Skip to content

Commit

Permalink
Add local_checksum parameter to Collection::put
Browse files Browse the repository at this point in the history
Adding this parameter allows a callable to be supplied which can
obtain per-file checksums during a recursive put operation. e.g. by
reading from an accompanying .md5 file or by deciding, based on file
size, whether to calculate checksums or to insist that an earlier
process supply one pre-calculated.

Improve test coverage of checksum operations.
  • Loading branch information
kjsanger committed Sep 17, 2024
1 parent efb5b15 commit 1231af1
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 3 deletions.
15 changes: 12 additions & 3 deletions src/partisan/irods.py
Original file line number Diff line number Diff line change
Expand Up @@ -2194,7 +2194,7 @@ def put(
against the remote checksum calculated by the iRODS server for data
objects.
local_checksum: A caller-supplied checksum of the local file. This may be a
string, a path to a file containing a string, or a file name
string, a path to a file containing a string, or a file path
transformation function. If the latter, it must accept the local path as
its only argument and return a string checksum. Typically, this is
useful when this checksum is available from an earlier process that
Expand Down Expand Up @@ -2239,9 +2239,9 @@ def put(
else:
raise ValueError(
f"Invalid type for local_checksum: {type(local_checksum)}; must be "
"a string or a path of a file containing a string"
"a string, a path of a file containing a string, or a callable "
"taking a path of a file and returning a string"
)

if fill and self.exists() and self.checksum() == chk:
log.info(
"Data object already exists in iRODS with matching checksum; skipping",
Expand Down Expand Up @@ -2656,6 +2656,7 @@ def put(
recurse=False,
calculate_checksum=False,
verify_checksum=False,
local_checksum=None,
compare_checksums=False,
fill=False,
force=True,
Expand All @@ -2673,6 +2674,12 @@ def put(
verify_checksum: Verify the local checksum calculated by the iRODS C API
against the remote checksum calculated by the iRODS server for data
objects. See DataObject.put() for more information.
local_checksum: A callable that returns a checksum for a local file. See
DataObject.put() for more information. This is called for each file in
encountered while recursing, with the file path as its argument.
(Also accepts a string or a path to a file containing a string, as does
DataObject.put(), however this is not useful for collections except in
the edge where all the files have identical contents).
compare_checksums: Compare caller-supplied local checksums to the remote
checksums calculated by the iRODS server after the put operation for
data objects. If the checksums do not match, raise an error. See
Expand Down Expand Up @@ -2707,6 +2714,7 @@ def put(
p,
calculate_checksum=calculate_checksum,
verify_checksum=verify_checksum,
local_checksum=local_checksum,
compare_checksums=compare_checksums,
fill=fill,
force=force,
Expand All @@ -2726,6 +2734,7 @@ def put(
p,
calculate_checksum=calculate_checksum,
verify_checksum=verify_checksum,
local_checksum=local_checksum,
compare_checksums=compare_checksums,
force=force,
timeout=timeout,
Expand Down
67 changes: 67 additions & 0 deletions tests/test_irods.py
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,73 @@ def test_data_object_put_checksum_no_verify(self, simple_collection):
assert obj.size() == 555
assert obj.checksum() == "39a4aa291ca849d601e4e5b8ed627a04"

@m.it("Can put put from a local file with checksum calculated on the fly")
def test_data_object_put_checksum_supplied(self, simple_collection):
obj = DataObject(simple_collection / "new.txt")
assert not obj.exists()

local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute()
checksum = "39a4aa291ca849d601e4e5b8ed627a04"

obj.put(local_path, calculate_checksum=True, compare_checksums=True)
assert obj.exists()
assert obj.checksum() == checksum

@m.it("Can put put from a local file with a supplied local checksum string")
def test_data_object_put_checksum_supplied(self, simple_collection):
obj = DataObject(simple_collection / "new.txt")
assert not obj.exists()

local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute()
checksum = "39a4aa291ca849d601e4e5b8ed627a04"
obj.put(
local_path,
calculate_checksum=True,
compare_checksums=True,
local_checksum=checksum,
)
assert obj.exists()
assert obj.checksum() == checksum

@m.it("Can put put from a local file with a supplied local checksum callable")
def test_data_object_put_callable_supplied(self, simple_collection):
obj = DataObject(simple_collection / "new.txt")
assert not obj.exists()

local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute()
checksum = "39a4aa291ca849d601e4e5b8ed627a04"
obj.put(
local_path,
calculate_checksum=True,
compare_checksums=True,
local_checksum=lambda _: checksum,
)
assert obj.exists()
assert obj.checksum() == checksum

@m.it("Raises an error if a supplied local checksum callable does not match")
def test_data_object_put_callable_supplied(self, simple_collection):
obj = DataObject(simple_collection / "new.txt")
assert not obj.exists()

local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute()
with pytest.raises(ValueError, match="mismatch"):
obj.put(
local_path,
calculate_checksum=True,
compare_checksums=True,
local_checksum=lambda _: "a bad checksum",
)

@m.it("Raises an error if a supplied local checksum string does not match")
def test_data_object_put_checksum_supplied_mismatch(self, simple_collection):
obj = DataObject(simple_collection / "new.txt")
assert not obj.exists()

local_path = Path("./tests/data/simple/data_object/lorem.txt").absolute()
with pytest.raises(ValueError, match="mismatch"):
obj.put(local_path, compare_checksums=True, local_checksum="a bad checksum")

@m.describe("Operations on an existing DataObject")
@m.context("When a DataObject exists")
@m.it("Can be detected")
Expand Down

0 comments on commit 1231af1

Please sign in to comment.