Skip to content

Commit

Permalink
[python] Support default-coord reads of corner-written dense ND arrays
Browse files Browse the repository at this point in the history
  • Loading branch information
johnkerl committed Oct 19, 2023
1 parent d992cc8 commit b7874fa
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 2 deletions.
32 changes: 30 additions & 2 deletions apis/python/src/tiledbsoma/_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import pyarrow as pa
import somacore
import tiledb
from somacore import options
from typing_extensions import Self

Expand Down Expand Up @@ -71,6 +72,18 @@ class DenseNDArray(NDArray, somacore.DenseNDArray):

__slots__ = ()

def non_empty_domain(self) -> Tuple[Tuple[int, int], ...]:
"""
Retrieves the non-empty domain for each dimension, namely the smallest
and largest indices in each dimension for which the sparse array has
data occupied. This is nominally the same as the domain used at
creation time, but if for example only a corner of the available domain
has actually had data written, this function will return a tighter
range.
"""
with tiledb.open(self.uri, ctx=self.context.tiledb_ctx) as A:
return A.nonempty_domain() # type: ignore

def read(
self,
coords: options.DenseNDCoords = (),
Expand Down Expand Up @@ -108,8 +121,23 @@ def read(
self._check_open_read()
result_order = somacore.ResultOrder(result_order)

schema = self._handle.schema
target_shape = dense_indices_to_shape(coords, schema.shape, result_order)
# The dense_indices_to_shape includes, as one of its roles, how to handle default
# coordinates -- e.g. `dnda.read()`. The default for a DenseNDArray should be "all the data"
# -- but what is that? If the schema shape matches the non-empty domain -- e.g. at create,
# shape was 100x200, and at write, 100x200 cells were written, those are both the same. But
# if the array was written with room for growth -- e.g. created with shape
# 1,000,000x1,000,000 but only 100x200 cells were written -- then we need the non-empty
# domain.
#
# The non-empty domain is the corret choice in either case.
#
# The only exception is if the array has been created but no data have been written at
# all, in which case the best we can do is use the schema shape.
data_shape = self._handle.schema.shape
ned = self.non_empty_domain()
if ned is not None:
data_shape = tuple(slot[1] + 1 for slot in ned)
target_shape = dense_indices_to_shape(coords, data_shape, result_order)

sr = self._soma_reader(result_order=result_order)

Expand Down
17 changes: 17 additions & 0 deletions apis/python/tests/test_dense_nd_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,23 @@ def test_dense_nd_array_requires_shape(tmp_path, shape_is_numeric):
soma.DenseNDArray.create(uri, type=pa.float32(), shape=(None, None)).close()


def test_dense_nd_array_ned_write(tmp_path):
uri = tmp_path.as_posix()

with soma.DenseNDArray.create(
uri=uri,
type=pa.int32(),
shape=[1000000],
) as dnda:
dnda.write(
(slice(0, 4),),
pa.Tensor.from_numpy(np.asarray([100, 101, 102, 103])),
)

with soma.DenseNDArray.open(uri) as dnda:
assert (dnda.read().to_numpy() == np.asarray([100, 101, 102, 103])).all()


@pytest.mark.parametrize(
"io",
[
Expand Down

0 comments on commit b7874fa

Please sign in to comment.