From 0e6493a05ea64c58935c6216d579a671b3d95059 Mon Sep 17 00:00:00 2001 From: John Kerl Date: Mon, 28 Oct 2024 18:02:23 -0400 Subject: [PATCH] iterating on R dense --- apis/python/src/tiledbsoma/_dense_nd_array.py | 11 ++++++++++- apis/python/tests/test_dense_nd_array.py | 10 ++++++++-- apis/r/R/SOMADataFrame.R | 1 + apis/r/R/SOMADenseNDArray.R | 2 +- apis/r/R/SOMASparseNDArray.R | 1 + apis/r/src/arrow.cpp | 15 ++++++++++++++- 6 files changed, 35 insertions(+), 5 deletions(-) diff --git a/apis/python/src/tiledbsoma/_dense_nd_array.py b/apis/python/src/tiledbsoma/_dense_nd_array.py index 14738254d6..dd2a77c245 100644 --- a/apis/python/src/tiledbsoma/_dense_nd_array.py +++ b/apis/python/src/tiledbsoma/_dense_nd_array.py @@ -263,7 +263,16 @@ def read( timestamp=handle.timestamp and (0, handle.timestamp), ) - # XXX comment this, and make it conditional on core 2.27 or more, and, new-shape feature-flag enabled + # Scenario to avoid: + # * Query dense array with coords not provided + # * When coords not provided, core uses the core domain + # For old shape: + # * Core current domain did not exist + # * Core max domain may be small; .shape returns this + # For new shape (core 2.27 and tiledbsoma 1.15): + # * Core current domain exists and will be small; .shape returns this + # * Core max domain will be huge + # In either case, applying these coords is the right thing to do if coords == (): coords = tuple(slice(0, e - 1) for e in data_shape) diff --git a/apis/python/tests/test_dense_nd_array.py b/apis/python/tests/test_dense_nd_array.py index 87b7c29415..942dd6e24d 100644 --- a/apis/python/tests/test_dense_nd_array.py +++ b/apis/python/tests/test_dense_nd_array.py @@ -9,6 +9,7 @@ import pytest import tiledbsoma as soma +import tiledbsoma.pytiledbsoma as clib from tiledbsoma.options import SOMATileDBContext from . import NDARRAY_ARROW_TYPES_NOT_SUPPORTED, NDARRAY_ARROW_TYPES_SUPPORTED @@ -180,8 +181,13 @@ def test_dense_nd_array_requires_shape(tmp_path, shape_is_numeric): with soma.DenseNDArray.open(uri) as dnda: assert dnda.shape == (2, 3) else: - with pytest.raises(ValueError): - soma.DenseNDArray.create(uri, type=pa.float32(), shape=(None, None)).close() + soma.DenseNDArray.create(uri, type=pa.float32(), shape=(None, None)).close() + with soma.DenseNDArray.open(uri) as dnda: + if ( + soma._flags.NEW_SHAPE_FEATURE_FLAG_ENABLED + and clib.embedded_version_triple() >= (2, 27, 0) + ): + assert dnda.shape == (1, 1) def test_dense_nd_array_ned_write(tmp_path): diff --git a/apis/r/R/SOMADataFrame.R b/apis/r/R/SOMADataFrame.R index 0f3d2854fe..1d04379ec5 100644 --- a/apis/r/R/SOMADataFrame.R +++ b/apis/r/R/SOMADataFrame.R @@ -145,6 +145,7 @@ SOMADataFrame <- R6::R6Class( uri = self$uri, naap = naap, nasp = nasp, + coords_list = list(), # only used for SOMADenseNDArray ctxxp = private$.soma_context, arraytype = "SOMADataFrame", config = NULL, diff --git a/apis/r/R/SOMADenseNDArray.R b/apis/r/R/SOMADenseNDArray.R index 114edbb811..8e628fd83f 100644 --- a/apis/r/R/SOMADenseNDArray.R +++ b/apis/r/R/SOMADenseNDArray.R @@ -149,9 +149,9 @@ SOMADenseNDArray <- R6::R6Class( #arr[] <- values writeArrayFromArrow( uri = self$uri, - coords, naap = naap, nasp = nasp, + coords_list = coords, ctxxp = private$.soma_context, arraytype = "SOMADenseNDArray", config = NULL, diff --git a/apis/r/R/SOMASparseNDArray.R b/apis/r/R/SOMASparseNDArray.R index ac2d249732..d71a83a8fe 100644 --- a/apis/r/R/SOMASparseNDArray.R +++ b/apis/r/R/SOMASparseNDArray.R @@ -282,6 +282,7 @@ SOMASparseNDArray <- R6::R6Class( uri = self$uri, naap = naap, nasp = nasp, + coords_list = list(), # only used for SOMADenseNDArray ctxxp = private$.soma_context, arraytype = "SOMASparseNDArray", config = NULL, diff --git a/apis/r/src/arrow.cpp b/apis/r/src/arrow.cpp index 565a406fde..1fb024ca11 100644 --- a/apis/r/src/arrow.cpp +++ b/apis/r/src/arrow.cpp @@ -273,7 +273,20 @@ void writeArrayFromArrow( int lo = *std::min_element(slot_values.begin(), slot_values.end()); int hi = *std::max_element(slot_values.begin(), slot_values.end()); spdl::debug( - "dense array write: dim {} set range lo {} hi {}", dim_name, lo, hi); + "dense array write: dim {} set 1-up range lo {} hi {}", + dim_name, + lo, + hi); + // These are 1-up indices from R. Convert to 0-up for C++. + if (lo < 1) { + Rcpp::stop(tfm::format( + "dense array write: expected lower bound %d >= 1 for dim " + "name %s", + lo, + dim_name)); + } + lo--; + hi--; std::pair lo_hi(int64_t{lo}, int64_t{hi}); std::vector> range({lo_hi}); arrup.get()->set_dim_ranges(dim_name, range);