From f6cc014b843f5e4bc87a07059e46ab8d318279e2 Mon Sep 17 00:00:00 2001 From: Kevin Phan <98072684+ph-kev@users.noreply.github.com> Date: Wed, 16 Oct 2024 15:34:15 -0700 Subject: [PATCH] Add Var.replace --- NEWS.md | 8 ++++++++ docs/src/api.md | 1 + docs/src/howdoi.md | 9 +++++++++ src/Var.jl | 24 ++++++++++++++++++++++- test/test_Var.jl | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 90 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 1091a65..85fa342 100644 --- a/NEWS.md +++ b/NEWS.md @@ -47,6 +47,14 @@ ts_max_var = ClimaAnalysis.get(simdir, short_name = "ts", reduction = "max", per pfull_var = ClimaAnalysis.get(simdir, short_name = "pfull", reduction = "2.0d", period = "inst") ``` +### Replace values in data of a `OutputVar` +When dealing with land or ocean data, there can potentially be `missing` or `NaN` values in +the data. The function `replace` can be used to replace `missing` or `NaN` values in +`Var.data` with another value like 0.0. See the example below of this usage. +```julia +ClimaAnalysis.replace(var, NaN => 0.0, missing => 0.0) +``` + ## Bug fixes - Masking now affects the colorbar. - `Var.shift_to_start_of_previous_month` now checks for duplicate dates and throws an error diff --git a/docs/src/api.md b/docs/src/api.md index e885742..36c4618 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -70,6 +70,7 @@ Var.global_rmse Var.shift_to_start_of_previous_month Var.apply_landmask Var.apply_oceanmask +Base.replace(var::OutputVar, old_new::Pair...) ``` ## Leaderboard diff --git a/docs/src/howdoi.md b/docs/src/howdoi.md index 18e64eb..352851d 100644 --- a/docs/src/howdoi.md +++ b/docs/src/howdoi.md @@ -164,3 +164,12 @@ data of `var`, where any coordinate corresponding to ocean is zero. var_no_land = ClimaAnalysis.apply_landmask(var) var_no_ocean = ClimaAnalysis.apply_oceanmask(var) ``` + +## How do I replace `NaN` and `missing` values in the data of a `OutputVar` with 0.0? + +You can use `replace` to replace all `NaN` and `missing` values in the data of a +`OutputVar` with 0.0. See the example below of this usage. + +```julia +var_no_nan_and_missing = ClimaAnalysis.replace(var, missing => 0.0, NaN => 0.0) +``` diff --git a/src/Var.jl b/src/Var.jl index 0e936d3..92f6b4b 100644 --- a/src/Var.jl +++ b/src/Var.jl @@ -55,7 +55,8 @@ export OutputVar, set_units, shift_to_start_of_previous_month, apply_landmask, - apply_oceanmask + apply_oceanmask, + replace """ Representing an output variable @@ -1654,6 +1655,27 @@ function _apply_lonlat_mask(var, mask::AbstractString) return OutputVar(ret_attribs, ret_dims, ret_dim_attributes, masked_data) end +""" + replace(var::OutputVar, old_new::Pair...) + +Return a `OutputVar` where, for each pair `old => new`, all occurences of `old` are +replaced by `new` in `Var.data` + +This function is useful if there are `NaN`s or `missing` values in the data. For instance, +you want to use the ocean mask, but there are `NaN`s in the ocean. You can replace all the +`NaN` and `missing` values with 0.0 and apply the ocean mask afterward. +""" +function Base.replace(var::OutputVar, old_new::Pair...) + # Replace all NaNs with val + no_nan_data = replace(var.data, old_new...) + + # Remake OutputVar with the new data + ret_attribs = deepcopy(var.attributes) + ret_dims = deepcopy(var.dims) + ret_dim_attributes = deepcopy(var.dim_attributes) + return OutputVar(ret_attribs, ret_dims, ret_dim_attributes, no_nan_data) +end + """ overload_binary_op(op) diff --git a/test/test_Var.jl b/test/test_Var.jl index c9ee5ae..392845f 100644 --- a/test/test_Var.jl +++ b/test/test_Var.jl @@ -1694,3 +1694,52 @@ end atol = 10^(-2.5), ) end + +@testset "Replace" begin + times = collect(range(0.0, 100, 2 * 180)) + data = ones(length(times)) + data[1:5] .= NaN + dims = OrderedDict(["time" => times]) + attribs = Dict("long_name" => "hi") + dim_attribs = OrderedDict(["time" => Dict("units" => "s")]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data) + var_no_nan = ClimaAnalysis.replace(var, NaN => 0.0) + @test var_no_nan.dims == var.dims + @test var_no_nan.data == vcat(zeros(5), ones(355)) + @test var_no_nan.attributes == var.attributes + @test var_no_nan.dim_attributes == var.dim_attributes + + lat = collect(range(-89.5, 89.5, 180)) + lon = collect(range(-179.5, 179.5, 360)) + data = ones(length(lat), length(lon)) + data[42:47] .= NaN + data[32042:32047] .= NaN + dims = OrderedDict(["lat" => lat, "lon" => lon]) + attribs = Dict("long_name" => "hi") + dim_attribs = OrderedDict([ + "lat" => Dict("units" => "deg"), + "lon" => Dict("units" => "deg"), + ]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data) + var_no_nan = ClimaAnalysis.replace(var, NaN => 1.0) + @test var_no_nan.dims == var.dims + @test var_no_nan.data == ones(length(lat), length(lon)) + @test var_no_nan.attributes == var.attributes + @test var_no_nan.dim_attributes == var.dim_attributes + + lat = collect(range(-89.5, 89.5, 2)) + lon = collect(range(-179.5, 179.5, 2)) + data = [[missing, NaN] [NaN, missing]] + dims = OrderedDict(["lat" => lat, "lon" => lon]) + attribs = Dict("long_name" => "hi") + dim_attribs = OrderedDict([ + "lat" => Dict("units" => "deg"), + "lon" => Dict("units" => "deg"), + ]) + var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data) + var_no_nan = ClimaAnalysis.replace(var, missing => 1.0, NaN => 2.0) + @test var_no_nan.dims == var.dims + @test var_no_nan.data == [[1.0, 2.0] [2.0, 1.0]] + @test var_no_nan.attributes == var.attributes + @test var_no_nan.dim_attributes == var.dim_attributes +end