Skip to content

Commit

Permalink
Add Var.replace
Browse files Browse the repository at this point in the history
  • Loading branch information
ph-kev committed Oct 18, 2024
1 parent a189286 commit f6cc014
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 1 deletion.
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ ts_max_var = ClimaAnalysis.get(simdir, short_name = "ts", reduction = "max", per
pfull_var = ClimaAnalysis.get(simdir, short_name = "pfull", reduction = "2.0d", period = "inst")
```

### Replace values in data of a `OutputVar`
When dealing with land or ocean data, there can potentially be `missing` or `NaN` values in
the data. The function `replace` can be used to replace `missing` or `NaN` values in
`Var.data` with another value like 0.0. See the example below of this usage.
```julia
ClimaAnalysis.replace(var, NaN => 0.0, missing => 0.0)
```

## Bug fixes
- Masking now affects the colorbar.
- `Var.shift_to_start_of_previous_month` now checks for duplicate dates and throws an error
Expand Down
1 change: 1 addition & 0 deletions docs/src/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ Var.global_rmse
Var.shift_to_start_of_previous_month
Var.apply_landmask
Var.apply_oceanmask
Base.replace(var::OutputVar, old_new::Pair...)
```

## Leaderboard
Expand Down
9 changes: 9 additions & 0 deletions docs/src/howdoi.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,3 +164,12 @@ data of `var`, where any coordinate corresponding to ocean is zero.
var_no_land = ClimaAnalysis.apply_landmask(var)
var_no_ocean = ClimaAnalysis.apply_oceanmask(var)
```
## How do I replace `NaN` and `missing` values in the data of a `OutputVar` with 0.0?
You can use `replace` to replace all `NaN` and `missing` values in the data of a
`OutputVar` with 0.0. See the example below of this usage.
```julia
var_no_nan_and_missing = ClimaAnalysis.replace(var, missing => 0.0, NaN => 0.0)
```
24 changes: 23 additions & 1 deletion src/Var.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ export OutputVar,
set_units,
shift_to_start_of_previous_month,
apply_landmask,
apply_oceanmask
apply_oceanmask,
replace

"""
Representing an output variable
Expand Down Expand Up @@ -1654,6 +1655,27 @@ function _apply_lonlat_mask(var, mask::AbstractString)
return OutputVar(ret_attribs, ret_dims, ret_dim_attributes, masked_data)
end

"""
replace(var::OutputVar, old_new::Pair...)
Return a `OutputVar` where, for each pair `old => new`, all occurences of `old` are
replaced by `new` in `Var.data`
This function is useful if there are `NaN`s or `missing` values in the data. For instance,
you want to use the ocean mask, but there are `NaN`s in the ocean. You can replace all the
`NaN` and `missing` values with 0.0 and apply the ocean mask afterward.
"""
function Base.replace(var::OutputVar, old_new::Pair...)
# Replace all NaNs with val
no_nan_data = replace(var.data, old_new...)

# Remake OutputVar with the new data
ret_attribs = deepcopy(var.attributes)
ret_dims = deepcopy(var.dims)
ret_dim_attributes = deepcopy(var.dim_attributes)
return OutputVar(ret_attribs, ret_dims, ret_dim_attributes, no_nan_data)
end

"""
overload_binary_op(op)
Expand Down
49 changes: 49 additions & 0 deletions test/test_Var.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1694,3 +1694,52 @@ end
atol = 10^(-2.5),
)
end

@testset "Replace" begin
times = collect(range(0.0, 100, 2 * 180))
data = ones(length(times))
data[1:5] .= NaN
dims = OrderedDict(["time" => times])
attribs = Dict("long_name" => "hi")
dim_attribs = OrderedDict(["time" => Dict("units" => "s")])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)
var_no_nan = ClimaAnalysis.replace(var, NaN => 0.0)
@test var_no_nan.dims == var.dims
@test var_no_nan.data == vcat(zeros(5), ones(355))
@test var_no_nan.attributes == var.attributes
@test var_no_nan.dim_attributes == var.dim_attributes

lat = collect(range(-89.5, 89.5, 180))
lon = collect(range(-179.5, 179.5, 360))
data = ones(length(lat), length(lon))
data[42:47] .= NaN
data[32042:32047] .= NaN
dims = OrderedDict(["lat" => lat, "lon" => lon])
attribs = Dict("long_name" => "hi")
dim_attribs = OrderedDict([
"lat" => Dict("units" => "deg"),
"lon" => Dict("units" => "deg"),
])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)
var_no_nan = ClimaAnalysis.replace(var, NaN => 1.0)
@test var_no_nan.dims == var.dims
@test var_no_nan.data == ones(length(lat), length(lon))
@test var_no_nan.attributes == var.attributes
@test var_no_nan.dim_attributes == var.dim_attributes

lat = collect(range(-89.5, 89.5, 2))
lon = collect(range(-179.5, 179.5, 2))
data = [[missing, NaN] [NaN, missing]]
dims = OrderedDict(["lat" => lat, "lon" => lon])
attribs = Dict("long_name" => "hi")
dim_attribs = OrderedDict([
"lat" => Dict("units" => "deg"),
"lon" => Dict("units" => "deg"),
])
var = ClimaAnalysis.OutputVar(attribs, dims, dim_attribs, data)
var_no_nan = ClimaAnalysis.replace(var, missing => 1.0, NaN => 2.0)
@test var_no_nan.dims == var.dims
@test var_no_nan.data == [[1.0, 2.0] [2.0, 1.0]]
@test var_no_nan.attributes == var.attributes
@test var_no_nan.dim_attributes == var.dim_attributes
end

0 comments on commit f6cc014

Please sign in to comment.