79 improve docs further (#108)

* include pandoc install in sphinx-doc action * embedded notebooks in sphinx docs * added additional dependencies to pyproject.toml * updated notebooks * updated docstrings * updated history.rst
Deltares · Jul 5, 2024 · eebc070 · eebc070
1 parent f6ee6f0
commit eebc070
Show file tree

Hide file tree

Showing 12 changed files with 220 additions and 146 deletions.
diff --git a/.github/workflows/sphinx-docs.yml b/.github/workflows/sphinx-docs.yml
@@ -1,4 +1,4 @@
-name: documentation
+name: sphinx-docs
 
 on:
   push:
@@ -11,10 +11,14 @@ permissions:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    runs-on: windows-latest
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v3
+      - name: install pandoc
+        # pip install is not seen somehow, so via choco
+        run: |
+          choco install pandoc
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip

diff --git a/HISTORY.rst b/HISTORY.rst
@@ -2,6 +2,11 @@
 History
 =======
 
+UNRELEASED
+------------------
+* avoid timezone conversion if tz-naive `ddlpy.dataframe_to_xarray` in https://github.com/Deltares/ddlpy/pull/106
+* added some missing dependencies in https://github.com/Deltares/ddlpy/pull/108
+
 0.5.0 (2024-04-26)
 ------------------
 * avoid duplicated periods in dataframe returned by `ddlpy.measurements_amount()` in https://github.com/Deltares/ddlpy/pull/93

diff --git a/README.md b/README.md
@@ -22,13 +22,13 @@ Documentation: <https://deltares.github.io/ddlpy>
 
 In the examples/notebooks folders you will find the following examples to get you started:
 
-* [minimal_example.py](https://github.com/Deltares/ddlpy/blob/main/examples/minimal_example.py) -> minimal code to retrieve data.
+* [minimal_example.py](https://github.com/Deltares/ddlpy/blob/main/docs/examples/minimal_example.py) -> minimal code to retrieve data.
 
-* [retrieve_parallel_to_netcdf.py](https://github.com/Deltares/ddlpy/blob/main/examples/retrieve_parallel_to_netcdf.py) -> Code to retrieve a bulk of observations and write to netcdf files for each station.
+* [retrieve_parallel_to_netcdf.py](https://github.com/Deltares/ddlpy/blob/main/docs/examples/retrieve_parallel_to_netcdf.py) -> Code to retrieve a bulk of observations and write to netcdf files for each station.
 
-* [measurements.ipynb](https://github.com/Deltares/ddlpy/blob/main/notebooks/measurements.ipynb) -> interactive notebook to subset/inspect locations and download/plot measurements
+* [measurements.ipynb](https://github.com/Deltares/ddlpy/blob/main/docs/notebooks/measurements.ipynb) -> interactive notebook to subset/inspect locations and download/plot measurements
 
-* [waterinfo.ipynb](https://github.com/Deltares/ddlpy/blob/main/notebooks/waterinfo.ipynb) -> interactive notebook to read csv's obained from waterinfo.rws.nl
+* [waterinfo.ipynb](https://github.com/Deltares/ddlpy/blob/main/docs/notebooks/waterinfo.ipynb) -> interactive notebook to read csv's obained from waterinfo.rws.nl
 
 
 # Run ddlpy from console

diff --git a/ddlpy/ddlpy.py b/ddlpy/ddlpy.py
@@ -73,10 +73,20 @@ def catalog(catalog_filter=None):
     return result
 
 
-def locations(catalog_filter=None):
+def locations(catalog_filter:list = None) -> pd.DataFrame:
     """
-    get station information from DDL (metadata from Catalogue). All metadata regarding stations.
-    The response (result) retrieves more keys
+    Get station information from DDL (metadata from Catalogue). All metadata regarding stations.
+
+    Parameters
+    ----------
+    catalog_filter : list, optional
+        list of catalogs to pass on to OphalenCatalogus CatalogusFilter, 
+        if None the list form endpoints.json is retrieved. The default is None.
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame with a combination of available locations and measurements.
 
     """
 
@@ -144,10 +154,24 @@ def _get_request_dicts(location):
     return request_dicts
 
 
-def measurements_available(location, start_date, end_date):
-    """checks if there are measurements for location, for the period start_date, end_date
-    gives None if check was unsuccesfull
-    gives True/False if there are / are no measurement available
+def measurements_available(location:pd.Series, start_date:(str,pd.Timestamp), end_date:(str,pd.Timestamp)) -> bool:
+    """
+    Checks if there are measurements available for a location in the requested period.
+    
+    Parameters
+    ----------
+    location : pd.Series
+        Single row of the `ddlpy.locations()` DataFrame.
+    start_date : (str,pd.Timestamp)
+        The start date of the requested period.
+    end_date : (str,pd.Timestamp)
+        The end date of the requested period.
+
+    Returns
+    -------
+    bool
+        Whether there are measurements available or not.
+
     """
     endpoint = ENDPOINTS['check_observations_available']
 
@@ -174,10 +198,27 @@ def measurements_available(location, start_date, end_date):
         return False  
 
 
-def measurements_amount(location, start_date, end_date, period="Jaar"):
-    """checks how much measurements are available for a location, for the period start_date, end_date
-    returns a DataFrame with columns Groeperingsperiode and AantalMetingen
-    possible for Jaar/Maand/Dag
+def measurements_amount(location:pd.Series, start_date:(str,pd.Timestamp), end_date:(str,pd.Timestamp), 
+                        period:str = "Jaar") -> pd.DataFrame:
+    """
+    Retrieves the amount of measurements available for a location for the requested period.
+
+    Parameters
+    ----------
+    location : pd.Series
+        Single row of the `ddlpy.locations()` DataFrame.
+    start_date : (str,pd.Timestamp)
+        The start date of the requested period.
+    end_date : (str,pd.Timestamp)
+        The end date of the requested period.
+    period : str, optional
+        "Jaar", "Maand" or "Dag". The default is "Jaar".
+
+    Returns
+    -------
+    df_amount : pd.DataFrame
+        A DataFrame with the number of mesurements (AantalMetingen) per period (Groeperingsperiode).
+
     """
     # TODO: there are probably more Groeperingsperiodes accepted by ddl, but not supported by ddlpy yet
     accepted_period = ["Jaar","Maand","Dag"]
@@ -222,9 +263,9 @@ def measurements_amount(location, start_date, end_date, period="Jaar"):
         df_list.append(df)
 
         # concatenate and sum duplicated index
-        amount_all = pd.concat(df_list).sort_index()
-        amount_all = amount_all.groupby(amount_all.index).sum()
-        return amount_all
+        df_amount = pd.concat(df_list).sort_index()
+        df_amount = df_amount.groupby(df_amount.index).sum()
+        return df_amount
 
 
 def _combine_waarnemingenlijst(result, location):
@@ -330,19 +371,20 @@ def _clean_dataframe(measurements):
     return measurements
 
 
-def measurements(location, start_date, end_date, freq=dateutil.rrule.MONTHLY, clean_df=True):
+def measurements(location:pd.Series, start_date:(str,pd.Timestamp), end_date:(str,pd.Timestamp), 
+                 freq:int = dateutil.rrule.MONTHLY, clean_df:bool = True):
     """
-    Return measurements for the given location and time window (start_date, end_date)
+    Returns measurements for the given location and requested period.
 
     Parameters
     ----------
     location : pd.Series
         Single row of the `ddlpy.locations()` DataFrame.
-    start_date : str, dt.datetime, pd.Timestamp
+    start_date : str, pd.Timestamp
         Start of the retrieval period.
-    end_date : str, dt.datetime, pd.Timestamp
+    end_date : str, pd.Timestamp
         End of the retrieval period.
-    freq : None, dateutil.rrule.MONTHLY, dateutil.rrule.YEARLY, etc., optional
+    freq : int, dateutil.rrule.MONTHLY, dateutil.rrule.YEARLY, etc., optional
         The frequency in which to divide the requested period (e.g. yearly or monthly).
         Can also be None, in which case the entire dataset will be retrieved at once.
         Please note that 10-minute measurements can often not be downloaded in yearly (or larger) chunks 
@@ -353,6 +395,10 @@ def measurements(location, start_date, end_date, freq=dateutil.rrule.MONTHLY, cl
     clean_df : bool, optional
         Whether to sort the dataframe and remove duplicate rows. The default is True.
     
+    Returns
+    -------
+    measurements : pd.DataFrame
+        DataFrame with measurements.
     """
 
     if isinstance(location, pd.DataFrame):
@@ -399,10 +445,20 @@ def measurements(location, start_date, end_date, freq=dateutil.rrule.MONTHLY, cl
     return measurements
 
 
-def measurements_latest(location):
-    """checks if there are measurements for location, for the period start_date, end_date
-    gives None if check was unsuccesfull
-    gives True/False if there are / are no measurement available
+def measurements_latest(location:pd.Series) -> pd.DataFrame:
+    """
+    Returns the latest available measurement for the given location.
+
+    Parameters
+    ----------
+    location : pd.Series
+        Single row of the `ddlpy.locations()` DataFrame.
+
+    Returns
+    -------
+    df : pd.DataFrame
+        DataFrame with measurements.
+
     """
     endpoint = ENDPOINTS['collect_latest_observations']
 

diff --git a/docs/conf.py b/docs/conf.py
@@ -32,7 +32,7 @@
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx_mdinclude']
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'sphinx_mdinclude', "nbsphinx"]
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -176,6 +176,3 @@
      'One line description of project.',
      'Miscellaneous'),
 ]
-
-
-
diff --git a/examples/minimal_example.py → docs/examples/minimal_example.py b/examples/minimal_example.py → docs/examples/minimal_example.py
diff --git a/examples/retrieve_parallel_to_netcdf.py → docs/examples/retrieve_parallel_to_netcdf.py b/examples/retrieve_parallel_to_netcdf.py → docs/examples/retrieve_parallel_to_netcdf.py
diff --git a/docs/minimal_example.rst b/docs/minimal_example.rst
@@ -0,0 +1,6 @@
+Minimal example
+===============
+
+.. literalinclude:: examples/minimal_example.py
+   :language: python
+   :linenos:
diff --git a/notebooks/measurements.ipynb → docs/notebooks/measurements.ipynb b/notebooks/measurements.ipynb → docs/notebooks/measurements.ipynb