Skip to content

Commit

Permalink
clib.Session.create_data: Improve docstrings and fix the comments for…
Browse files Browse the repository at this point in the history
… the `dim` parameter (#3532)

Co-authored-by: Michael Grund <[email protected]>
Co-authored-by: Yvonne Fröhlich <[email protected]>
  • Loading branch information
3 people authored Oct 25, 2024
1 parent ed18111 commit 68bb695
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 81 deletions.
2 changes: 1 addition & 1 deletion pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def vectors_to_arrays(vectors: Sequence[Any]) -> list[np.ndarray]:


def sequence_to_ctypes_array(
sequence: Sequence | None, ctype, size: int
sequence: Sequence[int | float] | np.ndarray | None, ctype, size: int
) -> ctp.Array | None:
"""
Convert a sequence of numbers into a ctypes array variable.
Expand Down
195 changes: 129 additions & 66 deletions pygmt/clib/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,19 @@

DIRECTIONS = ["GMT_IN", "GMT_OUT"]

MODES = ["GMT_CONTAINER_ONLY", "GMT_IS_OUTPUT"]
MODES = [
"GMT_CONTAINER_AND_DATA", # Create/Read/Write both container and the data array
"GMT_CONTAINER_ONLY", # Cread/Read/Write the container but no data array
"GMT_DATA_ONLY", # Create/Read/Write the container's data array only
"GMT_IS_OUTPUT", # For creating a resource as a container for output
]
MODE_MODIFIERS = [
"GMT_GRID_IS_CARTESIAN",
"GMT_GRID_IS_GEO",
"GMT_WITH_STRINGS",
"GMT_GRID_IS_CARTESIAN", # Grid is not geographic but Cartesian
"GMT_GRID_IS_GEO", # Grid is geographic, not Cartesian
"GMT_WITH_STRINGS", # Allocate string array for GMT_DATASET/GMT_VECTOR/GMT_MATRIX
]

REGISTRATIONS = ["GMT_GRID_PIXEL_REG", "GMT_GRID_NODE_REG"]
REGISTRATIONS = ["GMT_GRID_NODE_REG", "GMT_GRID_PIXEL_REG"]

DTYPES = {
np.int8: "GMT_CHAR",
Expand Down Expand Up @@ -643,53 +648,109 @@ def call_module(self, module: str, args: str | list[str]):

def create_data(
self,
family,
geometry,
mode,
dim=None,
ranges=None,
inc=None,
registration="GMT_GRID_NODE_REG",
pad=None,
):
family: str,
geometry: str,
mode: str,
dim: Sequence[int] | None = None,
ranges: Sequence[float] | None = None,
inc: Sequence[float] | None = None,
registration: Literal[
"GMT_GRID_NODE_REG", "GMT_GRID_PIXEL_REG"
] = "GMT_GRID_NODE_REG",
pad: int | None = None,
) -> ctp.c_void_p:
"""
Create an empty GMT data container.
Create an empty GMT data container and allocate space to hold data.
Valid data families and geometries are in ``FAMILIES`` and ``GEOMETRIES``.
There are two ways to define the dimensions needed to actually allocate memory:
1. Via ``ranges``, ``inc`` and ``registration``.
2. Via ``dim`` and ``registration``.
``dim`` contains up to 4 values and they have different meanings for
different GMT data families:
For ``GMT_DATASET``:
- 0: number of tables
- 1: number of segments per table
- 2: number of rows per segment
- 3: number of columns per row
For ``GMT_VECTOR``:
- 0: number of columns
- 1: number of rows [optional, can be 0 if unknown]
- 2: data type (e.g., ``GMT_DOUBLE``) [Will be overwritten by ``put_vector``]
For ``GMT_GRID``/``GMT_IMAGE``/``GMT_CUBE``/``GMT_MATRIX``:
- 0: number of columns
- 1: number of rows
- 2: number of bands or layers [Ignored for ``GMT_GRID``]
- 3: data type (e.g., ``GMT_DOUBLE``) [For ``GMT_MATRIX`` only, but will be
overwritten by ``put_matrix``]
In other words, ``inc`` is assumed to be 1.0, and ``ranges`` is
[0, dim[0], 0, dim[1]] for pixel registration or
[0, dim[0]-1.0, 0, dim[1]-1.0] for grid registration.
When creating a grid/image/cube, you can do it in one or two steps:
1. Call this function with ``mode="GMT_CONTAINER_AND_DATA"``. This creates
a header and allocates a grid or an image
2. Call this function twice:
1. First with ``mode="GMT_CONTAINER_ONLY"``, to create a header only and
compute the dimensions based on other parameters
2. Second with ``mode="GMT_DATA_ONLY"``, to allocate the grid/image/cube
array based on the dimensions already set. This time, you pass NULL for
``dim``/``ranges``/``inc``/``registration``/``pad`` and let ``data`` be
the void pointer returned in the first step.
**Note**: This is not implemented yet, since this function doesn't have the
``data`` parameter.
Parameters
----------
family : str
A valid GMT data family name (e.g., ``'GMT_IS_DATASET'``). See the
``FAMILIES`` attribute for valid names.
geometry : str
A valid GMT data geometry name (e.g., ``'GMT_IS_POINT'``). See the
``GEOMETRIES`` attribute for valid names.
mode : str
A valid GMT data mode (e.g., ``'GMT_IS_OUTPUT'``). See the
``MODES`` attribute for valid names.
dim : list of 4 integers
The dimensions of the dataset. See the documentation for the GMT C
API function ``GMT_Create_Data`` (``src/gmt_api.c``) for the full
range of options regarding 'dim'. If ``None``, will pass in the
NULL pointer.
ranges : list of 4 floats
The dataset extent. Also a bit of a complicated argument. See the C
function documentation. It's called ``range`` in the C function but
it would conflict with the Python built-in ``range`` function.
inc : list of 2 floats
The increments between points of the dataset. See the C function
documentation.
registration : str
The node registration (what the coordinates mean). Can be
``'GMT_GRID_PIXEL_REG'`` or ``'GMT_GRID_NODE_REG'``. Defaults to
``'GMT_GRID_NODE_REG'``.
pad : int
The grid padding. Defaults to ``GMT_PAD_DEFAULT``.
family
A valid GMT data family name (e.g., ``"GMT_IS_DATASET"``). See ``FAMILIES``
for valid names.
geometry
A valid GMT data geometry name (e.g., ``"GMT_IS_POINT"``). See
``GEOMETRIES`` for valid names.
mode
A valid GMT data mode. See ``MODES`` for valid names. For
``GMT_IS_DATASET``/``GMT_IS_MATRIX``/``GMT_IS_VECTOR``, adding
``GMT_WITH_STRINGS`` to the ``mode`` will allocate the corresponding arrays
of string pointers.
dim
The dimensions of the dataset, as explained above. If ``None``, will pass in
the NULL pointer.
ranges
The data extent.
inc
The increments between points of the dataset.
registration
The node registration. Can be ``"GMT_GRID_PIXEL_REG"`` or
``"GMT_GRID_NODE_REG"``.
pad
The padding for ``GMT_IS_GRID``/``GMT_IS_IMAGE``/``GMT_IS_CUBE``. If
``None``, defaults to ``"GMT_PAD_DEFAULT"``.
For ``GMT_IS_MATRIX``, it can be:
- 0: default row/col orientation [Default]
- 1: row-major format (C)
- 2: column-major format (FORTRAN)
Returns
-------
data_ptr : int
A ctypes pointer (an integer) to the allocated ``GMT_Dataset``
object.
data_ptr
A ctypes pointer (an integer) to the allocated GMT data container.
"""
c_create_data = self.get_libgmt_func(
"GMT_Create_Data",
Expand All @@ -703,8 +764,8 @@ def create_data(
ctp.POINTER(ctp.c_double), # inc
ctp.c_uint, # registration
ctp.c_int, # pad
ctp.c_void_p,
], # data
ctp.c_void_p, # data
],
restype=ctp.c_void_p,
)

Expand All @@ -717,31 +778,30 @@ def create_data(
geometry_int = self._parse_constant(geometry, valid=GEOMETRIES)
registration_int = self._parse_constant(registration, valid=REGISTRATIONS)

# Convert dim, ranges, and inc to ctypes arrays if given (will be None
# if not given to represent NULL pointers)
dim = sequence_to_ctypes_array(dim, ctp.c_uint64, 4)
ranges = sequence_to_ctypes_array(ranges, ctp.c_double, 4)
inc = sequence_to_ctypes_array(inc, ctp.c_double, 2)
# Convert dim, ranges, and inc to ctypes arrays if given (will be None if not
# given to represent NULL pointers)
dim_ctp = sequence_to_ctypes_array(dim, ctp.c_uint64, 4)
ranges_ctp = sequence_to_ctypes_array(ranges, ctp.c_double, 4)
inc_ctp = sequence_to_ctypes_array(inc, ctp.c_double, 2)

# Use a NULL pointer (None) for existing data to indicate that the
# container should be created empty. Fill it in later using put_vector
# and put_matrix.
# Use a NULL pointer (None) for existing data to indicate that the container
# should be created empty. Fill it in later using put_vector and put_matrix.
data_ptr = c_create_data(
self.session_pointer,
family_int,
geometry_int,
mode_int,
dim,
ranges,
inc,
dim_ctp,
ranges_ctp,
inc_ctp,
registration_int,
self._parse_pad(family, pad),
None,
)

if data_ptr is None:
raise GMTCLibError("Failed to create an empty GMT data pointer.")

msg = "Failed to create an empty GMT data pointer."
raise GMTCLibError(msg)
return data_ptr

def _parse_pad(self, family, pad):
Expand Down Expand Up @@ -1248,7 +1308,7 @@ def open_virtualfile(
... family=family,
... geometry=geometry,
... mode="GMT_CONTAINER_ONLY",
... dim=[2, 5, 1, 0], # columns, lines, segments, type
... dim=[2, 5, lib["GMT_INT"], 0], # ncolumns, nrows, dtype, unused
... )
... lib.put_vector(dataset, column=0, vector=x)
... lib.put_vector(dataset, column=1, vector=y)
Expand Down Expand Up @@ -1413,7 +1473,10 @@ def virtualfile_from_vectors(
geometry = "GMT_IS_POINT"

dataset = self.create_data(
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0]
family,
geometry,
mode="GMT_CONTAINER_ONLY",
dim=[columns, rows, self["GMT_DOUBLE"], 0],
)

# Use put_vector for columns with numerical type data
Expand Down Expand Up @@ -1501,12 +1564,13 @@ def virtualfile_from_matrix(self, matrix: np.ndarray) -> Generator[str, None, No
# around until the virtual file is closed.
matrix = np.ascontiguousarray(matrix)
rows, columns = matrix.shape
layers = 1

family = "GMT_IS_DATASET|GMT_VIA_MATRIX"
geometry = "GMT_IS_POINT"

dataset = self.create_data(
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, 1, 0]
family, geometry, mode="GMT_CONTAINER_ONLY", dim=[columns, rows, layers, 0]
)

self.put_matrix(dataset, matrix)
Expand Down Expand Up @@ -1592,7 +1656,7 @@ def virtualfile_from_grid(self, grid: xr.DataArray) -> Generator[str, None, None
mode=f"GMT_CONTAINER_ONLY|{_gtype}",
ranges=region,
inc=inc,
registration=_reg,
registration=_reg, # type: ignore[arg-type]
)
self.put_matrix(gmt_grid, matrix)
with self.open_virtualfile(
Expand Down Expand Up @@ -1677,8 +1741,7 @@ def virtualfile_from_stringio(
mode="GMT_CONTAINER_ONLY|GMT_WITH_STRINGS",
dim=[n_tables, n_segments, n_rows, n_columns],
)
dataset = ctp.cast(dataset, ctp.POINTER(_GMT_DATASET))
table = dataset.contents.table[0].contents
table = ctp.cast(dataset, ctp.POINTER(_GMT_DATASET)).contents.table[0].contents
for i, segment in enumerate(segments):
seg = table.segment[i].contents
if segment["header"]:
Expand Down
8 changes: 4 additions & 4 deletions pygmt/tests/test_clib_create_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@ def test_create_data_dataset():
family="GMT_IS_DATASET|GMT_VIA_VECTOR",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[10, 20, 1, 0], # columns, rows, layers, dtype
dim=[10, 20, 0, 0], # ncolumns, nrows, dtype, unused
)
# Dataset from matrices
data_matrix = lib.create_data(
family="GMT_IS_DATASET|GMT_VIA_MATRIX",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[10, 20, 1, 0],
dim=[10, 20, 1, 0], # ncolumns, nrows, nlayer, dtype
)
assert data_vector != data_matrix

Expand All @@ -40,7 +40,7 @@ def test_create_data_grid_dim():
family="GMT_IS_GRID|GMT_VIA_MATRIX",
geometry="GMT_IS_SURFACE",
mode="GMT_CONTAINER_ONLY",
dim=[10, 20, 1, 0],
dim=[10, 20, 1, 0], # ncolumns, nrows, nlayer, dtype
)


Expand Down Expand Up @@ -94,5 +94,5 @@ def test_create_data_fails():
family="GMT_IS_DATASET",
geometry="GMT_IS_SURFACE",
mode="GMT_CONTAINER_ONLY",
dim=[11, 10, 2, 0],
dim=[11, 10, 2, 0], # n_tables, n_segments, n_rows, n_columns
)
2 changes: 1 addition & 1 deletion pygmt/tests/test_clib_put_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_put_matrix(dtypes):
family="GMT_IS_DATASET|GMT_VIA_MATRIX",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[shape[1], shape[0], 1, 0], # columns, rows, layers, dtype
dim=[shape[1], shape[0], 1, 0], # ncolumns, nrows, nlayers, dtype
)
data = np.arange(shape[0] * shape[1], dtype=dtype).reshape(shape)
lib.put_matrix(dataset, matrix=data)
Expand Down
2 changes: 1 addition & 1 deletion pygmt/tests/test_clib_put_strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_put_strings():
family="GMT_IS_DATASET|GMT_VIA_VECTOR",
geometry="GMT_IS_POINT",
mode="GMT_CONTAINER_ONLY",
dim=[2, 5, 1, 0], # columns, rows, layers, dtype
dim=[2, 5, 0, 0], # ncolumns, nrows, dtype, unused
)
x = np.array([1, 2, 3, 4, 5], dtype=np.int32)
y = np.array([6, 7, 8, 9, 10], dtype=np.int32)
Expand Down
Loading

0 comments on commit 68bb695

Please sign in to comment.