Skip to content

I/O Utilities

The io module provides functions for reading and writing GeoTIFF raster files, vector formats (GeoJSON, GeoPackage, GeoParquet), and CRS handling.

io

I/O utilities for raster and vector data.

Provides functions for reading/writing GeoTIFF files, vector formats (GeoJSON, GeoPackage, GeoParquet), and CRS handling.

get_equal_area_crs

get_equal_area_crs() -> pyproj.CRS

Return the NSIDC EASE-Grid 2.0 equal-area CRS (EPSG:6933).

This CRS is used for accurate area calculations of field polygons.

Returns:

Type Description
CRS

EPSG:6933 equal-area cylindrical projection.

Source code in agribound/io/crs.py
def get_equal_area_crs() -> pyproj.CRS:
    """Return the NSIDC EASE-Grid 2.0 equal-area CRS (EPSG:6933).

    This CRS is used for accurate area calculations of field polygons.

    Returns
    -------
    pyproj.CRS
        EPSG:6933 equal-area cylindrical projection.
    """
    return pyproj.CRS.from_epsg(6933)

get_utm_crs

get_utm_crs(lon: float, lat: float) -> pyproj.CRS

Determine the UTM CRS for a given longitude/latitude.

Parameters:

Name Type Description Default
lon float

Longitude in degrees.

required
lat float

Latitude in degrees.

required

Returns:

Type Description
CRS

UTM CRS for the given location.

Source code in agribound/io/crs.py
def get_utm_crs(lon: float, lat: float) -> pyproj.CRS:
    """Determine the UTM CRS for a given longitude/latitude.

    Parameters
    ----------
    lon : float
        Longitude in degrees.
    lat : float
        Latitude in degrees.

    Returns
    -------
    pyproj.CRS
        UTM CRS for the given location.
    """
    utm_zone = int((lon + 180) / 6) + 1
    hemisphere = "north" if lat >= 0 else "south"
    epsg = 32600 + utm_zone if hemisphere == "north" else 32700 + utm_zone
    return pyproj.CRS.from_epsg(epsg)

reproject_raster

reproject_raster(src_path: str | Path, dst_path: str | Path, dst_crs: Any, resolution: float | None = None, resampling: str = 'nearest') -> str

Reproject a raster to a different CRS.

Parameters:

Name Type Description Default
src_path str or Path

Source raster file.

required
dst_path str or Path

Destination raster file.

required
dst_crs CRS or str

Target coordinate reference system.

required
resolution float or None

Output pixel resolution. If None, computed automatically.

None
resampling str

Resampling method: "nearest", "bilinear", "cubic".

'nearest'

Returns:

Type Description
str

Path to the reprojected raster.

Source code in agribound/io/crs.py
def reproject_raster(
    src_path: str | Path,
    dst_path: str | Path,
    dst_crs: Any,
    resolution: float | None = None,
    resampling: str = "nearest",
) -> str:
    """Reproject a raster to a different CRS.

    Parameters
    ----------
    src_path : str or Path
        Source raster file.
    dst_path : str or Path
        Destination raster file.
    dst_crs : CRS or str
        Target coordinate reference system.
    resolution : float or None
        Output pixel resolution. If *None*, computed automatically.
    resampling : str
        Resampling method: ``"nearest"``, ``"bilinear"``, ``"cubic"``.

    Returns
    -------
    str
        Path to the reprojected raster.
    """
    resample_map = {
        "nearest": Resampling.nearest,
        "bilinear": Resampling.bilinear,
        "cubic": Resampling.cubic,
    }
    resample_method = resample_map.get(resampling, Resampling.nearest)

    src_path = Path(src_path)
    dst_path = Path(dst_path)
    dst_path.parent.mkdir(parents=True, exist_ok=True)

    with rasterio.open(src_path) as src:
        kwargs = {}
        if resolution is not None:
            kwargs["resolution"] = resolution

        transform, width, height = calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds, **kwargs
        )

        meta = src.meta.copy()
        meta.update(
            {
                "crs": dst_crs,
                "transform": transform,
                "width": width,
                "height": height,
                "compress": "lzw",
            }
        )

        with rasterio.open(dst_path, "w", **meta) as dst:
            for band_idx in range(1, src.count + 1):
                reproject(
                    source=rasterio.band(src, band_idx),
                    destination=rasterio.band(dst, band_idx),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=dst_crs,
                    resampling=resample_method,
                )

    return str(dst_path)

get_raster_info

get_raster_info(path: str | Path) -> RasterInfo

Read metadata from a raster file without loading pixel data.

Parameters:

Name Type Description Default
path str or Path

Path to the raster file (GeoTIFF).

required

Returns:

Type Description
RasterInfo

Raster metadata.

Raises:

Type Description
FileNotFoundError

If the file does not exist.

Source code in agribound/io/raster.py
def get_raster_info(path: str | Path) -> RasterInfo:
    """Read metadata from a raster file without loading pixel data.

    Parameters
    ----------
    path : str or Path
        Path to the raster file (GeoTIFF).

    Returns
    -------
    RasterInfo
        Raster metadata.

    Raises
    ------
    FileNotFoundError
        If the file does not exist.
    """
    path = Path(path)
    if not path.exists():
        raise FileNotFoundError(f"Raster file not found: {path}")

    with rasterio.open(path) as src:
        return RasterInfo(
            path=str(path),
            width=src.width,
            height=src.height,
            count=src.count,
            crs=src.crs,
            transform=src.transform,
            bounds=src.bounds,
            dtype=str(src.dtypes[0]),
            nodata=src.nodata,
            res=src.res,
        )

read_raster

read_raster(path: str | Path, bands: list[int] | None = None, window: Window | None = None) -> tuple[np.ndarray, dict[str, Any]]

Read a raster file into a NumPy array.

Parameters:

Name Type Description Default
path str or Path

Path to the raster file.

required
bands list[int] or None

1-based band indices to read. None reads all bands.

None
window Window or None

Spatial sub-window to read. None reads the full extent.

None

Returns:

Name Type Description
data ndarray

Pixel data with shape (bands, height, width).

meta dict

Rasterio metadata dictionary (crs, transform, width, height, etc.).

Source code in agribound/io/raster.py
def read_raster(
    path: str | Path,
    bands: list[int] | None = None,
    window: rasterio.windows.Window | None = None,
) -> tuple[np.ndarray, dict[str, Any]]:
    """Read a raster file into a NumPy array.

    Parameters
    ----------
    path : str or Path
        Path to the raster file.
    bands : list[int] or None
        1-based band indices to read. *None* reads all bands.
    window : rasterio.windows.Window or None
        Spatial sub-window to read. *None* reads the full extent.

    Returns
    -------
    data : numpy.ndarray
        Pixel data with shape ``(bands, height, width)``.
    meta : dict
        Rasterio metadata dictionary (crs, transform, width, height, etc.).
    """
    path = Path(path)
    with rasterio.open(path) as src:
        if bands is None:
            bands = list(range(1, src.count + 1))
        data = src.read(bands, window=window)
        meta = src.meta.copy()
        if window is not None:
            meta.update(
                {
                    "width": window.width,
                    "height": window.height,
                    "transform": src.window_transform(window),
                }
            )
        meta["count"] = len(bands)
    return data, meta

write_raster

write_raster(path: str | Path, data: ndarray, crs: Any, transform: Any, nodata: float | None = None, dtype: str | None = None, compress: str = 'lzw') -> str

Write a NumPy array as a GeoTIFF.

Parameters:

Name Type Description Default
path str or Path

Destination file path.

required
data ndarray

Pixel data with shape (bands, height, width) or (height, width).

required
crs CRS or str

Coordinate reference system.

required
transform Affine

Affine transform.

required
nodata float or None

Nodata value to encode in the file.

None
dtype str or None

Output data type. Defaults to the array dtype.

None
compress str

Compression method (default "lzw").

'lzw'

Returns:

Type Description
str

Path to the written file.

Source code in agribound/io/raster.py
def write_raster(
    path: str | Path,
    data: np.ndarray,
    crs: Any,
    transform: Any,
    nodata: float | None = None,
    dtype: str | None = None,
    compress: str = "lzw",
) -> str:
    """Write a NumPy array as a GeoTIFF.

    Parameters
    ----------
    path : str or Path
        Destination file path.
    data : numpy.ndarray
        Pixel data with shape ``(bands, height, width)`` or ``(height, width)``.
    crs : rasterio.crs.CRS or str
        Coordinate reference system.
    transform : rasterio.transform.Affine
        Affine transform.
    nodata : float or None
        Nodata value to encode in the file.
    dtype : str or None
        Output data type. Defaults to the array dtype.
    compress : str
        Compression method (default ``"lzw"``).

    Returns
    -------
    str
        Path to the written file.
    """
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    if data.ndim == 2:
        data = data[np.newaxis, ...]

    count, height, width = data.shape
    if dtype is None:
        dtype = str(data.dtype)

    with rasterio.open(
        path,
        "w",
        driver="GTiff",
        height=height,
        width=width,
        count=count,
        dtype=dtype,
        crs=crs,
        transform=transform,
        nodata=nodata,
        compress=compress,
        tiled=True,
        blockxsize=256,
        blockysize=256,
        BIGTIFF="YES",
    ) as dst:
        dst.write(data)

    return str(path)

read_vector

read_vector(path: str | Path) -> gpd.GeoDataFrame

Read a vector file into a GeoDataFrame.

Supports GeoJSON, GeoPackage, Shapefile, and GeoParquet formats.

Parameters:

Name Type Description Default
path str or Path

Path to the vector file.

required

Returns:

Type Description
GeoDataFrame

Loaded vector data.

Raises:

Type Description
FileNotFoundError

If the file does not exist.

ValueError

If the file format is not supported.

Source code in agribound/io/vector.py
def read_vector(path: str | Path) -> gpd.GeoDataFrame:
    """Read a vector file into a GeoDataFrame.

    Supports GeoJSON, GeoPackage, Shapefile, and GeoParquet formats.

    Parameters
    ----------
    path : str or Path
        Path to the vector file.

    Returns
    -------
    geopandas.GeoDataFrame
        Loaded vector data.

    Raises
    ------
    FileNotFoundError
        If the file does not exist.
    ValueError
        If the file format is not supported.
    """
    path = Path(path)
    if not path.exists():
        raise FileNotFoundError(f"Vector file not found: {path}")

    suffix = path.suffix.lower()
    if suffix in (".parquet", ".geoparquet"):
        return gpd.read_parquet(path)
    elif suffix in (".geojson", ".json", ".gpkg", ".shp", ".fgb"):
        return gpd.read_file(path)
    else:
        raise ValueError(
            f"Unsupported vector format: {suffix!r}. "
            "Supported: .geojson, .json, .gpkg, .shp, .parquet, .geoparquet, .fgb"
        )

write_vector

write_vector(gdf: GeoDataFrame, path: str | Path, format: str | None = None) -> str

Write a GeoDataFrame to a vector file.

When writing to .parquet, the output is fiboa-compliant GeoParquet.

Parameters:

Name Type Description Default
gdf GeoDataFrame

Vector data to write.

required
path str or Path

Destination file path.

required
format str or None

Override output format. If None, inferred from the file extension.

None

Returns:

Type Description
str

Path to the written file.

Source code in agribound/io/vector.py
def write_vector(
    gdf: gpd.GeoDataFrame,
    path: str | Path,
    format: str | None = None,
) -> str:
    """Write a GeoDataFrame to a vector file.

    When writing to ``.parquet``, the output is fiboa-compliant GeoParquet.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        Vector data to write.
    path : str or Path
        Destination file path.
    format : str or None
        Override output format. If *None*, inferred from the file extension.

    Returns
    -------
    str
        Path to the written file.
    """
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)

    if format is None:
        suffix = path.suffix.lower()
        format_map = {
            ".gpkg": "gpkg",
            ".geojson": "geojson",
            ".json": "geojson",
            ".parquet": "parquet",
            ".geoparquet": "parquet",
            ".shp": "shp",
            ".fgb": "fgb",
        }
        format = format_map.get(suffix)
        if format is None:
            raise ValueError(f"Cannot infer format from extension {suffix!r}")

    if format == "parquet":
        _write_fiboa_parquet(gdf, path)
    elif format == "geojson":
        # GeoJSON requires EPSG:4326
        if gdf.crs is not None and not gdf.crs.equals("EPSG:4326"):
            gdf = gdf.to_crs("EPSG:4326")
        gdf.to_file(path, driver="GeoJSON")
    elif format == "gpkg":
        if path.exists():
            path.unlink()  # Remove existing to avoid stale layers
        gdf.to_file(path, driver="GPKG", layer="fields")
    elif format == "shp":
        gdf.to_file(path, driver="ESRI Shapefile")
    elif format == "fgb":
        gdf.to_file(path, driver="FlatGeobuf")
    else:
        raise ValueError(f"Unsupported output format: {format!r}")

    return str(path)