Skip to content

sudapy.vector.ops

Vector geoprocessing operations: reproject, clip, dissolve, area, buffer, simplify, fix-geometry.

sudapy.vector.ops

Vector geoprocessing operations.

All functions accept and return :class:geopandas.GeoDataFrame objects and support GeoPackage, GeoJSON, and Shapefile formats on disk.

reproject

reproject(src: PathLike | GeoDataFrame, to_epsg: int, out: PathLike | None = None) -> gpd.GeoDataFrame

Reproject a vector dataset to a new CRS.

Parameters:

Name Type Description Default
src PathLike | GeoDataFrame

Input file path or GeoDataFrame.

required
to_epsg int

Target EPSG code.

required
out PathLike | None

Optional output file path. If given the result is also saved.

None

Returns:

Type Description
GeoDataFrame

Reprojected GeoDataFrame.

Source code in src\sudapy\vector\ops.py
def reproject(
    src: PathLike | gpd.GeoDataFrame,
    to_epsg: int,
    out: PathLike | None = None,
) -> gpd.GeoDataFrame:
    """Reproject a vector dataset to a new CRS.

    Args:
        src: Input file path or GeoDataFrame.
        to_epsg: Target EPSG code.
        out: Optional output file path. If given the result is also saved.

    Returns:
        Reprojected GeoDataFrame.
    """
    gdf = _read(src) if not isinstance(src, gpd.GeoDataFrame) else src
    target_crs = validate_epsg(to_epsg)
    result = gdf.to_crs(target_crs)
    if out is not None:
        _write(result, out)
    return result

clip

clip(src: PathLike | GeoDataFrame, clip_src: PathLike | GeoDataFrame, out: PathLike | None = None) -> gpd.GeoDataFrame

Clip a vector dataset by another vector geometry.

Parameters:

Name Type Description Default
src PathLike | GeoDataFrame

Input file or GeoDataFrame.

required
clip_src PathLike | GeoDataFrame

Clipping geometry file or GeoDataFrame.

required
out PathLike | None

Optional output path.

None

Returns:

Type Description
GeoDataFrame

Clipped GeoDataFrame.

Source code in src\sudapy\vector\ops.py
def clip(
    src: PathLike | gpd.GeoDataFrame,
    clip_src: PathLike | gpd.GeoDataFrame,
    out: PathLike | None = None,
) -> gpd.GeoDataFrame:
    """Clip a vector dataset by another vector geometry.

    Args:
        src: Input file or GeoDataFrame.
        clip_src: Clipping geometry file or GeoDataFrame.
        out: Optional output path.

    Returns:
        Clipped GeoDataFrame.
    """
    gdf = _read(src) if not isinstance(src, gpd.GeoDataFrame) else src
    mask = _read(clip_src) if not isinstance(clip_src, gpd.GeoDataFrame) else clip_src

    # Ensure same CRS
    if gdf.crs and mask.crs and gdf.crs != mask.crs:
        logger.info("Reprojecting clip geometry to match input CRS (%s)", gdf.crs)
        mask = mask.to_crs(gdf.crs)

    result = gpd.clip(gdf, mask)
    if out is not None:
        _write(result, out)
    return result

dissolve

dissolve(src: PathLike | GeoDataFrame, by: str, out: PathLike | None = None) -> gpd.GeoDataFrame

Dissolve geometries by an attribute field.

Parameters:

Name Type Description Default
src PathLike | GeoDataFrame

Input file or GeoDataFrame.

required
by str

Column name to dissolve on.

required
out PathLike | None

Optional output path.

None

Returns:

Type Description
GeoDataFrame

Dissolved GeoDataFrame.

Source code in src\sudapy\vector\ops.py
def dissolve(
    src: PathLike | gpd.GeoDataFrame,
    by: str,
    out: PathLike | None = None,
) -> gpd.GeoDataFrame:
    """Dissolve geometries by an attribute field.

    Args:
        src: Input file or GeoDataFrame.
        by: Column name to dissolve on.
        out: Optional output path.

    Returns:
        Dissolved GeoDataFrame.
    """
    gdf = _read(src) if not isinstance(src, gpd.GeoDataFrame) else src
    if by not in gdf.columns:
        raise SudaPyError(
            f"Column '{by}' not found in dataset.",
            hint=f"Available columns: {', '.join(gdf.columns.tolist())}",
        )
    result = gdf.dissolve(by=by).reset_index()
    if out is not None:
        _write(result, out)
    return result

calculate_area

calculate_area(src: PathLike | GeoDataFrame, field: str = 'area_m2', out: PathLike | None = None) -> gpd.GeoDataFrame

Calculate geometry area in square meters.

If the CRS is geographic (lat/lon) a warning is emitted and geometries are temporarily projected to the appropriate UTM zone for accurate area calculation.

Parameters:

Name Type Description Default
src PathLike | GeoDataFrame

Input file or GeoDataFrame.

required
field str

Name of the new area column.

'area_m2'
out PathLike | None

Optional output path.

None

Returns:

Type Description
GeoDataFrame

GeoDataFrame with a new area column.

Source code in src\sudapy\vector\ops.py
def calculate_area(
    src: PathLike | gpd.GeoDataFrame,
    field: str = "area_m2",
    out: PathLike | None = None,
) -> gpd.GeoDataFrame:
    """Calculate geometry area in square meters.

    If the CRS is geographic (lat/lon) a warning is emitted and geometries
    are temporarily projected to the appropriate UTM zone for accurate
    area calculation.

    Args:
        src: Input file or GeoDataFrame.
        field: Name of the new area column.
        out: Optional output path.

    Returns:
        GeoDataFrame with a new area column.
    """
    gdf = _read(src) if not isinstance(src, gpd.GeoDataFrame) else src.copy()

    if gdf.crs is None:
        raise CRSError(
            "Input dataset has no CRS.",
            hint="Set a CRS first, e.g. 'sudapy vector reproject --in file --out file --to 32635'.",
        )

    if gdf.crs.is_geographic:
        warnings.warn(
            "Input CRS is geographic (lat/lon). Area will be computed by "
            "temporarily projecting to the estimated UTM zone. For best "
            "accuracy, reproject your data to a projected CRS first.",
            UserWarning,
            stacklevel=2,
        )
        projected = gdf.to_crs(gdf.estimate_utm_crs())
        gdf[field] = projected.geometry.area
    else:
        gdf[field] = gdf.geometry.area

    if out is not None:
        _write(gdf, out)
    return gdf

buffer

buffer(src: PathLike | GeoDataFrame, distance_m: float, out: PathLike | None = None) -> gpd.GeoDataFrame

Buffer geometries by a distance in meters.

If the CRS is geographic, the data is temporarily projected to the estimated UTM zone so the buffer distance is applied in meters.

Parameters:

Name Type Description Default
src PathLike | GeoDataFrame

Input file or GeoDataFrame.

required
distance_m float

Buffer distance in meters.

required
out PathLike | None

Optional output path.

None

Returns:

Type Description
GeoDataFrame

Buffered GeoDataFrame (in original CRS).

Source code in src\sudapy\vector\ops.py
def buffer(
    src: PathLike | gpd.GeoDataFrame,
    distance_m: float,
    out: PathLike | None = None,
) -> gpd.GeoDataFrame:
    """Buffer geometries by a distance in meters.

    If the CRS is geographic, the data is temporarily projected to the
    estimated UTM zone so the buffer distance is applied in meters.

    Args:
        src: Input file or GeoDataFrame.
        distance_m: Buffer distance in meters.
        out: Optional output path.

    Returns:
        Buffered GeoDataFrame (in original CRS).
    """
    gdf = _read(src) if not isinstance(src, gpd.GeoDataFrame) else src
    original_crs = gdf.crs

    if original_crs is None:
        raise CRSError(
            "Input dataset has no CRS.",
            hint="Set a CRS so the buffer distance can be applied in meters.",
        )

    if original_crs.is_geographic:
        warnings.warn(
            "Input CRS is geographic. Temporarily projecting to UTM for "
            "accurate meter-based buffering.",
            UserWarning,
            stacklevel=2,
        )
        projected = gdf.to_crs(gdf.estimate_utm_crs())
        projected["geometry"] = projected.geometry.buffer(distance_m)
        result = projected.to_crs(original_crs)
    else:
        result = gdf.copy()
        result["geometry"] = result.geometry.buffer(distance_m)

    if out is not None:
        _write(result, out)
    return result

simplify

simplify(src: PathLike | GeoDataFrame, tolerance_m: float, out: PathLike | None = None) -> gpd.GeoDataFrame

Simplify geometries to reduce vertex count.

If the CRS is geographic, the data is temporarily projected to UTM so the tolerance is applied in meters.

Parameters:

Name Type Description Default
src PathLike | GeoDataFrame

Input file or GeoDataFrame.

required
tolerance_m float

Simplification tolerance in meters.

required
out PathLike | None

Optional output path.

None

Returns:

Type Description
GeoDataFrame

Simplified GeoDataFrame.

Source code in src\sudapy\vector\ops.py
def simplify(
    src: PathLike | gpd.GeoDataFrame,
    tolerance_m: float,
    out: PathLike | None = None,
) -> gpd.GeoDataFrame:
    """Simplify geometries to reduce vertex count.

    If the CRS is geographic, the data is temporarily projected to UTM
    so the tolerance is applied in meters.

    Args:
        src: Input file or GeoDataFrame.
        tolerance_m: Simplification tolerance in meters.
        out: Optional output path.

    Returns:
        Simplified GeoDataFrame.
    """
    gdf = _read(src) if not isinstance(src, gpd.GeoDataFrame) else src
    original_crs = gdf.crs

    if original_crs and original_crs.is_geographic:
        projected = gdf.to_crs(gdf.estimate_utm_crs())
        projected["geometry"] = projected.geometry.simplify(tolerance_m)
        result = projected.to_crs(original_crs)
    else:
        result = gdf.copy()
        result["geometry"] = result.geometry.simplify(tolerance_m)

    if out is not None:
        _write(result, out)
    return result

fix_geometry

fix_geometry(src: PathLike | GeoDataFrame, out: PathLike | None = None) -> gpd.GeoDataFrame

Repair invalid geometries using :func:shapely.validation.make_valid.

Parameters:

Name Type Description Default
src PathLike | GeoDataFrame

Input file or GeoDataFrame.

required
out PathLike | None

Optional output path.

None

Returns:

Type Description
GeoDataFrame

GeoDataFrame with all geometries made valid.

Source code in src\sudapy\vector\ops.py
def fix_geometry(
    src: PathLike | gpd.GeoDataFrame,
    out: PathLike | None = None,
) -> gpd.GeoDataFrame:
    """Repair invalid geometries using :func:`shapely.validation.make_valid`.

    Args:
        src: Input file or GeoDataFrame.
        out: Optional output path.

    Returns:
        GeoDataFrame with all geometries made valid.
    """
    gdf = _read(src) if not isinstance(src, gpd.GeoDataFrame) else src.copy()

    invalid_count = int((~gdf.geometry.is_valid).sum())
    if invalid_count > 0:
        logger.info("Fixing %d invalid geometries", invalid_count)
        gdf["geometry"] = gdf.geometry.apply(
            lambda g: make_valid(g) if g is not None and not g.is_valid else g
        )
    else:
        logger.info("All geometries are already valid")

    if out is not None:
        _write(gdf, out)
    return gdf