Source code for sgis.geopandas_tools.polygon_operations

"""Functions for polygon geometries."""

from collections.abc import Callable

import networkx as nx
import numpy as np
import pandas as pd
from geopandas import GeoDataFrame
from geopandas import GeoSeries
from shapely import STRtree
from shapely import area
from shapely import box
from shapely import buffer
from shapely import difference
from shapely import extract_unique_points
from shapely import get_exterior_ring
from shapely import get_interior_ring
from shapely import get_num_geometries
from shapely import get_num_interior_rings
from shapely import get_parts
from shapely import is_empty
from shapely import make_valid
from shapely import polygons
from shapely import union_all
from shapely.errors import GEOSException
from shapely.geometry import LinearRing
from shapely.ops import SplitOp

from ..conf import _get_instance
from ..conf import config
from ..debug_config import _DEBUG_CONFIG
from ..debug_config import _try_debug_print
from ..maps.maps import explore_locals
from .conversion import to_gdf
from .conversion import to_geoseries
from .duplicates import _get_intersecting_geometries
from .general import _push_geom_col
from .general import clean_geoms
from .general import extend_lines
from .general import get_grouped_centroids
from .general import get_line_segments
from .general import to_lines
from .geometry_types import get_geom_type
from .geometry_types import make_all_singlepart
from .geometry_types import to_single_geom_type
from .neighbors import get_neighbor_indices
from .overlay import clean_overlay
from .polygons_as_rings import PolygonsAsRings
from .runners import OverlayRunner
from .runners import UnionRunner
from .sfilter import sfilter
from .sfilter import sfilter_inverse
from .utils import _unary_union_for_notna

PRECISION = 1e-3
_BUFFER = False



[docs]
def get_polygon_clusters(
    *gdfs: GeoDataFrame | GeoSeries,
    cluster_col: str = "cluster",
    allow_multipart: bool = False,
    predicate: str | None = "intersects",
    as_string: bool = False,
) -> GeoDataFrame | tuple[GeoDataFrame]:
    """Find which polygons overlap without dissolving.

    Devides polygons into clusters in a fast and precice manner by using spatial join
    and networkx to find the connected components, i.e. overlapping geometries.
    If multiple GeoDataFrames are given, the clusters will be based on all
    combined.

    This can be used instead of dissolve+explode, or before dissolving by the cluster
    column. This has been tested to be a lot faster if there are many
    non-overlapping polygons, but somewhat slower than dissolve+explode if most
    polygons overlap.

    Args:
        gdfs: One or more GeoDataFrames of polygons.
        cluster_col: Name of the resulting cluster column.
        allow_multipart: Whether to allow mutipart geometries in the gdfs.
            Defaults to False to avoid confusing results.
        predicate: Spatial predicate. Defaults to "intersects".
        as_string: Whether to return the cluster column values as a string with x and y
            coordinates. Convinient to always get unique ids.
            Defaults to False because of speed.

    Returns:
        One or more GeoDataFrames (same amount as was given) with a new cluster column.

    Examples:
    ---------
    Create geometries with three clusters of overlapping polygons.

    >>> import sgis as sg
    >>> gdf = sg.to_gdf([(0, 0), (1, 1), (0, 1), (4, 4), (4, 3), (7, 7)])
    >>> buffered = sg.buff(gdf, 1)
    >>> gdf
                                                geometry
    0  POLYGON ((1.00000 0.00000, 0.99951 -0.03141, 0...
    1  POLYGON ((2.00000 1.00000, 1.99951 0.96859, 1....
    2  POLYGON ((1.00000 1.00000, 0.99951 0.96859, 0....
    3  POLYGON ((5.00000 4.00000, 4.99951 3.96859, 4....
    4  POLYGON ((5.00000 3.00000, 4.99951 2.96859, 4....
    5  POLYGON ((8.00000 7.00000, 7.99951 6.96859, 7....

    Add a cluster column to the GeoDataFrame:

    >>> gdf = sg.get_polygon_clusters(gdf, cluster_col="cluster")
    >>> gdf
       cluster                                           geometry
    0        0  POLYGON ((1.00000 0.00000, 0.99951 -0.03141, 0...
    1        0  POLYGON ((2.00000 1.00000, 1.99951 0.96859, 1....
    2        0  POLYGON ((1.00000 1.00000, 0.99951 0.96859, 0....
    3        1  POLYGON ((5.00000 4.00000, 4.99951 3.96859, 4....
    4        1  POLYGON ((5.00000 3.00000, 4.99951 2.96859, 4....
    5        2  POLYGON ((8.00000 7.00000, 7.99951 6.96859, 7....

    If multiple GeoDataFrames are given, all are returned with common
    cluster values.

    >>> gdf2 = sg.to_gdf([(0, 0), (7, 7)])
    >>> gdf, gdf2 = sg.get_polygon_clusters(gdf, gdf2, cluster_col="cluster")
    >>> gdf2
       cluster                 geometry
    0        0  POINT (0.00000 0.00000)
    1        2  POINT (7.00000 7.00000)
    >>> gdf
       cluster                                           geometry
    0        0  POLYGON ((1.00000 0.00000, 0.99951 -0.03141, 0...
    1        0  POLYGON ((2.00000 1.00000, 1.99951 0.96859, 1....
    2        0  POLYGON ((1.00000 1.00000, 0.99951 0.96859, 0....
    3        1  POLYGON ((5.00000 4.00000, 4.99951 3.96859, 4....
    4        1  POLYGON ((5.00000 3.00000, 4.99951 2.96859, 4....
    5        2  POLYGON ((8.00000 7.00000, 7.99951 6.96859, 7....

    Dissolving 'by' the cluster column will make the dissolve much
    faster if there are a lot of non-overlapping polygons.

    >>> dissolved = gdf.dissolve(by="cluster", as_index=False)
    >>> dissolved
       cluster                                           geometry
    0        0  POLYGON ((0.99951 -0.03141, 0.99803 -0.06279, ...
    1        1  POLYGON ((4.99951 2.96859, 4.99803 2.93721, 4....
    2        2  POLYGON ((8.00000 7.00000, 7.99951 6.96859, 7....
    """
    if isinstance(gdfs[-1], str):
        *gdfs, cluster_col = gdfs

    concated = []
    orig_indices = ()

    # take a copy only if there are gdfs with the same id
    # To not get any overwriting in the for loop
    if sum(df1 is df2 for df1 in gdfs for df2 in gdfs) > len(gdfs):
        gdfs = [gdf.copy() for gdf in gdfs]

    for i, gdf in enumerate(gdfs):
        if isinstance(gdf, GeoSeries):
            gdf = gdf.to_frame()

        if not isinstance(gdf, GeoDataFrame):
            raise TypeError("'gdfs' should be GeoDataFrames or GeoSeries.")

        if not allow_multipart and len(gdf) != len(gdf.explode(index_parts=False)):
            raise ValueError(
                "All geometries should be exploded to singlepart "
                "in order to get correct polygon clusters. "
                "To allow multipart geometries, set allow_multipart=True"
            )

        orig_indices = orig_indices + (gdf.index,)

        gdf = gdf.assign(i__=i)

        concated.append(gdf)

    concated = pd.concat(concated, ignore_index=True)

    if not len(concated):
        return concated.drop("i__", axis=1).assign(**{cluster_col: []})

    concated[cluster_col] = get_cluster_mapper(concated, predicate)

    if as_string:
        concated[cluster_col] = get_grouped_centroids(concated, groupby=cluster_col)

    concated = _push_geom_col(concated)

    n_gdfs = concated["i__"].unique()

    if len(n_gdfs) == 1:
        concated.index = orig_indices[0]
        return concated.drop(["i__"], axis=1)

    unconcated = ()
    for i in n_gdfs:
        gdf = concated[concated["i__"] == i]
        gdf.index = orig_indices[i]
        gdf = gdf.drop(["i__"], axis=1)
        unconcated = unconcated + (gdf,)

    return unconcated




[docs]
def get_cluster_mapper(
    gdf: GeoDataFrame | GeoSeries, predicate: str = "intersects"
) -> list[int]:
    """Returns a list of cluster indices corresponding to the order of the input GeoDataFrame or GeoSeries."""
    gdf = gdf.reset_index(drop=True)
    neighbors = get_neighbor_indices(gdf, gdf, predicate=predicate)

    edges = [(source, target) for source, target in neighbors.items()]

    graph = nx.Graph()
    graph.add_edges_from(edges)

    mapper = {
        i: cluster_index
        for cluster_index, component in enumerate(nx.connected_components(graph))
        for i in component
    }
    return list(dict(sorted(mapper.items())).values())




[docs]
def eliminate_by_longest(
    gdf: GeoDataFrame | tuple[GeoDataFrame],
    to_eliminate: GeoDataFrame,
    *,
    fix_double: bool = True,
    ignore_index: bool = False,
    aggfunc: str | dict | list | None = None,
    grid_size=None,
    n_jobs: int = 1,
    union_runner: UnionRunner | None = None,
    overlay_runner: OverlayRunner | None = None,
    **kwargs,
) -> tuple[GeoDataFrame]:
    """Dissolves selected polygons with the longest bordering neighbor polygon.

    Eliminates selected geometries by dissolving them with the neighboring
    polygon with the longest shared border. The index and column values of the
    large polygons will be kept, unless else is specified.

    Note that this might be a lot slower than eliminate_by_largest.

    Args:
        gdf: GeoDataFrame with polygon geometries, or a list of GeoDataFrames.
        to_eliminate: The geometries to be eliminated by 'gdf'.
        fix_double: If True, geometries to be eliminated will be erased by overlapping
            geometries to not get double surfaces if the geometries in 'to_eliminate'
            overlaps with multiple geometries in 'gdf'.
        ignore_index: If False (default), the resulting GeoDataFrame will keep the
            index of the large polygons. If True, the resulting axis will be labeled
            0, 1, …, n - 1.
        aggfunc: Aggregation function(s) to use when dissolving/eliminating.
            Defaults to None, meaning the values of 'gdf' is used. Otherwise,
            aggfunc will be passed to pandas groupby.agg. note: The geometries of
            'gdf' are sorted first, but if 'gdf' has missing values, the resulting
            polygons might get values from the polygons to be eliminated
            (if aggfunc="first").
        grid_size: Rounding of the coordinates. Defaults to None.
        n_jobs: Number of threads to use. Defaults to 1.
        union_runner: Optionally debug/manipulate the spatial union operations.
            See the 'runners' module for example implementations.
        overlay_runner: Optionally debug/manipulate the spatial overlay operations.
            See the 'runners' module for example implementations.
        **kwargs: Keyword arguments passed to the dissolve method.

    Returns:
        A tuple of the GeoDataFrame with the geometries of 'to_eliminate'
        dissolved in and a GeoDataFrame with the potentionally isolated
        polygons that could not be eliminated. If multiple GeoDataFrame
        are passed as 'gdf', the returned tuple will contain each frame
        plus the isolated polygons as the last item.

    Examples:
    ---------
    Create two polygons with a sliver in between:

    >>> sliver = sg.to_gdf(Polygon([(0, 0), (0.1, 1), (0, 2), (-0.1, 1)]))
    >>> small_poly = sg.to_gdf(
    ...     Polygon([(0, 0), (-0.1, 1), (0, 2), (-1, 2), (-2, 2), (-1, 1)])
    ... )
    >>> large_poly = sg.to_gdf(
    ...     Polygon([(0, 0), (0.1, 1), (1, 2), (2, 2), (3, 2), (3, 0)])
    ... )

    Using multiple GeoDataFrame as input, the sliver is eliminated into the small
    polygon (because it has the longest border with sliver).

    >>> small_poly_eliminated, large_poly_eliminated, isolated = sg.eliminate_by_longest(
    ...     [small_poly, large_poly], sliver
    ... )

    With only one input GeoDataFrame:

    >>> polys = pd.concat([small_poly, large_poly])
    >>> eliminated, isolated = sg.eliminate_by_longest(polys, sliver)
    """
    _recurse = kwargs.pop("_recurse", False)

    if not len(to_eliminate) or not len(gdf):
        if isinstance(gdf, (list, tuple)):
            return (*gdf, to_eliminate)
        return gdf, to_eliminate

    if isinstance(gdf, (list, tuple)):
        # concat, then break up the dataframes in the end
        was_multiple_gdfs = True
        original_cols = [df.columns for df in gdf]
        gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
    else:
        was_multiple_gdfs = False

    crs = gdf.crs
    geom_type = get_geom_type(gdf)

    if not ignore_index:
        idx_mapper = dict(enumerate(gdf.index))
        idx_name = gdf.index.name

    gdf = gdf.reset_index(drop=True)

    # TODO: is it ok to singlepart here?
    gdf = make_all_singlepart(gdf, ignore_index=True).pipe(
        to_single_geom_type, "polygon"
    )

    if _BUFFER:
        gdf.geometry = gdf.buffer(
            PRECISION,
            resolution=1,
            join_style=2,
        )

        to_eliminate.geometry = to_eliminate.buffer(
            PRECISION,
            resolution=1,
            join_style=2,
        )

    # more_than_one = get_num_geometries(to_eliminate.geometry.values) > 1
    # filt = more_than_one, to_eliminate._geometry_column_name
    # to_eliminate.loc[*filt] = to_eliminate.loc[*filt].apply(_unary_union_for_notna)

    gdf["_dissolve_idx"] = gdf.index
    to_eliminate = to_eliminate.assign(_eliminate_idx=lambda x: range(len(x)))

    # convert to lines to get the borders
    lines_eliminate = to_lines(to_eliminate[["_eliminate_idx", "geometry"]])

    borders = clean_overlay(
        gdf[["_dissolve_idx", "geometry"]],
        lines_eliminate,
        keep_geom_type=False,
        grid_size=grid_size,
        n_jobs=n_jobs,
        overlay_runner=overlay_runner,
    ).loc[lambda x: x["_eliminate_idx"].notna()]

    borders["_length"] = borders.length

    # as DataFrame because GeoDataFrame constructor is expensive
    borders = pd.DataFrame(borders)

    longest_border = borders.sort_values("_length", ascending=False).drop_duplicates(
        "_eliminate_idx"
    )

    to_dissolve_idx = longest_border.set_index("_eliminate_idx")["_dissolve_idx"]
    to_eliminate["_dissolve_idx"] = to_eliminate["_eliminate_idx"].map(to_dissolve_idx)

    actually_eliminate = to_eliminate.loc[to_eliminate["_dissolve_idx"].notna()]

    isolated = to_eliminate.loc[to_eliminate["_dissolve_idx"].isna()]
    containing_eliminators = (
        pd.DataFrame(
            isolated.drop(columns="_dissolve_idx").sjoin(
                gdf[["_dissolve_idx", "geometry"]], predicate="contains"
            )
        )
        .drop(columns="index_right")
        .drop_duplicates("_eliminate_idx")
    )
    isolated = isolated.drop(
        ["_dissolve_idx", "_length", "_eliminate_idx"],
        axis=1,
        errors="ignore",
    )

    eliminated = _eliminate(
        pd.DataFrame(gdf),
        pd.concat([actually_eliminate, containing_eliminators]),
        aggfunc,
        crs,
        fix_double,
        grid_size=grid_size,
        n_jobs=n_jobs,
        union_runner=union_runner,
        overlay_runner=overlay_runner,
        **kwargs,
    )

    if not ignore_index:
        eliminated.index = eliminated.index.map(idx_mapper)
        eliminated.index.name = idx_name

    eliminated = eliminated.drop(
        ["_dissolve_idx", "_length", "_eliminate_idx"],
        axis=1,
        errors="ignore",
    )

    out = GeoDataFrame(eliminated, geometry="geometry", crs=crs).pipe(clean_geoms)

    if _BUFFER:
        out.geometry = out.buffer(
            -PRECISION,
            resolution=1,
            join_style=2,
        )
        isolated.geometry = isolated.buffer(
            -PRECISION,
            resolution=1,
            join_style=2,
        )

    if geom_type != "mixed":
        out = to_single_geom_type(out, geom_type)

    out = out.reset_index(drop=True) if ignore_index else out

    _try_debug_print("inni eliminate_by_longest")
    explore_locals(center=_DEBUG_CONFIG["center"])

    if not _recurse and len(isolated):
        out, isolated = _recursively_eliminate_new_neighbors(
            out,
            isolated,
            func=eliminate_by_longest,
            fix_double=fix_double,
            ignore_index=ignore_index,
            aggfunc=aggfunc,
            grid_size=grid_size,
            union_runner=union_runner,
            overlay_runner=overlay_runner,
            n_jobs=n_jobs,
        )

    _try_debug_print("inni eliminate_by_longest 2")
    explore_locals(center=_DEBUG_CONFIG["center"])

    # assert (
    #     out[["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]]
    #     .notna()
    #     .all()
    #     .all()
    # ), out[["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]].sort_values(
    #     ["ARTYPE", "ARTRESLAG", "ARSKOGBON", "ARGRUNNF", "kilde"]
    # )

    if not was_multiple_gdfs:
        return out, isolated

    gdfs = ()
    for i, cols in enumerate(original_cols):
        df = out.loc[out["_df_idx"] == i, cols]
        gdfs += (df,)

    return (*gdfs, isolated)



def _recursively_eliminate_new_neighbors(
    df: GeoDataFrame,
    isolated: GeoDataFrame,
    func: Callable,
    **kwargs,
):
    len_now = len(isolated)
    while len(isolated):
        _try_debug_print(f"recurse len({len(isolated)})")
        df, isolated = func(
            df,
            isolated,
            _recurse=True,
            **kwargs,
        )

        if len_now == len(isolated):
            break
        len_now = len(isolated)

    return df, isolated



[docs]
def eliminate_by_largest(
    gdf: GeoDataFrame | list[GeoDataFrame],
    to_eliminate: GeoDataFrame,
    *,
    max_distance: int | float | None = None,
    fix_double: bool = True,
    ignore_index: bool = False,
    aggfunc: str | dict | list | None = None,
    predicate: str = "intersects",
    grid_size=None,
    n_jobs: int = 1,
    union_runner: UnionRunner | None = None,
    overlay_runner: OverlayRunner | None = None,
    **kwargs,
) -> tuple[GeoDataFrame]:
    """Dissolves selected polygons with the largest neighbor polygon.

    Eliminates selected geometries by dissolving them with the neighboring
    polygon with the largest area. The index and column values of the
    large polygons will be kept, unless else is specified.

    Args:
        gdf: GeoDataFrame with polygon geometries, or a list of GeoDataFrames.
        to_eliminate: The geometries to be eliminated by 'gdf'.
        max_distance: Max distance to search for neighbors. Defaults to None, meaning
            0.
        fix_double: If True, geometries to be eliminated will be erased by overlapping
            geometries to not get double surfaces if the geometries in 'to_eliminate'
            overlaps with multiple geometries in 'gdf'.
        ignore_index: If False (default), the resulting GeoDataFrame will keep the
            index of the large polygons. If True, the resulting axis will be labeled
            0, 1, …, n - 1.
        aggfunc: Aggregation function(s) to use when dissolving/eliminating.
            Defaults to None, meaning the values of 'gdf' is used. Otherwise,
            aggfunc will be passed to pandas groupby.agg. note: The geometries of
            'gdf' are sorted first, but if 'gdf' has missing values, the resulting
            polygons might get values from the polygons to be eliminated
            (if aggfunc="first").
        predicate: Binary predicate passed to sjoin. Defaults to "intersects".
        grid_size: Rounding of the coordinates. Defaults to None.
        n_jobs: Number of threads to use. Defaults to 1.
        union_runner: Optionally debug/manipulate the spatial union operations.
            See the 'runners' module for example implementations.
        overlay_runner: Optionally debug/manipulate the spatial overlay operations.
            See the 'runners' module for example implementations.
        **kwargs: Keyword arguments passed to the dissolve method.

    Returns:
        A tuple of the GeoDataFrame with the geometries of 'to_eliminate'
        dissolved in and a GeoDataFrame with the potentionally isolated
        polygons that could not be eliminated. If multiple GeoDataFrame
        are passed as 'gdf', the returned tuple will contain each frame
        plus the isolated polygons as the last item.

    Examples:
    ---------
    Create two polygons with a sliver in between:

    >>> sliver = sg.to_gdf(Polygon([(0, 0), (0.1, 1), (0, 2), (-0.1, 1)]))
    >>> small_poly = sg.to_gdf(
    ...     Polygon([(0, 0), (-0.1, 1), (0, 2), (-1, 2), (-2, 2), (-1, 1)])
    ... )
    >>> large_poly = sg.to_gdf(
    ...     Polygon([(0, 0), (0.1, 1), (1, 2), (2, 2), (3, 2), (3, 0)])
    ... )

    Using multiple GeoDataFrame as input, the sliver is eliminated into
    the large polygon.

    >>> small_poly_eliminated, large_poly_eliminated, isolated = sg.eliminate_by_largest(
    ...     [small_poly, large_poly], sliver
    ... )

    With only one input GeoDataFrame:

    >>> polys = pd.concat([small_poly, large_poly])
    >>> eliminated, isolated = sg.eliminate_by_largest(polys, sliver)
    """
    return _eliminate_by_area(
        gdf,
        to_eliminate=to_eliminate,
        max_distance=max_distance,
        ignore_index=ignore_index,
        sort_ascending=False,
        aggfunc=aggfunc,
        predicate=predicate,
        fix_double=fix_double,
        grid_size=grid_size,
        n_jobs=n_jobs,
        union_runner=union_runner,
        overlay_runner=overlay_runner,
        **kwargs,
    )



def eliminate_by_smallest(
    gdf: GeoDataFrame | list[GeoDataFrame],
    to_eliminate: GeoDataFrame,
    *,
    max_distance: int | float | None = None,
    ignore_index: bool = False,
    aggfunc: str | dict | list | None = None,
    predicate: str = "intersects",
    fix_double: bool = True,
    grid_size=None,
    n_jobs: int = 1,
    union_runner: UnionRunner | None = None,
    overlay_runner: OverlayRunner | None = None,
    **kwargs,
) -> tuple[GeoDataFrame]:
    return _eliminate_by_area(
        gdf,
        to_eliminate=to_eliminate,
        max_distance=max_distance,
        ignore_index=ignore_index,
        sort_ascending=True,
        aggfunc=aggfunc,
        predicate=predicate,
        fix_double=fix_double,
        grid_size=grid_size,
        n_jobs=n_jobs,
        union_runner=union_runner,
        overlay_runner=overlay_runner,
        **kwargs,
    )


def _eliminate_by_area(
    gdf: GeoDataFrame,
    to_eliminate: GeoDataFrame,
    max_distance: int | float | None,
    sort_ascending: bool,
    ignore_index: bool,
    aggfunc: str | dict | list | None,
    predicate: str,
    fix_double: bool,
    grid_size,
    n_jobs: int,
    union_runner: UnionRunner,
    overlay_runner: OverlayRunner,
    **kwargs,
) -> GeoDataFrame:
    _recurse = kwargs.pop("_recurse", False)

    if not len(to_eliminate) or not len(gdf):
        return gdf, to_eliminate

    if isinstance(gdf, (list, tuple)):
        was_multiple_gdfs = True
        original_cols = [df.columns for df in gdf]
        gdf = pd.concat(df.assign(**{"_df_idx": i}) for i, df in enumerate(gdf))
    else:
        was_multiple_gdfs = False

    crs = gdf.crs
    geom_type = get_geom_type(gdf)

    if not ignore_index:
        idx_mapper = dict(enumerate(gdf.index))
        idx_name = gdf.index.name
        idx_mapper_to_eliminate = dict(enumerate(to_eliminate.index))
        idx_name_to_eliminate = to_eliminate.index.name

    gdf = make_all_singlepart(gdf).reset_index(drop=True)
    to_eliminate = make_all_singlepart(to_eliminate).reset_index(drop=True)

    gdf["_area"] = gdf.area
    gdf["_dissolve_idx"] = gdf.index

    if max_distance:
        to_join = gdf[["_area", "_dissolve_idx", "geometry"]]
        to_join.geometry = to_join.buffer(max_distance)
        joined = to_eliminate.sjoin(to_join, predicate=predicate, how="left")
    else:
        joined = to_eliminate.sjoin(
            gdf[["_area", "_dissolve_idx", "geometry"]], predicate=predicate, how="left"
        )

    # as DataFrames because GeoDataFrame constructor is expensive
    joined = (
        pd.DataFrame(joined)
        .drop(columns="index_right")
        .sort_values("_area", ascending=sort_ascending)
        .loc[lambda x: ~x.index.duplicated(keep="first")]
    )

    gdf = pd.DataFrame(gdf)

    notna = joined.loc[lambda x: x["_dissolve_idx"].notna()]

    eliminated = _eliminate(
        gdf,
        notna,
        aggfunc,
        crs,
        fix_double=fix_double,
        grid_size=grid_size,
        n_jobs=n_jobs,
        union_runner=union_runner,
        overlay_runner=overlay_runner,
        **kwargs,
    )

    eliminated = eliminated.drop(
        ["_dissolve_idx", "_area", "_eliminate_idx", "_dissolve_idx"],
        axis=1,
        errors="ignore",
    )

    out = GeoDataFrame(
        eliminated,
        geometry="geometry",
        crs=crs,
    ).pipe(clean_geoms)

    isolated = (
        GeoDataFrame(
            joined.loc[joined["_dissolve_idx"].isna()], geometry="geometry", crs=crs
        )
        .drop(
            ["_dissolve_idx", "_area", "_eliminate_idx", "_dissolve_idx"],
            axis=1,
            errors="ignore",
        )
        .pipe(clean_geoms)
    )

    if not ignore_index:
        out.index = out.index.map(idx_mapper)
        out.index.name = idx_name
        isolated.index = isolated.index.map(idx_mapper_to_eliminate)
        isolated.index.name = idx_name_to_eliminate

    if geom_type != "mixed":
        out = to_single_geom_type(out, geom_type)

    out = out.reset_index(drop=True) if ignore_index else out

    if not _recurse and len(isolated):
        out, isolated = _recursively_eliminate_new_neighbors(
            out,
            isolated,
            func=_eliminate_by_area,
            max_distance=max_distance,
            sort_ascending=sort_ascending,
            fix_double=fix_double,
            predicate=predicate,
            ignore_index=ignore_index,
            aggfunc=aggfunc,
            grid_size=grid_size,
            union_runner=union_runner,
            overlay_runner=overlay_runner,
            n_jobs=n_jobs,
        )

    if not was_multiple_gdfs:
        return out, isolated

    gdfs = ()
    for i, cols in enumerate(original_cols):
        df = out.loc[out["_df_idx"] == i, cols]
        gdfs += (df,)

    return (*gdfs, isolated)


def _eliminate(
    gdf,
    to_eliminate,
    aggfunc,
    crs,
    fix_double,
    grid_size,
    n_jobs,
    overlay_runner,
    union_runner,
    **kwargs,
):

    if not len(to_eliminate):
        return gdf

    if union_runner is None:
        union_runner = _get_instance(config, "union_runner", n_jobs=n_jobs)
    if overlay_runner is None:
        overlay_runner = _get_instance(config, "overlay_runner", n_jobs=n_jobs)

    gdf["_range_idx_elim"] = range(len(gdf))

    in_to_eliminate = gdf["_dissolve_idx"].isin(to_eliminate["_dissolve_idx"])
    to_dissolve = gdf.loc[in_to_eliminate]
    not_to_dissolve = gdf.loc[~in_to_eliminate].set_index("_dissolve_idx")

    to_eliminate["_to_eliminate"] = 1

    if aggfunc is None:
        concatted = pd.concat(
            [to_dissolve, to_eliminate[["_to_eliminate", "_dissolve_idx", "geometry"]]]
        )
        aggfunc = "first"
    else:
        concatted = pd.concat([to_dissolve, to_eliminate])

    one_hit = concatted.loc[
        lambda x: (x.groupby("_dissolve_idx").transform("size") == 1)
        & (x["_dissolve_idx"].notna())
    ].set_index("_dissolve_idx")

    assert len(one_hit) == 0

    many_hits = concatted.loc[
        lambda x: x.groupby("_dissolve_idx").transform("size") > 1
    ]

    if not len(many_hits):
        return one_hit

    # aggregate all columns except geometry
    kwargs.pop("as_index", None)
    eliminated = (
        many_hits.drop(columns="geometry")
        .groupby("_dissolve_idx", **kwargs)
        .agg(aggfunc)
        .drop(["_area"], axis=1, errors="ignore")
    )

    # aggregate geometry
    if fix_double:
        assert eliminated.index.is_unique

        many_hits = many_hits.set_index("_dissolve_idx")
        many_hits["_row_idx"] = range(len(many_hits))

        # TODO kan dette fikses trygt med .duplicated og ~x.duplicated?
        eliminators: pd.Series = many_hits.loc[
            many_hits["_to_eliminate"] != 1, "geometry"
        ]
        to_be_eliminated = many_hits.loc[many_hits["_to_eliminate"] == 1]

        # all_geoms: pd.Series = gdf.set_index("_dissolve_idx").geometry
        all_geoms: pd.Series = gdf.geometry

        # create DataFrame of intersection pairs
        tree = STRtree(all_geoms.values)
        left, right = tree.query(
            to_be_eliminated.geometry.values, predicate="intersects"
        )

        pairs = pd.Series(right, index=left).to_frame("right")
        pairs["_dissolve_idx"] = pairs.index.map(
            dict(enumerate(to_be_eliminated.index))
        )

        soon_erased = to_be_eliminated.iloc[pairs.index]
        intersecting = all_geoms.iloc[pairs["right"]]

        shoud_not_erase = soon_erased.index != intersecting.index
        soon_erased = soon_erased[shoud_not_erase]
        intersecting = intersecting[shoud_not_erase]

        missing = to_be_eliminated.loc[
            (~to_be_eliminated["_row_idx"].isin(soon_erased["_row_idx"])), "geometry"
        ]

        # allign and aggregate by dissolve index to not get duplicates in difference
        intersecting.index = soon_erased.index

        soon_erased = union_runner.run(soon_erased, level=0, grid_size=grid_size)
        intersecting = union_runner.run(intersecting, level=0, grid_size=grid_size)

        assert soon_erased.index.equals(soon_erased.index)

        soon_erased.loc[:] = overlay_runner.run(
            difference,
            soon_erased.to_numpy(),
            intersecting.to_numpy(),
            grid_size=grid_size,
            geom_type="polygon",
        )

        missing = union_runner.run(missing, level=0, grid_size=grid_size)

        missing = make_all_singlepart(missing).loc[lambda x: x.area > 0]

        soon_erased = make_all_singlepart(soon_erased).loc[lambda x: x.area > 0]

        soon_eliminated = pd.concat([eliminators, soon_erased, missing])
        more_than_one = get_num_geometries(soon_eliminated.values) > 1

        soon_eliminated.loc[more_than_one] = soon_eliminated.loc[more_than_one].apply(
            _unary_union_for_notna
        )

        eliminated["geometry"] = union_runner.run(
            soon_eliminated, level=0, grid_size=grid_size
        )
    else:
        eliminated["geometry"] = union_runner.run(
            many_hits, by="_dissolve_idx", grid_size=grid_size, n_jobs=n_jobs
        )

    # setting crs on the GeometryArrays to avoid warning in concat
    not_to_dissolve.geometry.values.crs = crs
    try:
        eliminated.geometry.values.crs = crs
    except AttributeError:
        pass
    one_hit.geometry.values.crs = crs

    to_concat = [not_to_dissolve, eliminated, one_hit]

    assert all(df.index.name == "_dissolve_idx" for df in to_concat)

    out = pd.concat(to_concat).sort_index()

    duplicated_geoms = _get_intersecting_geometries(
        GeoDataFrame(
            {
                "geometry": out.geometry.values,
                "_range_idx_elim_dups": out["_range_idx_elim"].values,
            },
        ),
        geom_type="polygon",
        keep_geom_type=True,
        n_jobs=n_jobs,
        predicate="intersects",
    ).pipe(clean_geoms)
    duplicated_geoms.geometry = duplicated_geoms.buffer(-PRECISION)
    duplicated_geoms = duplicated_geoms.pipe(clean_geoms)

    if len(duplicated_geoms):
        hits_in_original_df = duplicated_geoms.sjoin(
            GeoDataFrame(
                {
                    "geometry": gdf.geometry.values,
                    "_range_idx_elim": gdf["_range_idx_elim"].values,
                },
            ),
            how="inner",
        )

        should_be_erased = hits_in_original_df.loc[
            lambda x: x["_range_idx_elim"] != x["_range_idx_elim_dups"]
        ]

        should_be_erased_idx = list(
            sorted(should_be_erased["_range_idx_elim_dups"].unique())
        )
        should_erase = (
            should_be_erased.groupby("_range_idx_elim_dups")["geometry"]
            .agg(lambda x: make_valid(union_all(x)))
            .sort_index()
        )

        # aligining out with "should_erase" before rowwise difference
        out = out.sort_values("_range_idx_elim")
        assert out["_range_idx_elim"].is_unique
        to_be_erased_idx = out["_range_idx_elim"].isin(should_be_erased_idx)

        out.loc[to_be_erased_idx, "geometry"] = make_valid(
            difference(
                out.loc[
                    to_be_erased_idx,
                    "geometry",
                ].values,
                should_erase.values,
            )
        )

        from ..maps.maps import explore

        # display(hits_in_original_df)
        # display(should_be_erased.assign(area=lambda x: x.area))

        explore(
            gdf=to_gdf(gdf, 25833),
            out=to_gdf(out, 25833),
            should_be_erased=to_gdf(should_be_erased, 25833),
            duplicated_geoms=duplicated_geoms.set_crs(25833),
            eli=GeoDataFrame(
                {
                    "geometry": out.geometry.values,
                    "_range_idx_elim": out["_range_idx_elim"].values,
                },
                crs=25833,
            ),
            center=_DEBUG_CONFIG["center"],
        )

    _try_debug_print("inni _eliminate")
    _try_debug_print(duplicated_geoms)
    explore_locals(center=_DEBUG_CONFIG["center"])

    return out.drop(columns=["_to_eliminate", "_range_idx_elim"])



[docs]
def clean_dissexp(df: GeoDataFrame, dissolve_func: Callable, **kwargs) -> GeoDataFrame:
    """Experimental."""
    original_points = GeoDataFrame(
        {"geometry": get_parts(extract_unique_points(df.geometry.values))}
    )[lambda x: ~x.geometry.duplicated()]

    dissolved = df.copy()

    try:
        dissolved.geometry = dissolved.buffer(PRECISION, resolution=1, join_style=2)
    except AttributeError as e:
        if isinstance(dissolved, GeoSeries):
            dissolved.loc[:] = dissolved.buffer(PRECISION, resolution=1, join_style=2)
        else:
            raise e

    dissolved = dissolve_func(dissolved, **kwargs)

    try:
        dissolved.geometry = dissolved.buffer(-PRECISION, resolution=1, join_style=2)
    except AttributeError as e:
        if isinstance(dissolved, GeoSeries):
            dissolved.loc[:] = dissolved.buffer(-PRECISION, resolution=1, join_style=2)
        else:
            raise e

    dissolved = dissolved.loc[lambda x: ~x.geometry.is_empty]
    dissolved = dissolved.explode(ignore_index=True)

    original_points = sfilter_inverse(original_points, dissolved.buffer(-PRECISION))

    snapped = (
        PolygonsAsRings(
            dissolved.geometry,
        )
        .apply_numpy_func(
            _snap_points_back,
            kwargs={"snap_to": original_points, "tolerance": PRECISION},
        )
        .to_numpy()
    )

    try:
        dissolved.geometry = snapped
    except AttributeError as e:
        if isinstance(dissolved, GeoSeries):
            dissolved.loc[:] = snapped
        else:
            raise e

    return dissolved



def _snap_points_back(rings, snap_to, tolerance):
    points = GeoDataFrame({"geometry": extract_unique_points(rings)})
    points = points.explode(index_parts=True)

    snap_to["geom_right"] = snap_to.geometry
    nearest = points.sjoin_nearest(snap_to, max_distance=tolerance)
    points.loc[nearest.index, points.geometry.name] = nearest["geom_right"]

    new_rings = points.groupby(level=0)[points.geometry.name].agg(LinearRing)
    return new_rings


def close_thin_holes(gdf: GeoDataFrame, tolerance: int | float) -> GeoDataFrame:
    gdf = make_all_singlepart(gdf)
    holes = get_holes(gdf)
    inside_holes = union_all(sfilter(gdf, holes, predicate="within").geometry.values)

    def to_none_if_thin(geoms):
        if not len(geoms):
            return geoms
        try:
            polys = polygons(geoms)
        except GEOSException:
            polys = make_valid(polygons(make_valid(geoms)))
        if inside_holes is not None:
            polys = difference(polys, inside_holes)
        buffered_in = buffer(polys, -(tolerance / 2))
        return np.where(is_empty(buffered_in), None, geoms)

    if not (gdf.geom_type == "Polygon").all():
        raise ValueError(gdf.geom_type.value_counts())

    return PolygonsAsRings(gdf).apply_numpy_func_to_interiors(to_none_if_thin).to_gdf()



[docs]
def close_all_holes(
    gdf: GeoDataFrame | GeoSeries,
    *,
    ignore_islands: bool = False,
    copy: bool = True,
) -> GeoDataFrame | GeoSeries:
    """Closes all holes in polygons.

    It takes a GeoDataFrame or GeoSeries of polygons and
    returns the outer circle.

    Args:
        gdf: GeoDataFrame or GeoSeries of polygons.
        copy: if True (default), the input GeoDataFrame or GeoSeries is copied.
            Defaults to True.
        ignore_islands: If False (default), polygons inside the holes (islands)
            will be erased from the output geometries. If True, the entire
            holes will be closed and the islands kept, meaning there might be
            duplicate surfaces in the resulting geometries.
            Note that ignoring islands is a lot faster.

    Returns:
        A GeoDataFrame or GeoSeries of polygons with closed holes in the geometry
        column.

    Examples:
    ---------
    Let's create a circle with a hole in it.

    >>> point = sg.to_gdf([260000, 6650000], crs=25833)
    >>> point
                            geometry
    0  POINT (260000.000 6650000.000)
    >>> circle = sg.buff(point, 1000)
    >>> small_circle = sg.buff(point, 500)
    >>> circle_with_hole = circle.overlay(small_circle, how="difference")
    >>> circle_with_hole.area
    0    2.355807e+06
    dtype: float64

    Close the hole.

    >>> holes_closed = sg.close_all_holes(circle_with_hole)
    >>> holes_closed.area
    0    3.141076e+06
    dtype: float64
    """
    if not isinstance(gdf, (GeoDataFrame, GeoSeries)):
        raise ValueError(
            f"'gdf' should be of type GeoDataFrame or GeoSeries. Got {type(gdf)}"
        )

    if not len(gdf):
        return gdf

    if copy:
        gdf = gdf.copy()

    gdf = make_all_singlepart(gdf)

    if ignore_islands:
        geoms = gdf.geometry if isinstance(gdf, GeoDataFrame) else gdf
        holes_closed = make_valid(polygons(get_exterior_ring(geoms)))
        if isinstance(gdf, GeoDataFrame):
            gdf.geometry = holes_closed
            return gdf
        elif isinstance(gdf, GeoSeries):
            return GeoSeries(holes_closed, crs=gdf.crs)
        else:
            return holes_closed

    all_geoms = make_valid(union_all(gdf.geometry.values))
    if isinstance(gdf, GeoDataFrame):
        gdf.geometry = gdf.geometry.map(
            lambda x: _close_all_holes_no_islands(x, all_geoms)
        )
        return gdf
    else:
        return gdf.map(lambda x: _close_all_holes_no_islands(x, all_geoms))




[docs]
def close_small_holes(
    gdf: GeoDataFrame | GeoSeries,
    max_area: int | float,
    *,
    ignore_islands: bool = False,
    copy: bool = True,
) -> GeoDataFrame | GeoSeries:
    """Closes holes in polygons if the area is less than the given maximum.

    It takes a GeoDataFrame or GeoSeries of polygons and
    fills the holes that are smaller than the specified area given in units of
    either square meters ('max_m2') or square kilometers ('max_km2').

    Args:
        gdf: GeoDataFrame or GeoSeries of polygons.
        max_area: The maximum area in the unit of the GeoDataFrame's crs.
        ignore_islands: If False (default), polygons inside the holes (islands)
            will be erased from the "hole" geometries before the area is calculated.
            If True, the entire polygon interiors will be considered, meaning there
            might be duplicate surfaces in the resulting geometries.
            Note that ignoring islands is a lot faster.
        copy: if True (default), the input GeoDataFrame or GeoSeries is copied.
            Defaults to True.

    Returns:
        A GeoDataFrame or GeoSeries of polygons with closed holes in the geometry
        column.

    Raises:
        ValueError: If the coordinate reference system of the GeoDataFrame is not in
            meter units.
        ValueError: If both 'max_m2' and 'max_km2' is given.

    Examples:
    ---------
    Let's create a circle with a hole in it.

    >>> point = sg.to_gdf([260000, 6650000], crs=25833)
    >>> point
                            geometry
    0  POINT (260000.000 6650000.000)
    >>> circle = sg.buff(point, 1000)
    >>> small_circle = sg.buff(point, 500)
    >>> circle_with_hole = circle.overlay(small_circle, how="difference")
    >>> circle_with_hole.area
    0    2.355807e+06
    dtype: float64

    Close holes smaller than 1 square kilometer (1 million square meters).

    >>> holes_closed = sg.close_small_holes(circle_with_hole, max_area=1_000_000)
    >>> holes_closed.area
    0    3.141076e+06
    dtype: float64

    The hole will not be closed if it is larger.

    >>> holes_closed = sg.close_small_holes(circle_with_hole, max_area=1_000)
    >>> holes_closed.area
    0    2.355807e+06
    dtype: float64
    """
    if not isinstance(gdf, (GeoSeries, GeoDataFrame)):
        raise ValueError(
            f"'gdf' should be of type GeoDataFrame or GeoSeries. Got {type(gdf)}"
        )

    if not len(gdf):
        return gdf

    if copy:
        gdf = gdf.copy()

    gdf = make_all_singlepart(gdf)

    if not ignore_islands:
        all_geoms = make_valid(union_all(gdf.geometry.values))

        if isinstance(gdf, GeoDataFrame):
            gdf.geometry = gdf.geometry.map(
                lambda x: _close_small_holes_no_islands(x, max_area, all_geoms)
            )
            return gdf
        else:
            return gdf.map(
                lambda x: _close_small_holes_no_islands(x, max_area, all_geoms)
            )
    else:
        geoms = (
            gdf.geometry.to_numpy() if isinstance(gdf, GeoDataFrame) else gdf.to_numpy()
        )
        exteriors = get_exterior_ring(geoms)
        assert len(exteriors) == len(geoms)

        max_rings = max(get_num_interior_rings(geoms))

        if not max_rings:
            return gdf

        # looping through max for all geoms since arrays must be equal length
        interiors = np.array(
            [[get_interior_ring(geom, i) for i in range(max_rings)] for geom in geoms]
        )
        assert interiors.shape == (len(geoms), max_rings), interiors.shape

        areas = area(polygons(interiors))
        interiors[(areas < max_area) | np.isnan(areas)] = None

        results = polygons(exteriors, interiors)

        if isinstance(gdf, GeoDataFrame):
            gdf.geometry = results
            return gdf
        else:
            return GeoSeries(results, crs=gdf.crs)



def _close_small_holes_no_islands(poly, max_area, all_geoms):
    """Closes small holes within one shapely geometry of polygons."""
    # start with a list containing the polygon,
    # then append all holes smaller than 'max_km2' to the list.
    holes_closed = [poly]
    singlepart = get_parts(poly)
    for part in singlepart:
        n_interior_rings = get_num_interior_rings(part)

        if not (n_interior_rings):
            continue

        for n in range(n_interior_rings):
            hole = polygons(get_interior_ring(part, n))
            try:
                no_islands = union_all(hole.difference(all_geoms))
            except GEOSException:
                no_islands = make_valid(union_all(hole.difference(all_geoms)))

            if area(no_islands) < max_area:
                holes_closed.append(no_islands)

    return make_valid(union_all(holes_closed))


def _close_all_holes_no_islands(poly, all_geoms):
    """Closes all holes within one shapely geometry of polygons."""
    # start with a list containing the polygon,
    # then append all holes smaller than 'max_km2' to the list.
    holes_closed = [poly]
    singlepart = get_parts(poly)
    for part in singlepart:
        n_interior_rings = get_num_interior_rings(part)

        for n in range(n_interior_rings):
            hole = polygons(get_interior_ring(part, n))
            try:
                no_islands = union_all(hole.difference(all_geoms))
            except GEOSException:
                no_islands = make_valid(union_all(hole.difference(all_geoms)))

            holes_closed.append(no_islands)

    return make_valid(union_all(holes_closed))



[docs]
def get_gaps(
    gdf: GeoDataFrame,
    include_interiors: bool = False,
    grid_size: float | int | None = None,
) -> GeoDataFrame:
    """Get the gaps between polygons.

    Args:
        gdf: GeoDataFrame of polygons.
        include_interiors: If False (default), the holes inside individual polygons
            will not be included as gaps.
        grid_size: Rounding of the coordinates.

    Note:
        See get_holes to find holes inside singlepart polygons.

    Returns:
        GeoDataFrame of polygons with only a geometry column.
    """
    if not len(gdf):
        return GeoDataFrame({"geometry": []}, crs=gdf.crs)

    if not include_interiors:
        gdf = close_all_holes(gdf)

    bbox = GeoDataFrame(
        {"geometry": [box(*tuple(gdf.total_bounds)).buffer(1)]}, crs=gdf.crs
    )

    bbox_diff = make_all_singlepart(
        clean_overlay(
            bbox, gdf, how="difference", geom_type="polygon", grid_size=grid_size
        )
    )

    # remove the outer "gap", i.e. the surrounding area
    bbox_ring = get_exterior_ring(bbox.geometry.values)
    without_outer_ring = sfilter_inverse(bbox_diff, bbox_ring)
    return without_outer_ring.reset_index(drop=True)




[docs]
def get_holes(gdf: GeoDataFrame, as_polygons: bool = True) -> GeoDataFrame:
    """Get the holes inside polygons.

    Args:
        gdf: GeoDataFrame of polygons.
        as_polygons: If True (default), the holes will be returned as polygons.
            If False, they will be returned as LinearRings.

    Note:
        See get_gaps to find holes/gaps between undissolved polygons.

    Returns:
        GeoDataFrame of polygons or linearrings with only a geometry column.
    """
    if not len(gdf):
        return GeoDataFrame({"geometry": []}, index=gdf.index, crs=gdf.crs)

    def as_linearring(x):
        return x

    astype = polygons if as_polygons else as_linearring

    geoms = make_all_singlepart(gdf.geometry).to_numpy()

    rings = [
        GeoSeries(astype(get_interior_ring(geoms, i)), crs=gdf.crs)
        for i in range(max(get_num_interior_rings(geoms)))
    ]

    return (
        GeoDataFrame({"geometry": (pd.concat(rings).pipe(clean_geoms).sort_index())})
        if rings
        else GeoDataFrame({"geometry": []}, crs=gdf.crs)
    )



def split_polygons_by_lines(polygons: GeoSeries, lines: GeoSeries) -> GeoSeries:
    idx_mapper = dict(enumerate(polygons.index))
    idx_name = polygons.index.name
    polygons = polygons.copy()
    polygons.index = range(len(polygons))

    # use pandas to explode faster (from list instead of GeoSeries.explode)
    splitted = pd.Series(polygons.geometry.to_numpy())
    lines = to_geoseries(lines)
    lines.index = range(len(lines))

    # find intersection pairs to split relevant polygon for each line
    tree = STRtree(splitted.values)
    left, right = tree.query(lines.values, predicate="intersects")
    pairs = pd.Series(right, index=left)

    lines = lines.loc[lambda x: x.index.isin(pairs.index)]

    for i, line in lines.items():
        intersecting = pairs.loc[[i]].values
        try:
            splitted.loc[intersecting] = splitted.loc[intersecting].apply(
                lambda poly: SplitOp._split_polygon_with_line(poly, line) or poly
            )
        except TypeError:
            # if we got multipolygon
            splitted = splitted.apply(get_parts).explode()
            splitted.loc[intersecting] = splitted.loc[intersecting].apply(
                lambda poly: SplitOp._split_polygon_with_line(poly, line) or poly
            )
        splitted = splitted.explode()

    if isinstance(polygons, GeoDataFrame):
        polygons = polygons.loc[splitted.index]
        polygons.geometry = splitted
        polygons.index = polygons.index.map(idx_mapper)
        polygons.index.name = idx_name
        return polygons
    else:
        splitted.index = splitted.index.map(idx_mapper)
        splitted.index.name = idx_name
        return splitted


def split_by_neighbors(
    df: GeoDataFrame,
    split_by: GeoDataFrame,
    tolerance: int | float,
    grid_size: float | int | None = None,
) -> GeoDataFrame:
    if not len(df):
        return df

    df = make_all_singlepart(df)

    split_by = split_by.copy()

    intersecting_lines = (
        clean_overlay(
            to_lines(split_by),
            df.buffer(tolerance).to_frame("geometry"),
            how="intersection",
            grid_size=grid_size,
        )
        .pipe(get_line_segments)
        .reset_index(drop=True)
    )

    endpoints = intersecting_lines.boundary.explode(index_parts=False)

    lines = extend_lines(
        endpoints.loc[lambda x: ~x.index.duplicated(keep="first")].values,
        endpoints.loc[lambda x: ~x.index.duplicated(keep="last")].values,
        distance=tolerance * 3,
    )

    return split_polygons_by_lines(df, lines)