Source code for sgis.networkanalysis.finding_isolated_networks

"""Functions for Finding network components in a GeoDataFrame of lines."""

import networkx as nx
import pandas as pd
from geopandas import GeoDataFrame

from .nodes import make_node_ids


[docs] def get_connected_components(gdf: GeoDataFrame) -> GeoDataFrame: """Finds the largest network component. It takes a GeoDataFrame of lines and finds the lines that are part of the largest connected network component. These lines are given the value 1 in the added column 'connected', while isolated network islands get the value 0. Uses the connected_components function from the networkx package. Args: gdf: A GeoDataFrame of lines. Returns: The GeoDataFrame with a new column "connected". Examples: --------- >>> from sgis import read_parquet_url, get_connected_components >>> roads = read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_oslo_2022.parquet") >>> roads = get_connected_components(roads) >>> roads.connected.value_counts() 1.0 85638 0.0 7757 Name: connected, dtype: int64 Removing the isolated network islands. >>> connected_roads = get_connected_components(roads).loc[lambda x: x["connected"] == 1] >>> roads.connected.value_counts() 1.0 85638 Name: connected, dtype: int64 """ gdf, _ = make_node_ids(gdf) edges = [ (str(source), str(target)) for source, target in zip(gdf["source"], gdf["target"], strict=True) ] graph = nx.Graph() graph.add_edges_from(edges) largest_component = max(nx.connected_components(graph), key=len) largest_component_dict = {node_id: 1 for node_id in largest_component} gdf["connected"] = gdf.source.map(largest_component_dict).fillna(0) gdf = gdf.drop( ["source_wkt", "target_wkt", "source", "target", "n_source", "n_target"], axis=1 ) return gdf
[docs] def get_component_size(gdf: GeoDataFrame) -> GeoDataFrame: """Finds the size of each component in the network. Takes a GeoDataFrame of linea and creates the column "component_size", which indicates the size of the network component the line is a part of. Args: gdf: a GeoDataFrame of lines. Returns: A GeoDataFrame with a new column "component_size". Examples: --------- >>> from sgis import read_parquet_url, get_component_size >>> roads = read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_oslo_2022.parquet") >>> roads = get_component_size(roads) >>> roads["component_size"].value_counts().head() component_size 79180 85638 2 1601 4 688 6 406 3 346 Name: count, dtype: int64 """ if not len(gdf): gdf["component_index"] = None gdf["component_size"] = None return gdf gdf, _ = make_node_ids(gdf) edges = [ (str(source), str(target)) for source, target in zip(gdf["source"], gdf["target"], strict=True) ] graph = nx.Graph() graph.add_edges_from(edges) components = [list(x) for x in nx.connected_components(graph)] mapper = pd.DataFrame( { idx: [i, len(component)] for i, component in enumerate(components) for idx in component }, ).transpose() mapper.columns = ["component_index", "component_size"] gdf["component_index"] = gdf["source"].map(mapper["component_index"]) gdf["component_size"] = gdf["source"].map(mapper["component_size"]) gdf = gdf.drop( ["source_wkt", "target_wkt", "source", "target", "n_source", "n_target"], axis=1 ) return gdf