Source code for sgis.networkanalysis.networkanalysisrules

"""The NetworkAnalysisRules class sets the rules for the network analysis.

The class is to be used as the 'rules' parameter in the NetworkAnalysis
class.
"""

from copy import copy
from copy import deepcopy
from dataclasses import dataclass

from geopandas import GeoDataFrame

from ..helpers import unit_is_meters


[docs] @dataclass class NetworkAnalysisRules: """Sets the rules for the network analysis. To be used as the 'rules' parameter in the NetworkAnalysis class. Args: weight: Either a column in the GeoDataFrame of the Network or 'meters'/'metres'. A 'minutes' column can be created with the 'make_directed_network' or 'make_directed_network_norway' functions. directed: Whether the lines will be considered traversable in both directions. search_tolerance: distance to search for nodes in the network. Origins and destinations further away from the network than the search_tolerance will not find any paths. Defaults to 250. search_factor: number of meters and percent to add to the closest distance to a node when connecting origins and destinations to the network. Defaults to 0, meaning only the closest node is used. If search_factor is 10 and the closest node is 1 meter away, paths will be created from the point and all nodes within 11.1 meters. If the closest node is 100 meters away, paths will be created with all nodes within 120 meters. It can be wise to set a higher search_factor only for the origins and destinations that are causing problems in a separate analysis run. split_lines: If True (default), the closest line to each point will be split in two at the nearest excact point. The weight of the split lines are then adjusted to the new length. If False, the points will be connected to the endpoints of the network lines. nodedist_kmh: When using "minutes" as weight, this sets the speed in kilometers per hour for the edges between origins/destinations and the network nodes that connect them. Defaults to None, meaning 0 weight is added for the edges. nodedist_multiplier: When using "meters" as weight, this sets the weight for the edges between origins/destinations and the network nodes that connect them. Defaults to None, meaning 0 weight is added for these edges. If set to 1, the weight will be equal to the straigt line distance. Examples: --------- Read testdata. >>> import sgis as sg >>> roads = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/roads_oslo_2022.parquet") >>> points = sg.read_parquet_url("https://media.githubusercontent.com/media/statisticsnorway/ssb-sgis/main/tests/testdata/points_oslo.parquet") Let's start by setting the default rules. 'weight' and 'directed' have no default values. >>> rules = sg.NetworkAnalysisRules(weight="minutes", directed=True) >>> directed_roads = sg.get_connected_components(roads).loc[lambda x: x["connected"] == 1].pipe(sg.make_directed_network_norway, dropnegative=True) >>> nwa = sg.NetworkAnalysis(network=directed_roads, rules=rules, detailed_log=True) >>> nwa NetworkAnalysis( network=Network(6364 km, percent_bidirectional=87), rules=NetworkAnalysisRules(weight=minutes, directed=True, search_tolerance=250, search_factor=0, split_lines=True, ...), log=True, detailed_log=True, ) Setting 'split_lines' to True, means the points will be connected to the closest part of the closest network line. If False, the lines are connected to the closest endpoint of the lines. split_lines defaults to False, since splitting lines takes some time and doesn't make a huge difference in most cases. >>> od = nwa.od_cost_matrix(points, points) >>> nwa.rules.split_lines = True >>> od = nwa.od_cost_matrix(points, points) >>> nwa.log[['split_lines', 'percent_missing', 'cost_mean']] split_lines percent_missing cost_mean 0 False 0.9966 15.270462 1 True 0.8973 15.249900 >>> nwa.rules.split_lines = False Setting a high search_tolerance will make faraway points find their way to the network. >>> for i in [100, 250, 500, 1000]: ... nwa.rules.search_tolerance = i ... od = nwa.od_cost_matrix(points, points) >>> nwa.log.iloc[-4:][['percent_missing', 'cost_mean', 'search_tolerance', 'search_factor']] percent_missing cost_mean search_tolerance search_factor 2 2.3840 15.235559 100 0 3 0.9966 15.270462 250 0 4 0.7976 15.273579 500 0 5 0.5984 15.268614 1000 0 High search_tolerance won't affect how the points close to the network are connected to network nodes. Points trapped behind deadend oneway streets, can find their way out with a higher search_factor. >>> nwa.rules.search_tolerance = 250 >>> for i in [0, 10, 35, 100]: ... nwa.rules.search_factor = i ... od = nwa.od_cost_matrix(points, points) >>> nwa.log.iloc[-4:][['percent_missing', 'cost_mean', 'search_tolerance', 'search_factor']] percent_missing cost_mean search_tolerance search_factor 6 0.9966 15.270462 250 0 7 0.5987 15.063283 250 10 8 0.4991 14.636172 250 35 9 0.3994 13.680307 250 100 The remaining 0.4 percent missing are from/to two points, one on an island with no brigde and one at the edge of the road network (would require a larger network). These two points only find themselves, and thus has 999 missing values. >>> n_missing = od.groupby("origin").minutes.agg(lambda x: x.isna().sum()) >>> n_missing.n_largest(3) 59 999 510 999 0 2 Name: minutes, dtype: int64 By default, the distance from origin/destination to the network nodes is given a weight of 0. This means, if the search_tolerance is high, points far away from the network will get unrealisticly low travel times/distances. The weight from origin/ destination to the network nodes can be set with the 'nodedist_kmh' parameter if the weight is 'minutes', and the 'nodedist_multiplier' if the weight is 'meters'. If the weight is 'minutes', setting 'nodedist_kmh' to 5 means a distance of 1000 meters will get a weight of 12 minutes. >>> nwa.rules.search_tolerance = 5000 >>> for i in [3, 10, 50]: ... nwa.rules.nodedist_kmh = i ... od = nwa.od_cost_matrix(points, points) ... >>> nwa.log.iloc[-3:][['nodedist_kmh', 'cost_mean']] nodedist_kmh cost_mean 10 3 15.924197 11 10 14.817717 12 50 13.964457 If the weight is 'meters', setting nodedist_multiplier=1 will make the distance to nodes count as its straight line distance. >>> rules = NetworkAnalysisRules( ... weight="meters", ... search_tolerance=5000, ... ) >>> nwa = NetworkAnalysis(network=directed_roads, rules=rules) >>> od = nwa.od_cost_matrix(points, points) >>> nwa.rules.nodedist_multiplier = 1 >>> od = nwa.od_cost_matrix(points, points) >>> nwa.log[['nodedist_multiplier', 'cost_mean']] nodedist_multiplier cost_mean 0 None 10228.400228 1 1 10277.926186 """ directed: bool weight: str search_tolerance: int = 250 search_factor: int = 0 split_lines: bool = True nodedist_multiplier: int | float | None = None nodedist_kmh: int | float | None = None def _update_rules(self) -> None: """Stores the rules as separate attributes. Used for checking whether the rules have changed and the graph have to be remade. """ self._directed = self.directed self._weight = self.weight self._search_tolerance = self.search_tolerance self._search_factor = self.search_factor self._split_lines = self.split_lines self._nodedist_multiplier = self.nodedist_multiplier self._nodedist_kmh = self.nodedist_kmh def _rules_have_changed(self) -> bool: """Checks if any of the rules have changed since the graph was last created. If no rules have changed, time can be saved by not remaking the graph (the network and the points have to be unchanged as well). """ if self.directed != self._directed: return True if self.weight != self._weight: return True if self.search_factor != self._search_factor: return True if self.search_tolerance != self._search_tolerance: return True if self.split_lines != self._split_lines: return True if self.nodedist_multiplier != self._nodedist_multiplier: return True if self.nodedist_kmh != self._nodedist_kmh: return True def _validate_weight(self, gdf: GeoDataFrame) -> GeoDataFrame: if "meter" in self.weight or "metre" in self.weight and unit_is_meters(gdf): if self.nodedist_kmh: raise ValueError("Cannot set 'nodedist_kmh' when 'weight' is meters.") gdf[self.weight] = gdf.length return gdf # allow abbreviation of 'minutes' to be nice elif ( self.weight == "min" or "minut" in self.weight and "minutes" in gdf.columns ): if self.nodedist_multiplier: raise ValueError( "Cannot set 'nodedist_multiplier' when 'weight' is minutes. " "Set 'nodedist_kmh' instead." ) self.weight = "minutes" gdf["minutes"] = gdf[self.weight] self._check_for_nans(gdf, self.weight) gdf = self._try_to_float(gdf, self.weight) self._check_for_negative_values(gdf, self.weight) return gdf elif self.weight in gdf.columns: self._check_for_nans(gdf, self.weight) gdf = self._try_to_float(gdf, self.weight) self._check_for_negative_values(gdf, self.weight) return gdf # at this point, the weight is wrong. # Now to determine the error message if "meter" in self.weight or "metre" in self.weight: raise ValueError( "the crs of the roads have to have units in 'meters' when the " "weight is 'meters'." ) if self.weight == "minutes": incorrect_weight_column = ( "Cannot find 'weight' column for minutes. " "Try running one of the 'make_directed_network_' methods" ", or set 'weight' to 'meters'" ) else: incorrect_weight_column = f"Cannot find 'weight' column {self.weight}" raise KeyError(incorrect_weight_column) @staticmethod def _check_for_nans(df: GeoDataFrame, col: str) -> None: """Remove NaNs and give warning if there are any.""" if all(df[col].isna()): raise ValueError(f"All values in the {col!r} column are NaN.") nans = sum(df[col].isna()) if nans: raise ValueError( f"{nans} rows have missing values in the {col!r} column. " "Fill these rows with 0 or another number.", ) @staticmethod def _check_for_negative_values(df: GeoDataFrame, col: str) -> None: """Remove negative values and give warning if there are any.""" negative = sum(df[col] < 0) if negative: raise ValueError( f"{negative} negative values found in the {col!r} column. Fill these " "with a number greater than or equal to zero.", ) @staticmethod def _try_to_float(df: GeoDataFrame, col: str) -> GeoDataFrame: """Try to convert weight column to float, raise ValueError if it fails.""" try: df[col] = df[col].astype(float) except ValueError as e: raise ValueError( f"The {col!r} column must be numeric. Got characters that couldn't be " "interpreted as numbers." ) from e return df
[docs] def copy(self) -> "NetworkAnalysisRules": """Return a shallow copy the instance.""" return copy(self)
[docs] def deepcopy(self) -> "NetworkAnalysisRules": """Return a deep copy the instance.""" return deepcopy(self)