diff --git a/LICENSE.rst b/LICENSE.rst index ad052a1..e6ecfa0 100644 --- a/LICENSE.rst +++ b/LICENSE.rst @@ -1,7 +1,7 @@ MIT License =========== -*Copyright (c) 2021 Tariq Shihadah* +*Copyright (c) 2024 Tariq Shihadah* Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.rst b/README.rst index 76f9a26..c9a4a0a 100644 --- a/README.rst +++ b/README.rst @@ -53,13 +53,18 @@ performing an aggregation function over these values:: Version Notes ============= -0.0.11 (TBD) +0.1.0 (2024-01-16) ------------------- -Feeling the algo-rhythm these days. +My heart is in Gaza. -* Initial deployment of synthesis module featuring some tools for generating linear referencing information for chains of linear asset data with geometry but no LRS. +* Initial deployment of synthesis module featuring some tools for generating linear referencing information for chains of linear asset data with geometry but no LRS. These features are currently experimental and outputs should be reviewed for quality and expected outcomes. They will get refined in future versions based on performance in various applications and input from users. +* Addition of ``retain`` parameter to the ``EventsCollection.to_windows()`` method which retains all non-target and non-spatial fields from the original events dataframe when performing the operation. Previously, only newly generated target fields would be present in the output ``EventsCollection``'s events dataframe. +* Fixed implementation of ``EventsCollection.spatials`` property to correctly return a list of spatial column labels (e.g., geometry and route column labels) in the events dataframe. This also corrects ``EventsCollection.others`` which previously incorrectly included these labels. +* Addition of ``EventsCollection.clip()`` method and expansion of the ``EventsCollection.shift()`` method for better parameterization. +* Transition to ``pyproject.toml`` setup with ``setuptools``. * Performance improvements * Various bug fixes, minor features +* Why not push to v0.1 finally? 0.0.10 (2023-05-03) ------------------- diff --git a/linref/__init__.py b/linref/__init__.py index fb07bf4..9b59cdd 100644 --- a/linref/__init__.py +++ b/linref/__init__.py @@ -2,4 +2,4 @@ from linref.events.merge import EventsMerge, EventsMergeAttribute, EventsMergeTrace from linref.events.union import EventsUnion from linref.route import MLSRoute, combine_mpgs -from linref.events.synthesis import generate_linear_events \ No newline at end of file +from linref.events.synthesis import generate_linear_events, find_intersections \ No newline at end of file diff --git a/linref/events/collection.py b/linref/events/collection.py index bed38dd..beb7339 100644 --- a/linref/events/collection.py +++ b/linref/events/collection.py @@ -351,7 +351,21 @@ def targets(self): """ # Define target columns targets = [self.beg, self.end] + self.keys - return targets + return targets + + @property + def spatials(self): + """ + A list of geometry and route columns within the events dataframe, if + defined. + """ + # List defined spatial columns + spatials = [] + if not self.geom is None: + spatials.append(self.geom) + if not self.route is None: + spatials.append(self.route) + return spatials @property def others(self): @@ -360,7 +374,8 @@ def others(self): end, or key columns. """ # Define other columns - others = [col for col in self.df.columns if not col in self.targets] + exclude = self.targets + self.spatials + others = [col for col in self.df.columns if not col in exclude] return others @property @@ -429,6 +444,13 @@ def end(self, end): self._end = end self._end_loc = self.columns.index(end) + @property + def lengths(self): + """ + Lengths of all event ranges. + """ + return self.ends - self.begs + @property def geom(self): return self._geom @@ -689,7 +711,7 @@ def geometry_from_xy(self, x, y, col_name='geometry', crs=None, return None if inplace else ef def dissolve(self, attr=None, aggs=None, agg_func=None, agg_suffix='_agg', - agg_geometry=False, agg_routes=False, dropna=False, fillna=None, + agg_geometry=True, agg_routes=True, dropna=False, fillna=None, reorder=True, merge_lines=True): """ Dissolve the events dataframe on a selection of event attributes. @@ -718,14 +740,16 @@ def dissolve(self, attr=None, aggs=None, agg_func=None, agg_suffix='_agg', A suffix to be added to the name of aggregated columns. If provided as a list, must correspond to provided lost of aggregation attributes. - agg_geometry : bool, default False + agg_geometry : bool, default True Whether to create an aggregated geometries field, populated with aggregated shapely geometries based on those contained in the - collection's geometry field. - agg_routes : bool, default False + collection's geometry field. If not needed, set to False to reduce + processing time. + agg_routes : bool, default True Whether to create an aggregated routes field, populated with MLSRoute object class instances, created based on aggregated - segment geometries and begin and end mile posts. + segment geometries and begin and end mile posts. If not needed, + set to False to reduce processing time. dropna : bool, default False Whether to drop records with empty values in the attribute fields. This parameter is passed to the df.groupby call. @@ -1083,7 +1107,7 @@ def to_grid(self, dissolve=False, **kwargs): ) return res - def to_windows(self, dissolve=False, endpoint=False, **kwargs): + def to_windows(self, dissolve=False, retain=True, endpoint=False, **kwargs): """ Use the events dataframe to create sliding window events of a fixed length and a fixed number of steps, and which fill the bounds of each @@ -1121,6 +1145,9 @@ def to_windows(self, dissolve=False, endpoint=False, **kwargs): dissolve : bool, default False Whether to dissolve the events dataframe before performing the transformation. + retain : bool, default True + Whether to retain all fields from the original dataframe in the + newly generated dataframe. endpoint : bool, default False Add a point event at the end of each event range. """ @@ -1166,6 +1193,13 @@ def to_windows(self, dissolve=False, endpoint=False, **kwargs): } dtypes = {col: dtypes[col] for col in df.columns} df = df.astype(dtypes, copy=False) + # Retain original fields if requested + if retain: + df = df.merge( + self.df[self.others], left_on='index_parent', + right_index=True, how='left' + ) + # Prepare collection and return res = self.__class__( df, keys=self.keys, @@ -1812,6 +1846,27 @@ def _check_missing_data(self, missing_data='warn'): raise ValueError( "Invalid input missing_data parameter. Must be one of " "('ignore','drop','warn','raise').") + + def drop_missing(self, inplace=False): + """ + Drop records from the events dataframe which do not contain valid + linear referencing information (e.g., empty key values or begin or + end location values). + + Parameters + ---------- + inplace : boolean, default False + Whether to perform the operation in place. If False, will return a + modified copy of the events object. + """ + # Apply update + if inplace: + self._check_missing_data(missing_data='drop') + return + else: + ec = self.copy() + ec._check_missing_data(missing_data='drop') + return ec def from_similar(self, df, **kwargs): """ @@ -1959,6 +2014,204 @@ def _validate_keys(self, keys): keys = tuple(keys) # Return validated keys return keys + + def round(self, decimals=0, factor=1, inplace=False): + """ + Round the bounds of all events to the specified number of decimals + or to a specified rounding factor. + + Parameters + ---------- + decimals : int, default 0 + Number of decimals to round event bound values to. + factor : scalar, default 1 + Rounding factor to apply to the event bound values. For example, + use `factor=0.5` (and `decimals=0`) to round each value to the + nearest 0.5. + inplace : boolean, default False + Whether to perform the operation in place. If False, will return a + modified copy of the events object. + """ + # Copy data + df = self.df.copy() + # Perform rounding + df[self.beg] = \ + np.round(df[self.beg] / factor, decimals=decimals) * factor + df[self.end] = \ + np.round(df[self.end] / factor, decimals=decimals) * factor + + # Apply update + if inplace: + self.df = df + return + else: + ec = self.from_similar(df) + return ec + + def clip(self, lower=None, upper=None, inplace=False): + """ + Clip the bounds of all events at the specified lower and upper values. + + Parameters + ---------- + lower, upper : scalar, default None + The lower and upper threshold values. All values below the lower + threshold will be set to it. All values above the upper threshold + will be set to it. If a threshold value is not provided (e.g., + None), values will not be clipped in that direction. + inplace : boolean, default False + Whether to perform the operation in place. If False, will return a + modified copy of the events object. + """ + # Copy data + df = self.df.copy() + + # Perform clipping + df[self.beg] = np.clip(df[self.beg], lower, upper) + df[self.end] = np.clip(df[self.end], lower, upper) + + # Apply update + if inplace: + self.df = df + return + else: + ec = self.from_similar(df) + return ec + + def shift( + self, + distance=0, + direction='positive', + which='both', + labels=None, + inplace=False + ): + """ + Shift the bounds of all events by the specified value. + + Parameters + ---------- + distance : scalar, default 0 + The amount to shift each event bound by. Negative values will + result in an inversion of the `direction` parameter. + direction : {'positive', 'negative', 'both'}, default 'positive' + Which direction the event bounds should be shifted. + + Options + ------- + 'positive' : Shift event bounds in an increasing direciton. + 'negative' : Shift event bounds in a decreasing direciton. + 'both' : Shift event begin points in an increasing direction and + event end points in a decreasing direction. + + which : {'begs', 'ends', 'both'}, default 'both' + Which ends should be shifted. + labels : tuple, optional + Two-value tuple containing new begin and end column labels for the + modified event bound columns. If provided, must be a two-value + tuple containing two valid pandas column labels. If not provided, + original begin and end labels will be retained. + inplace : boolean, default False + Whether to perform the operation in place. If False, will return a + modified copy of the events object. + """ + # Validation + _ops_direction = {'positive', 'negative', 'both'} + _ops_which = {'begs', 'ends', 'both'} + if not direction in _ops_direction: + raise ValueError( + f"Input `direction` parameter must be one of {_ops_direction}") + if not which in _ops_which: + raise ValueError( + f"Input `which` parameter must be one of {_ops_which}") + if not labels is None: + try: + assert isinstance(labels, tuple) + assert len(labels) == 2 + except AssertionError: + raise ValueError( + f"If provided, input labels must be two-value tuple.") + + # Copy data + df = self.df.copy() + + # Determine shifting values + if which in ('ends', 'both'): + if direction in ('positive', 'both'): + distance_ends = distance + else: + distance_ends = -distance + else: + distance_ends = 0 + if which in ('begs', 'both'): + if direction in ('negative', 'both'): + distance_begs = -distance + else: + distance_begs = distance + + # Determine shifted column labels + if not labels is None: + label_beg = labels[0] + label_end = labels[1] + else: + label_beg = self.beg + label_end = self.end + + # Perform shifting + df[label_beg] = df[self.beg] + distance_begs + df[label_end] = df[self.end] + distance_ends + + # Apply update + if inplace: + self.df = df + self.beg = label_beg + self.end = label_end + return + else: + ec = self.from_similar(df) + ec.beg = label_beg + ec.end = label_end + return ec + + def separate(self, eliminate_inside=False, inplace=False, **kwargs): + """ + Separate the bounds of all events so that none directly overlap. + This is done using the rangel.RangeCollection.separate() method + on each EventsGroup. This method does not retain the sorting of + the original events dataframe. + + Parameters + ---------- + eliminate_inside : boolean, default False + Whether to automatically eliminate ranges which are entirely + overlapped by other ranges, producing zero-length ranges (e.g., + point events). + inplace : boolean, default False + Whether to perform the operation in place. If False, will return a + modified copy of the events object. + """ + # Iterate through events groups + records = [] + for key, group in self.iter_groups(): + # Separate group ranges + separated = group.rng.separate( + drop_short=False, eliminate_inside=eliminate_inside) + # Apply separated ranges to a copy of the group + updated = group.df.copy() + updated[self.beg] = separated.begs + updated[self.end] = separated.ends + records.append(updated) + + # Prepare new dataframe + df = pd.concat(records) + + # Apply update + if inplace: + self.df = df + return + else: + ec = self.from_similar(df) + return ec def overlay_average(self, other, cols=None, **kwargs): """ @@ -2023,7 +2276,7 @@ def merge(self, other): return em def project_parallel(self, other, samples=3, buffer=100, match='all', - choose=1, sort_locs=True, **kwargs): + choose=1, sort_locs=True, build_routes=True, **kwargs): """ Project an input geodataframe of linear geometries onto parallel events in the events dataframe, producing linearly referenced locations for all @@ -2051,10 +2304,29 @@ def project_parallel(self, other, samples=3, buffer=100, match='all', sort_locs : bool, default True Whether begin and end location values should be sorted, ensuring that all events are increasing and monotonic. + build_routes : bool, default True + Whether to automatically build routes using the build_routes() + method if routes are not already available. **kwargs Keyword arguments to be passed to the EventsCollection constructor upon completion of the projection. """ + # Ensure that geometries and routes are available + if self.geom is None: + raise ValueError( + "No geometry found in events dataframe. If valid shapely " + "geometries are available in the dataframe, set this with the " + f"{self.__class__.__name__}'s geom property.") + elif self.route is None: + if build_routes: + self.build_routes() + else: + raise ValueError( + "No routes found in events dataframe. If valid shapely " + "geometries are available in the dataframe, create routes " + "by calling the build_routes() method on the " + f"{self.__class__.__name__} class instance.") + # Create projector pp = ParallelProjector(self, other, samples=samples, buffer=buffer) # Perform match and return results in new events collection diff --git a/linref/events/merge.py b/linref/events/merge.py index 4a9b03d..3f7c972 100644 --- a/linref/events/merge.py +++ b/linref/events/merge.py @@ -725,7 +725,7 @@ def build(self, inplace=True): # Log traces self.traces = traces - def distribute(self, column=None, squeeze=True, **kwargs): + def distribute(self, column=None, **kwargs): """ Intersect and distribute events over the range collection, scaling their values relative to their indexed distance from their intersecting @@ -760,6 +760,21 @@ def distribute(self, column=None, squeeze=True, **kwargs): Created: 2022-10-04 """ + # Validate column choice + squeeze = True + if not column is None: + # Enforce list type + if not isinstance(column, list): + column = [column] + else: + squeeze = False + # Ensure valid labels + mismatched = set(column) - set(self.right.df.columns) + if len(mismatched) > 0: + raise ValueError( + f"Input column labels {mismatched} are not present in " + "the right dataframe.") + # Iterate over EventsGroups in the left collection index = [] res = [] @@ -785,18 +800,21 @@ def distribute(self, column=None, squeeze=True, **kwargs): # Log results index.append(group_left.df.index) res.append(res_i) + # Synthesize into pandas object index = np.concatenate(index) data = np.concatenate(res) + # No column requested, prepare generic results if column is None: - obj = pd.Series(index=index, data=data, name=None) - elif isinstance(column, list): - if len(column)==1 and squeeze: - obj = pd.Series(index=index, data=data, name=column[0]) - else: - obj = pd.DataFrame(index=index, data=data, columns=column) + obj = pd.Series( + index=index, data=data, name=None, dtype=float) + # Column(s) requested, prepare complete results else: - obj = pd.Series(index=index, data=data, name=column) + obj = pd.DataFrame( + index=index, data=data, columns=column, dtype=float) + # Squeeze dataframe if a single column label is provided + if squeeze: + obj = obj.squeeze() return obj def cut(self, **kwargs): diff --git a/linref/events/spatial.py b/linref/events/spatial.py index 88be43d..99bddc8 100644 --- a/linref/events/spatial.py +++ b/linref/events/spatial.py @@ -46,14 +46,58 @@ class ParallelProjector(object): """ Experimental class for performing projections of linear geometries onto linear events collections. + + The methodology used by this class involves the following steps: + + 1. Create sample points along the projected geometries using a fixed + number of samples per geometry. + + 2. Spatially join these sample points to the target EventsCollection's + geometry using the provided buffer distance to identify candidate matches. + + 3. Process all possible matches using the .match() method to produce + linear referencing information for the projected geometries based on that + of the target EventsCollection. """ - def __init__(self, target, other, samples=3, buffer=100) -> None: + def __init__(self, target, projected, samples=3, buffer=100) -> None: self.target = target - self.other = other + self.projected = projected self.samples = samples self.buffer = buffer + @property + def target(self): + return self._target + + @target.setter + def target(self, obj): + # Validate object type + if not isinstance(obj, EventsCollection): + raise TypeError( + "Input target must be valid lr.EventsCollection instance.") + # Ensure that geometries and routes are available + if obj.geom is None: + raise ValueError( + "No geometry found in the target events dataframe.") + elif obj.route is None: + raise ValueError( + "No routes found in events dataframe.") + self._target = obj + + @property + def projected(self): + return self._projected + + @projected.setter + def projected(self, obj): + # Validate object type + if not isinstance(obj, gpd.GeoDataFrame): + raise TypeError( + "Input projected object must be valid gpd.GeoDataFrame " + "instance.") + self._projected = obj + @property def samples(self): return self._samples @@ -80,8 +124,8 @@ def buffer(self, buffer): assert buffer >= 0 self._buffer = buffer except: - raise ValueError("Buffer parameter must be a non-negative " - "numeric value.") + raise ValueError( + "Buffer parameter must be a non-negative numeric value.") # Perform spatial join self._buffer_join() @@ -96,7 +140,7 @@ def sample_points(self): @property def projectors(self): - return self.other.geometry.values + return self.projected.geometry.values def _build_sample_points(self): """ @@ -110,8 +154,8 @@ def _build_sample_points(self): points.extend([projector.interpolate(loc) for loc in \ sample_locs * projector.length]) self._sample_points = gpd.GeoDataFrame({ - '__projector': np.repeat(self.other.index.values, self.samples), - 'geometry': points}, geometry='geometry', crs=self.other.crs + '__projector': np.repeat(self.projected.index.values, self.samples), + 'geometry': points}, geometry='geometry', crs=self.projected.crs ) def _buffer_join(self): @@ -134,6 +178,9 @@ def _buffer_join(self): def match(self, match='all', choose=1, sort_locs=True): """ + Perform the actual matching of nearby geometries to one another based + on input analysis parameters, producing a dataframe which has been + applied to the target EventsCollection's linear referencing system. """ # Validate matching parameters if match=='all': @@ -161,8 +208,8 @@ def match(self, match='all', choose=1, sort_locs=True): # Test all unique pairs for minimum match count match_mask = pair_counts >= match # Compute mean distances for all unique matched pairs - split = np.array(np.split(self._joined.values[:,2], pair_index)[1:])\ - [match_mask] + all_distances = np.split(self._joined.values[:,2], pair_index)[1:] + split = np.array(all_distances, dtype=object)[match_mask] mean_distances = np.array([np.mean(i) for i in split]) # Group matched targets for all matched projectors proj_unique, proj_index = np.unique( @@ -170,7 +217,8 @@ def match(self, match='all', choose=1, sort_locs=True): axis=0, return_index=True ) - pair_distances = np.array(np.split(mean_distances, proj_index)[1:]) + pair_distances = np.array( + np.split(mean_distances, proj_index)[1:], dtype=object) # Identify the index of the target(s) with the lowest mean distance for # each projector if requested @@ -185,7 +233,8 @@ def match(self, match='all', choose=1, sort_locs=True): for i in pair_distances] # - Produce a projector-target map zipped = zip(pair_groups, pair_select) - targets = np.array([group[index] for group, index in zipped]) + targets = np.array( + [group[index] for group, index in zipped], dtype=object) # - Flatten map if multiple choices if choose != 1: projectors = np.repeat(proj_unique, [len(i) for i in targets]) @@ -198,7 +247,7 @@ def match(self, match='all', choose=1, sort_locs=True): '__projector': projectors, '__target': targets}) # Merge matched records - proj_lines = self.other.geometry.rename('__proj_lines') + proj_lines = self.projected.geometry.rename('__proj_lines') select = matched_pairs \ .merge(proj_lines, left_on='__projector', right_index=True) target_data = self.target.df[self.target.keys + [self.target.route]] @@ -211,7 +260,7 @@ def _project(route, line): try: # Project bounds onto target route boundary = line.boundary - beg, end = boundary[0], boundary[-1] + beg, end = boundary.geoms[0], boundary.geoms[-1] beg_loc, end_loc = route.project(beg), route.project(end) return beg_loc, end_loc except (AttributeError, IndexError): @@ -229,10 +278,17 @@ def _project(route, line): else: select[labels[0]] = proj_bounds[:, 0] select[labels[1]] = proj_bounds[:, 1] - clean = self.other.drop( + clean = self.projected.drop( columns=labels + ['geometry','route'], errors='ignore') select = select \ .merge(clean, how='left', left_on='__projector', right_index=True) \ .drop(columns=['__projector','__target','__proj_lines','route'], errors='ignore') - return select \ No newline at end of file + return select + + +##################### +# LATE DEPENDENCIES # +##################### + +from linref.events.collection import EventsCollection diff --git a/linref/events/synthesis.py b/linref/events/synthesis.py index 1bcbf0f..9ec2960 100644 --- a/linref/events/synthesis.py +++ b/linref/events/synthesis.py @@ -2,7 +2,8 @@ =============================================================================== Module featuring classes and functionality for synthesizing linear referencing -information for existing data that is not LRS-enabled. +information for existing data that is not LRS-enabled and other manipulations +which support linear referencing data engineering and analysis. Classes @@ -12,7 +13,7 @@ Dependencies ------------ -pandas, numpy, rangel, copy, warnings, functools +geopandas, shapely, pandas, numpy, rangel, copy, warnings, functools Development @@ -41,6 +42,12 @@ from functools import wraps from rangel import RangeCollection from shapely.geometry import Point +from shapely import unary_union + + +####################### +# SYNTHESIS FUNCTIONS # +####################### def generate_linear_events( df, @@ -58,19 +65,22 @@ def generate_linear_events( Function for generating events information for existing chains of linear geospatial data based on the geographic lengths of chain members. This function is intended to synthesize a new linear referencing system for - chains of adjacent linear data which are oriented in the same direction - and whose end and begin points are coincident or fall within a defined - buffer distance of one another. + chains of adjacent linear data with matching key values, which are + oriented in the same direction, and whose end and begin points are + coincident or fall within a defined buffer distance of one another. Parameters ---------- df : gpd.GeoDataFrame Valid geopandas GeoDataFrame with linear geometries. - keys : list or tuple + keys : list or tuple, optional A list or tuple of dataframe column labels which define the unique groups of events within the events dataframe. Common examples include year or route ID columns which distinguish unrelated sets of events - within the events dataframe. + within the events dataframe. If not provided, it will be assumed that + the whole dataframe represents a single group of events. As a result, + the only key included in the resulting EventsCollection will be the + provided `chain_label`. beg_label, end_label : str or label Column labels to be created within the events dataframe which represent the linearly referenced location of each event. @@ -145,36 +155,51 @@ def generate_linear_events( groups = df.groupby(by=keys) else: groups = {np.nan: df}.items() + + # Define function for retrieving boundary points + def _get_boundary_point(line, index=None): + # Use coords approach for multi-part geometries + try: + return Point(line.geoms[index].coords[index]) + # Use coords approach for single-part geometries + except AttributeError: + return Point(line.coords[index]) + # Iterate over all groups and perform analysis - geom = df.geometry.name + geom_column = df.geometry.name record_indexes = [] record_begs = [] record_ends = [] record_chains = [] for key, group in groups: - # Get lengths of all geometries - lengths_all = group.length - - # Get boundaries of all lines - begs = group[geom].apply(lambda x: Point(x.coords[0])) - ends = group[geom].apply(lambda x: Point(x.coords[-1])) + # Get boundaries points of all lines for finding matches + begs = group[geom_column].apply(_get_boundary_point, index=0) + ends = group[geom_column].apply(_get_boundary_point, index=-1) + # Buffer the end points if requested if not buffer is None: begs = begs.apply(lambda x: x.buffer(buffer)) ends = ends.apply(lambda x: x.buffer(buffer)) - begs = gpd.GeoDataFrame(begs, geometry=geom) - ends = gpd.GeoDataFrame(ends, geometry=geom) + # Create geodataframes for intersecting + begs = gpd.GeoDataFrame(begs, geometry=geom_column) + ends = gpd.GeoDataFrame(ends, geometry=geom_column) # Intersect boundary geometries intersection = gpd.sjoin(ends, begs) intersection['index_left'] = intersection.index + # Get all unique matches between the line end points and other line + # begin points pairs = intersection[['index_left','index_right']].values - # Remove instances of multiple matches on left or right + # Remove instances of multiple matches on left or right; only the + # first unique value on the left and right are kept based on original + # sorting of data pairs = pairs[np.unique(pairs[:,0], return_index=True)[1]] pairs = pairs[np.unique(pairs[:,1], return_index=True)[1]] - # Identify single-value chains and downstream terminals with null pairs + # Identify single-segment chains with no matches to other segments as + # well as downstream terminals, holding their places with pairs of + # these segments' indices matched with null values missing = set(group.index) - set(pairs[:,0]) missing = np.array([sorted(missing), np.full(len(missing), np.nan)]).T pairs = np.concatenate([pairs, missing]) @@ -183,27 +208,44 @@ def generate_linear_events( chains = [] chain_index = 0 while pairs.shape[0] > 0: - # Initialize pairs filter + # Initialize pairs filter, retaining all pairs pairs_filter = np.ones(pairs.shape[0], dtype=bool) - # Iterate over upstream terminals + # Identify upstream terminals which will be used to begin segment + # chains terminals = sorted(set(pairs[:,0]) - set(pairs[:,1])) + + # Address complete loops by selecting a terminal from remaining + # data if none remain + try: + assert len(terminals) > 0 + except AssertionError: + terminals = [pairs[0,0]] + + # Iterate over upstream terminals, chaining from them to their + # downstream matches for terminal in terminals: - # Iterate through chain members + # Iterate through all subsequent matches to the terminal chains.append([]) member = terminal while True: try: - # Update chain with current member + # Update the chain by adding the current member chains[chain_index].append(member) # Identify pairs matching the selected chain member member_loc = (pairs[:,0] == member) & pairs_filter - assert member_loc.sum() > 0 # Check for no matches + # Check for no matches; if there are none, end the + # chain and move to the next one + assert member_loc.sum() > 0 # Update pairs filter to remove matches to the member pairs_filter[member_loc] = False - # Update indexed chain member + # Update indexed chain member by selecting the next + # match member = pairs[member_loc.argmax()][1] + # Check for the end of the chain if the new selected + # member is null, or if looping is detected; end the + # chain in either case assert ~np.isnan(member) # Check for end of chain assert member != terminal # Check for looping except AssertionError: @@ -215,6 +257,8 @@ def generate_linear_events( # Filter pairs to remove addressed items pairs = pairs[pairs_filter] + # Get lengths of all geometries + lengths_all = group.length # Iterate over chains and create events information for chain_index, chain in enumerate(chains): # Compute cumulative sum of chain geometry lengths @@ -244,10 +288,56 @@ def generate_linear_events( # Merge with parent table and create EventsCollection ec = EventsCollection( - events, keys=keys + [chain_label], beg=beg_label, end=end_label, - geom=df.geometry.name, **kwargs) + events, + keys=keys + [chain_label] if not keys is None else [chain_label], + beg=beg_label, + end=end_label, + geom=geom_column, + **kwargs + ) return ec +def find_intersections(df, only_points=True, only_single=True): + """ + Generate intersection points for an input geodataframe of linear + geometries. Output will be a geodataframe with a single point + geometry at each location where a line intersects with another + and will have the same CRS. + + Parameters + ---------- + df : gpd.GeoDataFrame + Valid geopandas geodataframe containing linear geometries. + only_points : bool, default True + Whether all non-point geometries resulting from intersections should + be removed from results. + only_single : bool, default True + Whether all multi-part geometries resulting from intersections should + be removed from results. + """ + # Spatial join with self + joined = df.sjoin(df) + # Remove self-matches + joined = joined[joined['index_right'] != joined.index] + # Find intersections + other_geometries = df.geometry.reindex(joined['index_right']) + intersections = joined.geometry.intersection(other_geometries, align=False) + + # Cast to geodataframe + res = gpd.GeoDataFrame(geometry=intersections, crs=df.crs) + # Drop duplicate points + res = res.drop_duplicates() + # Filter results if requested + if only_points: + if only_single: + choose_types = ['Point'] + else: + choose_types = ['Point', 'MultiPoint'] + res = res[res.geom_type.isin(choose_types)] + # Reset index + res = res.reset_index(drop=True) + return res + ##################### # LATE DEPENDENCIES # diff --git a/pyproject.toml b/pyproject.toml index 58df767..27d734a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,18 +1,43 @@ [build-system] -requires = ["setuptools", "setuptools-scm", "sphinx", "myst-parser", "sphinx-rtd-theme", "geopandas"] +requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] name = "linref" -version = "0.0.10.post1" +version = "0.1.0" description = "Linearly referenced data management, manipulation, and operations" -readme = "README.rst" requires-python = ">=3.8" license = {file = "LICENSE.rst"} keywords=["python", "geospatial", "linear", "data", "event", "dissolve", "overlay", "range", "numeric", "interval"] -authors = [{name = "Tariq Shihadah", email = "tariq.shihadah@gmail.com"}] -dependencies = ["numpy", "matplotlib", "shapely>=1.7", "pandas>=1.1", "geopandas>=0.10.2", "rangel>=0.0.7"] +authors = [ + {name = "Tariq Shihadah", email = "tariq.shihadah@gmail.com"} +] +dependencies = [ + "numpy", + "matplotlib", + "shapely>=1.7", + "pandas>=1.1", + "geopandas>=0.10.2", + "rangel>=0.0.7", +] +classifiers = [ + "Development Status :: 1 - Planning", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: Unix", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows", +] +dynamic = ["readme"] [project.urls] documentation = "https://linref.readthedocs.io/" -repository = "https://github.com/tariqshihadah/linref" \ No newline at end of file +repository = "https://github.com/tariqshihadah/linref" + +[tool.setuptools.packages.find] +include = ["linref*"] +exclude = ["linref.tests*"] + +[tool.setuptools.dynamic] +readme = {file = ["README.rst"]} \ No newline at end of file diff --git a/setup.py b/setup.py deleted file mode 100644 index f706e45..0000000 --- a/setup.py +++ /dev/null @@ -1,32 +0,0 @@ -from setuptools import setup, find_packages -import codecs -import os - -VERSION = '0.0.10.post1' -DESCRIPTION = 'Linearly referenced data management, manipulation, and operations' -with open('README.rst') as f: - LONG_DESCRIPTION = ''.join(f.readlines()) - -# Setting up -setup( - name="linref", - version=VERSION, - author="Tariq Shihadah", - author_email="", - description=DESCRIPTION, -# long_description_content_type="text/x-rst", - long_description=LONG_DESCRIPTION, - packages=find_packages(), - package_data={"": ["*.json"]}, - install_requires=['numpy', 'matplotlib', 'shapely>=1.7', 'pandas>=1.1', 'geopandas>=0.10.2', 'rangel>=0.0.7', 'sphinx', 'myst_parser'], - keywords=['python', 'geospatial', 'linear', 'data', 'event', 'dissolve', 'overlay', 'range', 'numeric', 'interval'], - classifiers=[ - "Development Status :: 1 - Planning", - "Intended Audience :: Developers", - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: Unix", - "Operating System :: MacOS :: MacOS X", - "Operating System :: Microsoft :: Windows", - ] -)