Coverage for pygeodesy/simplify.py: 99%
210 statements
« prev ^ index » next coverage.py v7.6.1, created at 2025-04-25 13:15 -0400
« prev ^ index » next coverage.py v7.6.1, created at 2025-04-25 13:15 -0400
2# -*- coding: utf-8 -*-
4u'''Simplify or linearize a path of C{LatLon} points.
6Each of the 4 I{simplify} functions is based on a different algorithm and
7produces different, simplified results in (very) different run times for the
8same path:
10 - Function L{simplify1} eliminates points with edge lengths shorter than
11 the given tolerance.
13 - Function L{simplifyRDP} implements the I{Ramer-Douglas-Peucker} (RDP)
14 algorithm, iteratively finding the point farthest from each path edge.
15 Original RDP exhaustively searches the most distant point in each iteration,
16 I{modified} RDP stops at the first point exceeding the distance tolerance.
18 - Function L{simplifyRW} uses the I{Reumann-Witkam} (RW) method, sliding a
19 "pipe" over each path edge, removing all subsequent points within the pipe
20 radius, up to the first point outside the pipe.
22 - Function L{simplifyVW} provides the I{Visvalingam-Whyatt} (VW) method
23 using the area of the triangle formed by three neigboring points. Original
24 VW removes only a single point per iteration, I{modified} VW eliminates all
25 points with a triangular area not exceeding the tolerance in each iteration.
27Keyword argument I{shortest} of functions L{simplifyRDP} and L{simplifyRW}
28specifies of the distance between a point and a path edge. If C{True}, use
29the I{shortest} distance to the path edge or edge points, otherwise use the
30I{perpendicular} distance to the (extended) edge through both points.
32Keyword argument B{C{radius}} of all fuctions is set to the mean earth
33radius in C{meter}, conventionally. Other units may be used, provided
34that radius and tolerance are specified in the same units.
36Use keyword argument C{B{indices}=True} in any function to return a list
37of I{indices} of simplified point instead of the simplified points with
38the first and last index are always the first and last original index.
40Finally, any additional keyword arguments B{C{options}} to all functions
41are passed thru to function L{pygeodesy.equirectangular4} to specify the
42distance approximation.
44To process C{NumPy} arrays containing rows of lat-, longitude and possibly
45other values, use class L{Numpy2LatLon} to wrap the C{NumPy} array into
46I{on-the-fly-LatLon} points. Pass the L{Numpy2LatLon} instance to any
47I{simplify} function and the returned result will be a C{NumPy} array
48containing the simplified subset, a partial copy of the original C{NumPy}
49array. Use keyword argument C{B{indices}=True} to return a list of array
50row indices inlieu of the simplified array subset.
52See:
53 - U{https://Bost.Ocks.org/mike/simplify}
54 - U{https://WikiPedia.org/wiki/Ramer-Douglas-Peucker_algorithm}
55 - U{https://www.ScienceDirect.com/science/article/pii/S0098300402000092}
56 - U{https://hydra.Hull.ac.UK/resources/hull:8338}
57 - U{https://psimpl.SourceForge.net/reumann-witkam.html}
58 - U{https://www.CS.UBC.Ca/cgi-bin/tr/1992/TR-92-07.pdf}
59 - U{https://GitHub.com/FlorianWilhelm/gps_data_with_python}
60 - U{https://www.BDCC.co.UK/Gmaps/GDouglasPeuker.js}
61 - U{https://GitHub.com/mourner/simplify-js}
62 - U{https://GitHub.com/OmarEstrella/simplify.py}
63 - U{https://PyPI.org/project/rdp}
64 - U{https://PyPI.org/project/visvalingam}
65 - U{https://PyPI.org/project/simplification}
66'''
67# make sure int/int division yields float quotient, see .basics
68from __future__ import division as _; del _ # PYCHOK semicolon
70# from pygeodesy.basics import len2 # from .fmath
71from pygeodesy.constants import EPS, R_M, _1_0
72from pygeodesy.errors import _AttributeError, _ValueError, _xkwds_pop2
73from pygeodesy.fmath import fdot_, len2, sqrt0
74from pygeodesy.formy import equirectangular4
75from pygeodesy.interns import _small_, _too_
76from pygeodesy.iters import isNumpy2, isTuple2
77# from pygeodesy.lazily import _ALL_LAZY # from .units
78from pygeodesy.units import _ALL_LAZY, _1mm, Radius_
80from math import degrees, fabs, radians
82__all__ = _ALL_LAZY.simplify
83__version__ = '24.12.02'
86# try:
87# from collections import namedtuple
88# _T2 = namedtuple('_T2', 'ix, h2')
89# except ImportError:
90# class _T2(object):
91# ...
92# namedtuple (and .named._NamedTuple) can not be
93# used because (a) values can not be updated and
94# (b) it produces PyChecker warning "<string>:28:
95# self is not first method argument" which can't
96# be suppressed with command line option --stdlib
97class _T2(object):
98 '''(INTERNAL) VW 2-tuple (index, area2).
99 '''
100 # __slots__ are no longer space savers, see
101 # the comments at the class .points.LatLon_
102 # __slots__ = ('ix', 'h2')
104 def __init__(self, ix, h2):
105 self.ix = ix
106 self.h2 = h2
109class _Sy(object):
110 '''(INTERNAL) Simplify state.
111 '''
112 d2yxse5 = () # 5-tuple
113 eps = EPS # system epsilon
114 indices = False
115 ixs = set() # set(indices)
116 n = 0
117 options = {}
118 pts = []
119 radius = R_M # mean earth radius
120 s2 = EPS # tolerance squared
121 s2e = EPS # VW sentinel
122 shortest = False # i.e. perpendicular
123 subset = None # isNumpy2 or isTuple2
124 t2s = [] # list(_T2s)
126 def __init__(self, points, tolerance, radius, shortest,
127 indices, **options):
128 '''New C{Simplify} state.
129 '''
130 n, self.pts = len2(points)
131 if n > 0:
132 self.n = n
133 self.ixs = set((0, n-1))
135 if radius is not R_M:
136 self.radius = Radius_(radius, low=self.eps)
137 # tolerance converted to degrees squared
138 self.s2 = s2 = degrees(tolerance / self.radius)**2
139 if min(s2, tolerance) < self.eps:
140 raise _ValueError(tolerance=tolerance, txt=_too_(_small_))
141 self.s2e = s2 + _1_0 # VW sentinel
142 # assert self.s2e > s2
144 if indices:
145 self.indices = True
146 if options:
147 _, self.options = _xkwds_pop2(options, modified=None)
148 if shortest:
149 self.shortest = True
150 if isNumpy2(points) or isTuple2(points): # NOT self.pts
151 self.subset = points.subset
153 def d21(self, s, e):
154 '''Set path edge or line thru (points[s], -[e]).
155 '''
156 d21, y21, x21, _ = self.d2yxu4(s, e)
157 self.d2yxse5 = d21, y21, x21, s, e
158 return d21 > self.eps
160 def d2i2(self, m, n, modified):
161 '''Find the tallest distance among all points[m..n]
162 to (points[s], -[e]) exceeding the tolerance.
163 '''
164 _, _, _, s, _ = self.d2yxse5
165 t2, t = self.s2, 0 # tallest
166 for i in range(m, n):
167 d2, _, _, _ = self.d2yxu4(s, i)
168 if d2 > t2:
169 t2, t = d2, i
170 if modified and d2 > self.eps:
171 break
172 return t2, t
174 def d2ix2(self, m, n, modified):
175 '''Find the tallest I{perpendicular B{or} shortest} distance
176 among all points[m..n] to the path edge or line through
177 (points[s], -[e]) exceeding the tolerance.
178 '''
179 h = not self.shortest
180 # point (x, y) on axis rotated by angle a ccw:
181 # x' = y * sin(a) + x * cos(a)
182 # y' = y * cos(a) - x * sin(a)
183 #
184 # distance along (w) and perpendicular to (h)
185 # a line from the origin to point (dx, dy):
186 # w = (y * dy + x * dx) / hypot(dx, dy)
187 # h = (y * dx - x * dy) / hypot(dx, dy)
188 d21, y21, x21, s, e = self.d2yxse5
189 t2, t = self.s2, 0 # tallest
190 for i in range(m, n):
191 # distance points[s] to -[i], ...
192 d2, y01, x01, _ = self.d2yxu4(s, i)
193 if d2 > self.eps:
194 if h: # perpendicular distance
195 d2 = fdot_(y01, x21, -x01, y21)**2 / d21
196 else:
197 w = fdot_(y01, y21, x01, x21)
198 if w > 0:
199 if w < d21: # ... perpendicular ...
200 d2 = fdot_(y01, x21, -x01, y21)**2 / d21
201 else: # ... or points[e] to -[i]
202 d2, _, _, _ = self.d2yxu4(e, i)
203 if d2 > t2:
204 t2, t = d2, i
205 if modified:
206 break
207 return t2, t
209 def d2yxu4(self, i, j):
210 '''Return the distance I{squared}, the deltas and the
211 (longitudinal) unrollment between (points[i], -[j]).
212 '''
213 p1, p2 = self.pts[i], self.pts[j]
214 return equirectangular4(p1.lat, p1.lon,
215 p2.lat, p2.lon, **self.options)
217 def h2t(self, i1, i2, i3):
218 '''Compute (double) the triangle area, points[i2] is
219 the top and edge (points[i1], -[i3]) is the base
220 of the triangle.
221 '''
222 d21, y21, x21 , _ = self.d2yxu4(i1, i3)
223 if d21 > self.eps:
224 d01, y01, x01, _ = self.d2yxu4(i1, i2)
225 if d01 > self.eps:
226 h2 = fabs(fdot_(y01, x21, -x01, y21))
227 # triangle height h = h2 / sqrt(d21) and
228 # the area = h * sqrt(d21) / 2 == h2 / 2
229 return h2 # double triangle area
230 return 0
232 def rdp(self, modified):
233 '''Ramer-Douglas-Peucker (RDP) simplification of a
234 path of C{LatLon} points.
236 @arg modified: Use I{modified} RDP (C{bool}).
237 '''
238 r, n = self.ixs, self.n
239 if n > 1:
240 s2, se = self.s2, [(0, n-1)]
241 while se:
242 s, e = se.pop()
243 s1 = s + 1
244 if e > s1:
245 if self.d21(s, e): # points[] to edge [s, e]
246 d2, i = self.d2ix2(s1, e, modified)
247 else: # points[] to point [s]
248 d2, i = self.d2i2( s1, e, modified)
249 if d2 > s2 and i > 0:
250 se.append((i, e))
251 if not modified:
252 se.append((s, i))
253 r.add(i)
254 r.add(s)
255 return self.result(r)
257 def result(self, r):
258 '''Return the simplified points or indices.
259 '''
260 r = sorted(r)
261 if self.indices:
262 return list(r)
263 elif self.subset:
264 return self.subset(r)
265 else:
266 return list(self.pts[i] for i in r)
268 def rw(self):
269 '''Reumann-Witkam simplification.
270 '''
271 r, n = self.ixs, self.n
272 if n > 1:
273 s, e, s2 = 0, 1, self.s2
274 while s < e < n:
275 if self.d21(s, e):
276 d2, i = self.d2ix2(e + 1, n, True)
277 r.add(s)
278 if d2 > s2 and i > 0:
279 r.add(i)
280 s = e = i
281 else:
282 # r.add(n - 1)
283 break
284 e += 1
285 return self.result(r)
287 def sy1(self):
288 '''Basic simplification.
289 '''
290 r, n = self.ixs, self.n
291 if n > 1:
292 s2, i = self.s2, 0
293 for j in range(1, n):
294 d2, _, _, _ = self.d2yxu4(i, j)
295 if d2 > s2:
296 r.add(j)
297 i = j
298 return self.result(r)
300 def vwn(self):
301 '''Initialize VW as list of 2-tuples _T2(ix, h2) where
302 ix is the points[] index and h2 is the triangular
303 area I{(times 2)} of that point.
304 '''
305 self.t2s = t = []
306 n, T2 = self.n, _T2
307 if n > 2:
308 _h2t = self.h2t
309 t[:] = [T2(i, _h2t(i-1, i, i+1)) for i in range(1, n - 1)]
310 if n > 1:
311 t.append(T2(n - 1, self.s2e))
312 if n > 0:
313 t.insert(0, T2(0, self.s2e))
314 return len(t)
316 def vwr(self, attr):
317 '''Return the VW results, optionally including the
318 triangular area (in C{meter}) as attribute C{attr}
319 to each simplified point.
320 '''
321 pts, t = self.pts, self.t2s
323 # double check the minimal triangular area
324 assert min(t2.h2 for t2 in t) > self.s2 > 0
326 if attr: # return each trangular area (actually
327 # the sqrt of double the triangular area)
328 # converted back from degrees to meter
329 if isNumpy2(pts):
330 raise _AttributeError(attr=attr)
331 t[0].h2 = t[-1].h2 = 0 # zap sentinels
332 m = radians(_1_0) * self.radius # meter
333 for t2 in t: # convert back to meter
334 setattr(pts[t2.ix], attr, sqrt0(t2.h2) * m)
336 n = len(t) # double check for duplicates
337 r = set(t2.ix for t2 in t)
338 assert len(r) == n
339 return self.result(r)
341 def vwrm(self):
342 '''Keep removing the VW point with the smallest triangular
343 area until that area exceeds the tolerance.
344 '''
345 s2, t = self.s2, self.t2s
346 while len(t) > 2:
347 m2, m = t[1].h2, 1
348 for i in range(2, len(t) - 1):
349 h2 = t[i].h2
350 if h2 < m2:
351 m2, m = h2, i
352 if m2 > s2:
353 break
354 self.vwrm1(m, 0)
356 def vwrm1(self, m, tol):
357 '''Eliminate VW point[m], keep recomputing the trangular
358 area of both neighboring points and removing those
359 too until the recomputed area exceeds C{tol}.
360 '''
361 t, _h2t = self.t2s, self.h2t
362 t.pop(m)
363 for n in (m, m - 1): # neighbors
364 while 0 < n < (len(t) - 1):
365 h2 = _h2t(t[n-1].ix, t[n].ix, t[n+1].ix)
366 if h2 > tol:
367 t[n].h2 = h2
368 break # while
369 t.pop(n)
371 def vwrm2(self, tol):
372 '''Eliminate all VW points with a triangular area not
373 exceeding C{tol}.
374 '''
375 t = self.t2s
376 m = len(t) - 1
377 while m > 1:
378 m -= 1
379 if t[m].h2 <= tol:
380 self.vwrm1(m, tol)
381 m = min(m, len(t) - 1)
384def simplify1(points, distance=_1mm, radius=R_M, indices=False, **options):
385 '''Basic simplification of a path of C{LatLon} points by eliminating
386 any points closer together than the given I{distance} tolerance.
388 @arg points: Iterable with the path points (C{LatLon}[]).
389 @kwarg distance: Tolerance (C{meter}, same units as B{C{radius}}).
390 @kwarg radius: Mean earth radius (C{meter}, conventionally).
391 @kwarg indices: If C{True}, return B{C{points}} indices instead
392 of the simplified points (C{bool}).
393 @kwarg options: Optional keyword arguments passed thru to function
394 L{pygeodesy.equirectangular4}.
396 @return: Simplified points (C{LatLon}[]) or B{C{points}} indices.
398 @raise LimitError: Lat- and/or longitudinal delta exceeds the B{C{limit}},
399 see function L{pygeodesy.equirectangular4}.
401 @raise ValueError: Tolerance B{C{distance}} or B{C{radius}} too small.
402 '''
403 S = _Sy(points, distance, radius, True, indices, **options)
404 return S.sy1()
407def simplifyRDP(points, distance=_1mm, radius=R_M, shortest=False,
408 indices=False, modified=False, **options):
409 '''I{Ramer-Douglas-Peucker} (RDP) simplification of a path of C{LatLon}
410 points by eliminating any points too close together or closer to an
411 edge than the given I{distance} tolerance.
413 @arg points: Iterable with the path points (C{LatLon}[]).
414 @kwarg distance: Tolerance (C{meter}, same units as B{C{radius}}).
415 @kwarg radius: Mean earth radius (C{meter}, conventionally).
416 @kwarg shortest: If C{True}, use the I{shortest} otherwise the
417 I{perpendicular} distance (C{bool}).
418 @kwarg indices: If C{True}, return B{C{points}} indices instead
419 of the simplified points (C{bool}).
420 @kwarg modified: If C{True}, use the C{modified RDP} method (C{bool}),
421 see the B{note}.
422 @kwarg options: Optional keyword arguments passed thru to function
423 L{pygeodesy.equirectangular4}.
425 @return: Simplified points (C{LatLon}[]) or B{C{points}} indices.
427 @raise LimitError: Lat- and/or longitudinal delta exceeds the B{C{limit}},
428 see function L{pygeodesy.equirectangular4}.
430 @raise ValueError: Tolerance B{C{distance}} or B{C{radius}} too small.
432 @note: The original C{RDP} method exhaustively searches for the point
433 with the largest distance (resulting in complexity M{O(n**2)}
434 with M{n} is the number of points). The B{C{modified}} C{RDP}
435 method stops at the first point farther than the B{C{distance}}
436 tolerance, significantly reducing the run time (but producing
437 results different from the original C{RDP} method).
438 '''
439 S = _Sy(points, distance, radius, shortest, indices, **options)
440 return S.rdp(bool(modified))
443def simplifyRW(points, pipe=_1mm, radius=R_M, shortest=False,
444 indices=False, **options):
445 '''I{Reumann-Witkam} (RW) simplification of a path of C{LatLon} points
446 by eliminating any points too close together or within the given
447 I{pipe} tolerance along an edge.
449 @arg points: Iterable with the path points (C{LatLon}[]).
450 @kwarg pipe: Pipe radius, half-width (C{meter}, same units as
451 B{C{radius}}).
452 @kwarg radius: Mean earth radius (C{meter}, conventionally).
453 @kwarg shortest: If C{True}, use the I{shortest} otherwise the
454 I{perpendicular} distance (C{bool}).
455 @kwarg indices: If C{True}, return B{C{points}} indices instead
456 of the simplified points (C{bool}).
457 @kwarg options: Optional keyword arguments passed thru to function
458 L{pygeodesy.equirectangular4}.
460 @return: Simplified points (C{LatLon}[]) or B{C{points}} indices.
462 @raise LimitError: Lat- and/or longitudinal delta exceeds the B{C{limit}},
463 see function L{pygeodesy.equirectangular4}.
465 @raise ValueError: Tolerance B{C{pipe}} or B{C{radius}} too small.
466 '''
467 S = _Sy(points, pipe, radius, shortest, indices, **options)
468 return S.rw()
471def simplifyVW(points, area=_1mm, radius=R_M, indices=False,
472 attr=None, modified=False, **options):
473 '''I{Visvalingam-Whyatt} (VW) simplification of a path of C{LatLon}
474 points by eliminating any points too close or with a triangular
475 area not exceeding the given I{area} tolerance I{squared}.
477 @arg points: Iterable with the path points (C{LatLon}[]).
478 @kwarg area: Tolerance (C{meter}, same units as B{C{radius}}).
479 @kwarg radius: Mean earth radius (C{meter}, conventionally).
480 @kwarg indices: If C{True}, return B{C{points}} indices instead
481 of the simplified points (C{bool}).
482 @kwarg attr: Optional, B{C{points}} attribute to save the area
483 value (C{str}).
484 @kwarg modified: If C{True}, use the C{modified VW} method (C{bool}),
485 see the B{note}.
486 @kwarg options: Optional keyword arguments passed thru to function
487 L{pygeodesy.equirectangular4}.
489 @return: Simplified points (C{LatLon}[]) or B{C{points}} indices.
491 @raise AttributeError: An B{C{attr}} isinvalid for I{Numpy2} B{C{points}}.
493 @raise LimitError: Lat- and/or longitudinal delta exceeds the B{C{limit}},
494 see function L{pygeodesy.equirectangular4}.
496 @raise ValueError: Tolerance B{C{area}} or B{C{radius}} too small.
498 @note: The original C{VW} method exhaustively searches for the point
499 with the smallest triangular I{area} (resulting in complexity
500 M{O(n**2)} with M{n} the number of points). The B{C{modified}}
501 C{VW} method removes I{all} points with a triangular I{area}
502 below the tolerance in each iteration, significantly reducing
503 the run time (but producing results different from the original
504 C{VW} method).
505 '''
506 S = _Sy(points, area, radius, False, indices, **options)
507 if S.vwn() > 2:
508 if modified:
509 S.vwrm2(S.s2)
510 else:
511 S.vwrm2(0)
512 S.vwrm()
513 return S.vwr(attr)
515# **) MIT License
516#
517# Copyright (C) 2016-2025 -- mrJean1 at Gmail -- All Rights Reserved.
518#
519# Permission is hereby granted, free of charge, to any person obtaining a
520# copy of this software and associated documentation files (the "Software"),
521# to deal in the Software without restriction, including without limitation
522# the rights to use, copy, modify, merge, publish, distribute, sublicense,
523# and/or sell copies of the Software, and to permit persons to whom the
524# Software is furnished to do so, subject to the following conditions:
525#
526# The above copyright notice and this permission notice shall be included
527# in all copies or substantial portions of the Software.
528#
529# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
530# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
531# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
532# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
533# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
534# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
535# OTHER DEALINGS IN THE SOFTWARE.