-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Speed up OSM processing some more #7583
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
09ec844
28fd5a6
f34d682
50af031
078e5e4
f60dfeb
fd19c8e
2ac5c55
e21b492
a89f768
e1c070e
96ea12d
8d3c685
5cd160d
31dc111
17bb76a
741db14
a4cdee8
bb2dc02
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,7 @@ | |
| import com.google.common.collect.ImmutableMultimap; | ||
| import com.google.common.collect.Multimap; | ||
| import gnu.trove.iterator.TLongIterator; | ||
| import gnu.trove.list.array.TDoubleArrayList; | ||
| import java.util.ArrayList; | ||
| import java.util.Collection; | ||
| import java.util.HashMap; | ||
|
|
@@ -372,7 +373,7 @@ private void buildBasicGraph( | |
| IntersectionVertex fromVertex = null; | ||
| IntersectionVertex toVertex = null; | ||
|
|
||
| ArrayList<Coordinate> segmentCoordinates = new ArrayList<>(); | ||
| TDoubleArrayList segmentCoordinates = new TDoubleArrayList(100); | ||
|
|
||
| /* | ||
| * Traverse through all the nodes of this edge. For nodes which are not shared with any other edge, do not create endpoints -- just | ||
|
|
@@ -412,7 +413,8 @@ private void buildBasicGraph( | |
| * the only processing we do on other nodes is to accumulate their geometry | ||
| */ | ||
| if (segmentCoordinates.isEmpty()) { | ||
| segmentCoordinates.add(osmStartNode.getCoordinate()); | ||
| segmentCoordinates.add(osmStartNode.lon); | ||
| segmentCoordinates.add(osmStartNode.lat); | ||
| } | ||
|
|
||
| if ( | ||
|
|
@@ -425,14 +427,14 @@ private void buildBasicGraph( | |
| osmEndNode.isEntrance() || | ||
| vertexGenerator.nodesInBarrierWays().containsKey(osmEndNode) | ||
| ) { | ||
| segmentCoordinates.add(osmEndNode.getCoordinate()); | ||
| segmentCoordinates.add(osmEndNode.lon); | ||
| segmentCoordinates.add(osmEndNode.lat); | ||
|
|
||
| geometry = GeometryUtils.getGeometryFactory().createLineString( | ||
| segmentCoordinates.toArray(new Coordinate[0]) | ||
| ); | ||
| geometry = GeometryUtils.makeLineString(segmentCoordinates.toArray()); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If only Java had compound value types... any day now. |
||
| segmentCoordinates.clear(); | ||
| } else { | ||
| segmentCoordinates.add(osmEndNode.getCoordinate()); | ||
| segmentCoordinates.add(osmEndNode.lon); | ||
| segmentCoordinates.add(osmEndNode.lat); | ||
| continue; | ||
| } | ||
|
|
||
|
|
@@ -554,28 +556,23 @@ private void buildBarrierEdges(VertexGenerator vertexGenerator) { | |
|
|
||
| private Optional<Platform> getPlatform(OsmDatabase osmdb, OsmWay way) { | ||
| var references = way.getMultiTagValues(params.boardingAreaRefTags()); | ||
| if (way.isBoardingLocation() && !references.isEmpty()) { | ||
| var nodeRefs = way.getNodeRefs(); | ||
| var size = nodeRefs.size(); | ||
| var nodes = new Coordinate[size]; | ||
| for (int i = 0; i < size; i++) { | ||
| nodes[i] = osmdb.getNode(nodeRefs.get(i)).getCoordinate(); | ||
| } | ||
| if (!way.isBoardingLocation() || references.isEmpty()) { | ||
| return Optional.empty(); | ||
| } | ||
| var nodeRefs = way.getNodeRefs(); | ||
| var size = nodeRefs.size(); | ||
| var nodes = new Coordinate[size]; | ||
| for (int i = 0; i < size; i++) { | ||
| nodes[i] = osmdb.getNode(nodeRefs.get(i)).getCoordinate(); | ||
| } | ||
|
|
||
| var geometryFactory = GeometryUtils.getGeometryFactory(); | ||
| var geometryFactory = GeometryUtils.getGeometryFactory(); | ||
|
|
||
| var geometry = geometryFactory.createLineString(nodes); | ||
| var geometry = geometryFactory.createLineString(nodes); | ||
|
|
||
| return Optional.of( | ||
| new Platform( | ||
| params.edgeNamer().getName(way, "platform " + way.getId()), | ||
| geometry, | ||
| references | ||
| ) | ||
| ); | ||
| } else { | ||
| return Optional.empty(); | ||
| } | ||
| return Optional.of( | ||
| new Platform(params.edgeNamer().getName(way, "platform " + way.getId()), geometry, references) | ||
| ); | ||
| } | ||
|
|
||
| private void validateBarriers() { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,9 +10,9 @@ | |
|
|
||
| import java.time.Duration; | ||
| import java.time.format.DateTimeParseException; | ||
| import java.util.Arrays; | ||
| import java.util.Collections; | ||
| import java.util.HashMap; | ||
| import java.util.HashSet; | ||
| import java.util.Map; | ||
| import java.util.Objects; | ||
| import java.util.Optional; | ||
|
|
@@ -21,7 +21,6 @@ | |
| import java.util.function.Consumer; | ||
| import java.util.regex.Matcher; | ||
| import java.util.regex.Pattern; | ||
| import java.util.stream.Collectors; | ||
| import javax.annotation.Nullable; | ||
| import org.opentripplanner.core.model.accessibility.Accessibility; | ||
| import org.opentripplanner.core.model.i18n.I18NString; | ||
|
|
@@ -37,6 +36,7 @@ | |
| */ | ||
| public abstract class OsmEntity { | ||
|
|
||
| private static final Pattern I18N_PATTERN = Pattern.compile("\\{(.*?)}"); | ||
| /** | ||
| * highway=* values that we don't want to even consider when building the graph. | ||
| */ | ||
|
|
@@ -133,6 +133,24 @@ public abstract class OsmEntity { | |
| */ | ||
| protected static final Set<String> CHECKED_MODES = Set.of("foot", "bicycle", "motorcar"); | ||
|
|
||
| private static final Set<String> WALK_ONLY_HIGHWAY_VALUES = Set.of("footway", "step", "corridor"); | ||
| private static final Set<String> NO_ACCESS_VALUES = Set.of("no", "license", "dismount"); | ||
| private static final Set<String> HIGHWAY_BOARDING_LOCATION_VALUES = Set.of( | ||
| "platform", | ||
| "bus_stop" | ||
| ); | ||
| private static final Set<String> RAILWAY_BOARDING_LOCATION_VALUES = Set.of( | ||
| "tram_stop", | ||
| "station", | ||
| "halt" | ||
| ); | ||
| private static final Set<String> AMENITY_BOARDING_LOCATION_VALUES = Set.of( | ||
| "bus_station", | ||
| "amenity", | ||
| "ferry_terminal" | ||
| ); | ||
| private static final Set<String> RAILWAY_PLATFORM_VALUES = Set.of("platform", "platform_edge"); | ||
|
|
||
| /** | ||
| * Mapping for the fallback key for checking access restrictions for each access mode in OSM | ||
| * However, access is not included because we are skeptical of access=yes tags. | ||
|
|
@@ -145,8 +163,6 @@ public abstract class OsmEntity { | |
| "bicycle", | ||
| "vehicle" | ||
| ); | ||
| private static final Set<String> WALK_ONLY_HIGHWAYS = Set.of("footway", "step", "corridor"); | ||
| private static final Set<String> NO_ACCESS_TAGS = Set.of("no", "license", "dismount"); | ||
| private static final Map<StreetTraversalPermission, String> OSM_TAGS_FOR_TRAVERSAL_PERMISSION = | ||
| Map.of( | ||
| StreetTraversalPermission.CAR, | ||
|
|
@@ -157,6 +173,11 @@ public abstract class OsmEntity { | |
| "foot" | ||
| ); | ||
|
|
||
| /// This is nullable for performance reasons. | ||
| /// | ||
| /// You could use an empty map, but using null allows you to skip the lower-casing and hash lookup | ||
| /// per tag, which is the hottest path during OSM processing. | ||
| @Nullable | ||
| private final Map<String, String> tags; | ||
|
Comment on lines
+180
to
181
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to make this non-nullable? Constantly calling the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a micro(?)-optimization. If you use a
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know that using null is not super-idiomatic in our current code base (I'm its biggest hater) but in this case it's worth it for performance.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If it makes sense for performance reasons, I think its fine. Maybe a comment should be added every time it is used as a performance optimization?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another way would be to create a custom map implementation that null checks behind the scenes. Do whatever you think makes sense |
||
|
|
||
| protected final long id; | ||
|
|
@@ -175,7 +196,7 @@ public abstract class OsmEntity { | |
| /** | ||
| * Constructor for immutable OsmEntity | ||
| */ | ||
| protected OsmEntity(long id, Map<String, String> tags, OsmProvider osmProvider) { | ||
| protected OsmEntity(long id, @Nullable Map<String, String> tags, OsmProvider osmProvider) { | ||
| this.id = id; | ||
| // calling Map.copyOf here costs about 10% of parsing performance, so we use | ||
| // Collections.unmodifiableMap in the getter | ||
|
|
@@ -202,20 +223,31 @@ public long getId() { | |
| * The tags of an entity (immutable). | ||
| */ | ||
| public Map<String, String> getTags() { | ||
| return Collections.unmodifiableMap(tags); | ||
| if (this.isTagless()) { | ||
| return Map.of(); | ||
| } else { | ||
| return Collections.unmodifiableMap(tags); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Is the tag defined? | ||
| */ | ||
| public boolean hasTag(String tag) { | ||
| return getTag(tag) != null; | ||
| public final boolean hasTag(String tag) { | ||
| return !this.isTagless() && getTag(tag) != null; | ||
| } | ||
|
|
||
| /** | ||
| * Does the entity contain any tags at all? | ||
| */ | ||
| final boolean isTagless() { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My understanding from other review comments is that the nullable tags field and this function exist largely to avoid String.toLowercase calls. As far as I know, OSM tags are case-sensitive and the vast majority of tag keys are lower case. Any non-lower-case version of a typically lower-case key would be considered a new nonstandard key and show up as such in the tag stats. The case insensitivity in OTP appears to be an adaptation to poor data quality in the early era of OTP, and may date all the way back to #500. I suspect OSM data quality has increased significantly in the intervening 15 years. Would it be possible to performance-test a version using a canonical empty map instead of null and eliminating all toLowerCase calls and all null checks? I do wonder if it would be just as fast and significantly simpler.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, this appears to bother both you and @VillePihlava quite a bit, so let me give a long form explanation. For well over a decade null was used in the OSM code to mean "no tags". I started using the empty map in my previous OSM PR #7537 but later noticed that there was a performance problem with it. For this reason, I re-established the previous state of using null. I understand that it's not super-idiomatic to how we use collections nowadays. I don't have exact numbers but I could see a small speed up when switching back to Besides, it's a well-encapsulated variable and the null never escapes this class. Your point about lowercasing is correct, but when I tried switching it off I noticed that a test failed so I never pursued that avenue any further. There are a handful of tags we use like It could probably be removed in the hottest inner loop, but it requires some risk management and careful investigation. In any case, that would be a functional change that I would like to keep separate from a performance optimization that aims to keep everything as it was before. Given all of the above, I politely request that you let me keep the method/null value as-is. If you feel that it ought to be done, I can do that in a follow-up.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is one more aspect to
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, use of null references doesn't particularly bother me. My review was a comment, not a request for changes to your PR. I had the impression complexity was being introduced just to continue supporting case-insensitive behavior in a place that is by specification supposed to be case-sensitive, and if the case-insensitive behavior was unnecessary maybe the problem could be eliminated. Since you've already considered this and decided case-insensitive behavior still needed, no change is necessary. |
||
| return this.tags == null; | ||
| } | ||
|
|
||
| /** | ||
| * Determines if a tag contains a false value. 'no', 'false', and '0' are considered false. | ||
| */ | ||
| public boolean isTagFalse(String tag) { | ||
| public final boolean isTagFalse(String tag) { | ||
| return isFalse(getTag(tag)); | ||
| } | ||
|
|
||
|
|
@@ -235,17 +267,10 @@ public Accessibility explicitWheelchairAccessibility() { | |
| /** | ||
| * Determines if a tag contains a true value. 'yes', 'true', and '1' are considered true. | ||
| */ | ||
| public boolean isTagTrue(String tag) { | ||
| public final boolean isTagTrue(String tag) { | ||
| return isTrue(getTag(tag)); | ||
| } | ||
|
|
||
| /** | ||
| * Returns true if bicycle dismounts are forced. | ||
| */ | ||
| public boolean isBicycleDismountForced() { | ||
| return isTag("bicycle", "dismount"); | ||
| } | ||
|
|
||
| public boolean isSidewalk() { | ||
| return isTag("footway", "sidewalk") && isTag("highway", "footway"); | ||
| } | ||
|
|
@@ -270,6 +295,9 @@ protected Optional<Permission> checkModePermission(String mode) { | |
| * or a parent mode, either with a directional suffix or not, empty if it is not specified. | ||
| */ | ||
| protected Optional<Permission> checkModePermission(String mode, TraverseDirection direction) { | ||
| if (isTagless()) { | ||
| return Optional.empty(); | ||
| } | ||
| // check if the exact directional tag allows or denies access | ||
| if (direction != DIRECTIONLESS) { | ||
| if (isExplicitlyAllowed(mode + direction.tagSuffix())) { | ||
|
|
@@ -309,7 +337,10 @@ protected boolean isExplicitlyAllowed(String key) { | |
| * Returns null if tag is not present. | ||
| */ | ||
| @Nullable | ||
| public String getTag(String tag) { | ||
| public final String getTag(String tag) { | ||
| if (this.isTagless()) { | ||
| return null; | ||
| } | ||
| tag = tag.toLowerCase(); | ||
| return tags.get(tag); | ||
|
VillePihlava marked this conversation as resolved.
|
||
| } | ||
|
|
@@ -470,8 +501,8 @@ public OptionalInt parseIntOrBoolean(String tag, Consumer<String> errorHandler) | |
| /** | ||
| * Checks if a tag contains the specified value. | ||
| */ | ||
| public boolean isTag(String tag, String value) { | ||
| return value != null && value.equals(tags.get(tag.toLowerCase())); | ||
| public final boolean isTag(String tag, String value) { | ||
| return !isTagless() && value != null && value.equals(tags.get(tag.toLowerCase())); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -492,7 +523,7 @@ public boolean isOneOfTags(String key, Set<String> oneOfTags) { | |
| */ | ||
| @Nullable | ||
| public I18NString getAssumedName() { | ||
| if (tags.containsKey("name")) { | ||
| if (hasTag("name")) { | ||
| return TranslatedString.getDeduplicatedI18NString( | ||
| this.generateI18NForPattern("{name}"), | ||
| false | ||
|
|
@@ -506,8 +537,9 @@ public I18NString getAssumedName() { | |
| if (creativeName != null) { | ||
| return this.creativeName; | ||
| } | ||
| if (tags.containsKey("ref")) { | ||
| return new NonLocalizedString(tags.get("ref")); | ||
| var ref = getTag("ref"); | ||
| if (ref != null) { | ||
| return new NonLocalizedString(ref); | ||
| } | ||
| return null; | ||
| } | ||
|
|
@@ -524,7 +556,7 @@ public I18NString getAssumedName() { | |
| public Map<String, String> generateI18NForPattern(String pattern) { | ||
| Map<String, StringBuffer> i18n = new HashMap<>(); | ||
| i18n.put(null, new StringBuffer()); | ||
| Matcher matcher = Pattern.compile("\\{(.*?)}").matcher(pattern); | ||
| Matcher matcher = I18N_PATTERN.matcher(pattern); | ||
|
|
||
| int lastEnd = 0; | ||
| while (matcher.find()) { | ||
|
|
@@ -621,7 +653,7 @@ public Optional<TraverseDirection> isOneWay(@Nullable String mode) { | |
| return Optional.empty(); | ||
| } | ||
|
|
||
| if ("foot".equals(mode) && !isOneOfTags("highway", WALK_ONLY_HIGHWAYS)) { | ||
| if ("foot".equals(mode) && !isOneOfTags("highway", WALK_ONLY_HIGHWAY_VALUES)) { | ||
| return Optional.empty(); | ||
| } | ||
|
|
||
|
|
@@ -705,14 +737,14 @@ public boolean isParkAndRide() { | |
| * @return whether the node is a place used to board a public transport vehicle | ||
| */ | ||
| public boolean isBoardingLocation() { | ||
| // the majority of nodes have no tags at all, so this yields a good speed-up | ||
| if (isTagless()) { | ||
| return false; | ||
| } | ||
| return ( | ||
| isTag("highway", "bus_stop") || | ||
| isTag("railway", "tram_stop") || | ||
| isTag("railway", "station") || | ||
| isTag("railway", "halt") || | ||
| isTag("amenity", "bus_station") || | ||
| isTag("amenity", "ferry_terminal") || | ||
| isTag("highway", "platform") || | ||
| isOneOfTags("highway", HIGHWAY_BOARDING_LOCATION_VALUES) || | ||
| isOneOfTags("railway", RAILWAY_BOARDING_LOCATION_VALUES) || | ||
| isOneOfTags("amenity", AMENITY_BOARDING_LOCATION_VALUES) || | ||
| isPlatform() | ||
| ); | ||
| } | ||
|
|
@@ -726,9 +758,7 @@ public boolean isBoardingLocation() { | |
| **/ | ||
| public boolean isPlatform() { | ||
| var isPlatform = | ||
| isTag("public_transport", "platform") || | ||
| isTag("railway", "platform") || | ||
| isTag("railway", "platform_edge"); | ||
| isTag("public_transport", "platform") || isOneOfTags("railway", RAILWAY_PLATFORM_VALUES); | ||
| return isPlatform && !isTag("usage", "tourism"); | ||
| } | ||
|
|
||
|
|
@@ -775,14 +805,21 @@ public String url() { | |
| * Values are split by semicolons. | ||
| */ | ||
| public Set<String> getMultiTagValues(Set<String> refTags) { | ||
| return refTags | ||
| .stream() | ||
| .map(this::getTag) | ||
| .filter(Objects::nonNull) | ||
| .flatMap(v -> Arrays.stream(v.split(";"))) | ||
| .map(String::strip) | ||
| .filter(v -> !v.isBlank()) | ||
| .collect(Collectors.toUnmodifiableSet()); | ||
| // we try to keep the allocations low, so only allocate a small hash set by default | ||
| Set<String> result = HashSet.newHashSet(2); | ||
| for (var tag : refTags) { | ||
| var value = getTag(tag); | ||
| if (value == null) { | ||
| continue; | ||
| } | ||
| for (var part : value.split(";")) { | ||
| var stripped = part.strip(); | ||
| if (!stripped.isBlank()) { | ||
| result.add(stripped); | ||
| } | ||
| } | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| public OsmProvider getOsmProvider() { | ||
|
|
@@ -842,10 +879,11 @@ public boolean isElevator() { | |
| * of other information. | ||
| */ | ||
| public boolean isWheelchairAccessible() { | ||
| if (isTagTrue("wheelchair")) { | ||
| var wheelchairValue = getTag("wheelchair"); | ||
| if (isTrue(wheelchairValue)) { | ||
| return true; | ||
| } | ||
| if (isTagFalse("wheelchair")) { | ||
| if (isFalse(wheelchairValue)) { | ||
| return false; | ||
| } | ||
| if (isOneOfTags("barrier", WHEELCHAIR_INACCESSIBLE_BARRIERS)) { | ||
|
|
@@ -891,7 +929,7 @@ public boolean isExplicitlyUnnamed() { | |
| * Returns true if this tag is explicitly access to this entity. | ||
| */ | ||
| private boolean isExplicitlyDenied(String key) { | ||
| return isOneOfTags(key, NO_ACCESS_TAGS); | ||
| return isOneOfTags(key, NO_ACCESS_VALUES); | ||
| } | ||
|
|
||
| public StreetTraversalPermission getPermission() { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.