Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Stream downloads to save memory #1021

Merged
merged 3 commits into from
Jul 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 5 additions & 13 deletions osmnx/_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,10 +530,10 @@ def _osm_network_download(polygon, network_type, custom_filter):
custom_filter : string
a custom ways filter to be used instead of the network_type presets

Returns
-------
response_jsons : list
list of JSON responses from the Overpass server
Yields
------
response_json : dict
JSON response from the Overpass server
"""
# create a filter to exclude certain kinds of ways based on the requested
# network_type, if provided, otherwise use custom_filter
Expand All @@ -542,8 +542,6 @@ def _osm_network_download(polygon, network_type, custom_filter):
else:
osm_filter = _get_osm_filter(network_type)

response_jsons = []

# create overpass settings string
overpass_settings = _make_overpass_settings()

Expand All @@ -555,17 +553,11 @@ def _osm_network_download(polygon, network_type, custom_filter):
# time. The '>' makes it recurse so we get ways and the ways' nodes.
for polygon_coord_str in polygon_coord_strs:
query_str = f"{overpass_settings};(way{osm_filter}(poly:{polygon_coord_str!r});>;);out;"
response_json = _overpass_request(data={"data": query_str})
response_jsons.append(response_json)
utils.log(
f"Got all network data within polygon from API in {len(polygon_coord_strs)} request(s)"
)
yield _overpass_request(data={"data": query_str})

if settings.cache_only_mode: # pragma: no cover
raise CacheOnlyModeInterrupt("settings.cache_only_mode=True")

return response_jsons


def _osm_features_download(polygon, tags):
"""
Expand Down
30 changes: 13 additions & 17 deletions osmnx/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,8 +548,8 @@ def _create_graph(response_jsons, retain_all=False, bidirectional=False):

Parameters
----------
response_jsons : list
list of dicts of JSON responses from from the Overpass API
response_jsons : iterable
iterable of dicts of JSON responses from from the Overpass API
retain_all : bool
if True, return the entire graph even if it is not connected.
otherwise, retain only the largest weakly connected component.
Expand All @@ -562,11 +562,6 @@ def _create_graph(response_jsons, retain_all=False, bidirectional=False):
"""
utils.log("Creating graph from downloaded OSM data...")

# make sure we got data back from the server request(s)
if not any(rj["elements"] for rj in response_jsons): # pragma: no cover
msg = "There are no data elements in the server response. Check log and query location/filters."
raise EmptyOverpassResponse(msg)

# create the graph as a MultiDiGraph and set its meta-attributes
metadata = {
"created_date": utils.ts(),
Expand All @@ -576,19 +571,20 @@ def _create_graph(response_jsons, retain_all=False, bidirectional=False):
G = nx.MultiDiGraph(**metadata)

# extract nodes and paths from the downloaded osm data
nodes = {}
paths = {}
for response_json in response_jsons:
nodes_temp, paths_temp = _parse_nodes_paths(response_json)
nodes.update(nodes_temp)
paths.update(paths_temp)
nodes, paths = _parse_nodes_paths(response_json)

# add each osm node to the graph
for node, data in nodes.items():
G.add_node(node, **data)

# add each osm node to the graph
for node, data in nodes.items():
G.add_node(node, **data)
# add each osm way (ie, a path of edges) to the graph
_add_paths(G, paths.values(), bidirectional)

# add each osm way (ie, a path of edges) to the graph
_add_paths(G, paths.values(), bidirectional)
# make sure we got data back from the server request(s)
if not any(G.nodes()): # pragma: no cover
msg = "There are no data elements in the server response. Check log and query location/filters."
raise EmptyOverpassResponse(msg)

# retain only the largest connected component if retain_all is False
if not retain_all:
Expand Down