Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for CDSE for downloading Sentinel-2 metadata #24

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ format: clean
@python -m black src/ tests/

lint:
@python -m ruff src/ tests/
@python -m ruff check --extend-select I src/ tests/

lint-fix:
@python -m ruff check --extend-select I --fix src/ tests/

static-check:
@python -m mypy src/ tests/
Expand Down
80 changes: 80 additions & 0 deletions geojson/andalucia.geojson
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
{
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"properties": {},
"geometry": {
"coordinates": [
[
[
-6.448626078204512,
38.11926126799699
],
[
-7.008851427056186,
38.30051876820747
],
[
-7.602031221584724,
37.607602679958944
],
[
-7.437259114106922,
37.136198240254075
],
[
-6.525519827237645,
36.741102901379534
],
[
-6.195975504207809,
36.19340245399495
],
[
-5.6357501569643205,
35.93589430235669
],
[
-4.196739962134018,
36.64421279072401
],
[
-2.142580355056708,
36.6442127870708
],
[
-1.5054613680218267,
37.31113781853382
],
[
-1.9118998680995105,
37.85953977278473
],
[
-2.6259126831380684,
38.575851575134806
],
[
-5.064540062442006,
38.80734628618006
],
[
-5.635750169280186,
38.455522733134984
],
[
-5.591810927934972,
38.2487769551752
],
[
-6.448626078204512,
38.11926126799699
]
]
],
"type": "Polygon"
}
}
]
}
8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ classifiers=[
]
dependencies = [
"fire>=0.5.0",
"sentinelsat>=1.2.1",
"requests",
"geojson >= 2",
"geomet",
"Shapely==2.0.1",
"pandas==2.0.0",
"pyproj>=3.5.0",
Expand All @@ -30,7 +32,7 @@ dependencies = [
dynamic = ["version"]

[project.optional-dependencies]
dev = ["black==23.1.0", "mypy>=1.0.1", "ruff>=0.0.253"]
dev = ["black==23.1.0", "mypy>=1.0.1", "ruff>=0.5.0"]
tests = ["pytest>=7.0.0", "pytest-cov>=4.0.0"]
gcloud = ["google-cloud-storage>=2.5.0"]
complete = ["greensenti[dev]", "greensenti[tests]", "greensenti[gcloud]"]
Expand All @@ -54,7 +56,7 @@ exclude = '''
)/
'''

[tool.ruff]
[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
Expand Down
2 changes: 1 addition & 1 deletion src/greensenti/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from dotenv import load_dotenv

__version__ = "0.7.0"
__version__ = "0.8.0"

if (env_file := Path(".env")).is_file():
print(f"Loading settings from file {env_file.absolute()}")
Expand Down
137 changes: 70 additions & 67 deletions src/greensenti/dhus.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
import json
import os
import re
import warnings
import zipfile
from datetime import datetime
from pathlib import Path
from typing import Iterator, List, Union

from sentinelsat.exceptions import LTAError, LTATriggered
from sentinelsat.sentinel import SentinelAPI, geojson_to_wkt, read_geojson
import geomet.wkt
import pandas as pd
import requests

import geojson

try:
GCLOUD_DISABLED = False
Expand All @@ -16,6 +21,38 @@
storage = None


def to_wkt(geojson_file: Path, decimals: int = 4) -> str:
with open(geojson_file) as f:
geojson_ = geojson.load(f)

# Extract geometry from GeoJSON
geometry = geojson_["features"][0]["geometry"]

wkt = geomet.wkt.dumps(geometry, decimals=decimals)
# Strip unnecessary spaces
wkt = re.sub(r"(?<!\d) ", "", wkt)
return wkt


def get_metadata_cdse(footprint: str, from_date: str, to_date) -> pd.DataFrame:
"""
Downloads metadata from the Copernicus Data Space Ecosystem (CDSE) API.

:param footprint: Footprint in WKT format.
:param from_date: From date %Y-%m-%d (begin date).
:param to_date: To date %Y-%m-%d (end date).
:return: DataFrame with metadata
"""
response = requests.get(
f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'SENTINEL-2' and contains(Name,'MSIL2A') and OData.CSC.Intersects(area=geography'SRID=4326;{footprint}') and ContentDate/Start gt {from_date}T00:00:00.000Z and ContentDate/Start lt {to_date}T00:00:00.000Z&$top=1000"
).json()

products_df = pd.DataFrame.from_dict(response["value"])
products_df["title"] = products_df["Name"]

return products_df


def download_by_title(
text_match: str,
from_date: str | datetime = None,
Expand Down Expand Up @@ -48,7 +85,7 @@ def download_by_title(
:return: Yields an iterator of dictionaries with the product metadata and download status
"""
yield from download(
geojson=None,
geojson_file=None,
text_match=text_match,
from_date=from_date,
to_date=to_date,
Expand All @@ -63,7 +100,7 @@ def download_by_title(


def download_by_geometry(
geojson: Path,
geojson_file: Path,
from_date: Union[str, datetime] = None,
to_date: Union[str, datetime] = None,
*,
Expand All @@ -81,7 +118,7 @@ def download_by_geometry(
To connect to Google Cloud to download Sentinel-2 data the enviroment variable GOOGLE_APPLICATION_CREDENTIALS to be set
as defined here https://googleapis.dev/python/google-api-core/latest/auth.html#overview.

:param geojson: GeoJSON file with product geometries.
:param geojson_file: GeoJSON file with product geometries.
:param from_date: From date %Y-%m-%d (begin date).
:param to_date: To date %Y-%m-%d (end date).
:param max_clouds: Max cloud percentage.
Expand All @@ -94,7 +131,7 @@ def download_by_geometry(
:return: Yields an iterator of dictionaries with the product metadata and download status
"""
yield from download(
geojson=geojson,
geojson_file=geojson_file,
text_match=None,
from_date=from_date,
to_date=to_date,
Expand All @@ -109,7 +146,7 @@ def download_by_geometry(


def download(
geojson: Path = None,
geojson_file: Path = None,
text_match: str | None = "*",
from_date: Union[str, datetime] = None,
to_date: Union[str, datetime] = None,
Expand All @@ -128,7 +165,7 @@ def download(
To connect to Google Cloud to download Sentinel-2 data the enviroment variable GOOGLE_APPLICATION_CREDENTIALS to be set
as defined here https://googleapis.dev/python/google-api-core/latest/auth.html#overview.

:param geojson: GeoJSON file with product geometries.
:param geojson_file: GeoJSON file with product geometries.
:param text_match: Regular expresion to match the product filename.
:param from_date: From date %Y-%m-%d (begin date).
:param to_date: To date %Y-%m-%d (end date).
Expand Down Expand Up @@ -160,50 +197,49 @@ def download(
elif not to_date:
to_date = datetime.now()

if isinstance(output, str):
output = Path(output)

# When using dataspace,they must be string
from_date = datetime.strftime(from_date, "%Y-%m-%d")
to_date = datetime.strftime(to_date, "%Y-%m-%d")

# Load geojson file* and download products for an interval of dates.
# *see: http://geojson.io/
if geojson:
geojson = read_geojson(geojson)
footprint = geojson_to_wkt(geojson)
if geojson_file:
footprint = to_wkt(geojson_file)
else:
footprint = None

# Text match uses filename, to avoid users having to add unknown extensions,
# add wildcard at the end
if text_match:
warnings.warn(
"""Text matching is still not supported for the new CDSE API and is being worked on.
More detail can be read here: https://dataspace.copernicus.eu/news/2023-9-28-accessing-sentinel-mission-data-new-copernicus-data-space-ecosystem-apis""",
DeprecationWarning,
stacklevel=2,
)

if not text_match.endswith("*"):
text_match += "*"

sentinel_api = SentinelAPI(dhus_username, dhus_password, dhus_host, show_progressbars=False)
products_df = get_metadata_cdse(footprint, from_date, to_date)

print("Searching for products in scene")

# Search is limited to those scenes that intersect with the AOI
# (area of interest) polygon.
products = sentinel_api.query(
area=footprint,
filename=text_match,
producttype="S2MSI2A",
platformname="Sentinel-2",
cloudcoverpercentage=(0, max_clouds),
date=(from_date, to_date),
)

# Get the list of products.
products_df = sentinel_api.to_dataframe(products)
if skip:
products_df = products_df[~products_df["title"].isin(skip)]
ids = products_df.index

print(f"Found {len(ids)} scenes between {from_date} and {to_date}")

if not gcloud:
for product in copernicous_download(ids, sentinel_api, output=output):
product_json_str = products_df[products_df["id"] == product["id"]].to_json(
orient="records", date_format="iso"
)
product_json = json.loads(product_json_str)[0] # Pandas gives a list of elements always
yield {**product_json, **product}
# WIP: add support for new CDSE
warnings.warn(
"""This method is no longer works, support for the new CDSE API is being worked on.
More detail can be read here: https://dataspace.copernicus.eu/news/2023-9-28-accessing-sentinel-mission-data-new-copernicus-data-space-ecosystem-apis""",
DeprecationWarning,
stacklevel=2,
)
else:
gcloud_api = gcloud_bucket()
# Google cloud doesn't utilize ids, only titles
Expand All @@ -216,39 +252,6 @@ def download(
yield {**product_json, **product}


def copernicous_download(ids: List[str], api: SentinelAPI, output: Path = Path(".")) -> Iterator[dict]:
"""
Downloads a list of Sentinel-2 products by a list of titles from Google Cloud.

:param titles: Sentinel-2 product ids.
:param api: Sentinelsat API object.
:param output: Output folder.
:return: Yields an iterator of dictionaries with the product status
"""

for id_ in ids:
try:
product_info = api.download(id_, str(output))

unzip_product(output, product_info["title"])

# If there is no error, yield "ok"
yield {
"uuid": id_,
"status": "ok",
}
except LTATriggered:
yield {
"uuid": id_,
"status": "triggered",
}
except LTAError:
yield {
"uuid": id_,
"status": "failed",
}


def unzip_product(output_folder: Path, title: str) -> None:
"""
Unzip a downloaded product inside the same folder
Expand Down Expand Up @@ -298,7 +301,7 @@ def gcloud_download(titles: List[str], api: "storage.Client", output: Path = Pat

for title in titles:
try:
gcloud_path = get_gcloud_path(title)
gcloud_path = get_gcloud_path(title.removesuffix(".SAFE"))
product_folder = Path(gcloud_path).name.removesuffix(".SAFE")

blobs = api.list_blobs("gcp-public-data-sentinel-2", prefix=gcloud_path)
Expand Down
Loading
Loading