diff --git a/Makefile b/Makefile index 483601a..4b1dbed 100644 --- a/Makefile +++ b/Makefile @@ -14,7 +14,10 @@ format: clean @python -m black src/ tests/ lint: - @python -m ruff src/ tests/ + @python -m ruff check --extend-select I src/ tests/ + +lint-fix: + @python -m ruff check --extend-select I --fix src/ tests/ static-check: @python -m mypy src/ tests/ diff --git a/geojson/andalucia.geojson b/geojson/andalucia.geojson new file mode 100644 index 0000000..f42d54a --- /dev/null +++ b/geojson/andalucia.geojson @@ -0,0 +1,80 @@ +{ + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "properties": {}, + "geometry": { + "coordinates": [ + [ + [ + -6.448626078204512, + 38.11926126799699 + ], + [ + -7.008851427056186, + 38.30051876820747 + ], + [ + -7.602031221584724, + 37.607602679958944 + ], + [ + -7.437259114106922, + 37.136198240254075 + ], + [ + -6.525519827237645, + 36.741102901379534 + ], + [ + -6.195975504207809, + 36.19340245399495 + ], + [ + -5.6357501569643205, + 35.93589430235669 + ], + [ + -4.196739962134018, + 36.64421279072401 + ], + [ + -2.142580355056708, + 36.6442127870708 + ], + [ + -1.5054613680218267, + 37.31113781853382 + ], + [ + -1.9118998680995105, + 37.85953977278473 + ], + [ + -2.6259126831380684, + 38.575851575134806 + ], + [ + -5.064540062442006, + 38.80734628618006 + ], + [ + -5.635750169280186, + 38.455522733134984 + ], + [ + -5.591810927934972, + 38.2487769551752 + ], + [ + -6.448626078204512, + 38.11926126799699 + ] + ] + ], + "type": "Polygon" + } + } + ] +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index b50bf4b..3ca3456 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,9 @@ classifiers=[ ] dependencies = [ "fire>=0.5.0", - "sentinelsat>=1.2.1", + "requests", + "geojson >= 2", + "geomet", "Shapely==2.0.1", "pandas==2.0.0", "pyproj>=3.5.0", @@ -30,7 +32,7 @@ dependencies = [ dynamic = ["version"] [project.optional-dependencies] -dev = ["black==23.1.0", "mypy>=1.0.1", "ruff>=0.0.253"] +dev = ["black==23.1.0", "mypy>=1.0.1", "ruff>=0.5.0"] tests = ["pytest>=7.0.0", "pytest-cov>=4.0.0"] gcloud = ["google-cloud-storage>=2.5.0"] complete = ["greensenti[dev]", "greensenti[tests]", "greensenti[gcloud]"] @@ -54,7 +56,7 @@ exclude = ''' )/ ''' -[tool.ruff] +[tool.ruff.lint] select = [ "E", # pycodestyle errors "W", # pycodestyle warnings diff --git a/src/greensenti/__init__.py b/src/greensenti/__init__.py index 1e3385c..1200b99 100644 --- a/src/greensenti/__init__.py +++ b/src/greensenti/__init__.py @@ -2,7 +2,7 @@ from dotenv import load_dotenv -__version__ = "0.7.0" +__version__ = "0.8.0" if (env_file := Path(".env")).is_file(): print(f"Loading settings from file {env_file.absolute()}") diff --git a/src/greensenti/dhus.py b/src/greensenti/dhus.py index efbe283..c3320c1 100644 --- a/src/greensenti/dhus.py +++ b/src/greensenti/dhus.py @@ -1,12 +1,17 @@ import json import os +import re +import warnings import zipfile from datetime import datetime from pathlib import Path from typing import Iterator, List, Union -from sentinelsat.exceptions import LTAError, LTATriggered -from sentinelsat.sentinel import SentinelAPI, geojson_to_wkt, read_geojson +import geomet.wkt +import pandas as pd +import requests + +import geojson try: GCLOUD_DISABLED = False @@ -16,6 +21,38 @@ storage = None +def to_wkt(geojson_file: Path, decimals: int = 4) -> str: + with open(geojson_file) as f: + geojson_ = geojson.load(f) + + # Extract geometry from GeoJSON + geometry = geojson_["features"][0]["geometry"] + + wkt = geomet.wkt.dumps(geometry, decimals=decimals) + # Strip unnecessary spaces + wkt = re.sub(r"(? pd.DataFrame: + """ + Downloads metadata from the Copernicus Data Space Ecosystem (CDSE) API. + + :param footprint: Footprint in WKT format. + :param from_date: From date %Y-%m-%d (begin date). + :param to_date: To date %Y-%m-%d (end date). + :return: DataFrame with metadata + """ + response = requests.get( + f"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'SENTINEL-2' and contains(Name,'MSIL2A') and OData.CSC.Intersects(area=geography'SRID=4326;{footprint}') and ContentDate/Start gt {from_date}T00:00:00.000Z and ContentDate/Start lt {to_date}T00:00:00.000Z&$top=1000" + ).json() + + products_df = pd.DataFrame.from_dict(response["value"]) + products_df["title"] = products_df["Name"] + + return products_df + + def download_by_title( text_match: str, from_date: str | datetime = None, @@ -48,7 +85,7 @@ def download_by_title( :return: Yields an iterator of dictionaries with the product metadata and download status """ yield from download( - geojson=None, + geojson_file=None, text_match=text_match, from_date=from_date, to_date=to_date, @@ -63,7 +100,7 @@ def download_by_title( def download_by_geometry( - geojson: Path, + geojson_file: Path, from_date: Union[str, datetime] = None, to_date: Union[str, datetime] = None, *, @@ -81,7 +118,7 @@ def download_by_geometry( To connect to Google Cloud to download Sentinel-2 data the enviroment variable GOOGLE_APPLICATION_CREDENTIALS to be set as defined here https://googleapis.dev/python/google-api-core/latest/auth.html#overview. - :param geojson: GeoJSON file with product geometries. + :param geojson_file: GeoJSON file with product geometries. :param from_date: From date %Y-%m-%d (begin date). :param to_date: To date %Y-%m-%d (end date). :param max_clouds: Max cloud percentage. @@ -94,7 +131,7 @@ def download_by_geometry( :return: Yields an iterator of dictionaries with the product metadata and download status """ yield from download( - geojson=geojson, + geojson_file=geojson_file, text_match=None, from_date=from_date, to_date=to_date, @@ -109,7 +146,7 @@ def download_by_geometry( def download( - geojson: Path = None, + geojson_file: Path = None, text_match: str | None = "*", from_date: Union[str, datetime] = None, to_date: Union[str, datetime] = None, @@ -128,7 +165,7 @@ def download( To connect to Google Cloud to download Sentinel-2 data the enviroment variable GOOGLE_APPLICATION_CREDENTIALS to be set as defined here https://googleapis.dev/python/google-api-core/latest/auth.html#overview. - :param geojson: GeoJSON file with product geometries. + :param geojson_file: GeoJSON file with product geometries. :param text_match: Regular expresion to match the product filename. :param from_date: From date %Y-%m-%d (begin date). :param to_date: To date %Y-%m-%d (end date). @@ -160,37 +197,35 @@ def download( elif not to_date: to_date = datetime.now() + if isinstance(output, str): + output = Path(output) + + # When using dataspace,they must be string + from_date = datetime.strftime(from_date, "%Y-%m-%d") + to_date = datetime.strftime(to_date, "%Y-%m-%d") + # Load geojson file* and download products for an interval of dates. # *see: http://geojson.io/ - if geojson: - geojson = read_geojson(geojson) - footprint = geojson_to_wkt(geojson) + if geojson_file: + footprint = to_wkt(geojson_file) else: footprint = None # Text match uses filename, to avoid users having to add unknown extensions, # add wildcard at the end if text_match: + warnings.warn( + """Text matching is still not supported for the new CDSE API and is being worked on. +More detail can be read here: https://dataspace.copernicus.eu/news/2023-9-28-accessing-sentinel-mission-data-new-copernicus-data-space-ecosystem-apis""", + DeprecationWarning, + stacklevel=2, + ) + if not text_match.endswith("*"): text_match += "*" - sentinel_api = SentinelAPI(dhus_username, dhus_password, dhus_host, show_progressbars=False) + products_df = get_metadata_cdse(footprint, from_date, to_date) - print("Searching for products in scene") - - # Search is limited to those scenes that intersect with the AOI - # (area of interest) polygon. - products = sentinel_api.query( - area=footprint, - filename=text_match, - producttype="S2MSI2A", - platformname="Sentinel-2", - cloudcoverpercentage=(0, max_clouds), - date=(from_date, to_date), - ) - - # Get the list of products. - products_df = sentinel_api.to_dataframe(products) if skip: products_df = products_df[~products_df["title"].isin(skip)] ids = products_df.index @@ -198,12 +233,13 @@ def download( print(f"Found {len(ids)} scenes between {from_date} and {to_date}") if not gcloud: - for product in copernicous_download(ids, sentinel_api, output=output): - product_json_str = products_df[products_df["id"] == product["id"]].to_json( - orient="records", date_format="iso" - ) - product_json = json.loads(product_json_str)[0] # Pandas gives a list of elements always - yield {**product_json, **product} + # WIP: add support for new CDSE + warnings.warn( + """This method is no longer works, support for the new CDSE API is being worked on. +More detail can be read here: https://dataspace.copernicus.eu/news/2023-9-28-accessing-sentinel-mission-data-new-copernicus-data-space-ecosystem-apis""", + DeprecationWarning, + stacklevel=2, + ) else: gcloud_api = gcloud_bucket() # Google cloud doesn't utilize ids, only titles @@ -216,39 +252,6 @@ def download( yield {**product_json, **product} -def copernicous_download(ids: List[str], api: SentinelAPI, output: Path = Path(".")) -> Iterator[dict]: - """ - Downloads a list of Sentinel-2 products by a list of titles from Google Cloud. - - :param titles: Sentinel-2 product ids. - :param api: Sentinelsat API object. - :param output: Output folder. - :return: Yields an iterator of dictionaries with the product status - """ - - for id_ in ids: - try: - product_info = api.download(id_, str(output)) - - unzip_product(output, product_info["title"]) - - # If there is no error, yield "ok" - yield { - "uuid": id_, - "status": "ok", - } - except LTATriggered: - yield { - "uuid": id_, - "status": "triggered", - } - except LTAError: - yield { - "uuid": id_, - "status": "failed", - } - - def unzip_product(output_folder: Path, title: str) -> None: """ Unzip a downloaded product inside the same folder @@ -298,7 +301,7 @@ def gcloud_download(titles: List[str], api: "storage.Client", output: Path = Pat for title in titles: try: - gcloud_path = get_gcloud_path(title) + gcloud_path = get_gcloud_path(title.removesuffix(".SAFE")) product_folder = Path(gcloud_path).name.removesuffix(".SAFE") blobs = api.list_blobs("gcp-public-data-sentinel-2", prefix=gcloud_path) diff --git a/src/greensenti/raster.py b/src/greensenti/raster.py index ab9ca8c..b0ca125 100644 --- a/src/greensenti/raster.py +++ b/src/greensenti/raster.py @@ -8,10 +8,11 @@ from rasterio import mask from rasterio.plot import adjust_band, reshape_as_image, reshape_as_raster from rasterio.warp import Resampling, reproject -from sentinelsat import read_geojson from shapely.geometry import Polygon, shape from shapely.ops import transform +import geojson + def crop_by_shape(filename: Path, geom: Polygon, output: str, override_no_data: float | None = None) -> None: """ @@ -101,7 +102,7 @@ def transform_image(band: Path, color_map: Optional[str], output: Path) -> None: def apply_mask( filename: Path, - geojson: Path, + geojson_file: Path, geojson_crs: str = "epsg:4326", output: Path | None = None, override_no_data: float | None = None, @@ -110,7 +111,7 @@ def apply_mask( Crop image data (jp2 imagery file) by shape. :param filename: Path to input file. - :param geojson: Geometry in GeoJSON format. + :param geojson_file: Geometry in GeoJSON format. :param geojson_crs: Coordinate reference system of the GeoJSON file. If different from the input file, the shape will be projected. :param output: Path to output file. If not provided, the output will be saved in the same directory as the input file. :param override_no_data: Value to fill outside the crop area. Useful to separate no data of fill. Raises `ValueError` if this value is present in the raster. @@ -125,11 +126,12 @@ def apply_mask( with rasterio.open(filename) as file: dsc = file.crs - geojson = read_geojson(geojson) + with open(geojson_file) as f: + geojson_ = geojson.load(f) if geojson_crs != dsc: - shp = project_shape(geojson["features"][0]["geometry"]) + shp = project_shape(geojson_["features"][0]["geometry"]) else: - shp = geojson["features"][0]["geometry"] + shp = geojson_["features"][0]["geometry"] crop_by_shape(filename=filename, output=str(output), geom=shp, override_no_data=override_no_data) diff --git a/tests/test_band_arithmetic.py b/tests/test_band_arithmetic.py index d30d7da..854a350 100644 --- a/tests/test_band_arithmetic.py +++ b/tests/test_band_arithmetic.py @@ -2,7 +2,6 @@ import numpy as np import pytest - from greensenti.band_arithmetic import ( bri, bsi, diff --git a/tests/test_dhus.py b/tests/test_dhus.py index cbabfc0..2e4847d 100644 --- a/tests/test_dhus.py +++ b/tests/test_dhus.py @@ -1,5 +1,6 @@ -from unittest.mock import MagicMock +from unittest.mock import MagicMock, patch +import pandas as pd from greensenti import dhus @@ -8,32 +9,29 @@ def test_gcloud_path_is_valid(): assert gcloud_path == "L2/tiles/30/S/UF/S2B_MSIL2A_20221005T105819_N0400_R094_T30SUF_20221005T135951.SAFE" -def test_copernicous_download_returns_correct_dataframe(monkeypatch): - # Mock sentinelsat.sentinel.SentinelAPI.download_all - mock = MagicMock() +def test_copernicous_download_returns_correct_dataframe(): + # Sample data to be returned by the mocked response + mock_response = { + "value": [ + {"Name": "S2A_MSIL2A_20210101T000000_N0214_R000_T00X00_20210101T000000", "OtherField": "value1"}, + {"Name": "S2B_MSIL2A_20210102T000000_N0214_R000_T00X00_20210102T000000", "OtherField": "value2"}, + ] + } # Mock unzip - monkeypatch.setattr(dhus, "unzip_product", lambda *args: None) - - status = list( - dhus.copernicous_download( - ids=[ - "uuid1", - "uuid2", - "uuid3", - "uuid4", - "uuid5", - ], - api=mock, + with patch("requests.get") as mock_get: + # Configure the mock to return a response with our mock_response JSON data + mock_get.return_value.json = MagicMock(return_value=mock_response) + + result = dhus.get_metadata_cdse( + footprint="POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))", + from_date="2021-01-01", + to_date="2021-01-02", ) - ) - expected_status = [ - {"uuid": "uuid1", "status": "ok"}, - {"uuid": "uuid2", "status": "ok"}, - {"uuid": "uuid3", "status": "ok"}, - {"uuid": "uuid4", "status": "ok"}, - {"uuid": "uuid5", "status": "ok"}, - ] - - assert status == expected_status + # Verify the response + assert isinstance(result, pd.DataFrame) + assert result.shape[0] == len(mock_response["value"]) + assert "title" in result.columns + assert "OtherField" in result.columns + assert result["title"].to_list() == [x["Name"] for x in mock_response["value"]] diff --git a/tests/test_raster.py b/tests/test_raster.py index f8a77c9..82f989d 100644 --- a/tests/test_raster.py +++ b/tests/test_raster.py @@ -40,7 +40,7 @@ def raster(tmp_path: Path) -> Tuple[Path, np.ndarray]: @pytest.fixture -def geojson(tmp_path: Path) -> Path: +def geojson_file(tmp_path: Path) -> Path: """Create a temporary GeoJSON file for testing.""" filepath = tmp_path / "test.geojson" with open(filepath, "w") as f: @@ -127,10 +127,12 @@ def test_transform_image(tmp_path: Path, raster: Tuple[Path, np.ndarray]): assert output_file.stat().st_size > 0 -def test_apply_mask(tmp_path: Path, geojson: Path, raster: Tuple[Path, np.ndarray]): +def test_apply_mask(tmp_path: Path, geojson_file: Path, raster: Tuple[Path, np.ndarray]): input_file, _ = raster output_file = tmp_path / "masked_image.tif" - output_path = apply_mask(filename=input_file, geojson=geojson, geojson_crs="epsg:4326", output=output_file) + output_path = apply_mask( + filename=input_file, geojson_file=geojson_file, geojson_crs="epsg:4326", output=output_file + ) assert output_path.is_file() assert output_path.stat().st_size > 0