Module eoreader.products.optical.landsat_product
Landsat products
Expand source code
""" Landsat products """
import glob
import logging
import os
import tarfile
from abc import abstractmethod
from datetime import datetime
from enum import unique
from typing import Tuple, Union
import geopandas as gpd
import numpy as np
import pandas as pd
from lxml import etree
from rasterio.enums import Resampling
from eoreader.bands.alias import ALL_CLOUDS, CIRRUS, CLOUDS, RAW_CLOUDS, SHADOWS
from eoreader.bands.bands import BandNames
from eoreader.bands.bands import OpticalBandNames as obn
from eoreader.exceptions import InvalidProductError, InvalidTypeError
from eoreader.products.optical.optical_product import OpticalProduct
from eoreader.utils import DATETIME_FMT, EOREADER_NAME
from sertit import files, rasters
from sertit.misc import ListEnum
from sertit.rasters import XDS_TYPE
LOGGER = logging.getLogger(EOREADER_NAME)
@unique
class LandsatProductType(ListEnum):
"""Landsat products types"""
L1_OLCI = "OLCI"
"""OLCI Product Type, for Landsat-8 platform"""
L1_ETM = "ETM"
"""ETM Product Type, for Landsat-7 platform"""
L1_TM = "TM"
"""TM Product Type, for Landsat-5 and 4 platforms"""
L1_MSS = "MSS"
"""MSS Product Type, for Landsat-5, 4, 3, 2, 1 platforms"""
@unique
class LandsatCollection(ListEnum):
"""
Landsat collection number.
See [here](https://www.usgs.gov/media/files/landsat-collection-1-vs-collection-2-summary) for more information
"""
COL_1 = "01"
"""Collection 1"""
COL_2 = "02"
"""Collection 2"""
class LandsatProduct(OpticalProduct):
"""
Super Class of Landsat Products
You can use directly the .tar file in case of collection 2 products.
"""
def __init__(
self, product_path: str, archive_path: str = None, output_path=None
) -> None:
# Private
self._collection = None
self._quality_id = None
# Initialization from the super class
super().__init__(product_path, archive_path, output_path)
def _set_collection(self):
"""Set Landsat collection"""
return LandsatCollection.from_value(self.split_name[-2])
def _post_init(self) -> None:
"""
Function used to post_init the products
(setting sensor type, band names and so on)
"""
self.tile_name = self._get_tile_name()
self._collection = self._set_collection()
if self._collection == LandsatCollection.COL_1:
self._quality_id = "_BQA"
self._nodata_band_id = "_BQA"
self.needs_extraction = True # Too slow to read directly tar.gz files
else:
self._quality_id = "_QA_RADSAT"
self._nodata_band_id = "_QA_PIXEL"
self.needs_extraction = False # Fine to read .tar files
# Post init done by the super class
super()._post_init()
def _get_path(self, band_id: str) -> str:
"""
Get either the archived path of the normal path of a tif file
Args:
band_id (str): Band ID
Returns:
str: band path
"""
if self.is_archived:
# Because of gap_mask files that have the same name structure and exists only for L7
if self.product_type == LandsatProductType.L1_ETM:
regex = f".*RT{band_id}.*"
else:
regex = f".*{band_id}.*"
path = files.get_archived_rio_path(self.path, regex)
else:
path = files.get_file_in_dir(self.path, band_id, extension="TIF")
return path
def footprint(self) -> gpd.GeoDataFrame:
"""
Get real footprint of the products (without nodata, in french == emprise utile)
```python
>>> from eoreader.reader import Reader
>>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2"
>>> prod = Reader().open(path)
>>> prod.footprint()
index geometry
0 0 POLYGON ((366165.000 4899735.000, 366165.000 4...
```
Overload of the generic function because landsat nodata seems to be different in QA than in regular bands.
Indeed, nodata pixels vary according to the band sensor footprint,
whereas QA nodata is where at least one band has nodata.
We chose to keep QA nodata values for the footprint in order to show where all bands are valid.
**TL;DR: We use the QA nodata value to determine the product's footprint**.
Returns:
gpd.GeoDataFrame: Footprint as a GeoDataFrame
"""
nodata_band = self._get_path(self._nodata_band_id)
# Vectorize the nodata band
nodata = rasters.vectorize(nodata_band, values=1)
# Clip the extent with the nodata
footprint = gpd.overlay(self.extent(), nodata, how="symmetric_difference")
return footprint
def _get_tile_name(self) -> str:
"""
Retrieve tile name
```python
>>> from eoreader.reader import Reader
>>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2"
>>> prod = Reader().open(path)
>>> prod.get_tile_name()
'023030'
```
Returns:
str: Tile name
"""
return self.split_name[2]
@abstractmethod
def _set_product_type(self) -> None:
"""Get products type"""
raise NotImplementedError("This method should be implemented by a child class")
def _set_mss_product_type(self, version: int) -> None:
"""Set MSS product type and map corresponding bands"""
if "L1" in self.name:
self.product_type = LandsatProductType.L1_MSS
self.band_names.map_bands(
{
obn.GREEN: "4" if version < 4 else "1",
obn.RED: "5" if version < 4 else "2",
obn.VRE_1: "6" if version < 4 else "3",
obn.VRE_2: "6" if version < 4 else "3",
obn.VRE_3: "6" if version < 4 else "3",
obn.NIR: "7" if version < 4 else "4",
obn.NARROW_NIR: "7" if version < 4 else "4",
}
)
else:
raise InvalidProductError("Only Landsat level 1 are managed in EOReader")
def _set_tm_product_type(self) -> None:
"""Set TM product type and map corresponding bands"""
if "L1" in self.name:
self.product_type = LandsatProductType.L1_TM
self.band_names.map_bands(
{
obn.BLUE: "1",
obn.GREEN: "2",
obn.RED: "3",
obn.NIR: "4",
obn.NARROW_NIR: "4",
obn.SWIR_1: "5",
obn.SWIR_2: "7",
obn.TIR_1: "6",
obn.TIR_2: "6",
}
)
else:
raise InvalidProductError("Only Landsat level 1 are managed in EOReader")
def _set_etm_product_type(self) -> None:
"""Set ETM product type and map corresponding bands"""
if "L1" in self.name:
self.product_type = LandsatProductType.L1_ETM
self.band_names.map_bands(
{
obn.BLUE: "1",
obn.GREEN: "2",
obn.RED: "3",
obn.NIR: "4",
obn.NARROW_NIR: "4",
obn.SWIR_1: "5",
obn.SWIR_2: "7",
obn.PAN: "8",
obn.TIR_1: "6_VCID_1",
obn.TIR_2: "6_VCID_2",
}
)
else:
raise InvalidProductError("Only Landsat level 1 are managed in EOReader")
def _set_olci_product_type(self) -> None:
"""Set OLCI product type and map corresponding bands"""
if "L1" in self.name:
self.product_type = LandsatProductType.L1_OLCI
self.band_names.map_bands(
{
obn.CA: "1",
obn.BLUE: "2",
obn.GREEN: "3",
obn.RED: "4",
obn.NIR: "5",
obn.NARROW_NIR: "5",
obn.SWIR_1: "6",
obn.SWIR_2: "7",
obn.PAN: "8",
obn.SWIR_CIRRUS: "9",
obn.TIR_1: "10",
obn.TIR_2: "11",
}
)
else:
raise InvalidProductError("Only Landsat level 1 are managed in EOReader")
def get_datetime(self, as_datetime: bool = False) -> Union[str, datetime]:
"""
Get the product's acquisition datetime, with format `YYYYMMDDTHHMMSS` <-> `%Y%m%dT%H%M%S`
```python
>>> from eoreader.reader import Reader
>>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2"
>>> prod = Reader().open(path)
>>> prod.get_datetime(as_datetime=True)
datetime.datetime(2020, 5, 18, 16, 34, 7)
>>> prod.get_datetime(as_datetime=False)
'20200518T163407'
```
Args:
as_datetime (bool): Return the date as a datetime.datetime. If false, returns a string.
Returns:
Union[str, datetime.datetime]: Its acquisition datetime
"""
try:
mtd = self.read_mtd(force_pd=True)
date = mtd["DATE_ACQUIRED"].value # 1982-09-06
# "16:47:09.5990000Z": needs max 6 digits for ms
hours = mtd["SCENE_CENTER_TIME"].value.replace('"', "")[:-3]
date = (
f"{datetime.strptime(date, '%Y-%m-%d').strftime('%Y%m%d')}"
f"T{datetime.strptime(hours, '%H:%M:%S.%f').strftime('%H%M%S')}"
)
except (FileNotFoundError, KeyError):
date = datetime.strptime(self.split_name[3], "%Y%m%d").strftime(
DATETIME_FMT
)
if as_datetime:
date = datetime.strptime(date, DATETIME_FMT)
return date
def get_band_paths(self, band_list: list, resolution: float = None) -> dict:
"""
Return the paths of required bands.
```python
>>> from eoreader.reader import Reader
>>> from eoreader.bands.alias import *
>>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip"
>>> prod = Reader().open(path)
>>> prod.get_band_paths([GREEN, RED])
{
<OpticalBandNames.GREEN: 'GREEN'>:
'LC08_L1GT_023030_20200518_20200527_01_T2\\LC08_L1GT_023030_20200518_20200527_01_T2_B3.TIF',
<OpticalBandNames.RED: 'RED'>:
'LC08_L1GT_023030_20200518_20200527_01_T2\\LC08_L1GT_023030_20200518_20200527_01_T2_B4.TIF'
}
```
Args:
band_list (list): List of the wanted bands
resolution (float): Useless here
Returns:
dict: Dictionary containing the path of each queried band
"""
band_paths = {}
for band in band_list:
if not self.has_band(band):
raise InvalidProductError(
f"Non existing band ({band.name}) "
f"for Landsat-{self.product_type.name} products"
)
band_nb = self.band_names[band]
try:
band_paths[band] = self._get_path(f"_B{band_nb}")
except FileNotFoundError as ex:
raise InvalidProductError(
f"Non existing {band} ({band_nb}) band for {self.path}"
) from ex
return band_paths
def read_mtd(
self, force_pd=False
) -> Union[pd.DataFrame, Tuple[etree._Element, str]]:
"""
Read Landsat metadata as:
- a `pandas.DataFrame` whatever its collection is (by default for collection 1)
- a XML root + its namespace if the product is retrieved from the 2nd collection (by default for collection 2)
```python
>>> from eoreader.reader import Reader
>>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2"
>>> prod = Reader().open(path)
>>> # COLLECTION 1 : Open metadata as panda DataFrame
>>> prod.read_mtd()
NAME ORIGIN ... RESAMPLING_OPTION
value "Image courtesy of the U.S. Geological Survey" ... "CUBIC_CONVOLUTION"
[1 rows x 197 columns]
>>> # COLLECTION 2 : Open metadata as XML
>>> path = r"LC08_L1TP_200030_20201220_20210310_02_T1" # Collection 2
>>> prod = Reader().open(path)
>>> prod.read_mtd()
(<Element LANDSAT_METADATA_FILE at 0x19229016048>, '')
>>> # COLLECTION 2 : Force to pandas.DataFrame
>>> prod.read_mtd(force_pd=True)
NAME ORIGIN ... RESAMPLING_OPTION
value "Image courtesy of the U.S. Geological Survey" ... "CUBIC_CONVOLUTION"
[1 rows x 263 columns]
```
Args:
force_pd (bool): If collection 2, return a pandas.DataFrame instead of a XML root + namespace
Returns:
pd.DataFrame: Metadata as a Pandas DataFrame
"""
# WARNING: always use force_pd in this class !
as_pd = (self._collection == LandsatCollection.COL_1) or force_pd
if as_pd:
mtd_name = f"{self.name}_MTL.txt"
if self.is_archived:
# We need to extract the file in memry to be used with pandas
tar_ds = tarfile.open(self.path, "r")
info = [f.name for f in tar_ds.getmembers() if mtd_name in f.name][0]
mtd_path = tar_ds.extractfile(info)
else:
# FOR COLLECTION 1 AND 2
tar_ds = None
mtd_path = os.path.join(self.path, mtd_name)
if not os.path.isfile(mtd_path):
raise FileNotFoundError(
f"Unable to find the metadata file associated with {self.path}"
)
# Parse
mtd_data = pd.read_table(
mtd_path,
sep="\s=\s",
names=["NAME", "value"],
skipinitialspace=True,
engine="python",
)
# Workaround an unexpected behaviour in pandas !
if any(mtd_data.NAME == "="):
mtd_data = pd.read_table(
mtd_path,
sep="=",
names=["NAME", "=", "value"],
usecols=[0, 2],
skipinitialspace=True,
)
# Remove useless rows
mtd_data = mtd_data[~mtd_data["NAME"].isin(["GROUP", "END_GROUP", "END"])]
# Set index
mtd_data = mtd_data.set_index("NAME").T
# Close if needed
if tar_ds:
tar_ds.close()
else:
if self.is_archived:
root = files.read_archived_xml(self.path, f".*{self.name}_MTL.xml")
else:
# ONLY FOR COLLECTION 2
try:
mtd_file = glob.glob(
os.path.join(self.path, f"{self.name}_MTL.xml")
)[0]
# pylint: disable=I1101:
# Module 'lxml.etree' has no 'parse' member, but source is unavailable.
xml_tree = etree.parse(mtd_file)
root = xml_tree.getroot()
except IndexError as ex:
raise InvalidProductError(
f"Metadata file ({self.name}.xml) not found in {self.path}"
) from ex
# Get namespace
namespace = "" # No namespace here
mtd_data = (root, namespace)
return mtd_data
def _read_band(
self,
path: str,
resolution: Union[tuple, list, float] = None,
size: Union[list, tuple] = None,
) -> XDS_TYPE:
"""
Read band from a dataset.
.. WARNING::
Invalid pixels are not managed here !
Args:
path (str): Band path
resolution (Union[tuple, list, float]): Resolution of the wanted band, in dataset resolution unit (X, Y)
size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided.
Returns:
XDS_TYPE: Radiometrically coherent band, saved as float 32 and its metadata
"""
# Get band name: the last number of the filename:
# ie: 'LC08_L1TP_200030_20191218_20191226_01_T1_B1'
if self.is_archived:
filename = files.get_filename(path.split("!")[-1])
else:
filename = files.get_filename(path)
band_name = filename[-1]
if self._quality_id in filename or self._nodata_band_id in filename:
band = rasters.read(
path,
resolution=resolution,
size=size,
resampling=Resampling.nearest, # NEAREST TO KEEP THE FLAGS
masked=False,
).astype(np.uint16)
else:
# Read band (call superclass generic method)
band = rasters.read(
path, resolution=resolution, size=size, resampling=Resampling.bilinear
).astype(np.float32)
# Open mtd
mtd_data = self.read_mtd(force_pd=True)
# Get band nb and corresponding coeff
c_mul_str = "REFLECTANCE_MULT_BAND_" + band_name
c_add_str = "REFLECTANCE_ADD_BAND_" + band_name
# Get coeffs to convert DN to reflectance
c_mul = mtd_data[c_mul_str].value
c_add = mtd_data[c_add_str].value
# Manage NULL values
try:
c_mul = float(c_mul)
except ValueError:
c_mul = 1
try:
c_add = float(c_add)
except ValueError:
c_add = 0
# Compute the correct radiometry of the band and set no data to 0
band = c_mul * band + c_add # Already in float
return band
# pylint: disable=R0913
# R0913: Too many arguments (6/5) (too-many-arguments)
def _manage_invalid_pixels(
self,
band_arr: XDS_TYPE,
band: obn,
resolution: float = None,
size: Union[list, tuple] = None,
) -> XDS_TYPE:
"""
Manage invalid pixels (Nodata, saturated, defective...)
Args:
band_arr (XDS_TYPE): Band array
band (obn): Band name as an OpticalBandNames
resolution (float): Band resolution in meters
size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided.
Returns:
XDS_TYPE: Cleaned band array
"""
# Open QA band
landsat_qa_path = self._get_path(self._quality_id)
qa_arr = self._read_band(
landsat_qa_path, resolution=resolution, size=size
).data # To np array
if self._collection == LandsatCollection.COL_1:
# https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-1-level-1-quality-assessment-band
# Bit ids
nodata_id = 0 # Fill value
dropped_id = 1 # Dropped pixel or terrain occlusion
# Set nodata to every saturated pixel, even if only 1-2 bands are touched by it
# -> 01 or 10 or 11
# -> bit 2 or bit 3
sat_id_1 = 2
sat_id_2 = 3
nodata, dropped, sat_1, sat_2 = rasters.read_bit_array(
qa_arr, [nodata_id, dropped_id, sat_id_1, sat_id_2]
)
mask = nodata | dropped | sat_1 | sat_2
else:
# https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-2-quality-assessment-bands
# SATURATED & OTHER PIXELS
band_nb = int(self.band_names[band])
# Bit ids
sat_id = band_nb - 1 # Saturated pixel
if self.product_type != LandsatProductType.L1_OLCI:
other_id = 11 # Terrain occlusion
else:
other_id = 9 # Dropped pixels
sat, other = rasters.read_bit_array(qa_arr, [sat_id, other_id])
# If collection 2, nodata has to be found in pixel QA file
landsat_stat_path = self._get_path(self._nodata_band_id)
pixel_arr = self._read_band(
landsat_stat_path, resolution=resolution, size=size
).data
nodata = np.where(pixel_arr == 1, 1, 0)
mask = sat | other | nodata
return self._set_nodata_mask(band_arr, mask)
def _load_bands(
self,
band_list: Union[list, BandNames],
resolution: float = None,
size: Union[list, tuple] = None,
) -> dict:
"""
Load bands as numpy arrays with the same resolution (and same metadata).
Args:
band_list (list, BandNames): List of the wanted bands
resolution (float): Band resolution in meters
size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided.
Returns:
dict: Dictionary {band_name, band_xarray}
"""
# Return empty if no band are specified
if not band_list:
return {}
# Get band paths
if not isinstance(band_list, list):
band_list = [band_list]
band_paths = self.get_band_paths(band_list)
# Open bands and get array (resampled if needed)
band_arrays = self._open_bands(band_paths, resolution=resolution, size=size)
return band_arrays
def get_mean_sun_angles(self) -> (float, float):
"""
Get Mean Sun angles (Azimuth and Zenith angles)
```python
>>> from eoreader.reader import Reader
>>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2.SAFE.zip"
>>> prod = Reader().open(path)
>>> prod.get_mean_sun_angles()
(140.80752656, 61.93065805)
```
Returns:
(float, float): Mean Azimuth and Zenith angle
"""
# Retrieve angles
mtd_data = self.read_mtd(force_pd=True)
azimuth_angle = float(mtd_data.SUN_AZIMUTH.value)
zenith_angle = float(mtd_data.SUN_ELEVATION.value)
return azimuth_angle, zenith_angle
@abstractmethod
def _get_condensed_name(self) -> str:
"""
Get products condensed name ({date}_Lx_{tile}_{product_type}).
Returns:
str: Condensed Landsat name
"""
return f"{self.get_datetime()}_{self.platform.name}_{self.tile_name}_{self.product_type.value}"
def _has_cloud_band(self, band: BandNames) -> bool:
"""
Does this products has the specified cloud band ?
- (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band]
- (COL 2)[https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-2-quality-assessment-bands]
True
```
"""
if self.product_type == LandsatProductType.L1_OLCI:
has_band = True
elif self.product_type in [LandsatProductType.L1_ETM, LandsatProductType.L1_TM]:
has_band = self._e_tm_has_cloud_band(band)
elif self.product_type == LandsatProductType.L1_MSS:
has_band = self._mss_has_cloud_band(band)
else:
raise InvalidProductError(f"Invalid product type: {self.product_type}")
return has_band
@staticmethod
def _mss_has_cloud_band(band: BandNames) -> bool:
"""
Does this products has the specified cloud band ?
```
"""
if band in [RAW_CLOUDS, CLOUDS, ALL_CLOUDS]:
has_band = True
else:
has_band = False
return has_band
@staticmethod
def _e_tm_has_cloud_band(band: BandNames) -> bool:
"""
Does this products has the specified cloud band ?
```
"""
if band in [RAW_CLOUDS, CLOUDS, ALL_CLOUDS, SHADOWS]:
has_band = True
else:
has_band = False
return has_band
def _load_clouds(
self, bands: list, resolution: float = None, size: Union[list, tuple] = None
) -> dict:
"""
Load cloud files as numpy arrays with the same resolution (and same metadata).
Read Landsat clouds from QA mask.
See here for clouds_values:
- (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band]
- (COL 2)[https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-2-quality-assessment-bands]
Args:
bands (list): List of the wanted bands
resolution (int): Band resolution in meters
size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided.
Returns:
dict: Dictionary {band_name, band_xarray}
"""
band_dict = {}
if bands:
# Open QA band
landsat_qa_path = self._get_path(self._quality_id)
qa_arr = self._read_band(landsat_qa_path, resolution=resolution, size=size)
if self.product_type == LandsatProductType.L1_OLCI:
band_dict = self._load_olci_clouds(qa_arr, bands)
elif self.product_type in [
LandsatProductType.L1_ETM,
LandsatProductType.L1_TM,
]:
band_dict = self._load_e_tm_clouds(qa_arr, bands)
elif self.product_type == LandsatProductType.L1_MSS:
band_dict = self._load_mss_clouds(qa_arr, bands)
else:
raise InvalidProductError(f"Invalid product type: {self.product_type}")
return band_dict
def _load_mss_clouds(self, qa_arr: XDS_TYPE, band_list: list) -> dict:
"""
Load cloud files as numpy arrays with the same resolution (and same metadata).
Read Landsat-MSS clouds from QA mask.
See here for clouds_values:
- (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band]
- (COL 2)[https://www.usgs.gov/media/files/landsat-1-5-mss-collection-2-level-1-data-format-control-book]
Args:
qa_arr (XDS_TYPE): Quality array
band_list (list): List of the wanted bands
Returns:
dict, dict: Dictionary {band_name, band_array}
"""
bands = {}
# Get clouds and nodata
nodata_id = 0
cloud_id = (
4 if self._collection == LandsatCollection.COL_1 else 3
) # Clouds with high confidence
clouds = None
if ALL_CLOUDS in band_list or CLOUDS in band_list:
nodata, cld = rasters.read_bit_array(qa_arr, [nodata_id, cloud_id])
clouds = self._create_mask(qa_arr, cld, nodata)
for band in band_list:
if band == ALL_CLOUDS:
bands[band] = clouds
elif band == CLOUDS:
bands[band] = clouds
elif band == RAW_CLOUDS:
bands[band] = qa_arr
else:
raise InvalidTypeError(
f"Non existing cloud band for Landsat-MSS sensor: {band}"
)
return bands
def _load_e_tm_clouds(
self, qa_arr: XDS_TYPE, band_list: Union[list, BandNames]
) -> dict:
"""
Load cloud files as numpy arrays with the same resolution (and same metadata).
Read Landsat-(E)TM clouds from QA mask.
See here for clouds_values:
- (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band]
- (COL 2 TM)[https://www.usgs.gov/media/files/landsat-4-5-tm-collection-2-level-1-data-format-control-book]
- (COL 2 ETM)[https://www.usgs.gov/media/files/landsat-7-etm-collection-2-level-1-data-format-control-book]
Args:
qa_arr (XDS_TYPE): Quality array
band_list (list): List of the wanted bands
Returns:
dict, dict: Dictionary {band_name, band_array}
"""
bands = {}
# Get clouds and nodata
nodata = None
cld = None
shd = None
if any(band in [ALL_CLOUDS, CLOUDS, SHADOWS] for band in band_list):
if self._collection == LandsatCollection.COL_1:
# Bit id
nodata_id = 0
cloud_id = 4 # Clouds with high confidence
shd_conf_1_id = 7
shd_conf_2_id = 8
nodata, cld, shd_conf_1, shd_conf_2 = rasters.read_bit_array(
qa_arr, [nodata_id, cloud_id, shd_conf_1_id, shd_conf_2_id]
)
shd = shd_conf_1 & shd_conf_2
else:
# Bit ids
nodata_id = 0
cloud_id = 3 # Clouds with high confidence
shd_id = 4 # Shadows with high confidence
nodata, cld, shd = rasters.read_bit_array(
qa_arr, [nodata_id, cloud_id, shd_id]
)
for band in band_list:
if band == ALL_CLOUDS:
bands[band] = self._create_mask(qa_arr, cld | shd, nodata)
elif band == SHADOWS:
bands[band] = self._create_mask(qa_arr, shd, nodata)
elif band == CLOUDS:
bands[band] = self._create_mask(qa_arr, cld, nodata)
elif band == RAW_CLOUDS:
bands[band] = qa_arr
else:
raise InvalidTypeError(
f"Non existing cloud band for Landsat-(E)TM sensor: {band}"
)
return bands
def _load_olci_clouds(
self, qa_arr: XDS_TYPE, band_list: Union[list, BandNames]
) -> dict:
"""
Load cloud files as numpy arrays with the same resolution (and same metadata).
Read Landsat-OLCI clouds from QA mask.
See here for clouds_values:
- (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band]
- (COL 2)[https://www.usgs.gov/media/files/landsat-8-level-1-data-format-control-book]
Args:
qa_arr (XDS_TYPE): Quality array
band_list (list): List of the wanted bands
Returns:
dict, dict: Dictionary {band_name, band_array}
"""
bands = {}
# Get clouds and nodata
nodata = None
cld = None
shd = None
cir = None
if any(band in [ALL_CLOUDS, CLOUDS, SHADOWS] for band in band_list):
if self._collection == LandsatCollection.COL_1:
# Bit ids
nodata_id = 0
cloud_id = 4 # Clouds with high confidence
shd_conf_1_id = 7
shd_conf_2_id = 8
cir_conf_1_id = 11
cir_conf_2_id = 12
# Read binary mask
(
nodata,
cld,
shd_conf_1,
shd_conf_2,
cir_conf_1,
cir_conf_2,
) = rasters.read_bit_array(
qa_arr,
[
nodata_id,
cloud_id,
shd_conf_1_id,
shd_conf_2_id,
cir_conf_1_id,
cir_conf_2_id,
],
)
shd = shd_conf_1 & shd_conf_2
cir = cir_conf_1 & cir_conf_2
else:
# Bit ids
nodata_id = 0
cloud_id = 3 # Clouds with high confidence
shd_id = 4 # Shadows with high confidence
cir_id = 2 # Cirrus with high confidence
nodata, cld, shd, cir = rasters.read_bit_array(
qa_arr, [nodata_id, cloud_id, shd_id, cir_id]
)
for band in band_list:
if band == ALL_CLOUDS:
bands[band] = self._create_mask(qa_arr, cld | shd | cir, nodata)
elif band == SHADOWS:
bands[band] = self._create_mask(qa_arr, shd, nodata)
elif band == CLOUDS:
bands[band] = self._create_mask(qa_arr, cld, nodata)
elif band == CIRRUS:
bands[band] = self._create_mask(qa_arr, cir, nodata)
elif band == RAW_CLOUDS:
bands[band] = qa_arr
else:
raise InvalidTypeError(
f"Non existing cloud band for Landsat-OLCI sensor: {band}"
)
return bands
Classes
class LandsatProductType (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
Landsat products types
Expand source code
class LandsatProductType(ListEnum): """Landsat products types""" L1_OLCI = "OLCI" """OLCI Product Type, for Landsat-8 platform""" L1_ETM = "ETM" """ETM Product Type, for Landsat-7 platform""" L1_TM = "TM" """TM Product Type, for Landsat-5 and 4 platforms""" L1_MSS = "MSS" """MSS Product Type, for Landsat-5, 4, 3, 2, 1 platforms"""
Ancestors
- sertit.misc.ListEnum
- enum.Enum
Class variables
var L1_OLCI
-
OLCI Product Type, for Landsat-8 platform
var L1_ETM
-
ETM Product Type, for Landsat-7 platform
var L1_TM
-
TM Product Type, for Landsat-5 and 4 platforms
var L1_MSS
-
MSS Product Type, for Landsat-5, 4, 3, 2, 1 platforms
class LandsatCollection (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
Landsat collection number. See here for more information
Expand source code
class LandsatCollection(ListEnum): """ Landsat collection number. See [here](https://www.usgs.gov/media/files/landsat-collection-1-vs-collection-2-summary) for more information """ COL_1 = "01" """Collection 1""" COL_2 = "02" """Collection 2"""
Ancestors
- sertit.misc.ListEnum
- enum.Enum
Class variables
var COL_1
-
Collection 1
var COL_2
-
Collection 2
class LandsatProduct (product_path, archive_path=None, output_path=None)
-
Super Class of Landsat Products
You can use directly the .tar file in case of collection 2 products.
Expand source code
class LandsatProduct(OpticalProduct): """ Super Class of Landsat Products You can use directly the .tar file in case of collection 2 products. """ def __init__( self, product_path: str, archive_path: str = None, output_path=None ) -> None: # Private self._collection = None self._quality_id = None # Initialization from the super class super().__init__(product_path, archive_path, output_path) def _set_collection(self): """Set Landsat collection""" return LandsatCollection.from_value(self.split_name[-2]) def _post_init(self) -> None: """ Function used to post_init the products (setting sensor type, band names and so on) """ self.tile_name = self._get_tile_name() self._collection = self._set_collection() if self._collection == LandsatCollection.COL_1: self._quality_id = "_BQA" self._nodata_band_id = "_BQA" self.needs_extraction = True # Too slow to read directly tar.gz files else: self._quality_id = "_QA_RADSAT" self._nodata_band_id = "_QA_PIXEL" self.needs_extraction = False # Fine to read .tar files # Post init done by the super class super()._post_init() def _get_path(self, band_id: str) -> str: """ Get either the archived path of the normal path of a tif file Args: band_id (str): Band ID Returns: str: band path """ if self.is_archived: # Because of gap_mask files that have the same name structure and exists only for L7 if self.product_type == LandsatProductType.L1_ETM: regex = f".*RT{band_id}.*" else: regex = f".*{band_id}.*" path = files.get_archived_rio_path(self.path, regex) else: path = files.get_file_in_dir(self.path, band_id, extension="TIF") return path def footprint(self) -> gpd.GeoDataFrame: """ Get real footprint of the products (without nodata, in french == emprise utile) ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> prod.footprint() index geometry 0 0 POLYGON ((366165.000 4899735.000, 366165.000 4... ``` Overload of the generic function because landsat nodata seems to be different in QA than in regular bands. Indeed, nodata pixels vary according to the band sensor footprint, whereas QA nodata is where at least one band has nodata. We chose to keep QA nodata values for the footprint in order to show where all bands are valid. **TL;DR: We use the QA nodata value to determine the product's footprint**. Returns: gpd.GeoDataFrame: Footprint as a GeoDataFrame """ nodata_band = self._get_path(self._nodata_band_id) # Vectorize the nodata band nodata = rasters.vectorize(nodata_band, values=1) # Clip the extent with the nodata footprint = gpd.overlay(self.extent(), nodata, how="symmetric_difference") return footprint def _get_tile_name(self) -> str: """ Retrieve tile name ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> prod.get_tile_name() '023030' ``` Returns: str: Tile name """ return self.split_name[2] @abstractmethod def _set_product_type(self) -> None: """Get products type""" raise NotImplementedError("This method should be implemented by a child class") def _set_mss_product_type(self, version: int) -> None: """Set MSS product type and map corresponding bands""" if "L1" in self.name: self.product_type = LandsatProductType.L1_MSS self.band_names.map_bands( { obn.GREEN: "4" if version < 4 else "1", obn.RED: "5" if version < 4 else "2", obn.VRE_1: "6" if version < 4 else "3", obn.VRE_2: "6" if version < 4 else "3", obn.VRE_3: "6" if version < 4 else "3", obn.NIR: "7" if version < 4 else "4", obn.NARROW_NIR: "7" if version < 4 else "4", } ) else: raise InvalidProductError("Only Landsat level 1 are managed in EOReader") def _set_tm_product_type(self) -> None: """Set TM product type and map corresponding bands""" if "L1" in self.name: self.product_type = LandsatProductType.L1_TM self.band_names.map_bands( { obn.BLUE: "1", obn.GREEN: "2", obn.RED: "3", obn.NIR: "4", obn.NARROW_NIR: "4", obn.SWIR_1: "5", obn.SWIR_2: "7", obn.TIR_1: "6", obn.TIR_2: "6", } ) else: raise InvalidProductError("Only Landsat level 1 are managed in EOReader") def _set_etm_product_type(self) -> None: """Set ETM product type and map corresponding bands""" if "L1" in self.name: self.product_type = LandsatProductType.L1_ETM self.band_names.map_bands( { obn.BLUE: "1", obn.GREEN: "2", obn.RED: "3", obn.NIR: "4", obn.NARROW_NIR: "4", obn.SWIR_1: "5", obn.SWIR_2: "7", obn.PAN: "8", obn.TIR_1: "6_VCID_1", obn.TIR_2: "6_VCID_2", } ) else: raise InvalidProductError("Only Landsat level 1 are managed in EOReader") def _set_olci_product_type(self) -> None: """Set OLCI product type and map corresponding bands""" if "L1" in self.name: self.product_type = LandsatProductType.L1_OLCI self.band_names.map_bands( { obn.CA: "1", obn.BLUE: "2", obn.GREEN: "3", obn.RED: "4", obn.NIR: "5", obn.NARROW_NIR: "5", obn.SWIR_1: "6", obn.SWIR_2: "7", obn.PAN: "8", obn.SWIR_CIRRUS: "9", obn.TIR_1: "10", obn.TIR_2: "11", } ) else: raise InvalidProductError("Only Landsat level 1 are managed in EOReader") def get_datetime(self, as_datetime: bool = False) -> Union[str, datetime]: """ Get the product's acquisition datetime, with format `YYYYMMDDTHHMMSS` <-> `%Y%m%dT%H%M%S` ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> prod.get_datetime(as_datetime=True) datetime.datetime(2020, 5, 18, 16, 34, 7) >>> prod.get_datetime(as_datetime=False) '20200518T163407' ``` Args: as_datetime (bool): Return the date as a datetime.datetime. If false, returns a string. Returns: Union[str, datetime.datetime]: Its acquisition datetime """ try: mtd = self.read_mtd(force_pd=True) date = mtd["DATE_ACQUIRED"].value # 1982-09-06 # "16:47:09.5990000Z": needs max 6 digits for ms hours = mtd["SCENE_CENTER_TIME"].value.replace('"', "")[:-3] date = ( f"{datetime.strptime(date, '%Y-%m-%d').strftime('%Y%m%d')}" f"T{datetime.strptime(hours, '%H:%M:%S.%f').strftime('%H%M%S')}" ) except (FileNotFoundError, KeyError): date = datetime.strptime(self.split_name[3], "%Y%m%d").strftime( DATETIME_FMT ) if as_datetime: date = datetime.strptime(date, DATETIME_FMT) return date def get_band_paths(self, band_list: list, resolution: float = None) -> dict: """ Return the paths of required bands. ```python >>> from eoreader.reader import Reader >>> from eoreader.bands.alias import * >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip" >>> prod = Reader().open(path) >>> prod.get_band_paths([GREEN, RED]) { <OpticalBandNames.GREEN: 'GREEN'>: 'LC08_L1GT_023030_20200518_20200527_01_T2\\LC08_L1GT_023030_20200518_20200527_01_T2_B3.TIF', <OpticalBandNames.RED: 'RED'>: 'LC08_L1GT_023030_20200518_20200527_01_T2\\LC08_L1GT_023030_20200518_20200527_01_T2_B4.TIF' } ``` Args: band_list (list): List of the wanted bands resolution (float): Useless here Returns: dict: Dictionary containing the path of each queried band """ band_paths = {} for band in band_list: if not self.has_band(band): raise InvalidProductError( f"Non existing band ({band.name}) " f"for Landsat-{self.product_type.name} products" ) band_nb = self.band_names[band] try: band_paths[band] = self._get_path(f"_B{band_nb}") except FileNotFoundError as ex: raise InvalidProductError( f"Non existing {band} ({band_nb}) band for {self.path}" ) from ex return band_paths def read_mtd( self, force_pd=False ) -> Union[pd.DataFrame, Tuple[etree._Element, str]]: """ Read Landsat metadata as: - a `pandas.DataFrame` whatever its collection is (by default for collection 1) - a XML root + its namespace if the product is retrieved from the 2nd collection (by default for collection 2) ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> # COLLECTION 1 : Open metadata as panda DataFrame >>> prod.read_mtd() NAME ORIGIN ... RESAMPLING_OPTION value "Image courtesy of the U.S. Geological Survey" ... "CUBIC_CONVOLUTION" [1 rows x 197 columns] >>> # COLLECTION 2 : Open metadata as XML >>> path = r"LC08_L1TP_200030_20201220_20210310_02_T1" # Collection 2 >>> prod = Reader().open(path) >>> prod.read_mtd() (<Element LANDSAT_METADATA_FILE at 0x19229016048>, '') >>> # COLLECTION 2 : Force to pandas.DataFrame >>> prod.read_mtd(force_pd=True) NAME ORIGIN ... RESAMPLING_OPTION value "Image courtesy of the U.S. Geological Survey" ... "CUBIC_CONVOLUTION" [1 rows x 263 columns] ``` Args: force_pd (bool): If collection 2, return a pandas.DataFrame instead of a XML root + namespace Returns: pd.DataFrame: Metadata as a Pandas DataFrame """ # WARNING: always use force_pd in this class ! as_pd = (self._collection == LandsatCollection.COL_1) or force_pd if as_pd: mtd_name = f"{self.name}_MTL.txt" if self.is_archived: # We need to extract the file in memry to be used with pandas tar_ds = tarfile.open(self.path, "r") info = [f.name for f in tar_ds.getmembers() if mtd_name in f.name][0] mtd_path = tar_ds.extractfile(info) else: # FOR COLLECTION 1 AND 2 tar_ds = None mtd_path = os.path.join(self.path, mtd_name) if not os.path.isfile(mtd_path): raise FileNotFoundError( f"Unable to find the metadata file associated with {self.path}" ) # Parse mtd_data = pd.read_table( mtd_path, sep="\s=\s", names=["NAME", "value"], skipinitialspace=True, engine="python", ) # Workaround an unexpected behaviour in pandas ! if any(mtd_data.NAME == "="): mtd_data = pd.read_table( mtd_path, sep="=", names=["NAME", "=", "value"], usecols=[0, 2], skipinitialspace=True, ) # Remove useless rows mtd_data = mtd_data[~mtd_data["NAME"].isin(["GROUP", "END_GROUP", "END"])] # Set index mtd_data = mtd_data.set_index("NAME").T # Close if needed if tar_ds: tar_ds.close() else: if self.is_archived: root = files.read_archived_xml(self.path, f".*{self.name}_MTL.xml") else: # ONLY FOR COLLECTION 2 try: mtd_file = glob.glob( os.path.join(self.path, f"{self.name}_MTL.xml") )[0] # pylint: disable=I1101: # Module 'lxml.etree' has no 'parse' member, but source is unavailable. xml_tree = etree.parse(mtd_file) root = xml_tree.getroot() except IndexError as ex: raise InvalidProductError( f"Metadata file ({self.name}.xml) not found in {self.path}" ) from ex # Get namespace namespace = "" # No namespace here mtd_data = (root, namespace) return mtd_data def _read_band( self, path: str, resolution: Union[tuple, list, float] = None, size: Union[list, tuple] = None, ) -> XDS_TYPE: """ Read band from a dataset. .. WARNING:: Invalid pixels are not managed here ! Args: path (str): Band path resolution (Union[tuple, list, float]): Resolution of the wanted band, in dataset resolution unit (X, Y) size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided. Returns: XDS_TYPE: Radiometrically coherent band, saved as float 32 and its metadata """ # Get band name: the last number of the filename: # ie: 'LC08_L1TP_200030_20191218_20191226_01_T1_B1' if self.is_archived: filename = files.get_filename(path.split("!")[-1]) else: filename = files.get_filename(path) band_name = filename[-1] if self._quality_id in filename or self._nodata_band_id in filename: band = rasters.read( path, resolution=resolution, size=size, resampling=Resampling.nearest, # NEAREST TO KEEP THE FLAGS masked=False, ).astype(np.uint16) else: # Read band (call superclass generic method) band = rasters.read( path, resolution=resolution, size=size, resampling=Resampling.bilinear ).astype(np.float32) # Open mtd mtd_data = self.read_mtd(force_pd=True) # Get band nb and corresponding coeff c_mul_str = "REFLECTANCE_MULT_BAND_" + band_name c_add_str = "REFLECTANCE_ADD_BAND_" + band_name # Get coeffs to convert DN to reflectance c_mul = mtd_data[c_mul_str].value c_add = mtd_data[c_add_str].value # Manage NULL values try: c_mul = float(c_mul) except ValueError: c_mul = 1 try: c_add = float(c_add) except ValueError: c_add = 0 # Compute the correct radiometry of the band and set no data to 0 band = c_mul * band + c_add # Already in float return band # pylint: disable=R0913 # R0913: Too many arguments (6/5) (too-many-arguments) def _manage_invalid_pixels( self, band_arr: XDS_TYPE, band: obn, resolution: float = None, size: Union[list, tuple] = None, ) -> XDS_TYPE: """ Manage invalid pixels (Nodata, saturated, defective...) Args: band_arr (XDS_TYPE): Band array band (obn): Band name as an OpticalBandNames resolution (float): Band resolution in meters size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided. Returns: XDS_TYPE: Cleaned band array """ # Open QA band landsat_qa_path = self._get_path(self._quality_id) qa_arr = self._read_band( landsat_qa_path, resolution=resolution, size=size ).data # To np array if self._collection == LandsatCollection.COL_1: # https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-1-level-1-quality-assessment-band # Bit ids nodata_id = 0 # Fill value dropped_id = 1 # Dropped pixel or terrain occlusion # Set nodata to every saturated pixel, even if only 1-2 bands are touched by it # -> 01 or 10 or 11 # -> bit 2 or bit 3 sat_id_1 = 2 sat_id_2 = 3 nodata, dropped, sat_1, sat_2 = rasters.read_bit_array( qa_arr, [nodata_id, dropped_id, sat_id_1, sat_id_2] ) mask = nodata | dropped | sat_1 | sat_2 else: # https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-2-quality-assessment-bands # SATURATED & OTHER PIXELS band_nb = int(self.band_names[band]) # Bit ids sat_id = band_nb - 1 # Saturated pixel if self.product_type != LandsatProductType.L1_OLCI: other_id = 11 # Terrain occlusion else: other_id = 9 # Dropped pixels sat, other = rasters.read_bit_array(qa_arr, [sat_id, other_id]) # If collection 2, nodata has to be found in pixel QA file landsat_stat_path = self._get_path(self._nodata_band_id) pixel_arr = self._read_band( landsat_stat_path, resolution=resolution, size=size ).data nodata = np.where(pixel_arr == 1, 1, 0) mask = sat | other | nodata return self._set_nodata_mask(band_arr, mask) def _load_bands( self, band_list: Union[list, BandNames], resolution: float = None, size: Union[list, tuple] = None, ) -> dict: """ Load bands as numpy arrays with the same resolution (and same metadata). Args: band_list (list, BandNames): List of the wanted bands resolution (float): Band resolution in meters size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided. Returns: dict: Dictionary {band_name, band_xarray} """ # Return empty if no band are specified if not band_list: return {} # Get band paths if not isinstance(band_list, list): band_list = [band_list] band_paths = self.get_band_paths(band_list) # Open bands and get array (resampled if needed) band_arrays = self._open_bands(band_paths, resolution=resolution, size=size) return band_arrays def get_mean_sun_angles(self) -> (float, float): """ Get Mean Sun angles (Azimuth and Zenith angles) ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2.SAFE.zip" >>> prod = Reader().open(path) >>> prod.get_mean_sun_angles() (140.80752656, 61.93065805) ``` Returns: (float, float): Mean Azimuth and Zenith angle """ # Retrieve angles mtd_data = self.read_mtd(force_pd=True) azimuth_angle = float(mtd_data.SUN_AZIMUTH.value) zenith_angle = float(mtd_data.SUN_ELEVATION.value) return azimuth_angle, zenith_angle @abstractmethod def _get_condensed_name(self) -> str: """ Get products condensed name ({date}_Lx_{tile}_{product_type}). Returns: str: Condensed Landsat name """ return f"{self.get_datetime()}_{self.platform.name}_{self.tile_name}_{self.product_type.value}" def _has_cloud_band(self, band: BandNames) -> bool: """ Does this products has the specified cloud band ? - (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band] - (COL 2)[https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-2-quality-assessment-bands] True ``` """ if self.product_type == LandsatProductType.L1_OLCI: has_band = True elif self.product_type in [LandsatProductType.L1_ETM, LandsatProductType.L1_TM]: has_band = self._e_tm_has_cloud_band(band) elif self.product_type == LandsatProductType.L1_MSS: has_band = self._mss_has_cloud_band(band) else: raise InvalidProductError(f"Invalid product type: {self.product_type}") return has_band @staticmethod def _mss_has_cloud_band(band: BandNames) -> bool: """ Does this products has the specified cloud band ? ``` """ if band in [RAW_CLOUDS, CLOUDS, ALL_CLOUDS]: has_band = True else: has_band = False return has_band @staticmethod def _e_tm_has_cloud_band(band: BandNames) -> bool: """ Does this products has the specified cloud band ? ``` """ if band in [RAW_CLOUDS, CLOUDS, ALL_CLOUDS, SHADOWS]: has_band = True else: has_band = False return has_band def _load_clouds( self, bands: list, resolution: float = None, size: Union[list, tuple] = None ) -> dict: """ Load cloud files as numpy arrays with the same resolution (and same metadata). Read Landsat clouds from QA mask. See here for clouds_values: - (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band] - (COL 2)[https://www.usgs.gov/core-science-systems/nli/landsat/landsat-collection-2-quality-assessment-bands] Args: bands (list): List of the wanted bands resolution (int): Band resolution in meters size (Union[tuple, list]): Size of the array (width, height). Not used if resolution is provided. Returns: dict: Dictionary {band_name, band_xarray} """ band_dict = {} if bands: # Open QA band landsat_qa_path = self._get_path(self._quality_id) qa_arr = self._read_band(landsat_qa_path, resolution=resolution, size=size) if self.product_type == LandsatProductType.L1_OLCI: band_dict = self._load_olci_clouds(qa_arr, bands) elif self.product_type in [ LandsatProductType.L1_ETM, LandsatProductType.L1_TM, ]: band_dict = self._load_e_tm_clouds(qa_arr, bands) elif self.product_type == LandsatProductType.L1_MSS: band_dict = self._load_mss_clouds(qa_arr, bands) else: raise InvalidProductError(f"Invalid product type: {self.product_type}") return band_dict def _load_mss_clouds(self, qa_arr: XDS_TYPE, band_list: list) -> dict: """ Load cloud files as numpy arrays with the same resolution (and same metadata). Read Landsat-MSS clouds from QA mask. See here for clouds_values: - (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band] - (COL 2)[https://www.usgs.gov/media/files/landsat-1-5-mss-collection-2-level-1-data-format-control-book] Args: qa_arr (XDS_TYPE): Quality array band_list (list): List of the wanted bands Returns: dict, dict: Dictionary {band_name, band_array} """ bands = {} # Get clouds and nodata nodata_id = 0 cloud_id = ( 4 if self._collection == LandsatCollection.COL_1 else 3 ) # Clouds with high confidence clouds = None if ALL_CLOUDS in band_list or CLOUDS in band_list: nodata, cld = rasters.read_bit_array(qa_arr, [nodata_id, cloud_id]) clouds = self._create_mask(qa_arr, cld, nodata) for band in band_list: if band == ALL_CLOUDS: bands[band] = clouds elif band == CLOUDS: bands[band] = clouds elif band == RAW_CLOUDS: bands[band] = qa_arr else: raise InvalidTypeError( f"Non existing cloud band for Landsat-MSS sensor: {band}" ) return bands def _load_e_tm_clouds( self, qa_arr: XDS_TYPE, band_list: Union[list, BandNames] ) -> dict: """ Load cloud files as numpy arrays with the same resolution (and same metadata). Read Landsat-(E)TM clouds from QA mask. See here for clouds_values: - (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band] - (COL 2 TM)[https://www.usgs.gov/media/files/landsat-4-5-tm-collection-2-level-1-data-format-control-book] - (COL 2 ETM)[https://www.usgs.gov/media/files/landsat-7-etm-collection-2-level-1-data-format-control-book] Args: qa_arr (XDS_TYPE): Quality array band_list (list): List of the wanted bands Returns: dict, dict: Dictionary {band_name, band_array} """ bands = {} # Get clouds and nodata nodata = None cld = None shd = None if any(band in [ALL_CLOUDS, CLOUDS, SHADOWS] for band in band_list): if self._collection == LandsatCollection.COL_1: # Bit id nodata_id = 0 cloud_id = 4 # Clouds with high confidence shd_conf_1_id = 7 shd_conf_2_id = 8 nodata, cld, shd_conf_1, shd_conf_2 = rasters.read_bit_array( qa_arr, [nodata_id, cloud_id, shd_conf_1_id, shd_conf_2_id] ) shd = shd_conf_1 & shd_conf_2 else: # Bit ids nodata_id = 0 cloud_id = 3 # Clouds with high confidence shd_id = 4 # Shadows with high confidence nodata, cld, shd = rasters.read_bit_array( qa_arr, [nodata_id, cloud_id, shd_id] ) for band in band_list: if band == ALL_CLOUDS: bands[band] = self._create_mask(qa_arr, cld | shd, nodata) elif band == SHADOWS: bands[band] = self._create_mask(qa_arr, shd, nodata) elif band == CLOUDS: bands[band] = self._create_mask(qa_arr, cld, nodata) elif band == RAW_CLOUDS: bands[band] = qa_arr else: raise InvalidTypeError( f"Non existing cloud band for Landsat-(E)TM sensor: {band}" ) return bands def _load_olci_clouds( self, qa_arr: XDS_TYPE, band_list: Union[list, BandNames] ) -> dict: """ Load cloud files as numpy arrays with the same resolution (and same metadata). Read Landsat-OLCI clouds from QA mask. See here for clouds_values: - (COL 1)[https://www.usgs.gov/land-resources/nli/landsat/landsat-collection-1-level-1-quality-assessment-band] - (COL 2)[https://www.usgs.gov/media/files/landsat-8-level-1-data-format-control-book] Args: qa_arr (XDS_TYPE): Quality array band_list (list): List of the wanted bands Returns: dict, dict: Dictionary {band_name, band_array} """ bands = {} # Get clouds and nodata nodata = None cld = None shd = None cir = None if any(band in [ALL_CLOUDS, CLOUDS, SHADOWS] for band in band_list): if self._collection == LandsatCollection.COL_1: # Bit ids nodata_id = 0 cloud_id = 4 # Clouds with high confidence shd_conf_1_id = 7 shd_conf_2_id = 8 cir_conf_1_id = 11 cir_conf_2_id = 12 # Read binary mask ( nodata, cld, shd_conf_1, shd_conf_2, cir_conf_1, cir_conf_2, ) = rasters.read_bit_array( qa_arr, [ nodata_id, cloud_id, shd_conf_1_id, shd_conf_2_id, cir_conf_1_id, cir_conf_2_id, ], ) shd = shd_conf_1 & shd_conf_2 cir = cir_conf_1 & cir_conf_2 else: # Bit ids nodata_id = 0 cloud_id = 3 # Clouds with high confidence shd_id = 4 # Shadows with high confidence cir_id = 2 # Cirrus with high confidence nodata, cld, shd, cir = rasters.read_bit_array( qa_arr, [nodata_id, cloud_id, shd_id, cir_id] ) for band in band_list: if band == ALL_CLOUDS: bands[band] = self._create_mask(qa_arr, cld | shd | cir, nodata) elif band == SHADOWS: bands[band] = self._create_mask(qa_arr, shd, nodata) elif band == CLOUDS: bands[band] = self._create_mask(qa_arr, cld, nodata) elif band == CIRRUS: bands[band] = self._create_mask(qa_arr, cir, nodata) elif band == RAW_CLOUDS: bands[band] = qa_arr else: raise InvalidTypeError( f"Non existing cloud band for Landsat-OLCI sensor: {band}" ) return bands
Ancestors
Subclasses
Instance variables
var output
-
Inherited from:
OpticalProduct
.output
Output directory of the product, to write orthorectified data for example.
var name
-
Inherited from:
OpticalProduct
.name
Product name (its filename without any extension).
var split_name
-
Inherited from:
OpticalProduct
.split_name
Split name, to retrieve every information from its filename (dates, tile, product type…).
var archive_path
-
Inherited from:
OpticalProduct
.archive_path
Archive path, same as the product path if not specified. Useful when you want to know where both the extracted and archived version of your product …
var path
-
Inherited from:
OpticalProduct
.path
Usable path to the product, either extracted or archived path, according to the satellite.
var is_archived
-
Inherited from:
OpticalProduct
.is_archived
Is the archived product is processed (a products is considered as archived if its products path is a directory).
var needs_extraction
-
Inherited from:
OpticalProduct
.needs_extraction
Does this products needs to be extracted to be processed ? (
True
by default). var date
-
Inherited from:
OpticalProduct
.date
Acquisition date.
var datetime
-
Inherited from:
OpticalProduct
.datetime
Acquisition datetime.
var tile_name
-
Inherited from:
OpticalProduct
.tile_name
Tile if possible (for data that can be piled, for example S2 and Landsats).
var sensor_type
-
Inherited from:
OpticalProduct
.sensor_type
Sensor type, SAR or optical.
var product_type
-
Inherited from:
OpticalProduct
.product_type
Product type, satellite-related field, such as L1C or L2A for Sentinel-2 data.
var band_names
-
Inherited from:
OpticalProduct
.band_names
Band mapping between band wrapping names such as
GREEN
and band real number such as03
for Sentinel-2. var is_reference
-
Inherited from:
OpticalProduct
.is_reference
If the product is a reference, used for algorithms that need pre and post data, such as fire detection.
var corresponding_ref
-
Inherited from:
OpticalProduct
.corresponding_ref
The corresponding reference products to the current one (if the product is not a reference but has a reference data corresponding to it). A list …
var nodata
-
Inherited from:
OpticalProduct
.nodata
Product nodata, set to 0 by default. Please do not touch this or all index will fail.
var platform
-
Inherited from:
OpticalProduct
.platform
Product platform, such as Sentinel-2
var resolution
-
Inherited from:
OpticalProduct
.resolution
Default resolution in meters of the current product. For SAR product, we use Ground Range resolution as we will automatically orthorectify the tiles.
var condensed_name
-
Inherited from:
OpticalProduct
.condensed_name
Condensed name, the filename with only useful data to keep the name unique (ie.
20191215T110441_S2_30TXP_L2A_122756
). Used to shorten names and paths. var sat_id
-
Inherited from:
OpticalProduct
.sat_id
Satellite ID, i.e.
S2
for Sentinel-2
Methods
def footprint(
self)
-
Get real footprint of the products (without nodata, in french == emprise utile)
>>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> prod.footprint() index geometry 0 0 POLYGON ((366165.000 4899735.000, 366165.000 4...
Overload of the generic function because landsat nodata seems to be different in QA than in regular bands. Indeed, nodata pixels vary according to the band sensor footprint, whereas QA nodata is where at least one band has nodata.
We chose to keep QA nodata values for the footprint in order to show where all bands are valid.
TL;DR: We use the QA nodata value to determine the product's footprint.
Returns
gpd.GeoDataFrame
- Footprint as a GeoDataFrame
Expand source code
def footprint(self) -> gpd.GeoDataFrame: """ Get real footprint of the products (without nodata, in french == emprise utile) ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> prod.footprint() index geometry 0 0 POLYGON ((366165.000 4899735.000, 366165.000 4... ``` Overload of the generic function because landsat nodata seems to be different in QA than in regular bands. Indeed, nodata pixels vary according to the band sensor footprint, whereas QA nodata is where at least one band has nodata. We chose to keep QA nodata values for the footprint in order to show where all bands are valid. **TL;DR: We use the QA nodata value to determine the product's footprint**. Returns: gpd.GeoDataFrame: Footprint as a GeoDataFrame """ nodata_band = self._get_path(self._nodata_band_id) # Vectorize the nodata band nodata = rasters.vectorize(nodata_band, values=1) # Clip the extent with the nodata footprint = gpd.overlay(self.extent(), nodata, how="symmetric_difference") return footprint
def get_datetime(
self,
as_datetime=False)-
Get the product's acquisition datetime, with format
YYYYMMDDTHHMMSS
<->%Y%m%dT%H%M%S
>>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> prod.get_datetime(as_datetime=True) datetime.datetime(2020, 5, 18, 16, 34, 7) >>> prod.get_datetime(as_datetime=False) '20200518T163407'
Args
as_datetime
:bool
- Return the date as a datetime.datetime. If false, returns a string.
Returns
Union[str, datetime.datetime]
- Its acquisition datetime
Expand source code
def get_datetime(self, as_datetime: bool = False) -> Union[str, datetime]: """ Get the product's acquisition datetime, with format `YYYYMMDDTHHMMSS` <-> `%Y%m%dT%H%M%S` ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> prod.get_datetime(as_datetime=True) datetime.datetime(2020, 5, 18, 16, 34, 7) >>> prod.get_datetime(as_datetime=False) '20200518T163407' ``` Args: as_datetime (bool): Return the date as a datetime.datetime. If false, returns a string. Returns: Union[str, datetime.datetime]: Its acquisition datetime """ try: mtd = self.read_mtd(force_pd=True) date = mtd["DATE_ACQUIRED"].value # 1982-09-06 # "16:47:09.5990000Z": needs max 6 digits for ms hours = mtd["SCENE_CENTER_TIME"].value.replace('"', "")[:-3] date = ( f"{datetime.strptime(date, '%Y-%m-%d').strftime('%Y%m%d')}" f"T{datetime.strptime(hours, '%H:%M:%S.%f').strftime('%H%M%S')}" ) except (FileNotFoundError, KeyError): date = datetime.strptime(self.split_name[3], "%Y%m%d").strftime( DATETIME_FMT ) if as_datetime: date = datetime.strptime(date, DATETIME_FMT) return date
def get_band_paths(
self,
band_list,
resolution=None)-
Return the paths of required bands.
>>> from eoreader.reader import Reader >>> from eoreader.bands.alias import * >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip" >>> prod = Reader().open(path) >>> prod.get_band_paths([GREEN, RED]) { <OpticalBandNames.GREEN: 'GREEN'>: 'LC08_L1GT_023030_20200518_20200527_01_T2\LC08_L1GT_023030_20200518_20200527_01_T2_B3.TIF', <OpticalBandNames.RED: 'RED'>: 'LC08_L1GT_023030_20200518_20200527_01_T2\LC08_L1GT_023030_20200518_20200527_01_T2_B4.TIF' }
Args
band_list
:list
- List of the wanted bands
resolution
:float
- Useless here
Returns
dict
- Dictionary containing the path of each queried band
Expand source code
def get_band_paths(self, band_list: list, resolution: float = None) -> dict: """ Return the paths of required bands. ```python >>> from eoreader.reader import Reader >>> from eoreader.bands.alias import * >>> path = r"S2A_MSIL1C_20200824T110631_N0209_R137_T30TTK_20200824T150432.SAFE.zip" >>> prod = Reader().open(path) >>> prod.get_band_paths([GREEN, RED]) { <OpticalBandNames.GREEN: 'GREEN'>: 'LC08_L1GT_023030_20200518_20200527_01_T2\\LC08_L1GT_023030_20200518_20200527_01_T2_B3.TIF', <OpticalBandNames.RED: 'RED'>: 'LC08_L1GT_023030_20200518_20200527_01_T2\\LC08_L1GT_023030_20200518_20200527_01_T2_B4.TIF' } ``` Args: band_list (list): List of the wanted bands resolution (float): Useless here Returns: dict: Dictionary containing the path of each queried band """ band_paths = {} for band in band_list: if not self.has_band(band): raise InvalidProductError( f"Non existing band ({band.name}) " f"for Landsat-{self.product_type.name} products" ) band_nb = self.band_names[band] try: band_paths[band] = self._get_path(f"_B{band_nb}") except FileNotFoundError as ex: raise InvalidProductError( f"Non existing {band} ({band_nb}) band for {self.path}" ) from ex return band_paths
def read_mtd(
self,
force_pd=False)-
Read Landsat metadata as:
- a
pandas.DataFrame
whatever its collection is (by default for collection 1) - a XML root + its namespace if the product is retrieved from the 2nd collection (by default for collection 2)
>>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> # COLLECTION 1 : Open metadata as panda DataFrame >>> prod.read_mtd() NAME ORIGIN ... RESAMPLING_OPTION value "Image courtesy of the U.S. Geological Survey" ... "CUBIC_CONVOLUTION" [1 rows x 197 columns] >>> # COLLECTION 2 : Open metadata as XML >>> path = r"LC08_L1TP_200030_20201220_20210310_02_T1" # Collection 2 >>> prod = Reader().open(path) >>> prod.read_mtd() (<Element LANDSAT_METADATA_FILE at 0x19229016048>, '') >>> # COLLECTION 2 : Force to pandas.DataFrame >>> prod.read_mtd(force_pd=True) NAME ORIGIN ... RESAMPLING_OPTION value "Image courtesy of the U.S. Geological Survey" ... "CUBIC_CONVOLUTION" [1 rows x 263 columns]
Args
force_pd
:bool
- If collection 2, return a pandas.DataFrame instead of a XML root + namespace
Returns
pd.DataFrame
- Metadata as a Pandas DataFrame
Expand source code
def read_mtd( self, force_pd=False ) -> Union[pd.DataFrame, Tuple[etree._Element, str]]: """ Read Landsat metadata as: - a `pandas.DataFrame` whatever its collection is (by default for collection 1) - a XML root + its namespace if the product is retrieved from the 2nd collection (by default for collection 2) ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2" >>> prod = Reader().open(path) >>> # COLLECTION 1 : Open metadata as panda DataFrame >>> prod.read_mtd() NAME ORIGIN ... RESAMPLING_OPTION value "Image courtesy of the U.S. Geological Survey" ... "CUBIC_CONVOLUTION" [1 rows x 197 columns] >>> # COLLECTION 2 : Open metadata as XML >>> path = r"LC08_L1TP_200030_20201220_20210310_02_T1" # Collection 2 >>> prod = Reader().open(path) >>> prod.read_mtd() (<Element LANDSAT_METADATA_FILE at 0x19229016048>, '') >>> # COLLECTION 2 : Force to pandas.DataFrame >>> prod.read_mtd(force_pd=True) NAME ORIGIN ... RESAMPLING_OPTION value "Image courtesy of the U.S. Geological Survey" ... "CUBIC_CONVOLUTION" [1 rows x 263 columns] ``` Args: force_pd (bool): If collection 2, return a pandas.DataFrame instead of a XML root + namespace Returns: pd.DataFrame: Metadata as a Pandas DataFrame """ # WARNING: always use force_pd in this class ! as_pd = (self._collection == LandsatCollection.COL_1) or force_pd if as_pd: mtd_name = f"{self.name}_MTL.txt" if self.is_archived: # We need to extract the file in memry to be used with pandas tar_ds = tarfile.open(self.path, "r") info = [f.name for f in tar_ds.getmembers() if mtd_name in f.name][0] mtd_path = tar_ds.extractfile(info) else: # FOR COLLECTION 1 AND 2 tar_ds = None mtd_path = os.path.join(self.path, mtd_name) if not os.path.isfile(mtd_path): raise FileNotFoundError( f"Unable to find the metadata file associated with {self.path}" ) # Parse mtd_data = pd.read_table( mtd_path, sep="\s=\s", names=["NAME", "value"], skipinitialspace=True, engine="python", ) # Workaround an unexpected behaviour in pandas ! if any(mtd_data.NAME == "="): mtd_data = pd.read_table( mtd_path, sep="=", names=["NAME", "=", "value"], usecols=[0, 2], skipinitialspace=True, ) # Remove useless rows mtd_data = mtd_data[~mtd_data["NAME"].isin(["GROUP", "END_GROUP", "END"])] # Set index mtd_data = mtd_data.set_index("NAME").T # Close if needed if tar_ds: tar_ds.close() else: if self.is_archived: root = files.read_archived_xml(self.path, f".*{self.name}_MTL.xml") else: # ONLY FOR COLLECTION 2 try: mtd_file = glob.glob( os.path.join(self.path, f"{self.name}_MTL.xml") )[0] # pylint: disable=I1101: # Module 'lxml.etree' has no 'parse' member, but source is unavailable. xml_tree = etree.parse(mtd_file) root = xml_tree.getroot() except IndexError as ex: raise InvalidProductError( f"Metadata file ({self.name}.xml) not found in {self.path}" ) from ex # Get namespace namespace = "" # No namespace here mtd_data = (root, namespace) return mtd_data
- a
def get_mean_sun_angles(
self)
-
Get Mean Sun angles (Azimuth and Zenith angles)
>>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2.SAFE.zip" >>> prod = Reader().open(path) >>> prod.get_mean_sun_angles() (140.80752656, 61.93065805)
Returns
(float, float): Mean Azimuth and Zenith angle
Expand source code
def get_mean_sun_angles(self) -> (float, float): """ Get Mean Sun angles (Azimuth and Zenith angles) ```python >>> from eoreader.reader import Reader >>> path = r"LC08_L1GT_023030_20200518_20200527_01_T2.SAFE.zip" >>> prod = Reader().open(path) >>> prod.get_mean_sun_angles() (140.80752656, 61.93065805) ``` Returns: (float, float): Mean Azimuth and Zenith angle """ # Retrieve angles mtd_data = self.read_mtd(force_pd=True) azimuth_angle = float(mtd_data.SUN_AZIMUTH.value) zenith_angle = float(mtd_data.SUN_ELEVATION.value) return azimuth_angle, zenith_angle
def get_default_band(
self)
-
Inherited from:
OpticalProduct
.get_default_band
Get default band:
GREEN
for optical data as every optical satellite has a GREEN band … def get_default_band_path(
self)
-
Inherited from:
OpticalProduct
.get_default_band_path
Get default band (
GREEN
for optical data) path … def crs(
self)
-
Inherited from:
OpticalProduct
.crs
Get UTM projection of the tile …
def extent(
self)
-
Inherited from:
OpticalProduct
.extent
Get UTM extent of the tile …
def get_existing_bands(
self)
-
Inherited from:
OpticalProduct
.get_existing_bands
Return the existing band paths …
def get_existing_band_paths(
self)
-
Inherited from:
OpticalProduct
.get_existing_band_paths
Return the existing band paths …
def get_date(
self,
as_date=False)-
Inherited from:
OpticalProduct
.get_date
Get the product's acquisition date …
def load(
self,
bands,
resolution=None,
size=None)-
Inherited from:
OpticalProduct
.load
Open the bands and compute the wanted index …
def has_band(
self,
band)-
Inherited from:
OpticalProduct
.has_band
Does this products has the specified band ? …
def stack(
self,
bands,
resolution=None,
stack_path=None,
save_as_int=False)-
Inherited from:
OpticalProduct
.stack
Stack bands and index of a products …