Source code for disdrodb.l0.check_metadata

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import yaml
from typing import Union
from disdrodb.l0.io import (
    get_disdrodb_dir,
    get_data_source,
    get_campaign_name,
)
from disdrodb.api.metadata import read_station_metadata
from disdrodb.l0.l0_reader import _check_metadata_reader
from disdrodb.l0.metadata import (
    _check_metadata_keys,
    _check_metadata_data_source,
    _check_metadata_campaign_name,
    _check_metadata_station_name,
    _check_metadata_sensor_name,
    check_metadata_compliance,
)

from disdrodb.api.metadata import get_list_metadata

#### --------------------------------------------------------------------------.


[docs]def read_yaml(fpath: str) -> dict: """Read YAML file. Parameters ---------- fpath : str Input YAML file path. Returns ------- dict Attributes read from the YAML file. """ with open(fpath, "r") as f: attrs = yaml.safe_load(f) return attrs
#### --------------------------------------------------------------------------. #### Metadata Archive Missing Information
[docs]def check_metadata_geolocation(metadata) -> None: """Identify metadata with missing or wrong geolocation.""" # Get longitude, latitude and platform type longitude = metadata.get("longitude") latitude = metadata.get("latitude") platform_type = metadata.get("platform_type") # Check type validity if isinstance(longitude, str): raise TypeError("longitude is not defined as numeric.") if isinstance(latitude, str): raise TypeError("latitude is not defined as numeric.") # Check is not none if isinstance(longitude, type(None)) or isinstance(latitude, type(None)): raise ValueError("Unspecified longitude and latitude coordinates.") else: # Check value validity # - If mobile platform if platform_type == "mobile": if longitude != -9999 or latitude != -9999: raise ValueError("For mobile platform_type, specify latitude and longitude -9999") # - If fixed platform else: if longitude == -9999 or latitude == -9999: raise ValueError("Missing lat lon coordinates (-9999).") elif longitude > 180 or longitude < -180: raise ValueError("Unvalid longitude (outside [-180, 180])") elif latitude > 90 or latitude < -90: raise ValueError("Unvalid latitude (outside [-90, 90])") else: pass return None
[docs]def identify_missing_metadata_coords(metadata_fpaths: str) -> None: """Identify missing coordinates. Parameters ---------- metadata_fpaths : str Input YAML file path. Raises ------ TypeError Error if latitude or longitude coordinates are not present or are wrongly formatted. """ for fpath in metadata_fpaths: metadata = read_yaml(fpath) check_metadata_geolocation(metadata) return None
[docs]def identify_empty_metadata_keys(metadata_fpaths: list, keys: Union[str, list]) -> None: """Identify empty metadata keys. Parameters ---------- metadata_fpaths : str Input YAML file path. keys : Union[str,list] Attributes to verify the presence. """ if isinstance(keys, str): keys = [keys] for fpath in metadata_fpaths: for key in keys: metadata = read_yaml(fpath) if len(str(metadata.get(key, ""))) == 0: # ensure is string to avoid error print(f"Empty {key} at: ", fpath) return None
[docs]def get_archive_metadata_key_value(disdrodb_dir: str, key: str, return_tuple: bool = True): """Return the values of a metadata key for all the archive. Parameters ---------- disdrodb_dir : str Path to the disdrodb directory. key : str Metadata key. return_tuple : bool, optional if True, returns a tuple of values with station, campaign and data source name (default is True) if False, returns a list of values without station, campaign and data source name Returns ------- list or tuple List or tuple of values of the metadata key. """ list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) list_info = [] for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) value = metadata[key] info = (data_source, campaign_name, station_name, value) list_info.append(info) if not return_tuple: list_info = [info[3] for info in list_info] return list_info
#### --------------------------------------------------------------------------. #### Metadata Archive Checks
[docs]def check_archive_metadata_keys(disdrodb_dir: str) -> bool: """Check that all metadata files have valid keys Parameters ---------- disdrodb_dir : str Path to the disdrodb directory. Returns ------- bool If the check succeeds, the result is True, and if it fails, the result is False. """ is_valid = True list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_keys(metadata) except Exception as e: print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.") is_valid = False return is_valid
[docs]def check_archive_metadata_campaign_name(disdrodb_dir) -> bool: """Check metadata campaign_name. Parameters ---------- disdrodb_dir : str Path to the disdrodb directory. Returns ------- bool If the check succeeds, the result is True, and if it fails, the result is False. """ is_valid = True list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_campaign_name(metadata, expected_name=campaign_name) except Exception as e: is_valid = False print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.") return is_valid
[docs]def check_archive_metadata_data_source(disdrodb_dir) -> bool: """Check metadata data_source. Parameters ---------- disdrodb_dir : str Path to the disdrodb directory. Returns ------- bool If the check succeeds, the result is True, and if it fails, the result is False. """ is_valid = True list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_data_source(metadata, expected_name=data_source) except Exception as e: is_valid = False print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.") return is_valid
[docs]def check_archive_metadata_sensor_name(disdrodb_dir) -> bool: """Check metadata sensor name. Parameters ---------- disdrodb_dir : str Path to the disdrodb directory. Returns ------- bool If the check succeeds, the result is True, and if it fails, the result is False. """ is_valid = True list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_sensor_name(metadata) except Exception as e: is_valid = False print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.") return is_valid
[docs]def check_archive_metadata_station_name(disdrodb_dir) -> bool: """Check metadata station name. Parameters ---------- disdrodb_dir : str Path to the disdrodb directory. Returns ------- bool If the check succeeds, the result is True, and if it fails, the result is False. """ is_valid = True list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_station_name(metadata, expected_name=station_name) except Exception as e: is_valid = False print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.") return is_valid
[docs]def check_archive_metadata_reader(disdrodb_dir: str) -> bool: """Check if the reader key is available and there is the associated reader. Parameters ---------- disdrodb_dir : str Path to the disdrodb directory. Returns ------- bool If the check succeeds, the result is True, and if it fails, the result is False. """ is_valid = True list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: _check_metadata_reader(metadata) except Exception as e: is_valid = False print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.") return is_valid
[docs]def check_archive_metadata_compliance(disdrodb_dir): is_valid = True list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") try: check_metadata_compliance( disdrodb_dir=disdrodb_dir, data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) except Exception as e: is_valid = False print(f"Error for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.") return is_valid
[docs]def check_archive_metadata_geolocation(disdrodb_dir): """Check the metadata files have missing or wrong geolocation.. Parameters ---------- disdrodb_dir : str Path to the disdrodb directory. Returns ------- bool If the check succeeds, the result is True, and if it fails, the result is False. """ is_valid = True list_metadata_paths = get_list_metadata( disdrodb_dir, data_sources=None, campaign_names=None, station_names=None, with_stations_data=False ) for fpath in list_metadata_paths: disdrodb_dir = get_disdrodb_dir(fpath) data_source = get_data_source(fpath) campaign_name = get_campaign_name(fpath) station_name = os.path.basename(fpath).replace(".yml", "") metadata = read_station_metadata( disdrodb_dir=disdrodb_dir, product_level="RAW", data_source=data_source, campaign_name=campaign_name, station_name=station_name, ) try: check_metadata_geolocation(metadata) except Exception as e: is_valid = False print(f"Missing information for {data_source} {campaign_name} {station_name}.") print(f"The error is: {e}.") return is_valid