#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#*******************************************************************************
# @Author: Anne Fouilloux (University of Oslo)
#
# @Date: October 2014
#
# @Change History:
#
# November 2015 - Leopold Haimberger (University of Vienna):
# - optimized display_info
# - optimized data_retrieve and seperate between python and shell
# script call
#
# February 2018 - Anne Philipp (University of Vienna):
# - applied PEP8 style guide
# - added documentation
# - applied some minor modifications in programming style/structure
# - added writing of mars request attributes to a csv file
#
# @License:
# (C) Copyright 2014-2020.
# Anne Philipp, Leopold Haimberger
#
# SPDX-License-Identifier: CC-BY-4.0
#
# This work is licensed under the Creative Commons Attribution 4.0
# International License. To view a copy of this license, visit
# http://creativecommons.org/licenses/by/4.0/ or send a letter to
# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
#*******************************************************************************
# ------------------------------------------------------------------------------
# MODULES
# ------------------------------------------------------------------------------
from __future__ import print_function
import os
import sys
import subprocess
import traceback
# software specific classes and modules from flex_extract
#pylint: disable=wrong-import-position
sys.path.append('../')
import _config
#pylint: disable=invalid-name
try:
ec_api = True
import ecmwfapi
except ImportError:
ec_api = False
try:
cds_api = True
import cdsapi
except ImportError:
cds_api = False
#pylint: enable=invalid-name
#pylint: enable=wrong-import-position
# ------------------------------------------------------------------------------
# CLASS
# ------------------------------------------------------------------------------
[docs]class MarsRetrieval(object):
'''Specific syntax and content for submission of MARS retrievals.
A MARS revtrieval has a specific syntax with a selection of keywords and
their corresponding values. This class provides the necessary functions
by displaying the selected parameters and their values and the actual
retrievement of the data through a mars request or a Python web api
interface. The initialization already expects all the keyword values.
A description of MARS keywords/arguments and examples of their
values can be found here:
https://software.ecmwf.int/wiki/display/UDOC/\
Identification+keywords#Identificationkeywords-class
Attributes
----------
server : ECMWFService or ECMWFDataServer
This is the connection to the ECMWF data servers.
public : int
Decides which Web API Server version is used.
marsclass : str, optional
Characterisation of dataset.
dataset : str, optional
For public datasets there is the specific naming and parameter
dataset which has to be used to characterize the type of
data.
type : str, optional
Determines the type of fields to be retrieved.
levtype : str, optional
Denotes type of level.
levelist : str, optional
Specifies the required levels.
repres : str, optional
Selects the representation of the archived data.
date : str, optional
Specifies the Analysis date, the Forecast base date or
Observations date.
resol : str, optional
Specifies the desired triangular truncation of retrieved data,
before carrying out any other selected post-processing.
stream : str, optional
Identifies the forecasting system used to generate the data.
area : str, optional
Specifies the desired sub-area of data to be extracted.
time : str, optional
Specifies the time of the data in hours and minutes.
step : str, optional
Specifies the forecast time step from forecast base time.
expver : str, optional
The version of the dataset.
number : str, optional
Selects the member in ensemble forecast run.
accuracy : str, optional
Specifies the number of bits per value to be used in the
generated GRIB coded fields.
grid : str, optional
Specifies the output grid which can be either a Gaussian grid
or a Latitude/Longitude grid.
gaussian : str, optional
This parameter is deprecated and should no longer be used.
Specifies the desired type of Gaussian grid for the output.
target : str, optional
Specifies a file into which data is to be written after
retrieval or manipulation.
param : str, optional
Specifies the meteorological parameter.
'''
def __init__(self, server, public, marsclass="EA", dataset="", type="",
levtype="", levelist="", repres="", date="", resol="",
stream="", area="", time="", step="", expver="1",
number="", accuracy="", grid="", gaussian="", target="",
param=""):
'''Initialises the instance of the MarsRetrieval class and
defines and assigns a set of the necessary retrieval parameters
for the FLEXPART input data.
A description of MARS keywords/arguments, their dependencies
on each other and examples of their values can be found here:
https://software.ecmwf.int/wiki/display/UDOC/MARS+keywords
Parameters
----------
server : ECMWFService or ECMWFDataServer
This is the connection to the ECMWF data servers.
It is needed for the pythonic access of ECMWF data.
public : int
Decides which Web API version is used:
0: member-state users and full archive access
1: public access and limited access to the public server and
datasets. Needs the parameter dataset.
Default is "0" and for member-state users.
marsclass : str, optional
Characterisation of dataset. E.g. EI (ERA-Interim),
E4 (ERA40), OD (Operational archive), EA (ERA5).
Default is the ERA5 dataset "EA".
dataset : str, optional
For public datasets there is the specific naming and parameter
dataset which has to be used to characterize the type of
data. Usually there is less data available, either in times,
domain or parameter.
Default is an empty string.
type : str, optional
Determines the type of fields to be retrieved.
Selects between observations, images or fields.
Examples for fields: Analysis (an), Forecast (fc),
Perturbed Forecast (pf), Control Forecast (cf) and so on.
Default is an empty string.
levtype : str, optional
Denotes type of level. Has a direct implication on valid
levelist values!
E.g. model level (ml), pressure level (pl), surface (sfc),
potential vorticity (pv), potential temperature (pt)
and depth (dp).
Default is an empty string.
levelist : str, optional
Specifies the required levels. It has to have a valid
correspondence to the selected levtype.
Examples: model level: 1/to/137, pressure levels: 500/to/1000
Default is an empty string.
repres : str, optional
Selects the representation of the archived data.
E.g. sh - spherical harmonics, gg - Gaussian grid,
ll - latitude/longitude, ...
Default is an empty string.
date : str, optional
Specifies the Analysis date, the Forecast base date or
Observations date. Valid formats are:
Absolute as YYYY-MM-DD or YYYYMMDD.
Default is an empty string.
resol : str, optional
Specifies the desired triangular truncation of retrieved data,
before carrying out any other selected post-processing.
The default is automatic truncation (auto), by which the lowest
resolution compatible with the value specified in grid is
automatically selected for the retrieval.
Users wanting to perform post-processing from full spectral
resolution should specify Archived Value (av).
The following are examples of existing resolutions found in
the archive: 63, 106, 159, 213, 255, 319, 399, 511, 799 or 1279.
This keyword has no meaning/effect if the archived data is
not in spherical harmonics representation.
The best selection can be found here:
https://software.ecmwf.int/wiki/display/UDOC/\
Retrieve#Retrieve-Truncationbeforeinterpolation
Default is an empty string.
stream : str, optional
Identifies the forecasting system used to generate the data.
E.g. oper (Atmospheric model), enfo (Ensemble forecats), ...
Default is an empty string.
area : str, optional
Specifies the desired sub-area of data to be extracted.
Areas can be defined to wrap around the globe.
Latitude values must be given as signed numbers, with:
north latitudes (i.e. north of the equator)
being positive (e.g: 40.5)
south latitutes (i.e. south of the equator)
being negative (e.g: -50.5)
Longtitude values must be given as signed numbers, with:
east longitudes (i.e. east of the 0 degree meridian)
being positive (e.g: 35.0)
west longitudes (i.e. west of the 0 degree meridian)
being negative (e.g: -20.5)
E.g.: North/West/South/East
Default is an empty string.
time : str, optional
Specifies the time of the data in hours and minutes.
Valid values depend on the type of data: Analysis time,
Forecast base time or First guess verification time
(all usually at synoptic hours: 00, 06, 12 and 18 ).
Observation time (any combination in hours and minutes is valid,
subject to data availability in the archive).
The syntax is HHMM or HH:MM. If MM is omitted it defaults to 00.
Default is an empty string.
step : str, optional
Specifies the forecast time step from forecast base time.
Valid values are hours (HH) from forecast base time. It also
specifies the length of the forecast which verifies at
First Guess time.
E.g. 1/3/6-hourly
Default is an empty string.
expver : str, optional
The version of the dataset. Each experiment is assigned a
unique code (version). Production data is assigned 1 or 2,
and experimental data in Operations 11, 12 ,...
Research or Member State's experiments have a four letter
experiment identifier.
Default is "1".
number : str, optional
Selects the member in ensemble forecast run. (Only then it
is necessary.) It has a different meaning depending on
the type of data.
E.g. Perturbed Forecasts: specifies the Ensemble forecast member
Default is an empty string.
accuracy : str, optional
Specifies the number of bits per value to be used in the
generated GRIB coded fields.
A positive integer may be given to specify the preferred number
of bits per packed value. This must not be greater than the
number of bits normally used for a Fortran integer on the
processor handling the request (typically 32 or 64 bit).
Within a compute request the accuracy of the original fields
can be passed to the result field by specifying accuracy=av.
Default is an empty string.
grid : str, optional
Specifies the output grid which can be either a Gaussian grid
or a Latitude/Longitude grid. MARS requests specifying
grid=av will return the archived model grid.
Lat/Lon grid: The grid spacing needs to be an integer
fraction of 90 degrees e.g. grid = 0.5/0.5
Gaussian grid: specified by a letter denoting the type of
Gaussian grid followed by an integer (the grid number)
representing the number of lines between the Pole and Equator,
e.g.
grid = F160 - full (or regular) Gaussian grid with
160 latitude lines between the pole and equator
grid = N320 - ECMWF original reduced Gaussian grid with
320 latitude lines between the pole and equator,
see Reduced Gaussian Grids for grid numbers used at ECMWF
grid = O640 - ECMWF octahedral (reduced) Gaussian grid with
640 latitude lines between the pole and equator
Default is an empty string.
gaussian : str, optional
This parameter is deprecated and should no longer be used.
Specifies the desired type of Gaussian grid for the output.
Valid Gaussian grids are quasi-regular (reduced) or regular.
Keyword gaussian can only be specified together with
keyword grid. Gaussian without grid has no effect.
Default is an empty string.
target : str, optional
Specifies a file into which data is to be written after
retrieval or manipulation. Path names should always be
enclosed in double quotes. The MARS client supports automatic
generation of multiple target files using MARS keywords
enclosed in square brackets [ ]. If the environment variable
MARS_MULTITARGET_STRICT_FORMAT is set to 1 before calling mars,
the keyword values will be used in the filename as shown by
the ecCodes GRIB tool grib_ls -m, e.g. with
MARS_MULTITARGET_STRICT_FORMAT set to 1 the keywords time,
expver and param will be formatted as 0600, 0001 and 129.128
rather than 600, 1 and 129.
Default is an empty string.
param : str, optional
Specifies the meteorological parameter.
The list of meteorological parameters in MARS is extensive.
Their availability is directly related to their meteorological
meaning and, therefore, the rest of directives specified
in the MARS request.
Meteorological parameters can be specified by their
GRIB code (param=130), their mnemonic (param=t) or
full name (param=temperature).
The list of parameter should be seperated by a "/"-sign.
E.g. 130/131/133
Default is an empty string.
Return
------
'''
self.server = server
self.public = public
self.marsclass = marsclass
self.dataset = dataset
self.type = type
self.levtype = levtype
self.levelist = levelist
self.repres = repres
self.date = date
self.resol = resol
self.stream = stream
self.area = area
self.time = time
self.step = step
self.expver = expver
self.number = number
self.accuracy = accuracy
self.grid = grid
self.gaussian = gaussian
self.target = target
self.param = param
return
[docs] def display_info(self):
'''Prints all class attributes and their values to the
standard output.
Parameters
----------
Return
------
'''
# Get all class attributes and their values as a dictionary
attrs = vars(self).copy()
# iterate through all attributes and print them
# with their corresponding values
for item in attrs.items():
if item[0] in ['server', 'public']:
pass
else:
print(item[0] + ': ' + str(item[1]))
return
[docs] def print_infodata_csv(self, inputdir, request_number):
'''Write all request parameter in alpabetical order into a "csv" file.
Parameters
----------
inputdir : str
The path where all data from the retrievals are stored.
request_number : int
Number of mars requests for flux and non-flux data.
Return
------
'''
# Get all class attributes and their values as a dictionary
attrs = vars(self).copy()
del attrs['server']
del attrs['public']
# open a file to store all requests to
with open(os.path.join(inputdir,
_config.FILE_MARS_REQUESTS), 'a') as f:
f.write(str(request_number) + ', ')
f.write(', '.join(str(attrs[key])
for key in sorted(attrs.keys())))
f.write('\n')
return
def _convert_to_cdsera5_sfc_request(self, attrs):
'''
The keywords and values for the single level download
with CDS API is different from MARS. This function
converts the old request keywords to the new ones.
Example request for single level downloads in CDS API
retrieve(
'reanalysis-era5-single-levels',
{
'product_type': 'reanalysis',
'variable': 'total_precipitation',
'year': '2019',
'month': '01',
'day': '01',
'time': '00:00',
'format': 'grib',
'grid':[1.0, 1.0],
'area': [
45, 0, 43,
12,
],
},
'download.grib')
Parameters
----------
attrs : dict
Dictionary of the mars request parameters.
Return
------
'''
from datetime import datetime, timedelta
newattrs = {}
if '/' in attrs['date']:
year = set()
month = set()
day = set()
start,end = attrs['date'].split('/')[::2]
sdate = datetime.strptime(start, '%Y%m%d')
edate = datetime.strptime(end, '%Y%m%d')
date = sdate
while date <= edate:
year.add(date.year)
month.add(date.month)
day.add(date.day)
date = date + timedelta(days=1)
newattrs['year'] =list(year)
newattrs['month'] = list(month)
newattrs['day'] = list(day)
else:
date = datetime.strptime(attrs['date'], '%Y%m%d')
newattrs['year'] = date.year
newattrs['month'] = date.month
newattrs['day'] = date.day
newattrs['product_type'] = 'reanalysis'
newattrs['area'] = attrs['area'].split('/')
newattrs['grid'] = list(map(float,attrs['grid'].split('/')))
newattrs['param'] = attrs['param'].split('/')
newattrs['time'] = list(map(str,range(0,24,3)))
newattrs['format'] = 'grib'
return newattrs
[docs] def data_retrieve(self):
'''Submits a MARS retrieval. Depending on the existence of
ECMWF Web-API or CDS API it is submitted via Python or a
subprocess in the Shell. The parameter for the mars retrieval
are taken from the defined class attributes.
Parameters
----------
Return
------
'''
# Get all class attributes and their values as a dictionary
attrs = vars(self).copy()
# eliminate unnecessary attributes from the dictionary attrs
del attrs['server']
del attrs['public']
# exchange parameter name for marsclass
mclass = attrs.get('marsclass')
del attrs['marsclass']
attrs['class'] = mclass
# prepare target variable as needed for the Web API or CDS API mode
# within the dictionary for full access
# as a single variable for public access
target = attrs.get('target')
if not int(self.public):
del attrs['target']
print('target: ' + target)
# find all keys without a value and convert all other values to strings
empty_keys = []
for key, value in attrs.items():
if value == '':
empty_keys.append(str(key))
else:
attrs[key] = str(value)
# delete all empty parameter from the dictionary
for key in empty_keys:
del attrs[key]
# attrs['ppengine'] = 'emos'
# MARS request via Python script
if self.server:
try:
if cds_api and isinstance(self.server, cdsapi.Client):
# distinguish between model (ECMWF MARS access)
# and surface level (CS3 online access)
if attrs['levtype'].lower() == 'ml':
dataset = _config.CDS_DATASET_ML
else:
dataset = _config.CDS_DATASET_SFC
attrs = self._convert_to_cdsera5_sfc_request(attrs)
print('RETRIEVE ERA5 WITH CDS API!')
self.server.retrieve(dataset,
attrs, target)
elif ec_api and isinstance(self.server, ecmwfapi.ECMWFDataServer):
print('RETRIEVE PUBLIC DATA (NOT ERA5)!')
self.server.retrieve(attrs)
elif ec_api and isinstance(self.server, ecmwfapi.ECMWFService):
print('EXECUTE NON-PUBLIC RETRIEVAL (NOT ERA5)!')
self.server.execute(attrs, target)
else:
print('ERROR:')
print('No match for Web API instance!')
raise IOError
except Exception as e:
print('\n\nMARS Request failed!')
print(e)
print(traceback.format_exc())
sys.exit()
# MARS request via call in shell
else:
request_str = 'ret'
for key, value in attrs.items():
request_str = request_str + ',' + key + '=' + str(value)
request_str += ',target="' + target + '"'
p = subprocess.Popen(['mars'], #'-e'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
bufsize=1)
pout = p.communicate(input=request_str.encode())[0]
print(pout.decode())
if 'Some errors reported' in pout.decode():
print('MARS Request failed - please check request')
raise IOError
elif os.stat(target).st_size == 0:
print('MARS Request returned no data - please check request')
raise IOError
return