Skip to content

Reference

This reference manual details functions, modules, and objects included in joseki, describing what they are and what they do.

Accessor

Accessor module.

JosekiAccessor

Joseki accessor.

Source code in src/joseki/accessor.py
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
@xr.register_dataset_accessor("joseki")
class JosekiAccessor:  # pragma: no cover
    """Joseki accessor."""

    def __init__(self, xarray_obj):
        self._obj = xarray_obj

    @property
    def molecules(self) -> t.List[str]:
        """Return list of molecules."""
        return [c[2:] for c in self._obj.data_vars if c.startswith("x_")]

    @property
    def column_number_density(
        self,
    ) -> t.Dict[str, pint.Quantity]:
        r"""Compute column number density.

        Returns:
            A mapping of molecule and column number density.

        Notes:
            The column number density is given by:

            $$
            N_{\mathrm{M}} = \int n_{\mathrm{M}}(z) \, \mathrm{d} z
            $$

            with

            $$
            n_{\mathrm{M}}(z) = x_{\mathrm{M}}(z) \, n(z)
            $$

            where

            * $z$ is the altitude,
            * $x_{\mathrm{M}}(z)$ is the mole fraction of molecule M
            at altitude $z$,
            * $n(z)$ is the air number density at altitude $z$,
            * $n_{\mathrm{M}}(z)$ is the number density of molecule M at
            altitude $z$.

            The  integration is performed using the trapezoidal rule.
        """
        ds = self._obj

        logger.debug(
            "Computing column number density using the trapezoidal rule."
        )

        _column_number_density = {}
        for m in self.molecules:
            integral = (ds[f"x_{m}"] * ds.n).integrate(
                coord="z"
            )  # integrate using the trapezoidal rule
            units = " ".join(
                [ds[var].attrs["units"] for var in [f"x_{m}", "n", "z"]]
            )
            _column_number_density[m] = (
                integral.values * ureg.Unit(units)
            ).to_base_units()

        return _column_number_density

    @property
    def column_mass_density(
        self,
    ) -> t.Dict[str, pint.Quantity]:
        r"""Compute column mass density.

        Returns:
            A mapping of molecule and column mass density.

        Notes:
            The column mass density is given by:

            $$
            \sigma_{\mathrm{M}} = N_{\mathrm{M}} \, m_{\mathrm{M}}
            $$

            where

            * $N_{\mathrm{M}}$ is the column number density of molecule M,
            * $m_{\mathrm{M}}$ is the molecular mass of molecule M.
        """
        _column_number_density = self.column_number_density
        return {
            m: (molecular_mass(m) * _column_number_density[m]).to("kg/m^2")
            for m in self.molecules
        }

    @property
    def number_density_at_sea_level(
        self,
    ) -> t.Dict[str, pint.Quantity]:
        """Compute number density at sea level.

        Returns:
            A mapping of molecule and number density at sea level.
        """
        ds = self._obj
        n = to_quantity(ds.n.isel(z=0))
        return {
            m: (to_quantity(ds[f"x_{m}"].isel(z=0)) * n)
            for m in self.molecules
        }

    @property
    def mass_density_at_sea_level(
        self,
    ) -> t.Dict[str, pint.Quantity]:
        """Compute mass density at sea level.

        Returns:
            A mapping of molecule and mass density at sea level.
        """
        _number_density_at_sea_level = self.number_density_at_sea_level
        return {
            m: (molecular_mass(m) * _number_density_at_sea_level[m]).to("kg/m^3")
            for m in self.molecules
        }

    @property
    def mole_fraction_at_sea_level(
        self,
    ) -> t.Dict[str, pint.Quantity]:
        """Compute mole fraction at sea level.

        Returns:
            A mapping of molecule and mole fraction at sea level.
        """
        ds = self._obj
        return {
            m: to_quantity(ds[f"x_{m}"].isel(z=0)).item()
            for m in self.molecules
        }

    @property
    def mole_fraction(self) -> xr.DataArray:
        """Extract mole fraction and tabulate as a function of (m, z).

        Returns:
            Mole fraction.
        """
        ds = self._obj
        molecules = self.molecules
        concatenated = xr.concat([ds[f"x_{m}"] for m in molecules], dim="m")
        concatenated["m"] = ("m", molecules, {"long_name": "molecule"})
        concatenated.attrs.update(
            {
                "standard_name": "mole_fraction",
                "long_name": "mole fraction",
                "units": "dimensionless",
            }
        )
        concatenated.name = "x"
        return concatenated

    @property
    def mass_fraction(self) -> xr.DataArray:
        """Extract mass fraction and tabulate as a function of (m, z).

        Returns:
            Mass fraction.
        """
        x = self.mole_fraction
        m_air = self.air_molar_mass
        m = molar_mass(molecules=self.molecules)
        y = (x * m / m_air).rename("y")
        y.attrs.update(
            {
                "standard_name": "mass_fraction_in_air",
                "long_name": "mass fraction",
                "units": "kg * kg^-1",
            }
        )
        return y

    @property
    def air_molar_mass(self) -> xr.DataArray:
        r"""
        Compute air molar mass as a function of altitude.

        Returns:
            Air molar mass.

        Notes:
            The air molar mass is given by:

            $$
            M_{\mathrm{air}} = 
            \frac{
                \sum_{\mathrm{M}} x_{\mathrm{M}} \, m_{\mathrm{M}}
            }{
                \sum_{\mathrm{M}} x_{\mathrm{M}}
            }
            $$

            where
            * $x_{\mathrm{M}}$ is the mole fraction of molecule M,
            * $m_{\mathrm{M}}$ is the molar mass of molecule M.

            To compute the air molar mass accurately, the mole fraction of
            molecular nitrogen (N2), molecular oxygen (O2), and argon (Ar) are
            required. If these are not present in the dataset, they are
            computed using the assumption that the mole fraction of these
            molecules are constant with altitude and set to the following
            values:

            * molecular nitrogen (N2): 0.78084
            * molecular oxygen (O2): 0.209476
            * argon (Ar): 0.00934

            are independent of altitude.

            Since nothing garantees that the mole fraction sum is equal to
            one, the air molar mass is computed as the sum of the mole
            fraction weighted molar mass divided by the sum of the mole
            fraction.
        """
        ds = self._obj

        # for molar mass computation to be accurate, main air constituents
        # must be present in the dataset
        ds_copy = ds.copy(deep=True)
        for m in AIR_MAIN_CONSTITUENTS_MOLAR_FRACTION:
            if f"x_{m}" not in ds:
                value = AIR_MAIN_CONSTITUENTS_MOLAR_FRACTION[m]
                ds_copy[f"x_{m}"] = ("z", np.full_like(ds.n, value))
                ds_copy[f"x_{m}"].attrs.update({"units": "dimensionless"})

        # compute air molar mass
        x = ds_copy.joseki.mole_fraction
        molecules = x.m.values
        mm = xr.DataArray(
            data=np.array([MM[m] for m in molecules]),
            coords={"m": ("m", molecules)},
            attrs={"units": "dimensionless"}
        )

        mm_average = (x * mm).sum(dim="m") / (x.sum(dim="m"))

        mm_average.attrs.update(
            {
                "long_name": "air molar mass",
                "units": "kg/mol",
            }
        )

        return mm_average

    def scaling_factors(
        self, target: t.MutableMapping[str, pint.Quantity | dict | xr.DataArray]
    ) -> t.MutableMapping[str, float]:
        """Compute scaling factor(s) to reach specific target amount(s).

        Args:
            target: Mapping of molecule and target amount.

        Raises:
            ValueError: If a target amount has dimensions that are not supported.

        Returns:
            Mapping of molecule and scaling factors.

        Notes:
            For each molecule in the ``target`` mapping, the target amount is
            interpreted, depending on its dimensions (indicated in square 
            brackets), as:

            * a column number density [`length^-2`],
            * a column mass density [`mass * length^-2`],
            * a number density at sea level [`length^-3`],
            * a mass density at sea level [`mass * length^-3`],
            * a mole fraction at sea level [`dimensionless`]

            The scaling factor is then evaluated as the ratio of the target
            amount with the original amount, for each molecule.

        See Also:
            `rescale`
        """
        compute_initial_amount = {
            "[length]^-2": self.column_number_density,
            "[mass] * [length]^-2": self.column_mass_density,
            "[length]^-3": self.number_density_at_sea_level,
            "[mass] * [length]^-3": self.mass_density_at_sea_level,
            "": self.mole_fraction_at_sea_level,
        }
        factors = {}
        for m, target_amount in target.items():
            target_amount = to_quantity(target_amount)
            initial_amount = None
            for dim in compute_initial_amount.keys():
                if target_amount.check(dim):
                    initial_amount = compute_initial_amount[dim][m]
            if initial_amount is None:
                raise ValueError
            factors[m] = _scaling_factor(
                initial_amount=initial_amount, target_amount=target_amount
            )
        return factors

    def rescale(
        self,
        factors: t.MutableMapping[str, float],
        check_x_sum: bool = False
    ) -> xr.Dataset:
        """Rescale molecules concentration in atmospheric profile.

        Args:
            factors: A mapping of molecule and scaling factor.
            check_x_sum: if True, check that mole fraction sums
                are never larger than one.
        Raises:
            ValueError: if `check_x_sum` is `True` and the 
                dataset is not valid.

        Returns:
            Rescaled dataset (new object).
        """
        ds = self._obj

        # update mole fraction
        x_new = {}
        for m in factors:
            with xr.set_options(keep_attrs=True):
                x_new[f"x_{m}"] = ds[f"x_{m}"] * factors[m]

        ds = ds.assign(x_new)

        # validate rescaled dataset
        try:
            ds.joseki.validate(check_x_sum=check_x_sum)
        except ValueError as e:
            raise ValueError("Cannot rescale") from e

        # update history attribute
        now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
        for m in factors.keys():
            ds.attrs["history"] += (
                f"\n{now} - rescaled {m}'s mole fraction using a scaling "
                f"factor of {factors[m]:.3f} - joseki, version {__version__}"
            )

        return ds

    def rescale_to(
        self,
        target: t.Mapping[str, pint.Quantity | dict | xr.DataArray],
        check_x_sum: bool = False,
    ) -> xr.Dataset:
        """
        Rescale mole fractions to match target molecular total column 
        densities.

        Args:
            target: Mapping of molecule and target total column density. 
                Total column must be either a column number density 
                [`length^-2`], a column mass density [`mass * length^-2`], a 
                number density at sea level [`length^-3`], a mass density at 
                sea level [`mass * length^-3`], a mole fraction at 
                sea level [`dimensionless`].
            check_x_sum: if True, check that mole fraction sums are never
                larger than one.

        Returns:
            Rescaled dataset (new object).    
        """
        return self.rescale(
            factors=self.scaling_factors(target=target),
            check_x_sum=check_x_sum,
        )

    def drop_molecules(
        self,
        molecules: t.List[str],
    ) -> xr.Dataset:
        """Drop molecules from dataset.

        Args:
            molecules: List of molecules to drop.

        Returns:
            Dataset with molecules dropped.
        """
        ds = self._obj

        # update history attribute
        now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()

        ds.attrs["history"] += (
            f"\n{now} - dropped mole fraction data for molecules "
            f"{', '.join(molecules)} - joseki, version {__version__}"
        )

        return ds.drop_vars([f"x_{m}" for m in molecules])

    def validate(
        self,
        check_x_sum: bool = False,
        ret_true_if_valid: bool = False,
    ) -> bool:
        """Validate atmosphere thermophysical profile dataset schema.

        Returns:
            `True` if the dataset complies with the schema, else `False`.
        """
        return schema.validate(
            ds=self._obj,
            check_x_sum=check_x_sum,
            ret_true_if_valid=ret_true_if_valid,
        )

    @property
    def is_valid(self):
        return self.validate(ret_true_if_valid=True)

air_molar_mass: xr.DataArray property

Compute air molar mass as a function of altitude.

Returns:

Type Description
xr.DataArray

Air molar mass.

Notes

The air molar mass is given by:

\[ M_{\mathrm{air}} = \frac{ \sum_{\mathrm{M}} x_{\mathrm{M}} \, m_{\mathrm{M}} }{ \sum_{\mathrm{M}} x_{\mathrm{M}} } \]

where * \(x_{\mathrm{M}}\) is the mole fraction of molecule M, * \(m_{\mathrm{M}}\) is the molar mass of molecule M.

To compute the air molar mass accurately, the mole fraction of molecular nitrogen (N2), molecular oxygen (O2), and argon (Ar) are required. If these are not present in the dataset, they are computed using the assumption that the mole fraction of these molecules are constant with altitude and set to the following values:

  • molecular nitrogen (N2): 0.78084
  • molecular oxygen (O2): 0.209476
  • argon (Ar): 0.00934

are independent of altitude.

Since nothing garantees that the mole fraction sum is equal to one, the air molar mass is computed as the sum of the mole fraction weighted molar mass divided by the sum of the mole fraction.

column_mass_density: t.Dict[str, pint.Quantity] property

Compute column mass density.

Returns:

Type Description
t.Dict[str, pint.Quantity]

A mapping of molecule and column mass density.

Notes

The column mass density is given by:

\[ \sigma_{\mathrm{M}} = N_{\mathrm{M}} \, m_{\mathrm{M}} \]

where

  • \(N_{\mathrm{M}}\) is the column number density of molecule M,
  • \(m_{\mathrm{M}}\) is the molecular mass of molecule M.

column_number_density: t.Dict[str, pint.Quantity] property

Compute column number density.

Returns:

Type Description
t.Dict[str, pint.Quantity]

A mapping of molecule and column number density.

Notes

The column number density is given by:

\[ N_{\mathrm{M}} = \int n_{\mathrm{M}}(z) \, \mathrm{d} z \]

with

\[ n_{\mathrm{M}}(z) = x_{\mathrm{M}}(z) \, n(z) \]

where

  • \(z\) is the altitude,
  • \(x_{\mathrm{M}}(z)\) is the mole fraction of molecule M at altitude \(z\),
  • \(n(z)\) is the air number density at altitude \(z\),
  • \(n_{\mathrm{M}}(z)\) is the number density of molecule M at altitude \(z\).

The integration is performed using the trapezoidal rule.

mass_density_at_sea_level: t.Dict[str, pint.Quantity] property

Compute mass density at sea level.

Returns:

Type Description
t.Dict[str, pint.Quantity]

A mapping of molecule and mass density at sea level.

mass_fraction: xr.DataArray property

Extract mass fraction and tabulate as a function of (m, z).

Returns:

Type Description
xr.DataArray

Mass fraction.

mole_fraction: xr.DataArray property

Extract mole fraction and tabulate as a function of (m, z).

Returns:

Type Description
xr.DataArray

Mole fraction.

mole_fraction_at_sea_level: t.Dict[str, pint.Quantity] property

Compute mole fraction at sea level.

Returns:

Type Description
t.Dict[str, pint.Quantity]

A mapping of molecule and mole fraction at sea level.

molecules: t.List[str] property

Return list of molecules.

number_density_at_sea_level: t.Dict[str, pint.Quantity] property

Compute number density at sea level.

Returns:

Type Description
t.Dict[str, pint.Quantity]

A mapping of molecule and number density at sea level.

drop_molecules(molecules)

Drop molecules from dataset.

Parameters:

Name Type Description Default
molecules t.List[str]

List of molecules to drop.

required

Returns:

Type Description
xr.Dataset

Dataset with molecules dropped.

Source code in src/joseki/accessor.py
def drop_molecules(
    self,
    molecules: t.List[str],
) -> xr.Dataset:
    """Drop molecules from dataset.

    Args:
        molecules: List of molecules to drop.

    Returns:
        Dataset with molecules dropped.
    """
    ds = self._obj

    # update history attribute
    now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()

    ds.attrs["history"] += (
        f"\n{now} - dropped mole fraction data for molecules "
        f"{', '.join(molecules)} - joseki, version {__version__}"
    )

    return ds.drop_vars([f"x_{m}" for m in molecules])

rescale(factors, check_x_sum=False)

Rescale molecules concentration in atmospheric profile.

Parameters:

Name Type Description Default
factors t.MutableMapping[str, float]

A mapping of molecule and scaling factor.

required
check_x_sum bool

if True, check that mole fraction sums are never larger than one.

False

Raises:

Type Description
ValueError

if check_x_sum is True and the dataset is not valid.

Returns:

Type Description
xr.Dataset

Rescaled dataset (new object).

Source code in src/joseki/accessor.py
def rescale(
    self,
    factors: t.MutableMapping[str, float],
    check_x_sum: bool = False
) -> xr.Dataset:
    """Rescale molecules concentration in atmospheric profile.

    Args:
        factors: A mapping of molecule and scaling factor.
        check_x_sum: if True, check that mole fraction sums
            are never larger than one.
    Raises:
        ValueError: if `check_x_sum` is `True` and the 
            dataset is not valid.

    Returns:
        Rescaled dataset (new object).
    """
    ds = self._obj

    # update mole fraction
    x_new = {}
    for m in factors:
        with xr.set_options(keep_attrs=True):
            x_new[f"x_{m}"] = ds[f"x_{m}"] * factors[m]

    ds = ds.assign(x_new)

    # validate rescaled dataset
    try:
        ds.joseki.validate(check_x_sum=check_x_sum)
    except ValueError as e:
        raise ValueError("Cannot rescale") from e

    # update history attribute
    now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()
    for m in factors.keys():
        ds.attrs["history"] += (
            f"\n{now} - rescaled {m}'s mole fraction using a scaling "
            f"factor of {factors[m]:.3f} - joseki, version {__version__}"
        )

    return ds

rescale_to(target, check_x_sum=False)

Rescale mole fractions to match target molecular total column densities.

Parameters:

Name Type Description Default
target t.Mapping[str, pint.Quantity | dict | xr.DataArray]

Mapping of molecule and target total column density. Total column must be either a column number density [length^-2], a column mass density [mass * length^-2], a number density at sea level [length^-3], a mass density at sea level [mass * length^-3], a mole fraction at sea level [dimensionless].

required
check_x_sum bool

if True, check that mole fraction sums are never larger than one.

False

Returns:

Type Description
xr.Dataset

Rescaled dataset (new object).

Source code in src/joseki/accessor.py
def rescale_to(
    self,
    target: t.Mapping[str, pint.Quantity | dict | xr.DataArray],
    check_x_sum: bool = False,
) -> xr.Dataset:
    """
    Rescale mole fractions to match target molecular total column 
    densities.

    Args:
        target: Mapping of molecule and target total column density. 
            Total column must be either a column number density 
            [`length^-2`], a column mass density [`mass * length^-2`], a 
            number density at sea level [`length^-3`], a mass density at 
            sea level [`mass * length^-3`], a mole fraction at 
            sea level [`dimensionless`].
        check_x_sum: if True, check that mole fraction sums are never
            larger than one.

    Returns:
        Rescaled dataset (new object).    
    """
    return self.rescale(
        factors=self.scaling_factors(target=target),
        check_x_sum=check_x_sum,
    )

scaling_factors(target)

Compute scaling factor(s) to reach specific target amount(s).

Parameters:

Name Type Description Default
target t.MutableMapping[str, pint.Quantity | dict | xr.DataArray]

Mapping of molecule and target amount.

required

Raises:

Type Description
ValueError

If a target amount has dimensions that are not supported.

Returns:

Type Description
t.MutableMapping[str, float]

Mapping of molecule and scaling factors.

Notes

For each molecule in the target mapping, the target amount is interpreted, depending on its dimensions (indicated in square brackets), as:

  • a column number density [length^-2],
  • a column mass density [mass * length^-2],
  • a number density at sea level [length^-3],
  • a mass density at sea level [mass * length^-3],
  • a mole fraction at sea level [dimensionless]

The scaling factor is then evaluated as the ratio of the target amount with the original amount, for each molecule.

See Also

rescale

Source code in src/joseki/accessor.py
def scaling_factors(
    self, target: t.MutableMapping[str, pint.Quantity | dict | xr.DataArray]
) -> t.MutableMapping[str, float]:
    """Compute scaling factor(s) to reach specific target amount(s).

    Args:
        target: Mapping of molecule and target amount.

    Raises:
        ValueError: If a target amount has dimensions that are not supported.

    Returns:
        Mapping of molecule and scaling factors.

    Notes:
        For each molecule in the ``target`` mapping, the target amount is
        interpreted, depending on its dimensions (indicated in square 
        brackets), as:

        * a column number density [`length^-2`],
        * a column mass density [`mass * length^-2`],
        * a number density at sea level [`length^-3`],
        * a mass density at sea level [`mass * length^-3`],
        * a mole fraction at sea level [`dimensionless`]

        The scaling factor is then evaluated as the ratio of the target
        amount with the original amount, for each molecule.

    See Also:
        `rescale`
    """
    compute_initial_amount = {
        "[length]^-2": self.column_number_density,
        "[mass] * [length]^-2": self.column_mass_density,
        "[length]^-3": self.number_density_at_sea_level,
        "[mass] * [length]^-3": self.mass_density_at_sea_level,
        "": self.mole_fraction_at_sea_level,
    }
    factors = {}
    for m, target_amount in target.items():
        target_amount = to_quantity(target_amount)
        initial_amount = None
        for dim in compute_initial_amount.keys():
            if target_amount.check(dim):
                initial_amount = compute_initial_amount[dim][m]
        if initial_amount is None:
            raise ValueError
        factors[m] = _scaling_factor(
            initial_amount=initial_amount, target_amount=target_amount
        )
    return factors

validate(check_x_sum=False, ret_true_if_valid=False)

Validate atmosphere thermophysical profile dataset schema.

Returns:

Type Description
bool

True if the dataset complies with the schema, else False.

Source code in src/joseki/accessor.py
def validate(
    self,
    check_x_sum: bool = False,
    ret_true_if_valid: bool = False,
) -> bool:
    """Validate atmosphere thermophysical profile dataset schema.

    Returns:
        `True` if the dataset complies with the schema, else `False`.
    """
    return schema.validate(
        ds=self._obj,
        check_x_sum=check_x_sum,
        ret_true_if_valid=ret_true_if_valid,
    )

molecular_mass(m)

Return the average molecular mass of a molecule.

Parameters:

Name Type Description Default
m str

Molecule formula.

required

Returns:

Type Description
pint.Quantity

Average molecular mass.

Source code in src/joseki/accessor.py
def molecular_mass(m: str) -> pint.Quantity:
    """Return the average molecular mass of a molecule.

    Args:
        m: Molecule formula.

    Returns:
        Average molecular mass.
    """
    return MM[m] * ureg("dalton")

Data

Raw data files.

Profiles

Factory

Profile factory module.

ProfileFactory

Profile factory class.

Source code in src/joseki/profiles/factory.py
@define
class ProfileFactory:
    """
    Profile factory class.
    """

    """Profile registry."""
    registry: t.Dict[str, Profile] = field(factory=dict)

    @property
    def registered_identifiers(self) -> t.List[str]:
        """
        Registered profile identifiers.

        Returns:
            List of registered profile identifiers.
        """
        return list(self.registry.keys())

    def register(
        self,
        identifier: str,
    ) -> t.Callable:
        """
        Register a profile class.

        Args:
            identifier: Profile identifier.

        Returns:
            Decorator function.
        """

        def inner_wrapper(wrapped_class: Profile) -> t.Callable:
            logger.info("Registering profile %s", identifier)
            if identifier in self.registry:
                logger.warning(  # pragma: no cover
                    "Profile %s already exists. Will replace it",
                    identifier,
                )
            self.registry[identifier] = wrapped_class
            return wrapped_class

        return inner_wrapper

    def create(self, identifier: str, **kwargs) -> Profile:
        """
        Create a profile instance.

        Args:
            identifier: Profile identifier.

        Returns:
            Profile instance.
        """
        if identifier not in self.registry:
            logger.fatal("Profile %s does not exist in the registry", identifier)
            raise ValueError(f"Profile {identifier} does not exist in the registry")

        logger.debug("Creating profile %s", identifier)
        profile_cls = self.registry[identifier]
        profile = profile_cls(**kwargs)
        return profile

registered_identifiers: t.List[str] property

Registered profile identifiers.

Returns:

Type Description
t.List[str]

List of registered profile identifiers.

create(identifier, **kwargs)

Create a profile instance.

Parameters:

Name Type Description Default
identifier str

Profile identifier.

required

Returns:

Type Description
Profile

Profile instance.

Source code in src/joseki/profiles/factory.py
def create(self, identifier: str, **kwargs) -> Profile:
    """
    Create a profile instance.

    Args:
        identifier: Profile identifier.

    Returns:
        Profile instance.
    """
    if identifier not in self.registry:
        logger.fatal("Profile %s does not exist in the registry", identifier)
        raise ValueError(f"Profile {identifier} does not exist in the registry")

    logger.debug("Creating profile %s", identifier)
    profile_cls = self.registry[identifier]
    profile = profile_cls(**kwargs)
    return profile

register(identifier)

Register a profile class.

Parameters:

Name Type Description Default
identifier str

Profile identifier.

required

Returns:

Type Description
t.Callable

Decorator function.

Source code in src/joseki/profiles/factory.py
def register(
    self,
    identifier: str,
) -> t.Callable:
    """
    Register a profile class.

    Args:
        identifier: Profile identifier.

    Returns:
        Decorator function.
    """

    def inner_wrapper(wrapped_class: Profile) -> t.Callable:
        logger.info("Registering profile %s", identifier)
        if identifier in self.registry:
            logger.warning(  # pragma: no cover
                "Profile %s already exists. Will replace it",
                identifier,
            )
        self.registry[identifier] = wrapped_class
        return wrapped_class

    return inner_wrapper

Core

Core module for atmosphere thermophysical profiles.

The Profile abstract class defines the interface for atmosphere thermophysical profiles. The interp function is used to interpolate an atmosphere thermophysical profile on new altitude values.

Profile

Bases: ABC

Abstract class for atmosphere thermophysical profiles.

Source code in src/joseki/profiles/core.py
@define
class Profile(ABC):
    """
    Abstract class for atmosphere thermophysical profiles.
    """

    @abstractmethod
    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        """
        Return the profile as a dataset.

        Args:
            z: Altitude grid.
                If the profile can be evaluated at arbitrary altitudes, this
                parameter is passed to the evaluating method for that profile.
                If the profile is defined on a fixed altitude grid, this parameter
                is used to interpolate the profile on the specified altitude grid.
            interp_method: Interpolation method for each variable.
                If ``None``, the default interpolation method is used.
                Interpolation may be required if the profile is defined on a fixed
                altitude grid, and the altitude grid is not the same as the one
                used to define the profile.
                Interpolation may also not be required, e.g. if the profile is
                defined by analytical function(s) of the altitude variable.
            conserve_column: If `True`, ensure that column densities are conserved
                during interpolation.
            kwargs: Parameters passed to lower-level methods.

        Returns:
            Atmospheric profile.
        """
        pass  # pragma: no cover

to_dataset(z=None, interp_method=None, conserve_column=False, **kwargs) abstractmethod

Return the profile as a dataset.

Parameters:

Name Type Description Default
z t.Optional[pint.Quantity]

Altitude grid. If the profile can be evaluated at arbitrary altitudes, this parameter is passed to the evaluating method for that profile. If the profile is defined on a fixed altitude grid, this parameter is used to interpolate the profile on the specified altitude grid.

None
interp_method t.Optional[t.Mapping[str, str]]

Interpolation method for each variable. If None, the default interpolation method is used. Interpolation may be required if the profile is defined on a fixed altitude grid, and the altitude grid is not the same as the one used to define the profile. Interpolation may also not be required, e.g. if the profile is defined by analytical function(s) of the altitude variable.

None
conserve_column bool

If True, ensure that column densities are conserved during interpolation.

False
kwargs t.Any

Parameters passed to lower-level methods.

{}

Returns:

Type Description
xr.Dataset

Atmospheric profile.

Source code in src/joseki/profiles/core.py
@abstractmethod
def to_dataset(
    self,
    z: t.Optional[pint.Quantity] = None,
    interp_method: t.Optional[t.Mapping[str, str]] = None,
    conserve_column: bool = False,
    **kwargs: t.Any,
) -> xr.Dataset:
    """
    Return the profile as a dataset.

    Args:
        z: Altitude grid.
            If the profile can be evaluated at arbitrary altitudes, this
            parameter is passed to the evaluating method for that profile.
            If the profile is defined on a fixed altitude grid, this parameter
            is used to interpolate the profile on the specified altitude grid.
        interp_method: Interpolation method for each variable.
            If ``None``, the default interpolation method is used.
            Interpolation may be required if the profile is defined on a fixed
            altitude grid, and the altitude grid is not the same as the one
            used to define the profile.
            Interpolation may also not be required, e.g. if the profile is
            defined by analytical function(s) of the altitude variable.
        conserve_column: If `True`, ensure that column densities are conserved
            during interpolation.
        kwargs: Parameters passed to lower-level methods.

    Returns:
        Atmospheric profile.
    """
    pass  # pragma: no cover

extrapolate(ds, z_extra, direction, method=DEFAULT_METHOD, conserve_column=False)

Extrapolate an atmospheric profile to new altitude(s).

Parameters:

Name Type Description Default
ds xr.Dataset

Initial atmospheric profile.

required
z_extra pint.Quantity

Altitude(s) to extrapolate to.

required
direction str

Direction of the extrapolation, either "up" or "down".

required
method t.Dict[str, str]

Mapping of variable and interpolation method. If a variable is not in the mapping, the linear interpolation is used. By default, linear interpolation is used for all variables.

DEFAULT_METHOD
conserve_column bool

If True, ensure that column densities are conserved.

False

Raises:

Type Description
ValueError

If the extrapolation direction is not "up" or "down".

Returns:

Type Description
xr.Dataset

Extrapolated atmospheric profile.

Source code in src/joseki/profiles/core.py
def extrapolate(
    ds: xr.Dataset,
    z_extra: pint.Quantity,
    direction: str,
    method: t.Dict[str, str] = DEFAULT_METHOD,
    conserve_column: bool = False,
) -> xr.Dataset:
    """
    Extrapolate an atmospheric profile to new altitude(s).

    Args:
        ds: Initial atmospheric profile.
        z_extra: Altitude(s) to extrapolate to.
        direction: Direction of the extrapolation, either "up" or "down".
        method: Mapping of variable and interpolation method.
            If a variable is not in the mapping, the linear interpolation is used.
            By default, linear interpolation is used for all variables.
        conserve_column: If True, ensure that column densities are conserved.

    Raises:
        ValueError: If the extrapolation direction is not "up" or "down".

    Returns:
        Extrapolated atmospheric profile.
    """
    if direction not in ["up", "down"]:
        msg = (
            f"Extrapolation direction must be either 'up' or 'down', got "
            f"{direction}."
        )
        logger.critical(msg)
        raise ValueError(msg)

    z = to_quantity(ds.z)

    if direction == "down" and np.any(z_extra >= z.min()):
        msg = (
            f"Cannot extrapolate down to {z_extra:~P}, "
            f"minimum altitude is {z.min():~P}."
        )
        logger.critical(msg)
        raise ValueError(msg)

    elif direction == "up" and np.any(z_extra <= z.max()):
        msg = (
            f"Cannot extrapolate up to {z_extra:~P}, "
            f"maximum altitude is {z.max():~P}."
        )
        logger.critical(msg)
        raise ValueError(msg)

    else:
        extrapolated = interp(
            ds=ds,
            z_new=np.concatenate([np.atleast_1d(z_extra), z]),
            method=method,
            conserve_column=conserve_column,
            fill_value="extrapolate",
        )
        extrapolated.attrs.update(
            history=extrapolated.history + f"\n{utcnow()} "
            f"- extrapolate - joseki, version {__version__}"
        )
        return extrapolated

interp(ds, z_new, method=DEFAULT_METHOD, conserve_column=False, **kwargs)

Interpolate atmospheric profile on new altitudes.

Parameters:

Name Type Description Default
ds xr.Dataset

Atmospheric profile to interpolate.

required
z_new pint.Quantity

Altitudes values at which to interpolate the atmospheric profile.

required
method t.Dict[str, str]

Mapping of variable and interpolation method. If a variable is not in the mapping, the linear interpolation is used. By default, linear interpolation is used for all variables.

DEFAULT_METHOD
conserve_column bool

If True, ensure that column densities are conserved.

False
kwargs t.Any

Parameters passed to scipy.interpolate.interp1d (except 'kind' and 'bounds_error').

{}

Returns:

Type Description
xr.Dataset

Interpolated atmospheric profile.

Source code in src/joseki/profiles/core.py
def interp(
    ds: xr.Dataset,
    z_new: pint.Quantity,
    method: t.Dict[str, str] = DEFAULT_METHOD,
    conserve_column: bool = False,
    **kwargs: t.Any,
) -> xr.Dataset:
    """Interpolate atmospheric profile on new altitudes.

    Args:
        ds: Atmospheric profile to interpolate.
        z_new: Altitudes values at which to interpolate the atmospheric profile.
        method: Mapping of variable and interpolation method.
            If a variable is not in the mapping, the linear interpolation is used.
            By default, linear interpolation is used for all variables.
        conserve_column: If True, ensure that column densities are conserved.
        kwargs: Parameters passed to `scipy.interpolate.interp1d` (except 
            'kind' and 'bounds_error').

    Returns:
        Interpolated atmospheric profile.
    """
    # sort altitude values
    z_new = np.sort(z_new)

    z_units = ds.z.attrs["units"]
    z_new_values = z_new.m_as(z_units)

    coords = {"z": z_new.to(z_units)}

    # kwargs cannot contain 'kind' and 'bounds_error'
    kwargs.pop("kind", None)
    kwargs.pop("bounds_error", None)

    try:
        if kwargs["fill_value"] == "extrapolate":  # pragma: no cover
            bounds_error = None
    except KeyError:
        bounds_error = True


    # Interpolate pressure, temperature and density
    data_vars = {}
    for var in ["p", "t", "n"]:
        f = interpolate.interp1d(
            x=ds.z.values,
            y=ds[var].values,
            kind=method.get(var, method["default"]),
            bounds_error=bounds_error,
            **kwargs,
        )
        data_vars[var] = ureg.Quantity(f(z_new_values), ds[var].attrs["units"])

    # Interpolate mole fraction
    for m in ds.joseki.molecules:
        var = f"x_{m}"
        f = interpolate.interp1d(
            x=ds.z.values,
            y=ds[var].values,
            kind=method.get(var, method["default"]),
            bounds_error=bounds_error,
            **kwargs,
        )
        data_vars[var] = ureg.Quantity(f(z_new_values), ds[var].attrs["units"])

    # Attributes
    attrs = ds.attrs
    author = f"joseki, version {__version__}"
    attrs.update(
        {
            "history": f"{utcnow()} - dataset interpolation by {author}.",
        }
    )

    # Convert to dataset
    logger.debug("convert interpolated data to dataset")
    interpolated = schema.convert(
        data_vars=data_vars,
        coords=coords,
        attrs=attrs,
    )

    # Compute scaling factors to conserve column densities
    if conserve_column:
        return rescale_to_column(reference=ds, ds=interpolated)

    return interpolated

regularize(ds, method=DEFAULT_METHOD, conserve_column=False, options=DEFAULT_OPTIONS, **kwargs)

Regularize the profile's altitude grid.

Parameters:

Name Type Description Default
ds xr.Dataset

Initial atmospheric profile.

required
method t.Dict[str, str]

Mapping of variable and interpolation method. If a variable is not in the mapping, the linear interpolation is used. By default, linear interpolation is used for all variables.

DEFAULT_METHOD
conserve_column bool

If True, ensure that column densities are conserved.

False
options t.Dict[str, t.Union[int, str, pint.Quantity]]

Options for the regularization. Mapping with possible keys: - "num": Number of points in the new altitude grid. - "zstep": Altitude step in the new altitude grid. If "auto", the minimum altitude step is used.

DEFAULT_OPTIONS
kwargs t.Any

Keyword arguments passed to the interpolation function.

{}

Returns:

Type Description
xr.Dataset

Regularized atmospheric profile.

Source code in src/joseki/profiles/core.py
def regularize(
    ds: xr.Dataset,
    method: t.Dict[str, str] = DEFAULT_METHOD,
    conserve_column: bool = False,
    options: t.Dict[str, t.Union[int, str, pint.Quantity]] = DEFAULT_OPTIONS,
    **kwargs: t.Any,
) -> xr.Dataset:
    """Regularize the profile's altitude grid.

    Args:
        ds: Initial atmospheric profile.
        method: Mapping of variable and interpolation method.
            If a variable is not in the mapping, the linear interpolation is used.
            By default, linear interpolation is used for all variables.
        conserve_column: If True, ensure that column densities are conserved.
        options: Options for the regularization.
            Mapping with possible keys:
                - "num": Number of points in the new altitude grid.
                - "zstep": Altitude step in the new altitude grid.
                    If "auto", the minimum altitude step is used.
        kwargs: Keyword arguments passed to the interpolation function.

    Returns:
        Regularized atmospheric profile.
    """
    z = to_quantity(ds.z)
    if options.get("num", None):
        z_new = np.linspace(
            z.min(),
            z.max(),
            options["num"],
        )
    elif options.get("zstep", None):
        zstep = options["zstep"]
        zunits = z.units
        if isinstance(zstep, ureg.Quantity):
            pass
        elif isinstance(zstep, str):
            if zstep == "auto":
                zstep = np.diff(z).min()
            else:
                raise ValueError(f"Invalid zstep value: {zstep}")
        else:
            raise ValueError(f"Invalid zstep value: {zstep}")
        z_new = np.arange(
            z.min().m_as(zunits),
            z.max().m_as(zunits),
            zstep.m_as(zunits),
        ) * zunits

    else:
        raise ValueError("options must contain either 'num' or 'zstep' key.")

    return interp(
        ds=ds,
        z_new=z_new,
        method=method,
        conserve_column=conserve_column,
        **kwargs,
    )

rescale_to_column(reference, ds)

Rescale mole fraction to ensure that column densities are conserved.

Parameters:

Name Type Description Default
reference xr.Dataset

Reference profile.

required
ds xr.Dataset

Profile to rescale.

required

Returns:

Type Description
xr.Dataset

Rescaled profile.

Source code in src/joseki/profiles/core.py
def rescale_to_column(reference: xr.Dataset, ds: xr.Dataset) -> xr.Dataset:
    """Rescale mole fraction to ensure that column densities are conserved.

    Args:
        reference: Reference profile.
        ds: Profile to rescale.

    Returns:
        Rescaled profile.
    """
    desired = reference.joseki.column_number_density
    actual = ds.joseki.column_number_density
    factors = {}
    for m in reference.joseki.molecules:
        if desired[m].m == 0.0:
            factors[m] = 0.0
        elif actual[m].m == 0.0:
            msg = (
                f"Actual column number density of {m} is zero but the reference "
                f"column number density is not ({desired[m]:~P}): rescaling "
                f"is impossible."
            )
            logger.critical(msg)
            raise ValueError(msg)
        else:
            factors[m] = (desired[m] / actual[m]).m_as("dimensionless")

    return ds.joseki.rescale(factors=factors)

select_molecules(ds, molecules)

Select specified molecules in the profile.

Parameters:

Name Type Description Default
ds xr.Dataset

Initial atmospheric profile.

required
molecules t.List[str]

List of molecules to select.

required

Returns:

Type Description
xr.Dataset

Atmospheric profile with exactly the specified molecules.

Source code in src/joseki/profiles/core.py
def select_molecules(
    ds: xr.Dataset,
    molecules: t.List[str],
) -> xr.Dataset:
    """
    Select specified molecules in the profile.

    Args:
        ds: Initial atmospheric profile.
        molecules: List of molecules to select.

    Returns:
        Atmospheric profile with exactly the specified molecules.
    """
    drop_molecules = [m for m in ds.joseki.molecules if m not in molecules]
    ds_dropped = ds.joseki.drop_molecules(drop_molecules)

    if all([m in ds_dropped.joseki.molecules for m in molecules]):
        return ds_dropped
    else:
        raise ValueError(
            f"Could not select molecules {molecules}, "
            f"available molecules are {ds.joseki.molecules}."
        )

Dataset schema

Dataset schema for atmosphere thermophysical profiles.

The dataset schema defines the variables, coordinates and attributes that are expected in a dataset representing an atmosphere thermophysical profile.

Schema

Dataset schema for atmosphere thermophysical profiles.

Source code in src/joseki/profiles/schema.py
@define(frozen=True)
class Schema:
    """Dataset schema for atmosphere thermophysical profiles."""

    data_vars = {
        "p": (["z"], npt.NDArray[np.float64], "Pa", "air_pressure"),
        "t": (["z"], npt.NDArray[np.float64], "K", "air_temperature"),
        "n": (["z"], npt.NDArray[np.float64], "m^-3", "air_number_density"),
    }

    coords = {
        "z": ("z", npt.NDArray[np.float64], "km", "altitude"),
    }

    attrs = {
        "Conventions": str,
        "title": str,
        "institution": str,
        "source": str,
        "history": str,
        "references": str,
        "url": str,
        "urldate": str,
    }

    def validate(
        self,
        ds: xr.Dataset,
        check_x_sum: bool = False,
        ret_true_if_valid: bool = False,
    ) -> t.Optional[bool]:
        """Validate dataset.

        Args:
            ds: Dataset to validate.
            check_x_sum: if True, check that mole fraction sums
                are never larger than one.
            ret_true_if_valid: make this method return True if the dataset is
                valid.

        Raises:
            ValueError: If the dataset does not match the schema.

        Returns:
            None or bool: If `ret_true_if_valid` is True, returns True if the 
                dataset is valid, otherwise returns None.
        """
        logger.debug("Validating dataset")

        logger.debug("Checking that all data variables are present")
        for var in self.data_vars:
            if var not in ds.data_vars:
                raise ValueError(f"missing data variable: {var}")  # pragma: no cover

        logger.debug("Checking that 'x_*' data variable(s) are present")
        if not any([name.startswith("x_") for name in ds.data_vars]):
            raise ValueError("missing data variable starting with x_")  # pragma: no cover

        logger.debug("Checking that all coordinates are present")
        for coord in self.coords:
            if coord not in ds.coords:
                raise ValueError(f"missing coordinate: {coord}")  # pragma: no cover

        logger.debug("Checking that all attributes are present")
        for attr in self.attrs:
            if attr not in ds.attrs:
                raise ValueError(f"missing attribute: {attr}")  # pragma: no cover

        logger.debug("Checking that data variables have the correct dimensions")
        for var, (dims, _, _, _) in self.data_vars.items():
            if set(ds[var].dims) != set(dims):
                raise ValueError(  # pragma: no cover
                    f"incorrect dimensions for {var}. Expected {dims}, "
                    f"got {ds[var].dims}"
                )

        logger.debug("Checking that coordinates have the correct dimensions")
        for coord, (dims, _, _, _) in self.coords.items():
            if set(ds[coord].dims) != set(dims):
                raise ValueError(  # pragma: no cover
                    f"incorrect dimensions for {coord}. Expected {dims}, "
                    f"got {ds[coord].dims}"
                )

        logger.debug("Checking that data variables have the correct units")
        for var, (_, _, units, _) in self.data_vars.items():
            if ds[var].units != units:
                raise ValueError(  # pragma: no cover
                    f"incorrect units for {var}. Expected {units}, "
                    f"got {ds[var].units}"
                )

        logger.debug("Checking that coordinates have the correct units")
        for coord, (_, _, units, _) in self.coords.items():
            if ds[coord].units != units:
                raise ValueError(  # pragma: no cover
                    f"incorrect units for {coord}. Expected {units}, "
                    f"got {ds[coord].units}"
                )

        logger.debug("Checking that attributes have the correct types")
        for attr, typ in self.attrs.items():
            if not isinstance(ds.attrs[attr], typ):
                raise ValueError(  # pragma: no cover
                    f"incorrect type for {attr}. Expected {typ}, "
                    f"got {type(ds.attrs[attr])}"
                )

        logger.debug("Checking that data variables have the correct standard names")
        for var, (_, _, _, standard_name) in self.data_vars.items():
            if ds[var].attrs["standard_name"] != standard_name:
                raise ValueError(  # pragma: no cover
                    f"incorrect standard name for {var}. Expected "
                    f"{standard_name}, got "
                    f"{ds[var].attrs['standard_name']}"
                )

        logger.debug(
            "Checking that all x_* data variables have the correct units and "
            "standard names"
        )
        for var in ds.data_vars:
            if var.startswith("x_"):
                m = var[2:]
                if ds[var].attrs["units"] != "dimensionless":
                    raise ValueError(  # pragma: no cover
                        f"incorrect units for {var}. Expected dimensionless, "
                        f"got {ds[var].attrs['units']}"
                    )
                if ds[var].attrs["standard_name"] != f"{m}_mole_fraction":
                    raise ValueError(  # pragma: no cover
                        f"incorrect standard name for {var}. Expected "
                        f"{m}_mole_fraction, got "
                        f"{ds[var].attrs['standard_name']}"
                    )

        if check_x_sum:
            logger.debug(
                "Checking that mole fraction sums are never larger than one"
            )
            vfs = mole_fraction_sum(ds)
            if np.any(vfs.m > 1):
                raise ValueError(  # pragma: no cover
                    "The rescaling factors lead to a profile where the mole "
                    "fraction sum is larger than 1."
                )

        logger.info("Dataset is valid")

        if ret_true_if_valid:  # pragma: no cover
            return True

    def convert(
        self,
        data_vars: t.Mapping[str, pint.Quantity],
        coords: t.Mapping[str, pint.Quantity],
        attrs: t.Mapping[str, str],
    ) -> xr.Dataset:
        """Convert input to schema-compliant dataset.

        Args:
            data_vars: Mapping of data variable names to quantities.
            coords: Mapping of coordinate names to quantities.
            attrs: Mapping of attribute names to values.

        Returns:
            Dataset with schema-compliant data variables, coordinates, and
            attributes.
        """
        logger.debug("converting input to schema-compliant dataset")

        logger.debug("checking that all data variables are present")
        for var in self.data_vars:
            if var == "n" not in data_vars:
                n = number_density(
                    p=data_vars["p"],
                    t=data_vars["t"],
                )
                data_vars["n"] = n
            else:
                if var not in data_vars:
                    raise ValueError(f"missing data variable: {var}")  # pragma: no cover


        logger.debug("checking that there is at least one x_ data variable")
        if not any([name.startswith("x_") for name in data_vars]):
            raise ValueError("missing data variable starting with x_")  # pragma: no cover

        logger.debug("checking that all coordinates are present")
        for coord in self.coords:
            if coord not in coords:
                raise ValueError(f"missing coordinate: {coord}")  # pragma: no cover

        logger.debug("checking that all attributes are present")
        for attr in self.attrs:
            if attr not in attrs:
                raise ValueError(f"missing attribute: {attr}")  # pragma: no cover

        logger.debug("converting data variables to xarray data array tuples")
        for var, (dims, _, units, standard_name) in self.data_vars.items():
            data_vars[var] = (
                dims,
                data_vars[var].m_as(units),
                {
                    "standard_name": standard_name,
                    "long_name": standard_name.replace("_", " "),
                    "units": units,
                },
            )

        logger.debug("converting x_ data variables")
        for var in data_vars:
            if var.startswith("x_"):
                m = var[2:]
                data_vars[var] = (
                    "z",
                    data_vars[var].m_as("dimensionless"),
                    {
                        "standard_name": f"{m}_mole_fraction",
                        "long_name": f"{m} mole fraction",
                        "units": "dimensionless",
                    },
                )

        logger.debug("converting coordinates")
        for attr, (_, _, units, standard_name) in self.coords.items():
            coords[attr] = (
                attr,
                coords[attr].m_as(units),
                {
                    "standard_name": standard_name,
                    "long_name": standard_name.replace("_", " "),
                    "units": units,
                },
            )

        logger.debug("checking that all attributes are present")
        for attr in self.attrs:
            if attr not in attrs:
                raise ValueError(f"missing attribute: {attr}")  # pragma: no cover

        logger.debug("creating dataset")
        return xr.Dataset(
            data_vars=data_vars,
            coords=coords,
            attrs=attrs,
        )

convert(data_vars, coords, attrs)

Convert input to schema-compliant dataset.

Parameters:

Name Type Description Default
data_vars t.Mapping[str, pint.Quantity]

Mapping of data variable names to quantities.

required
coords t.Mapping[str, pint.Quantity]

Mapping of coordinate names to quantities.

required
attrs t.Mapping[str, str]

Mapping of attribute names to values.

required

Returns:

Type Description
xr.Dataset

Dataset with schema-compliant data variables, coordinates, and

xr.Dataset

attributes.

Source code in src/joseki/profiles/schema.py
def convert(
    self,
    data_vars: t.Mapping[str, pint.Quantity],
    coords: t.Mapping[str, pint.Quantity],
    attrs: t.Mapping[str, str],
) -> xr.Dataset:
    """Convert input to schema-compliant dataset.

    Args:
        data_vars: Mapping of data variable names to quantities.
        coords: Mapping of coordinate names to quantities.
        attrs: Mapping of attribute names to values.

    Returns:
        Dataset with schema-compliant data variables, coordinates, and
        attributes.
    """
    logger.debug("converting input to schema-compliant dataset")

    logger.debug("checking that all data variables are present")
    for var in self.data_vars:
        if var == "n" not in data_vars:
            n = number_density(
                p=data_vars["p"],
                t=data_vars["t"],
            )
            data_vars["n"] = n
        else:
            if var not in data_vars:
                raise ValueError(f"missing data variable: {var}")  # pragma: no cover


    logger.debug("checking that there is at least one x_ data variable")
    if not any([name.startswith("x_") for name in data_vars]):
        raise ValueError("missing data variable starting with x_")  # pragma: no cover

    logger.debug("checking that all coordinates are present")
    for coord in self.coords:
        if coord not in coords:
            raise ValueError(f"missing coordinate: {coord}")  # pragma: no cover

    logger.debug("checking that all attributes are present")
    for attr in self.attrs:
        if attr not in attrs:
            raise ValueError(f"missing attribute: {attr}")  # pragma: no cover

    logger.debug("converting data variables to xarray data array tuples")
    for var, (dims, _, units, standard_name) in self.data_vars.items():
        data_vars[var] = (
            dims,
            data_vars[var].m_as(units),
            {
                "standard_name": standard_name,
                "long_name": standard_name.replace("_", " "),
                "units": units,
            },
        )

    logger.debug("converting x_ data variables")
    for var in data_vars:
        if var.startswith("x_"):
            m = var[2:]
            data_vars[var] = (
                "z",
                data_vars[var].m_as("dimensionless"),
                {
                    "standard_name": f"{m}_mole_fraction",
                    "long_name": f"{m} mole fraction",
                    "units": "dimensionless",
                },
            )

    logger.debug("converting coordinates")
    for attr, (_, _, units, standard_name) in self.coords.items():
        coords[attr] = (
            attr,
            coords[attr].m_as(units),
            {
                "standard_name": standard_name,
                "long_name": standard_name.replace("_", " "),
                "units": units,
            },
        )

    logger.debug("checking that all attributes are present")
    for attr in self.attrs:
        if attr not in attrs:
            raise ValueError(f"missing attribute: {attr}")  # pragma: no cover

    logger.debug("creating dataset")
    return xr.Dataset(
        data_vars=data_vars,
        coords=coords,
        attrs=attrs,
    )

validate(ds, check_x_sum=False, ret_true_if_valid=False)

Validate dataset.

Parameters:

Name Type Description Default
ds xr.Dataset

Dataset to validate.

required
check_x_sum bool

if True, check that mole fraction sums are never larger than one.

False
ret_true_if_valid bool

make this method return True if the dataset is valid.

False

Raises:

Type Description
ValueError

If the dataset does not match the schema.

Returns:

Type Description
t.Optional[bool]

None or bool: If ret_true_if_valid is True, returns True if the dataset is valid, otherwise returns None.

Source code in src/joseki/profiles/schema.py
def validate(
    self,
    ds: xr.Dataset,
    check_x_sum: bool = False,
    ret_true_if_valid: bool = False,
) -> t.Optional[bool]:
    """Validate dataset.

    Args:
        ds: Dataset to validate.
        check_x_sum: if True, check that mole fraction sums
            are never larger than one.
        ret_true_if_valid: make this method return True if the dataset is
            valid.

    Raises:
        ValueError: If the dataset does not match the schema.

    Returns:
        None or bool: If `ret_true_if_valid` is True, returns True if the 
            dataset is valid, otherwise returns None.
    """
    logger.debug("Validating dataset")

    logger.debug("Checking that all data variables are present")
    for var in self.data_vars:
        if var not in ds.data_vars:
            raise ValueError(f"missing data variable: {var}")  # pragma: no cover

    logger.debug("Checking that 'x_*' data variable(s) are present")
    if not any([name.startswith("x_") for name in ds.data_vars]):
        raise ValueError("missing data variable starting with x_")  # pragma: no cover

    logger.debug("Checking that all coordinates are present")
    for coord in self.coords:
        if coord not in ds.coords:
            raise ValueError(f"missing coordinate: {coord}")  # pragma: no cover

    logger.debug("Checking that all attributes are present")
    for attr in self.attrs:
        if attr not in ds.attrs:
            raise ValueError(f"missing attribute: {attr}")  # pragma: no cover

    logger.debug("Checking that data variables have the correct dimensions")
    for var, (dims, _, _, _) in self.data_vars.items():
        if set(ds[var].dims) != set(dims):
            raise ValueError(  # pragma: no cover
                f"incorrect dimensions for {var}. Expected {dims}, "
                f"got {ds[var].dims}"
            )

    logger.debug("Checking that coordinates have the correct dimensions")
    for coord, (dims, _, _, _) in self.coords.items():
        if set(ds[coord].dims) != set(dims):
            raise ValueError(  # pragma: no cover
                f"incorrect dimensions for {coord}. Expected {dims}, "
                f"got {ds[coord].dims}"
            )

    logger.debug("Checking that data variables have the correct units")
    for var, (_, _, units, _) in self.data_vars.items():
        if ds[var].units != units:
            raise ValueError(  # pragma: no cover
                f"incorrect units for {var}. Expected {units}, "
                f"got {ds[var].units}"
            )

    logger.debug("Checking that coordinates have the correct units")
    for coord, (_, _, units, _) in self.coords.items():
        if ds[coord].units != units:
            raise ValueError(  # pragma: no cover
                f"incorrect units for {coord}. Expected {units}, "
                f"got {ds[coord].units}"
            )

    logger.debug("Checking that attributes have the correct types")
    for attr, typ in self.attrs.items():
        if not isinstance(ds.attrs[attr], typ):
            raise ValueError(  # pragma: no cover
                f"incorrect type for {attr}. Expected {typ}, "
                f"got {type(ds.attrs[attr])}"
            )

    logger.debug("Checking that data variables have the correct standard names")
    for var, (_, _, _, standard_name) in self.data_vars.items():
        if ds[var].attrs["standard_name"] != standard_name:
            raise ValueError(  # pragma: no cover
                f"incorrect standard name for {var}. Expected "
                f"{standard_name}, got "
                f"{ds[var].attrs['standard_name']}"
            )

    logger.debug(
        "Checking that all x_* data variables have the correct units and "
        "standard names"
    )
    for var in ds.data_vars:
        if var.startswith("x_"):
            m = var[2:]
            if ds[var].attrs["units"] != "dimensionless":
                raise ValueError(  # pragma: no cover
                    f"incorrect units for {var}. Expected dimensionless, "
                    f"got {ds[var].attrs['units']}"
                )
            if ds[var].attrs["standard_name"] != f"{m}_mole_fraction":
                raise ValueError(  # pragma: no cover
                    f"incorrect standard name for {var}. Expected "
                    f"{m}_mole_fraction, got "
                    f"{ds[var].attrs['standard_name']}"
                )

    if check_x_sum:
        logger.debug(
            "Checking that mole fraction sums are never larger than one"
        )
        vfs = mole_fraction_sum(ds)
        if np.any(vfs.m > 1):
            raise ValueError(  # pragma: no cover
                "The rescaling factors lead to a profile where the mole "
                "fraction sum is larger than 1."
            )

    logger.info("Dataset is valid")

    if ret_true_if_valid:  # pragma: no cover
        return True

mole_fraction_sum(ds)

Compute the sum of mole fractions.

Parameters:

Name Type Description Default
ds xr.Dataset

Dataset.

required

Returns:

Type Description
pint.Quantity

The sum of mole fractions.

Source code in src/joseki/profiles/schema.py
def mole_fraction_sum(ds: xr.Dataset) -> pint.Quantity:
    """Compute the sum of mole fractions.

    Args:
        ds: Dataset.

    Returns:
        The sum of mole fractions.
    """
    return (
        sum([ds[c] for c in ds.data_vars if c.startswith("x_")]).values
        * ureg.dimensionless
    )

AFGL (1986)

AFGL 1986 atmosphere's thermophysical profiles.

The profiles are generated from data files stored in joseki/data/afgl_1986. These data files correspond to tables 1a-f and 2a-d of the technical report Anderson+1986.

AFGL1986MidlatitudeSummer

Bases: Profile

AFGL 1986 midlatitude summer atmosphere thermophysical profile.

Source code in src/joseki/profiles/afgl_1986.py
@factory.register(identifier="afgl_1986-midlatitude_summer")
@define
class AFGL1986MidlatitudeSummer(Profile):
    """AFGL 1986 midlatitude summer atmosphere thermophysical profile."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        logger.debug(
            "creating AFGL 1986 midlatitude summer atmosphere thermophysical "
            "profile dataset."
        )
        return to_dataset(
            identifier=Identifier.MIDLATITUDE_SUMMER,
            z=z,
            interp_method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

AFGL1986MidlatitudeWinter

Bases: Profile

AFGL 1986 midlatitude winter atmosphere thermophysical profile.

Source code in src/joseki/profiles/afgl_1986.py
@factory.register(identifier="afgl_1986-midlatitude_winter")
@define
class AFGL1986MidlatitudeWinter(Profile):
    """AFGL 1986 midlatitude winter atmosphere thermophysical profile."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        logger.debug(
            "creating AFGL 1986 midlatitude winter atmosphere thermophysical "
            "profile dataset."
        )
        return to_dataset(
            identifier=Identifier.MIDLATITUDE_WINTER,
            z=z,
            interp_method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

AFGL1986SubarcticSummer

Bases: Profile

AFGL 1986 subarctic summer atmosphere thermophysical profile.

Source code in src/joseki/profiles/afgl_1986.py
@factory.register(identifier="afgl_1986-subarctic_summer")
@define
class AFGL1986SubarcticSummer(Profile):
    """AFGL 1986 subarctic summer atmosphere thermophysical profile."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        logger.debug(
            "creating AFGL 1986 subarctic summer atmosphere thermophysical "
            "profile dataset."
        )
        return to_dataset(
            identifier=Identifier.SUBARCTIC_SUMMER,
            z=z,
            interp_method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

AFGL1986SubarcticWinter

Bases: Profile

AFGL 1986 subarctic winter atmosphere thermophysical profile.

Source code in src/joseki/profiles/afgl_1986.py
@factory.register(identifier="afgl_1986-subarctic_winter")
@define
class AFGL1986SubarcticWinter(Profile):
    """AFGL 1986 subarctic winter atmosphere thermophysical profile."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        logger.debug(
            "creating AFGL 1986 subarctic winter atmosphere thermophysical "
            "profile dataset."
        )
        return to_dataset(
            identifier=Identifier.SUBARCTIC_WINTER,
            z=z,
            interp_method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

AFGL1986Tropical

Bases: Profile

AFGL 1986 tropical atmosphere thermophysical profile.

Source code in src/joseki/profiles/afgl_1986.py
@factory.register(identifier="afgl_1986-tropical")
@define
class AFGL1986Tropical(Profile):
    """AFGL 1986 tropical atmosphere thermophysical profile."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        logger.debug(
            "creating AFGL 1986 tropical atmosphere thermophysical profile dataset."
        )
        return to_dataset(
            identifier=Identifier.TROPICAL,
            z=z,
            interp_method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

AFGL1986USStandard

Bases: Profile

AFGL 1986 US Standard atmosphere thermophysical profile.

Source code in src/joseki/profiles/afgl_1986.py
@factory.register(identifier="afgl_1986-us_standard")
@define
class AFGL1986USStandard(Profile):
    """AFGL 1986 US Standard atmosphere thermophysical profile."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        logger.debug(
            "creating AFGL 1986 US Standard atmosphere thermophysical profile dataset."
        )
        return to_dataset(
            identifier=Identifier.US_STANDARD,
            z=z,
            interp_method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

Identifier

Bases: enum.Enum

AFGL 1986 atmospheric profile identifier enumeration.

Source code in src/joseki/profiles/afgl_1986.py
class Identifier(enum.Enum):
    """AFGL 1986 atmospheric profile identifier enumeration."""

    TROPICAL = "tropical"
    MIDLATITUDE_SUMMER = "midlatitude_summer"
    MIDLATITUDE_WINTER = "midlatitude_winter"
    SUBARCTIC_SUMMER = "subarctic_summer"
    SUBARCTIC_WINTER = "subarctic_winter"
    US_STANDARD = "us_standard"

dataframe_to_dataset(df, identifier, additional_molecules=True)

Convert the output of the parse method to a xarray.Dataset.

Parameters:

Name Type Description Default
df pd.DataFrame

Atmospheric profile data.

required
identifier Identifier

Atmospheric profile identifier.

required
additional_molecules bool

If True, include molecules 8-28 as numbered in Anderson+1986. Else, discard molecules 8-28.

True

Returns:

Type Description
xr.Dataset

Atmospheric profile dataset.

Notes

Use the z column of the output pandas.DataFrame of read_raw_data as data coordinate and all other columns as data variables. All data variables and coordinates of the returned xarray.Dataset are associated metadata (standard name, long name and units). Raw data units are documented in the technical report AFGL Atmospheric Constituent Profiles (0-120 km), Anderson et al., 1986 Anderson+1986. dataset attributes are added.

Source code in src/joseki/profiles/afgl_1986.py
def dataframe_to_dataset(
    df: pd.DataFrame,
    identifier: Identifier,
    additional_molecules: bool = True,
) -> xr.Dataset:
    """Convert the output of the `parse` method to a `xarray.Dataset`.

    Args:
        df: Atmospheric profile data.
        identifier: Atmospheric profile identifier.
        additional_molecules: If ``True``, include molecules 8-28 as numbered 
            in [Anderson+1986](bibliography.md#Anderson+1986).
            Else, discard molecules 8-28.

    Returns:
        Atmospheric profile dataset.

    Notes:
        Use the ``z`` column of the output pandas.DataFrame of read_raw_data
        as data coordinate and all other columns as data variables.
        All data variables and coordinates of the returned xarray.Dataset are
        associated metadata (standard name, long name and units).
        Raw data units are documented in the technical report *AFGL Atmospheric
        Constituent Profiles (0-120 km)*, Anderson et al., 1986
        [Anderson+1986](bibliography.md#Anderson+1986).
        dataset attributes are added.
    """
    # list molecules
    # molecules labels correspond to column with upper case first letter in
    # raw data DataFrames
    molecules = []
    for column in df.columns:
        if column[0].isupper():
            molecules.append(column)

    if additional_molecules:
        pass
    else:
        molecules = molecules[:7]

    # coordinates
    coords = {"z": ureg.Quantity(df.z.values, "km")}

    # data variables
    data_vars = {}
    data_vars["p"] = ureg.Quantity(df.p.values, "millibar").to("Pa")
    data_vars["t"] = ureg.Quantity(df.t.values, "K")
    data_vars["n"] = ureg.Quantity(df.n.values, "cm^-3").to("m^-3")

    for s in molecules:
        data_vars[f"x_{s}"] = (
            df[s].values * ureg.ppm
        )  # raw data mole fraction are given in ppmv

    # attributes
    pretty_identifier = f"AFGL (1986) {identifier.value.replace('_', '-')}"
    pretty_title = f"{pretty_identifier} atmosphere thermophysical profile"

    attrs = {
        "Conventions": "CF-1.10",
        "title": pretty_title,
        "institution": INSTITUION,
        "source": SOURCE,
        "history": history(),
        "references": REFERENCE,
        "url": URL,
        "urldate": URLDATE,
    }

    return schema.convert(
        data_vars=data_vars,
        coords=coords,
        attrs=attrs,
    )

get_dataset(identifier, additional_molecules=True)

Read data files for a given atmospheric profile.

Parameters:

Name Type Description Default
identifier Identifier

Atmospheric profile identifier. See Identifier for possible values.

required
additional_molecules bool

If True, include molecules 8-28 as numbered in Anderson+1986. Else, discard molecules 8-28.

True

Returns:

Type Description
xr.Dataset

Atmospheric profile dataset.

Notes

Chain calls to parse and dataframe_to_dataset.

Source code in src/joseki/profiles/afgl_1986.py
def get_dataset(
    identifier: Identifier,
    additional_molecules: bool = True,
) -> xr.Dataset:
    """Read data files for a given atmospheric profile.

    Args:
        identifier: Atmospheric profile identifier.
            See 
            [`Identifier`](reference.md#src.joseki.profiles.afgl_1986.Identifier) 
            for possible values.
        additional_molecules: If ``True``, include molecules 8-28 as numbered in
            [Anderson+1986](bibliography.md#Anderson+1986).
            Else, discard molecules 8-28.

    Returns:
        Atmospheric profile dataset.

    Notes:
        Chain calls to 
        [`parse`](reference.md#src.joseki.profiles.afgl_1986.parse) and
        [`dataframe_to_dataset`](reference.md#src.joseki.profiles.afgl_1986.dataframe_to_dataset).

    """
    df = parse(identifier=identifier)
    return dataframe_to_dataset(
        df=df,
        identifier=identifier,
        additional_molecules=additional_molecules,
    )

parse(identifier)

Parse table data files for a given atmospheric profile.

Parameters:

Name Type Description Default
identifier Identifier

Atmospheric profile identifier.

required

Returns:

Type Description
pd.DataFrame

Atmospheric profile dataset.

Notes

Read the relevant raw data files corresponding to the atmospheric profile. These raw data files correspond to tables 1 and 2 from the technical report AFGL Atmospheric Constituent Profiles (0-120 km), Anderson et al., 1986. Each atmospheric profile has 5 tables, i.e. 5 raw data files, associated to it. Only the first of these tables is specific to each atmospheric profile. All 5 raw data files are read into pandas.DataFrame objects and then concatenated after dropping the duplicate columns.

Source code in src/joseki/profiles/afgl_1986.py
def parse(identifier: Identifier) -> pd.DataFrame:
    """Parse table data files for a given atmospheric profile.

    Args:
        identifier: Atmospheric profile identifier.

    Returns:
        Atmospheric profile dataset.

    Notes:
        Read the relevant raw data files corresponding to the atmospheric profile.
        These raw data files correspond to tables 1 and 2 from the
        technical report [*AFGL Atmospheric Constituent Profiles (0-120 km)*,
        Anderson et al., 1986](bibliography.md#Anderson+1986).
        Each atmospheric profile has 5 tables, i.e. 5 raw data files, associated
        to it.
        Only the first of these tables is specific to each atmospheric profile.
        All 5 raw data files are read into `pandas.DataFrame` objects and
        then concatenated after dropping the duplicate columns.
    """
    package = "joseki.data.afgl_1986"
    files = DATA_FILES[identifier]
    dataframes = []
    for file in files:
        csvfile = importlib_resources.files(package).joinpath(file)
        df = pd.read_csv(csvfile)
        dataframes.append(df)
    dataframes[1] = dataframes[1].drop(["H2O", "O3", "N2O", "CO", "CH4"], axis=1)
    for i in range(1, 5):
        dataframes[i] = dataframes[i].drop("z", axis=1)

    return pd.concat(dataframes, axis=1)

to_dataset(identifier, z=None, interp_method=None, conserve_column=False, **kwargs)

Helper Profile.to_dataset() method.

Parameters:

Name Type Description Default
identifier Identifier

AFGL 1986 atmosphere thermophysical profile identifier. See Identifier for possible values.

required
z t.Optional[pint.Quantity]

New level altitudes. If None, return the original dataset Else, interpolate the dataset to the new level altitudes. Default is None.

None
interp_method t.Mapping[str, str]

Interpolation method for each data variable. Default is None.

None
conserve_column bool

If True, ensure that column densities are conserved during interpolation.

False
kwargs t.Any

Additional arguments passed to get_dataset.

{}

Returns:

Type Description
xr.Dataset

Atmosphere thermophysical profile dataset.

Source code in src/joseki/profiles/afgl_1986.py
def to_dataset(
    identifier: Identifier,
    z: t.Optional[pint.Quantity] = None,
    interp_method: t.Mapping[str, str] = None,
    conserve_column: bool = False,
    **kwargs: t.Any,
) -> xr.Dataset:
    """
    Helper Profile.to_dataset() method.

    Args:
        identifier: AFGL 1986 atmosphere thermophysical profile identifier.
            See 
            [`Identifier`](reference.md#src.joseki.profiles.afgl_1986.Identifier) 
            for possible values.
        z: New level altitudes.
            If ``None``, return the original dataset
            Else, interpolate the dataset to the new level altitudes.
            Default is ``None``.
        interp_method: Interpolation method for each data variable. Default is 
            ``None``.
        conserve_column: If `True`, ensure that column densities are conserved
            during interpolation.
        kwargs: Additional arguments passed to 
            [`get_dataset`](reference.md#src.joseki.profiles.afgl_1986.get_dataset).

    Returns:
        Atmosphere thermophysical profile dataset.
    """
    # Get additional_molecules from kwargs
    additional_molecules = kwargs.get("additional_molecules", True)

    # kwargs different than 'additional_molecules' are ignored
    if len([x for x in kwargs.keys() if x != "additional_molecules"]) > 0:
        logger.warning(
            "Ignoring kwargs different than 'additional_molecules'. "
            "(got %s)"
            "Use 'additional_molecules' to include molecules 8-28 "
            "as numbered in Anderson et al. (1986).",
            kwargs,
        )

    # Get the original dataset
    ds = get_dataset(
        identifier=identifier,
        additional_molecules=additional_molecules,
    )

    # Interpolate if necessary
    if z is not None:
        method = interp_method if interp_method is not None else DEFAULT_METHOD
        ds = interp(
            ds=ds,
            z_new=z,
            method=method,
            conserve_column=conserve_column,
        )
        return ds
    else:
        return ds

MIPAS (2007)

MIPAS atmosphere thermophysical profiles.

Remedios et al. (2007) define a set of 5 "standard atmospheres" representing the atmosphere at different latitudes and seasons or times of day:

  • midlatitude day
  • midlatitude night
  • polar winter
  • polar summer
  • tropical

MIPAS standard atmospheres were intended to provide an updated set of pro- files for characteristic atmospheric states such as the AFGL Atmospheric constituent profiles.

Identifier

Bases: enum.Enum

MIPAS atmosphere thermophysical profile identifier enumeration.

Source code in src/joseki/profiles/mipas_2007.py
class Identifier(enum.Enum):
    """MIPAS atmosphere thermophysical profile identifier enumeration."""

    MIDLATITUDE_DAY = "midlatitude_day"
    MIDLATITUDE_NIGHT = "midlatitude_night"
    POLAR_WINTER = "polar_winter"
    POLAR_SUMMER = "polar_summer"
    TROPICAL = "tropical"

MIPASMidlatitudeDay

Bases: Profile

MIPAS midlatitude day reference atmosphere.

Source code in src/joseki/profiles/mipas_2007.py
@factory.register("mipas_2007-midlatitude_day")
@define
class MIPASMidlatitudeDay(Profile):
    """MIPAS midlatitude day reference atmosphere."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        return to_dataset(
            identifier=Identifier.MIDLATITUDE_DAY,
            z=z,
            method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

MIPASMidlatitudeNight

Bases: Profile

MIPAS Midlatitude night reference atmosphere.

Source code in src/joseki/profiles/mipas_2007.py
@factory.register("mipas_2007-midlatitude_night")
@define
class MIPASMidlatitudeNight(Profile):
    """MIPAS Midlatitude night reference atmosphere."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        return to_dataset(
            identifier=Identifier.MIDLATITUDE_NIGHT,
            z=z,
            method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

MIPASPolarSummer

Bases: Profile

MIPAS Polar summer reference atmosphere.

Source code in src/joseki/profiles/mipas_2007.py
@factory.register("mipas_2007-polar_summer")
@define
class MIPASPolarSummer(Profile):
    """MIPAS Polar summer reference atmosphere."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        return to_dataset(
            identifier=Identifier.POLAR_SUMMER,
            z=z,
            method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

MIPASPolarWinter

Bases: Profile

MIPAS Polar winter reference atmosphere.

Source code in src/joseki/profiles/mipas_2007.py
@factory.register("mipas_2007-polar_winter")
@define
class MIPASPolarWinter(Profile):
    """MIPAS Polar winter reference atmosphere."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        return to_dataset(
            identifier=Identifier.POLAR_WINTER,
            z=z,
            method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

MIPASTropical

Bases: Profile

MIPAS Tropical reference atmosphere.

Source code in src/joseki/profiles/mipas_2007.py
@factory.register("mipas_2007-tropical")
@define
class MIPASTropical(Profile):
    """MIPAS Tropical reference atmosphere."""

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        return to_dataset(
            identifier=Identifier.TROPICAL,
            z=z,
            method=interp_method,
            conserve_column=conserve_column,
            **kwargs,
        )

get_dataset(identifier)

Read MIPAS reference atmosphere data files into an xarray.Dataset.

Parameters:

Name Type Description Default
identifier Identifier

Atmospheric profile identifier. See Identifier for possible values.

required

Returns:

Type Description
xr.Dataset

Atmospheric profile.

Source code in src/joseki/profiles/mipas_2007.py
def get_dataset(identifier: Identifier) -> xr.Dataset:
    """Read MIPAS reference atmosphere data files into an xarray.Dataset.

    Args:
        identifier: Atmospheric profile identifier.
            See 
            [`Identifier`](reference.md#src.joseki.profiles.mipas_2007.Identifier) 
            for possible values.

    Returns:
        Atmospheric profile.
    """
    content = read_file_content(identifier=identifier)
    quantities = parse_content(content.splitlines())

    # Coordinates
    coords = {"z": quantities.pop("z")}

    # Data variables
    data_vars = {}
    p = quantities.pop("p")
    data_vars["p"] = p
    t = quantities.pop("t")
    data_vars["t"] = t
    n = p / (K * t)  # perfect gas equation
    data_vars["n"] = n
    data_vars.update(quantities)

    logger.debug("data variables: %s", data_vars.keys())

    # Attributes
    pretty_id = identifier.value.replace("_", " ")
    pretty_title = f"MIPAS {pretty_id} Reference Atmosphere"
    attrs = {
        "Conventions": "CF-1.10",
        "history": history(),
        "title": pretty_title,
        "source": SOURCE,
        "institution": INSTITUTION,
        "references": REFERENCE,
        "url": URL,
        "urldate": URL_DATE,
    }

    # Dataset
    ds = schema.convert(
        data_vars=data_vars,
        coords=coords,
        attrs=attrs,
    )

    return ds

parse_content(lines)

Parse lines.

Source code in src/joseki/profiles/mipas_2007.py
def parse_content(lines: t.List[str]) -> t.Dict[str, pint.Quantity]:
    """Parse lines."""
    logger.debug("Parsing file content")
    iterator = iter(lines)
    line = next(iterator)

    quantities: t.Dict[str, pint.Quantity] = {}

    def _add_to_quantities(quantity: pint.Quantity, name: str) -> None:
        if name not in ["z", "p", "t", "n"]:
            name = f"x_{name}"

        if quantity.check(""):
            quantities[name] = quantity.to("dimensionless")
        else:
            quantities[name] = quantity

    var: str = ""
    units: str = ""
    values: t.List[str] = []
    while line != "*END":
        if line.startswith("!"):
            pass  # this is a comment, ignore the line
        elif line.startswith("*"):
            # convert previously read values (if any) and units to quantity
            if len(values) > 0:
                quantity = ureg.Quantity(
                    np.array(values, dtype=float),
                    units,
                )
                _add_to_quantities(quantity=quantity, name=var)

            # this is a variable line, parse variable name and units
            var, units = parse_var_line(line)

            # following lines are the variables values so prepare a variable
            # to store the values
            values = []
        else:
            if "!" in line:
                # this the line with the number of profile levels, ignore it
                pass
            else:
                # this line contains variable values
                values += parse_values_line(line)
        line = next(iterator)

    # include last array of values before the '*END' line
    quantity = ureg.Quantity(np.array(values, dtype=float), units)
    _add_to_quantities(quantity=quantity, name=var)

    return quantities

parse_units(s)

Parse units.

Source code in src/joseki/profiles/mipas_2007.py
def parse_units(s: str) -> str:
    """Parse units."""
    if s.startswith("[") and s.endswith("]"):
        units = s[1:-1]
        if units == "mb":
            return "millibar"
        else:
            return units
    else:
        raise ValueError(f"Cannot parse units '{s}'")

parse_values_line(s)

Parse a line with numeric values.

Source code in src/joseki/profiles/mipas_2007.py
def parse_values_line(s: str) -> t.List[str]:
    """Parse a line with numeric values."""
    if "," in s:  # delimiter is comma and whitespace combined
        s_strip = s.strip()
        if s_strip[-1] == ",":
            s_strip = s_strip[:-1]
        return [x.strip() for x in s_strip.split(",")]
    else:  # delimiter is whitespace
        return s.split()

parse_var_line(s)

Parse a line with the declaration of a variable and its units.

Source code in src/joseki/profiles/mipas_2007.py
def parse_var_line(s: str) -> t.Tuple[str, str]:
    """Parse a line with the declaration of a variable and its units."""
    parts = s[1:].strip().split()
    if len(parts) == 2:
        var_name, units_s = parts
    elif len(parts) == 3:
        var_name, _, units_s = parts
    else:
        raise ValueError(f"Invalid line format: {s}")
    var = parse_var_name(var_name)
    units = parse_units(units_s)
    return var, units

parse_var_name(n)

Parse variable name.

Source code in src/joseki/profiles/mipas_2007.py
def parse_var_name(n: str) -> str:
    """Parse variable name."""
    translate = {"HGT": "z", "PRE": "p", "TEM": "t"}
    if n in translate.keys():
        return translate[n]
    else:
        return to_chemical_formula(n)

read_file_content(identifier)

Read data file content.

Parameters:

Name Type Description Default
identifier Identifier

Atmospheric profile identifier. See Identifier for possible values.

required

Returns:

Type Description
str

file content, URL, URL date.

Source code in src/joseki/profiles/mipas_2007.py
def read_file_content(identifier: Identifier) -> str:
    """
    Read data file content.

    Args:
        identifier: Atmospheric profile identifier.
            See 
            [`Identifier`](reference.md#src.joseki.profiles.mipas_2007.Identifier) 
            for possible values.

    Returns:
        file content, URL, URL date.
    """
    package = "joseki.data.mipas_2007"
    file = f"{identifier.value}.atm"
    logger.debug(f"Reading file {file}")
    return importlib_resources.files(package).joinpath(file).read_text()

to_chemical_formula(name)

Convert to chemical formula.

Parameters:

Name Type Description Default
name str

Molecule name.

required

Returns:

Type Description
str

Molecule formula.

Notes

If molecule name is unknown, returns name unchanged.

Source code in src/joseki/profiles/mipas_2007.py
def to_chemical_formula(name: str) -> str:
    """Convert to chemical formula.

    Args:
        name: Molecule name.

    Returns:
        Molecule formula.

    Notes:
        If molecule name is unknown, returns name unchanged.
    """
    try:
        return translate_cfc(name)
    except ValueError:
        return name

to_dataset(identifier, z=None, method=None, conserve_column=False, **kwargs)

Helper for Profile.to_dataset method

Source code in src/joseki/profiles/mipas_2007.py
def to_dataset(
    identifier: Identifier,
    z: t.Optional[pint.Quantity] = None,
    method: t.Optional[t.Mapping[str, str]] = None,
    conserve_column: bool = False,
    **kwargs: t.Any,
) -> xr.Dataset:
    """Helper for `Profile.to_dataset` method"""
    # no kwargs are expected
    if len(kwargs) > 0:  # pragma: no cover
        logger.warning("Unexpected keyword arguments: %s", kwargs)

    # get original MIPAS midlatitude day reference atmosphere
    logger.debug("Get original MIPAS midlatitude day reference atmosphere")
    ds = get_dataset(identifier=identifier)

    # Interpolate to new vertical grid if necessary
    if z is not None:
        method = DEFAULT_METHOD if method is None else method
        ds = interp(
            ds=ds,
            z_new=z,
            method=method,
            conserve_column=conserve_column,
        )
        return ds
    else:
        return ds

translate_cfc(name)

Convert chlorofulorocarbon name to corresponding chemical formula.

Parameters:

Name Type Description Default
name str

Chlorofulorocarbon name.

required

Returns:

Type Description
str

Chlorofulorocarbon chemical formula.

Raises:

Type Description
ValueError

If the name does not match a known chlorofulorocarbon.

Source code in src/joseki/profiles/mipas_2007.py
def translate_cfc(name: str) -> str:
    """Convert chlorofulorocarbon name to corresponding chemical formula.

    Args:
        name: Chlorofulorocarbon name.

    Returns:
        Chlorofulorocarbon chemical formula.

    Raises:
        ValueError: If the name does not match a known chlorofulorocarbon.
    """
    for formula, names in CFC_FORMULAE.items():
        if name in names:
            return formula
    raise ValueError("Unknown chlorofulorocarbon {name}")

US Standard Atmosphere (1976)

Module to compute the U.S. Standard Atmosphere 1976.

The U.S. Standard Atmosphere 1976 is a Earth atmosphere thermophysical model described in the technical report NOAA+1976.

USSA1976

Bases: Profile

Class to compute the U.S. Standard Atmosphere 1976.

The U.S. Standard Atmosphere 1976 is a Earth atmosphere thermophysical model described in the technical report NOAA+1976.

Source code in src/joseki/profiles/ussa_1976.py
@factory.register(identifier="ussa_1976")
@define
class USSA1976(Profile):
    """
    Class to compute the U.S. Standard Atmosphere 1976.

    The U.S. Standard Atmosphere 1976 is a Earth atmosphere thermophysical model
    described in the technical report [NOAA+1976](bibliography.md#NOAA+1976).
    """

    def to_dataset(
        self,
        z: t.Optional[pint.Quantity] = None,
        interp_method: t.Optional[t.Mapping[str, str]] = None,
        conserve_column: bool = False,
        **kwargs: t.Any,
    ) -> xr.Dataset:
        # Since the ussa_1976 model can be evaluated at any altitude, both
        # interp_method and conserve_column are ignored.

        # kwargs are ignored
        if kwargs:
            logger.warning(  # pragma: no cover
                "value of the 'kwargs' parameter will be ignored."
            )

        # variable to compute with the ussa1976 package
        variables = [
            "p",
            "t",
            "n_tot",
            "n",
        ]
        # compute profile
        if z is None:
            logging.debug("Computing profile with ussa1976 package")
            ds = ussa1976.compute(variables=variables)
        else:
            logging.debug("Computing profile with ussa1976 package")
            logging.debug("z=%s", z)
            ds = ussa1976.compute(z=z.m_as("m"), variables=variables)

        # extract data
        coords = {"z": to_quantity(ds["z"]).to("km")}

        data_vars = {}
        data_vars["p"] = to_quantity(ds["p"]).to("Pa")
        data_vars["t"] = to_quantity(ds["t"]).to("K")
        data_vars["n"] = to_quantity(ds["n_tot"]).to("m^-3")

        # compute mole fraction
        for s in ds["s"].values:
            nx = to_quantity(ds["n"].sel(s=s))
            n_tot = to_quantity(ds["n_tot"])
            data_vars[f"x_{s}"] = (nx / n_tot).to("dimensionless")

        attrs = {
            "Conventions": "CF-1.10",
            "title": "U.S. Standard Atmosphere 1976",
            "institution": "NOAA",
            "source": ds.attrs["source"],
            "history": ds.attrs["history"] + "\n" + history(),
            "references": ds.attrs["references"],
            "url": "https://ntrs.nasa.gov/citations/19770009539",
            "urldate": "2022-12-08",
        }

        ds = schema.convert(
            data_vars,
            coords,
            attrs,
        )

        return ds

Utilities

Utility module.

air_molar_mass_from_mass_fraction(y)

Compute the air molar mass from the of air constituents mass fractions.

Parameters:

Name Type Description Default
y xr.DataArray

Mass fraction as a function of molecule (m) and altitude (z).

required

Returns:

Type Description
xr.DataArray

Air molar mass as a function of altitude (z).

Notes

The air molar mass is computed according to the following equation:

\[ m_{\mathrm{air}} (z) = \left( \sum_{\mathrm{M}} \frac{ y_{\mathrm{M}} (z) }{ m_{\mathrm{M}} } \right)^{-1} \]

where:

  • \(y_{\mathrm{M}} (z)\) is the mass fraction of molecule M at altitude \(z\),
  • \(m_{\mathrm{M}}\) is the molar mass of molecule M.
Source code in src/joseki/profiles/util.py
def air_molar_mass_from_mass_fraction(
    y: xr.DataArray
) -> xr.DataArray:
    r"""
    Compute the air molar mass from the of air constituents mass fractions.

    Args:
        y: Mass fraction as a function of molecule (`m`) and altitude (`z`).

    Returns:
        Air molar mass as a function of altitude (`z`).

    Notes:
        The air molar mass is computed according to the following equation:

        $$
        m_{\mathrm{air}} (z) = \left(
            \sum_{\mathrm{M}} \frac{
                y_{\mathrm{M}} (z)
            }{
                m_{\mathrm{M}}
            }
        \right)^{-1}
        $$

        where:

        * $y_{\mathrm{M}} (z)$ is the mass fraction of molecule M at altitude
          $z$,
        * $m_{\mathrm{M}}$ is the molar mass of molecule M.
    """
    # compute molar masses along molecular species
    molecules = y.m.values
    mm = xr.DataArray(
        data=[MM[m] for m in molecules],
        coords={"m": ("m", molecules)},
    )

    # compute air molar mass
    mm_inv = 1 / mm
    weighted_mean_mm_inv = (mm_inv * y).sum(dim="m") / y.sum(dim="m")
    mm_air = 1 / weighted_mean_mm_inv
    mm_air.attrs.update({"units": "g/mol"})
    return mm_air

mass_fraction_to_mole_fraction3(y, m_air=28.9644 * ureg.g / ureg.mole)

Convert mass fractions to mole fractions.

Parameters:

Name Type Description Default
y xr.DataArray

Mass fraction as a function of molecule (m) and altitude (z).

required
m_air pint.Quantity

Molar mass of air. Defaults to 28.9644 g/mol.

28.9644 * ureg.g / ureg.mole

Returns:

Type Description
xr.DataArray

Volume fraction as a function of molecule (m) and altitude (z).

Notes

The mole fraction of molecule M at altitude \(z\) is computed according to the following equation:

\[ x_{\mathrm{M}} (z) = \frac{ y_{\mathrm{M}} (z) }{ m_{\mathrm{M}} } m_{\mathrm{air}} (z) \]

where:

  • \(y_{\mathrm{M}} (z)\) is the mass fraction of molecule \(M\) at altitude \(z\),
  • \(m_{\mathrm{M}}\) is the molar mass of molecule \(M\),
  • \(m_{\mathrm{air}}\) is the air molar mass (m_air).
Source code in src/joseki/profiles/util.py
def mass_fraction_to_mole_fraction3(
    y: xr.DataArray,
    m_air: pint.Quantity = 28.9644 * ureg.g / ureg.mole,
) -> xr.DataArray:
    r"""
    Convert mass fractions to mole fractions.

    Args:
        y: Mass fraction as a function of molecule (`m`) and altitude (`z`).
        m_air: Molar mass of air. Defaults to 28.9644 g/mol.

    Returns:
        Volume fraction as a function of molecule (`m`) and altitude (`z`).

    Notes:
        The mole fraction of molecule M at altitude $z$ is computed according 
        to the following equation:

        $$
        x_{\mathrm{M}} (z) = \frac{
            y_{\mathrm{M}} (z)
        }{
            m_{\mathrm{M}}
        }
        m_{\mathrm{air}} (z)
        $$

        where:

        * $y_{\mathrm{M}} (z)$ is the mass fraction of molecule $M$ at 
          altitude $z$,
        * $m_{\mathrm{M}}$ is the molar mass of molecule $M$,
        * $m_{\mathrm{air}}$ is the air molar mass (`m_air`).
    """
    # compute molar masses of each molecular species
    mm = molar_mass(molecules=y.m.values.tolist())

    # compute mole fraction
    x = (m_air.m_as("g/mol") * y / mm).rename("x")
    x.attrs.update(
        {
            "long_name": "Mole fraction",
            "standard_name": "mole_fraction",
            "units": "dimensionless"
        }
    )

    return x

number_density(p, t)

Compute air number density from air pressure and air temperature.

Parameters:

Name Type Description Default
p pint.Quantity

Air pressure.

required
t pint.Quantity

Air temperature.

required

Returns:

Type Description
pint.Quantity

Number density.

Notes

The air number density is computed according to the ideal gas law:

\[ n = \frac{p}{k_B T} \]

where \(p\) is the air pressure, \(k_B\) is the Boltzmann constant, and \(T\) is the air temperature.

Source code in src/joseki/profiles/util.py
def number_density(p: pint.Quantity, t: pint.Quantity) -> pint.Quantity:
    """Compute air number density from air pressure and air temperature.

    Args:
        p: Air pressure.
        t: Air temperature.

    Returns:
        Number density.

    Notes:
        The air number density is computed according to the ideal gas law:

        $$
        n = \\frac{p}{k_B T}
        $$

        where $p$ is the air pressure, $k_B$ is the Boltzmann constant, and 
        $T$ is the air temperature.
    """
    return (p / (K * t)).to_base_units()

to_m_suffixed_data(da)

Takes a data array with a m coordinate and returns a dataset with a data variable for each molecule.

Parameters:

Name Type Description Default
da xr.DataArray

Data array with a m coordinate. Must be named.

required

Returns:

Type Description
xr.Dataset

Dataset with a data variable for each molecule.

Source code in src/joseki/profiles/util.py
def to_m_suffixed_data(da: xr.DataArray) -> xr.Dataset:
    """
    Takes a data array with a `m` coordinate and returns a dataset with
    a data variable for each molecule.

    Args:
        da: Data array with a `m` coordinate. Must be named.

    Returns:
        Dataset with a data variable for each molecule.
    """
    data_var_name = da.name
    data_var_attrs = da.attrs
    molecules = da.m.values
    ds = xr.Dataset()
    for m in molecules:
        ds[f"{data_var_name}_{m}"] = da.sel(m=m, drop=True)
        ds[f"{data_var_name}_{m}"].attrs = {
            "standard_name": f"{m}_mole_fraction",
            "long_name": f"{m} mole fraction",
            "units": data_var_attrs["units"],
        }
    return ds

utcnow()

Get current UTC time.

Returns:

Type Description
str

ISO 8601 formatted UTC timestamp.

Source code in src/joseki/profiles/util.py
def utcnow() -> str:
    """Get current UTC time.

    Returns:
        ISO 8601 formatted UTC timestamp.
    """
    return datetime.datetime.utcnow().replace(microsecond=0).isoformat()

Command line interface

Command-line interface.

main(file_name, identifier, altitudes, altitude_units, conserve_column, p_interp_method, t_interp_method, n_interp_method, x_interp_method)

Joseki command-line interface.

Source code in src/joseki/__main__.py
@click.command()
@click.option(
    "--file-name",
    "-f",
    help="Output file name.",
    default="ds.nc",
    show_default=True,
    type=click.Path(writable=True),
)
@click.option(
    "--identifier",
    "-i",
    help="Atmospheric profile identifier.",
    required=True,
    type=click.Choice(
        choices=IDENTIFIER_CHOICES,
        case_sensitive=True,
    ),
)
@click.option(
    "--altitudes",
    "-a",
    help=(
        "Path to level altitudes data file. The data file is read with "
        "pandas.read_csv. The data file must be a column named 'z'."
    ),
    default=None,
    show_default=True,
)
@click.option(
    "--altitude-units",
    "-u",
    help="Altitude units",
    default="km",
    show_default=True,
)
@click.option(
    "--conserve-column",
    "-c",
    help=(
        "Ensure that column densities are conserved during interpolation."
    ),
    is_flag=True,
)
@click.option(
    "--p-interp-method",
    "-p",
    help="Pressure interpolation method.",
    type=click.Choice(
        INTERPOLATION_METHOD_CHOICES,
        case_sensitive=True,
    ),
    default="linear",
    show_default=True,
)
@click.option(
    "--t-interp-method",
    "-t",
    help="Temperature interpolation method.",
    type=click.Choice(
        INTERPOLATION_METHOD_CHOICES,
        case_sensitive=True,
    ),
    default="linear",
    show_default=True,
)
@click.option(
    "--n-interp-method",
    "-n",
    help="Number density interpolation method.",
    type=click.Choice(
        INTERPOLATION_METHOD_CHOICES,
        case_sensitive=True,
    ),
    default="linear",
    show_default=True,
)
@click.option(
    "--x-interp-method",
    "-x",
    help="Volume mixing ratios interpolation method.",
    type=click.Choice(
        INTERPOLATION_METHOD_CHOICES,
        case_sensitive=True,
    ),
    default="linear",
    show_default=True,
)
@click.version_option()
def main(
    file_name: str,
    identifier: str,
    altitudes: t.Optional[str],
    altitude_units: str,
    conserve_column: bool,
    p_interp_method: str,
    t_interp_method: str,
    n_interp_method: str,
    x_interp_method: str,
) -> None:
    """Joseki command-line interface."""
    # read altitude grid
    if altitudes is not None:
        df = pd.read_csv(pathlib.Path(altitudes))
        z = df["z"].values * ureg(altitude_units)
    else:
        z = None

    interp_method = {
        "p": p_interp_method,
        "t": t_interp_method,
        "n": n_interp_method,
        "x": x_interp_method,
        "default": "linear",
    }

    # make dataset
    ds = make(
        identifier=identifier,
        z=z,
        interp_method=interp_method,
        conserve_column=conserve_column,
    )

    # write dataset
    ds.to_netcdf(file_name)

Core

Core module.

identifiers()

List all registered profile identifiers.

Returns:

Type Description
t.List[str]

List of all registered profile identifiers.

Source code in src/joseki/core.py
def identifiers() -> t.List[str]:
    """
    List all registered profile identifiers.

    Returns:
        List of all registered profile identifiers.
    """
    return factory.registered_identifiers

load_dataset(path, *args, **kwargs)

Thin wrapper around xarray.load_dataset.

Parameters:

Name Type Description Default
path os.PathLike

Path to the dataset.

required

Returns:

Type Description
xr.Dataset

Profile.

Source code in src/joseki/core.py
def load_dataset(path: os.PathLike, *args, **kwargs) -> xr.Dataset:
    """
    Thin wrapper around `xarray.load_dataset`.

    Args:
        path: Path to the dataset.

    Returns:
        Profile.
    """
    return xr.load_dataset(path, *args, **kwargs)

make(identifier, z=None, interp_method=DEFAULT_METHOD, conserve_column=False, molecules=None, regularize=None, rescale_to=None, check_x_sum=False, **kwargs)

Create a profile with the specified identifier.

Parameters:

Name Type Description Default
identifier str

Profile identifier.

required
z pint.Quantity | dict | xr.DataArray | None

Altitude values.

None
interp_method t.Mapping[str, str] | None

Mapping of variable and interpolation method.

DEFAULT_METHOD
conserve_column bool

If True, ensure that column densities are conserved during interpolation.

False
molecules t.List[str] | None

List of molecules to include in the profile.

None
regularize bool | dict | None

Regularize the altitude grid with specified options which are passed to regularize.

None
rescale_to dict | None

Rescale molecular concentrations to the specified target values which are passed to rescale_to.

None
check_x_sum bool

If True, check that the mole fraction sums are less or equal to 1.

False
kwargs t.Any

Additional keyword arguments passed to the profile constructor.

{}

Returns:

Type Description
xr.Dataset

Profile as xarray.Dataset.

See Also

regularize rescale_to

Source code in src/joseki/core.py
def make(
    identifier: str,
    z: pint.Quantity | dict | xr.DataArray | None = None,
    interp_method: t.Mapping[str, str] | None = DEFAULT_METHOD,
    conserve_column: bool = False,
    molecules: t.List[str] | None = None,
    regularize: bool | dict | None = None,
    rescale_to: dict | None = None,
    check_x_sum: bool = False,
    **kwargs: t.Any,
) -> xr.Dataset:
    """
    Create a profile with the specified identifier.

    Args:
        identifier: Profile identifier.
        z: Altitude values.
        interp_method: Mapping of variable and interpolation method.
        conserve_column: If `True`, ensure that column densities are conserved
            during interpolation.
        molecules: List of molecules to include in the profile.
        regularize: Regularize the altitude grid with specified options which
            are passed to 
            [regularize](reference.md#src.joseki.profiles.core.regularize).
        rescale_to: Rescale molecular concentrations to the specified target 
            values which are passed to 
            [rescale_to](reference.md#src.joseki.accessor.JosekiAccessor.rescale_to).  
        check_x_sum: If `True`, check that the mole fraction sums are less or 
            equal to 1.
        kwargs: Additional keyword arguments passed to the profile constructor.

    Returns:
        Profile as xarray.Dataset.

    See Also:
        [regularize](reference.md#src.joseki.profiles.core.regularize)
        [rescale_to](reference.md#src.joseki.accessor.JosekiAccessor.rescale_to)
    """
    logger.info("Creating profile %s", identifier)
    logger.debug("z: %s", z)
    logger.debug("interp_method: %s", interp_method)
    logger.debug("conserve_column: %s", conserve_column)
    logger.debug("molecules: %s", molecules)
    logger.debug("regularize: %s", regularize)
    logger.debug("rescale_to: %s", rescale_to)
    logger.debug("kwargs: %s", kwargs)

    # Convert z to pint.Quantity
    z = to_quantity(z) if z is not None else None

    profile = factory.create(identifier)

    logger.debug("exporting profile to xarray.Dataset")
    ds = profile.to_dataset(
        z=z,
        interp_method=interp_method,
        conserve_column=conserve_column,
        **kwargs,
    )

    # Molecules selection
    if molecules is not None:
        ds = select_molecules(ds, molecules)

    # Altitude grid regularization
    if regularize:
        z = to_quantity(ds.z)
        default_num = int((z.max() - z.min()) // np.diff(z).min()) + 1
        if isinstance(regularize, bool):
            regularize = {}
        ds = _regularize(
            ds=ds,
            method=regularize.get("method", DEFAULT_METHOD),
            conserve_column=regularize.get("conserve_column", False),
            options=regularize.get('options', {"num": default_num}),
        )

    # Molecular concentration rescaling
    if rescale_to:
        ds = ds.joseki.rescale_to(
            target=rescale_to,
            check_x_sum=check_x_sum,
        )

    return ds

merge(datasets, new_title=None)

Merge multiple profiles into a single profile.

Parameters:

Name Type Description Default
datasets t.Iterable[xr.Dataset]

Iterable of profiles.

required
new_title str | None

New title for the merged profile. If None, the title of the first profile is used.

None

Returns:

Type Description
xr.Dataset

Merged profile.

Notes

The first profile in the iterable is used as the base profile; when variables with the same name are encountered in subsequent profiles, the variable from the first profile is used.

Source code in src/joseki/core.py
def merge(
    datasets: t.Iterable[xr.Dataset],
    new_title: str | None = None,
) -> xr.Dataset:
    """
    Merge multiple profiles into a single profile.

    Args:
        datasets: Iterable of profiles.
        new_title: New title for the merged profile. If `None`, the title of
            the first profile is used.

    Returns:
        Merged profile.

    Notes:
        The first profile in the iterable is used as the base profile; when
        variables with the same name are encountered in subsequent profiles,
        the variable from the first profile is used.
    """
    merged = xr.merge(
        datasets,
        compat="override",  # pick variable from first dataset
        combine_attrs="override",  # copy attrs from the first dataset to the result
    )

    # update attributes
    now = datetime.datetime.utcnow().replace(microsecond=0).isoformat()

    institutions = set([ds.attrs["institution"] for ds in datasets])
    sources = set([ds.attrs["source"] for ds in datasets])
    references = set([ds.attrs["references"] for ds in datasets])
    urls = set([ds.attrs["url"] for ds in datasets])
    urldates = set([ds.attrs["urldate"] for ds in datasets])
    conventions = set([ds.attrs["Conventions"] for ds in datasets])

    with_profiles = "with profile " if len(datasets) == 2 else "with profiles "
    with_profiles += ", ".join([f"'{ds.title}'" for ds in datasets[1:]])
    merged.attrs["history"] += (
        f"\n{now} - merged profile '{datasets[0].title}' {with_profiles} "
        f"joseki, version {__version__}"
    )

    merged.attrs["institution"] = "\n".join(institutions)
    merged.attrs["source"] = "\n".join(sources)
    merged.attrs["references"] = "\n".join(references)
    merged.attrs["url"] = "\n".join(urls)
    merged.attrs["urldate"] = "\n".join(urldates)
    merged.attrs["Conventions"] = "\n".join(conventions)

    if new_title is not None:  # pragma: no cover
        merged.attrs["title"] = new_title
    else:  # pragma: no cover
        merged.attrs["title"] = datasets[0].title

    return merged

open_dataset(path, *args, **kwargs)

Thin wrapper around xarray.open_dataset.

Parameters:

Name Type Description Default
path os.PathLike

Path to the dataset.

required

Returns:

Type Description
xr.Dataset

Profile.

Source code in src/joseki/core.py
def open_dataset(path: os.PathLike, *args, **kwargs) -> xr.Dataset:
    """
    Thin wrapper around `xarray.open_dataset`.

    Args:
        path: Path to the dataset.

    Returns:
        Profile.
    """
    return xr.open_dataset(path, *args, **kwargs)

Units

Units module.

to_quantity(value, units=None)

Convert to a pint.Quantity.

Parameters:

Name Type Description Default
value pint.Quantity | dict | int | float | list | np.ndarray | xr.DataArray

Value which will be converted. If value is an ArrayLike, it is assumed to be dimensionless (unless units is set). If a xarray.DataArray is passed and units_error is True, it is assumed to have a units key in its attrs field; otherwise, it is assumed to be dimensionless.

required
units None | str

Units to assign. If None, the units are inferred from the value argument.

None

Returns:

Type Description
pint.Quantity

The corresponding quantity.

Notes

This function can also be used on DataArray and Dataset coordinate variables.

Source code in src/joseki/units.py
@singledispatch
def to_quantity(
    value: pint.Quantity | dict | int | float | list | np.ndarray | xr.DataArray,
    units: None | str = None,
) -> pint.Quantity:
    """Convert to a `pint.Quantity`.

    Args:
        value: Value which will be converted. If value is an `ArrayLike`, it is
            assumed to be dimensionless (unless `units` is set).
            If a `xarray.DataArray` is passed and 
            `units_error` is `True`, it is assumed to have a `units` key in
            its `attrs` field; otherwise, it is assumed to be dimensionless.
        units: Units to assign. If `None`, the units are inferred from the
            `value` argument.

    Returns:
        The corresponding quantity.

    Notes:
        This function can also be used on DataArray and Dataset coordinate 
        variables.
    """
    raise NotImplementedError

Util

Utility module.

air_molar_mass_from_mass_fraction(y)

Compute the air molar mass from the of air constituents mass fractions.

Parameters:

Name Type Description Default
y xr.DataArray

Mass fraction as a function of molecule (m) and altitude (z).

required

Returns:

Type Description
xr.DataArray

Air molar mass as a function of altitude (z).

Notes

The air molar mass is computed according to the following equation:

\[ m_{\mathrm{air}} (z) = \left( \sum_{\mathrm{M}} \frac{ y_{\mathrm{M}} (z) }{ m_{\mathrm{M}} } \right)^{-1} \]

where:

  • \(y_{\mathrm{M}} (z)\) is the mass fraction of molecule M at altitude \(z\),
  • \(m_{\mathrm{M}}\) is the molar mass of molecule M.
Source code in src/joseki/profiles/util.py
def air_molar_mass_from_mass_fraction(
    y: xr.DataArray
) -> xr.DataArray:
    r"""
    Compute the air molar mass from the of air constituents mass fractions.

    Args:
        y: Mass fraction as a function of molecule (`m`) and altitude (`z`).

    Returns:
        Air molar mass as a function of altitude (`z`).

    Notes:
        The air molar mass is computed according to the following equation:

        $$
        m_{\mathrm{air}} (z) = \left(
            \sum_{\mathrm{M}} \frac{
                y_{\mathrm{M}} (z)
            }{
                m_{\mathrm{M}}
            }
        \right)^{-1}
        $$

        where:

        * $y_{\mathrm{M}} (z)$ is the mass fraction of molecule M at altitude
          $z$,
        * $m_{\mathrm{M}}$ is the molar mass of molecule M.
    """
    # compute molar masses along molecular species
    molecules = y.m.values
    mm = xr.DataArray(
        data=[MM[m] for m in molecules],
        coords={"m": ("m", molecules)},
    )

    # compute air molar mass
    mm_inv = 1 / mm
    weighted_mean_mm_inv = (mm_inv * y).sum(dim="m") / y.sum(dim="m")
    mm_air = 1 / weighted_mean_mm_inv
    mm_air.attrs.update({"units": "g/mol"})
    return mm_air

mass_fraction_to_mole_fraction3(y, m_air=28.9644 * ureg.g / ureg.mole)

Convert mass fractions to mole fractions.

Parameters:

Name Type Description Default
y xr.DataArray

Mass fraction as a function of molecule (m) and altitude (z).

required
m_air pint.Quantity

Molar mass of air. Defaults to 28.9644 g/mol.

28.9644 * ureg.g / ureg.mole

Returns:

Type Description
xr.DataArray

Volume fraction as a function of molecule (m) and altitude (z).

Notes

The mole fraction of molecule M at altitude \(z\) is computed according to the following equation:

\[ x_{\mathrm{M}} (z) = \frac{ y_{\mathrm{M}} (z) }{ m_{\mathrm{M}} } m_{\mathrm{air}} (z) \]

where:

  • \(y_{\mathrm{M}} (z)\) is the mass fraction of molecule \(M\) at altitude \(z\),
  • \(m_{\mathrm{M}}\) is the molar mass of molecule \(M\),
  • \(m_{\mathrm{air}}\) is the air molar mass (m_air).
Source code in src/joseki/profiles/util.py
def mass_fraction_to_mole_fraction3(
    y: xr.DataArray,
    m_air: pint.Quantity = 28.9644 * ureg.g / ureg.mole,
) -> xr.DataArray:
    r"""
    Convert mass fractions to mole fractions.

    Args:
        y: Mass fraction as a function of molecule (`m`) and altitude (`z`).
        m_air: Molar mass of air. Defaults to 28.9644 g/mol.

    Returns:
        Volume fraction as a function of molecule (`m`) and altitude (`z`).

    Notes:
        The mole fraction of molecule M at altitude $z$ is computed according 
        to the following equation:

        $$
        x_{\mathrm{M}} (z) = \frac{
            y_{\mathrm{M}} (z)
        }{
            m_{\mathrm{M}}
        }
        m_{\mathrm{air}} (z)
        $$

        where:

        * $y_{\mathrm{M}} (z)$ is the mass fraction of molecule $M$ at 
          altitude $z$,
        * $m_{\mathrm{M}}$ is the molar mass of molecule $M$,
        * $m_{\mathrm{air}}$ is the air molar mass (`m_air`).
    """
    # compute molar masses of each molecular species
    mm = molar_mass(molecules=y.m.values.tolist())

    # compute mole fraction
    x = (m_air.m_as("g/mol") * y / mm).rename("x")
    x.attrs.update(
        {
            "long_name": "Mole fraction",
            "standard_name": "mole_fraction",
            "units": "dimensionless"
        }
    )

    return x

number_density(p, t)

Compute air number density from air pressure and air temperature.

Parameters:

Name Type Description Default
p pint.Quantity

Air pressure.

required
t pint.Quantity

Air temperature.

required

Returns:

Type Description
pint.Quantity

Number density.

Notes

The air number density is computed according to the ideal gas law:

\[ n = \frac{p}{k_B T} \]

where \(p\) is the air pressure, \(k_B\) is the Boltzmann constant, and \(T\) is the air temperature.

Source code in src/joseki/profiles/util.py
def number_density(p: pint.Quantity, t: pint.Quantity) -> pint.Quantity:
    """Compute air number density from air pressure and air temperature.

    Args:
        p: Air pressure.
        t: Air temperature.

    Returns:
        Number density.

    Notes:
        The air number density is computed according to the ideal gas law:

        $$
        n = \\frac{p}{k_B T}
        $$

        where $p$ is the air pressure, $k_B$ is the Boltzmann constant, and 
        $T$ is the air temperature.
    """
    return (p / (K * t)).to_base_units()

to_m_suffixed_data(da)

Takes a data array with a m coordinate and returns a dataset with a data variable for each molecule.

Parameters:

Name Type Description Default
da xr.DataArray

Data array with a m coordinate. Must be named.

required

Returns:

Type Description
xr.Dataset

Dataset with a data variable for each molecule.

Source code in src/joseki/profiles/util.py
def to_m_suffixed_data(da: xr.DataArray) -> xr.Dataset:
    """
    Takes a data array with a `m` coordinate and returns a dataset with
    a data variable for each molecule.

    Args:
        da: Data array with a `m` coordinate. Must be named.

    Returns:
        Dataset with a data variable for each molecule.
    """
    data_var_name = da.name
    data_var_attrs = da.attrs
    molecules = da.m.values
    ds = xr.Dataset()
    for m in molecules:
        ds[f"{data_var_name}_{m}"] = da.sel(m=m, drop=True)
        ds[f"{data_var_name}_{m}"].attrs = {
            "standard_name": f"{m}_mole_fraction",
            "long_name": f"{m} mole fraction",
            "units": data_var_attrs["units"],
        }
    return ds

utcnow()

Get current UTC time.

Returns:

Type Description
str

ISO 8601 formatted UTC timestamp.

Source code in src/joseki/profiles/util.py
def utcnow() -> str:
    """Get current UTC time.

    Returns:
        ISO 8601 formatted UTC timestamp.
    """
    return datetime.datetime.utcnow().replace(microsecond=0).isoformat()