Add loss function to estimator, and Antoine equation for vapor pressu…

…re estimation
feos-org · prehner · Apr 6, 2022 · Feb 25, 2022 · Mar 14, 2022 · Mar 22, 2022
commit 07fc8b286c5fcafe70e50cc1fa64b780d323604c
diff --git a/src/python/utils.rs b/src/python/utils.rs
@@ -11,6 +11,46 @@ impl From<FitError> for PyErr {
 #[macro_export]
 macro_rules! impl_estimator {
     ($eos:ty, $py_eos:ty) => {
+        #[pyclass(name = "Loss", unsendable)]
+        #[derive(Clone)]
+        pub struct PyLoss(Loss);
+
+        #[pymethods]
+        impl PyLoss {
+            /// Create a linear loss function.
+            /// 
+            /// `loss = s**2 * rho(f**2 / s**2)`
+            /// where `rho(z) = z` and `s = 1`.
+            ///
+            /// Returns
+            /// -------
+            /// Loss
+            #[staticmethod]
+            pub fn linear() -> Self {
+                Self(Loss::Linear)
+            }
+
+            /// Create a loss function according to Huber's method.
+            /// 
+            /// `loss = s**2 * rho(f**2 / s**2)`
+            /// where `rho(z) = z if z <= 1 else 2*z**0.5 - 1`.
+            /// `s` is the scaling factor.
+            ///
+            /// Parameters
+            /// ----------
+            /// scaling_factor : f64
+            ///     Scaling factor for Huber loss function.
+            ///
+            /// Returns
+            /// -------
+            /// Loss
+            #[staticmethod]
+            #[pyo3(text_signature = "(scaling_factor)")]
+            pub fn huber(scaling_factor: f64) -> Self {
+                Self(Loss::Huber(scaling_factor))
+            }
+        }
+
         /// A collection of experimental data that can be used to compute
         /// cost functions and make predictions using an equation of state.
         #[pyclass(name = "DataSet", unsendable)]
@@ -268,18 +308,21 @@ macro_rules! impl_estimator {
         #[pymethods]
         impl PyEstimator {
             #[new]
-            fn new(data: Vec<PyDataSet>, weights: Vec<f64>) -> Self {
+            fn new(data: Vec<PyDataSet>, loss: Vec<PyLoss>, weights: Vec<f64>) -> Self {
                 Self(Estimator::new(
                     data.iter().map(|d| d.0.clone()).collect(),
+                    loss.iter().map(|l| l.0.clone()).collect(),
                     weights,
                 ))
             }
 
             /// Compute the cost function for each ``DataSet``.
             ///
-            /// The cost function that is used depends on the
-            /// property. See the class constructors for the ``DataSet``
-            /// to learn about the cost functions of the properties.
+            /// The cost function is:
+            /// - The relative difference between prediction and target value,
+            /// - to which a loss function is applied,
+            /// - and which is weighted according to the number of datapoints,
+            /// - and the relative weights as defined in the Estimator object.
             ///
             /// Parameters
             /// ----------

diff --git a/src/utils/dataset.rs b/src/utils/dataset.rs
@@ -6,7 +6,7 @@ use crate::equation_of_state::{EquationOfState, MolarWeight};
 use crate::phase_equilibria::{PhaseEquilibrium, VLEOptions};
 use crate::state::{DensityInitialization, State};
 use crate::utils::estimator::FitError;
-use crate::EosUnit;
+use crate::{Contributions, EosUnit};
 use ndarray::{arr1, Array1};
 use quantity::{QuantityArray1, QuantityScalar};
 use std::collections::HashMap;
@@ -147,25 +147,31 @@ impl<U: EosUnit, E: EquationOfState> DataSet<U, E> for VaporPressure<U> {
     where
         QuantityScalar<U>: std::fmt::Display + std::fmt::LowerExp,
     {
-        let tc =
+        let critical_point =
             State::critical_point(eos, None, Some(self.max_temperature), VLEOptions::default())
-                .unwrap()
-                .temperature;
+                .unwrap();
+        let tc = critical_point.temperature;
+        let pc = critical_point.pressure(Contributions::Total);
+
+        let t0 = 0.9 * tc;
+        let p0 = PhaseEquilibrium::vapor_pressure(eos, t0)[0].unwrap();
+
+        let b = pc.to_reduced(p0).unwrap().ln() / (1.0 / tc - 1.0 / t0);
+        let a = pc.to_reduced(U::reference_pressure()).unwrap() - b.to_reduced(tc).unwrap();
 
         let unit = self.target.get(0);
         let mut prediction = Array1::zeros(self.datapoints) * unit;
         for i in 0..self.datapoints {
             let t = self.temperature.get(i);
-            if t < tc {
-                if let Some(pvap) =
-                    PhaseEquilibrium::vapor_pressure(eos, self.temperature.get(i))[0]
-                {
-                    prediction.try_set(i, pvap).unwrap();
-                } else {
-                    prediction.try_set(i, f64::NAN * unit).unwrap();
-                }
+            if let Some(pvap) = PhaseEquilibrium::vapor_pressure(eos, t)[0] {
+                prediction.try_set(i, pvap).unwrap();
             } else {
-                prediction.try_set(i, f64::NAN * unit).unwrap();
+                prediction
+                    .try_set(
+                        i,
+                        pc * (a + (b * (1.0 / t - 1.0 / tc)).into_value().unwrap()).exp(),
+                    )
+                    .unwrap();
             }
         }
         Ok(prediction)

diff --git a/src/utils/estimator.rs b/src/utils/estimator.rs
@@ -1,6 +1,7 @@
 //! The [`Estimator`] struct can be used to store multiple [`DataSet`]s for convenient parameter
 //! optimization.
 use super::dataset::*;
+use super::Loss;
 use crate::equation_of_state::EquationOfState;
 use crate::EosUnit;
 use ndarray::{arr1, concatenate, Array1, ArrayView1, Axis};
@@ -31,6 +32,7 @@ pub enum FitError {
 /// evaluate an equation of state versus experimental data.
 pub struct Estimator<U: EosUnit, E: EquationOfState> {
     data: Vec<Rc<dyn DataSet<U, E>>>,
+    loss: Vec<Loss>,
     weights: Vec<f64>,
 }
 
@@ -42,8 +44,8 @@ where
     ///
     /// The weights are normalized and used as multiplicator when the
     /// cost function across all `DataSet`s is evaluated.
-    pub fn new(data: Vec<Rc<dyn DataSet<U, E>>>, weights: Vec<f64>) -> Self {
-        Self { data, weights }
+    pub fn new(data: Vec<Rc<dyn DataSet<U, E>>>, loss: Vec<Loss>, weights: Vec<f64>) -> Self {
+        Self { data, loss, weights }
     }
 
     /// Add a `DataSet` and its weight.
@@ -56,14 +58,17 @@ where
     ///
     /// Each cost contains the inverse weight.
     pub fn cost(&self, eos: &Rc<E>) -> Result<Array1<f64>, FitError> {
+        let w_sum = self.weights.iter().sum::<f64>();
+        let w = arr1(&self.weights) / w_sum;
+
         let predictions: Result<Vec<Array1<f64>>, FitError> = self
             .data
             .iter()
             .enumerate()
             .map(|(i, d)| {
-                let w_sum = self.weights.iter().sum::<f64>();
-                let w = arr1(&self.weights) / w_sum;
-                Ok(d.cost(eos)? * w[i])
+                let mut res = d.relative_difference(eos).unwrap();
+                self.loss[i].apply(&mut res.view_mut());
+                Ok(res * w[i] / d.datapoints() as f64)
             })
             .collect();
         if let Ok(p) = predictions {

diff --git a/src/utils/loss.rs b/src/utils/loss.rs
@@ -0,0 +1,30 @@
+use ndarray::ArrayViewMut1;
+
+#[derive(Clone, Debug)]
+pub enum Loss {
+    Linear,
+    Huber(f64),
+}
+
+impl Loss {
+    pub fn huber(scaling_factor: f64) -> Self {
+        Self::Huber(scaling_factor)
+    }
+
+    pub fn apply(&self, res: &mut ArrayViewMut1<f64>) {
+        match self {
+            Self::Linear => (),
+            Self::Huber(s) => {
+                let s2 = s * s;
+                let s2_inv = 1.0 / s2;
+                res.mapv_inplace(|ri| {
+                    if ri * ri * s2_inv <= 1.0 {
+                        ri
+                    } else {
+                        2.0 * (ri * ri * s2_inv).sqrt() - 1.0
+                    }
+                })
+            }
+        }
+    }
+}
diff --git a/src/utils/mod.rs b/src/utils/mod.rs
@@ -5,5 +5,7 @@
 //! - [`Estimator`]: stores multiple `DataSet`
 mod dataset;
 mod estimator;
+mod loss;
 pub use dataset::*;
 pub use estimator::{Estimator, FitError};
+pub use loss::*;