pymc-labs · drbenvincent · May 6, 2024 · Mar 16, 2024 · Mar 17, 2024 · Mar 17, 2024
diff --git a/README.md b/README.md
@@ -188,6 +188,17 @@ Instrumental Variable regression is an appropriate technique when you wish to es
 
 ![](https://raw.githubusercontent.com/pymc-labs/CausalPy/main/docs/source/_static/iv_reg1.png)
 
+
+### Inverse Propensity Score Weighting
+
+Propensity scores are often used to address the risks of bias or confounding introduced in an observational study by
+selection effects into the treatment condition. Propensity scores can be used in a number of ways, but here we demonstrate
+their usage within corrective weighting schemes aimed to recover as-if random allocation of subjects to the treatment condition.
+The technique "up-weights" or "down-weights" individual observations to better estimate a causal estimand such as the average treatment
+effect.
+
+![](https://raw.githubusercontent.com/pymc-labs/CausalPy/main/docs/source/_static/propensity_weight.png)
+
 ## Learning resources
 
 Here are some general resources about causal inference:

diff --git a/causalpy/data/datasets.py b/causalpy/data/datasets.py
@@ -34,6 +34,7 @@
     "anova1": {"filename": "ancova_generated.csv"},
     "geolift1": {"filename": "geolift1.csv"},
     "risk": {"filename": "AJR2001.csv"},
+    "nhefs": {"filename": "nhefs.csv"},
 }
 
 

diff --git a/causalpy/data/nhefs.csv b/causalpy/data/nhefs.csv
diff --git a/causalpy/data_validation.py b/causalpy/data_validation.py
@@ -146,3 +146,29 @@ def _input_validation(self):
                 the assumption of a simple IV experiment.
                 The coefficients should be interpreted appropriately."""
             )
+
+
+class PropensityDataValidator:
+    """Mixin class for validating the input data and model formula for Propensity Weighting experiments."""
+
+    def _input_validation(self):
+        """Validate the input data and model formula for correctness"""
+        treatment = self.formula.split("~")[0]
+        test = treatment.strip() in self.data.columns
+        test = test & (self.outcome_variable in self.data.columns)
+        if not test:
+            raise DataException(
+                f"""
+                The treatment variable:
+                {treatment} must appear in the data to be used
+                as an outcome variable. And {self.outcome_variable}
+                must also be available in the data to be re-weighted
+                """
+            )
+        T = self.data[treatment.strip()]
+        check_binary = len(np.unique(T)) > 2
+        if check_binary:
+            raise DataException(
+                """Warning. The treatment variable is not 0-1 Binary.
+                """
+            )