pandas-dev · jreback · Nov 8, 2019 · Nov 6, 2019 · Nov 6, 2019 · Nov 6, 2019
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -4964,6 +4964,21 @@ def sample(
         dog          4          0                  2
         fish         0          0                  8
 
+
+        An upsample sample of the ``DataFrame`` with replacement:
+        Note that `replace` parameter has to be `True` for `frac` parameter > 1.
+
+        >>> df.sample(frac=2, replace=True, random_state=1)
+                num_legs  num_wings  num_specimen_seen
+        dog            4          0                  2
+        fish           0          0                  8
+        falcon         2          2                 10
+        falcon         2          2                 10
+        fish           0          0                  8
+        dog            4          0                  2
+        fish           0          0                  8
+        dog            4          0                  2
+
         Using a DataFrame column as weights. Rows with larger value in the
         `num_specimen_seen` column are more likely to be sampled.
 
@@ -5039,6 +5054,10 @@ def sample(
         # If no frac or n, default to n=1.
         if n is None and frac is None:
             n = 1
+        elif frac is not None and frac > 1 and not replace:
+            raise ValueError(
+                "Replace has to be set to `True` when upsampling the population `frac` > 1"
+            )
         elif n is not None and frac is None and n % 1 != 0:
             raise ValueError("Only integers accepted as `n` values")
         elif n is None and frac is not None:

diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
@@ -322,6 +322,7 @@ def test_sample(self):
             self._compare(
                 o.sample(n=4, random_state=seed), o.sample(n=4, random_state=seed)
             )
+
             self._compare(
                 o.sample(frac=0.7, random_state=seed),
                 o.sample(frac=0.7, random_state=seed),
@@ -337,6 +338,15 @@ def test_sample(self):
                 o.sample(frac=0.7, random_state=np.random.RandomState(test)),
             )
 
+            self._compare(
+                o.sample(
+                    frac=2, replace=True, random_state=np.random.RandomState(test)
+                ),
+                o.sample(
+                    frac=2, replace=True, random_state=np.random.RandomState(test)
+                ),
+            )
+
             os1, os2 = [], []
             for _ in range(2):
                 np.random.seed(test)
@@ -424,6 +434,14 @@ def test_sample(self):
         weights_with_None[5] = 0.5
         self._compare(o.sample(n=1, axis=0, weights=weights_with_None), o.iloc[5:6])
 
+    def test_sample_upsampling_without_replacement(self):
+        df = pd.DataFrame({"A": list("abc")})
+        msg = (
+            "Replace has to be set to `True` when upsampling the population `frac` > 1"
+        )
+        with pytest.raises(TypeError, match=msg):
+            hash(df.sample(frac=2, replace=False))
+
     def test_size_compat(self):
         # GH8846
         # size property should be defined