mongodb-labs · blink1073 · May 28, 2024 · May 22, 2024 · May 22, 2024
@@ -383,16 +383,22 @@ def _transform_bwe(bwe, offset):
     }
 
 
-def _tabular_generator(tabular):
+def _tabular_generator(tabular, *, exclude_none=False):
     if isinstance(tabular, Table):
         for i in tabular.to_batches():
             for row in i.to_pylist():
-                yield row
+                if exclude_none:
+                    yield {k: v for k, v in row.items() if v is not None}
+                else:
+                    yield row
     elif isinstance(tabular, pd.DataFrame):
         for row in tabular.to_dict("records"):
-            yield row
+            if exclude_none:
+                yield {k: v for k, v in row.items() if not np.isnan(v)}
+            else:
+                yield row
     elif pl is not None and isinstance(tabular, pl.DataFrame):
-        yield from _tabular_generator(tabular.to_arrow())
+        yield from _tabular_generator(tabular.to_arrow(), exclude_none=exclude_none)
     elif isinstance(tabular, dict):
         iter_dict = {k: np.nditer(v) for k, v in tabular.items()}
         try:
@@ -414,13 +420,14 @@ def transform_python(self, _):
         return
 
 
-def write(collection, tabular):
+def write(collection, tabular, *, exclude_none: bool = False):
     """Write data from `tabular` into the given MongoDB `collection`.
 
     :Parameters:
       - `collection`: Instance of :class:`~pymongo.collection.Collection`.
         against which to run the operation.
       - `tabular`: A tabular data store to use for the write operation.
+      - `exclude_none`: Whether to skip writing `null` fields in documents.
 
     :Returns:
       An instance of :class:`result.ArrowWriteResult`.
@@ -464,7 +471,7 @@ def write(collection, tabular):
         )
         raise ValueError(msg)
 
-    tabular_gen = _tabular_generator(tabular)
+    tabular_gen = _tabular_generator(tabular, exclude_none=exclude_none)
 
     # Handle Pandas NA objects.
     codec_options = collection.codec_options

@@ -137,7 +137,7 @@ def test_find_with_session(self):
 
         with self.client.start_session() as session:
             self.assertIsNone(session.operation_time)
-            _ = session._server_session.last_use
+            _ = getattr(session._server_session, "last_use", None)
             expected = Table.from_pydict(
                 {"_id": [1, 2, 3, 4], "data": [10, 20, 30, None]},
                 ArrowSchema([("_id", int32()), ("data", int64())]),
@@ -787,6 +787,27 @@ def test_binary_types(self):
                 self.assertTrue(table_out_schema.schema == table_in.schema)
                 self.assertTrue(table_out_none.equals(table_out_schema))
 
+    def test_exclude_none(self):
+        schema = {"a": int32(), "b": int32()}
+        b_data = [i for i in range(10)] * 2
+        b_data[2] = None
+        data = Table.from_pydict(
+            {
+                "a": [i for i in range(10)] * 2,
+                "b": b_data,
+            },
+            ArrowSchema(schema),
+        )
+        self.coll.drop()
+        write(self.coll, data)
+        col_data = list(self.coll.find({}))
+        assert "b" in col_data[2]
+
+        self.coll.drop()
+        write(self.coll, data, exclude_none=True)
+        col_data = list(self.coll.find({}))
+        assert "b" not in col_data[2]
+
 
 class TestArrowExplicitApi(ArrowApiTestMixin, unittest.TestCase):
     def run_find(self, *args, **kwargs):

@@ -329,6 +329,18 @@ def test_csv(self):
             out = pd.read_csv(f.name)
             self._assert_frames_equal(data, out)
 
+    def test_exclude_none(self):
+        df = pd.DataFrame(data={"a": [1, 2, 3, 4], "b": [20, 40, 60, None]})
+        self.coll.drop()
+        write(self.coll, df)
+        col_data = list(self.coll.find({}))
+        assert "b" in col_data[3]
+
+        self.coll.drop()
+        write(self.coll, df, exclude_none=True)
+        col_data = list(self.coll.find({}))
+        assert "b" not in col_data[3]
+
 
 class TestBSONTypes(PandasTestBase):
     @classmethod