jupyter-server · Zsailer · Aug 31, 2022 · Aug 29, 2022 · Aug 29, 2022 · Aug 29, 2022
diff --git a/jupyter_server/__init__.py b/jupyter_server/__init__.py
@@ -1,5 +1,6 @@
 """The Jupyter Server"""
 import os
+import pathlib
 import subprocess
 import sys
 
@@ -10,6 +11,8 @@
 ]
 
 DEFAULT_JUPYTER_SERVER_PORT = 8888
+JUPYTER_SERVER_EVENTS_URI = "https://events.jupyter.org/jupyter_server"
+DEFAULT_EVENTS_SCHEMA_PATH = pathlib.Path(__file__).parent / "event_schemas"
 
 del os
 

diff --git a/jupyter_server/event_schemas/contents_service/v1.yaml b/jupyter_server/event_schemas/contents_service/v1.yaml
@@ -0,0 +1,73 @@
+"$id": https://events.jupyter.org/jupyter_server/contents_service/v1
+version: 1
+title: Contents Manager activities
+personal-data: true
+description: |
+  Record actions on files via the ContentsManager.
+
+  The notebook ContentsManager REST API is used by all frontends to retreive,
+  save, list, delete and perform other actions on notebooks, directories,
+  and other files through the UI. This is pluggable - the default acts on
+  the file system, but can be replaced with a different ContentsManager
+  implementation - to work on S3, Postgres, other object stores, etc.
+  The events get recorded regardless of the ContentsManager implementation
+  being used.
+
+  Limitations:
+
+  1. This does not record all filesystem access, just the ones that happen
+     explicitly via the notebook server's REST API. Users can (and often do)
+     trivially access the filesystem in many other ways (such as `open()` calls
+     in their code), so this is usually never a complete record.
+  2. As with all events recorded by the notebook server, users most likely
+     have the ability to modify the code of the notebook server. Unless other
+     security measures are in place, these events should be treated as user
+     controlled and not used in high security areas.
+  3. Events are only recorded when an action succeeds.
+type: object
+required:
+  - action
+  - path
+properties:
+  action:
+    enum:
+      - get
+      - create
+      - save
+      - upload
+      - rename
+      - copy
+      - delete
+    description: |
+      Action performed by the ContentsManager API.
+
+      This is a required field.
+
+      Possible values:
+
+      1. get
+         Get contents of a particular file, or list contents of a directory.
+
+      2. save
+         Save a file at path with contents from the client
+
+      3. rename
+         Rename a file or directory from value in source_path to
+         value in path.
+
+      4. copy
+         Copy a file or directory from value in source_path to
+         value in path.
+
+      5. delete
+         Delete a file or empty directory at given path
+  path:
+    type: string
+    description: |
+      Logical path on which the operation was performed.
+
+      This is a required field.
+  source_path:
+    type: string
+    description: |
+      Source path of an operation when action is 'copy' or 'rename'
diff --git a/jupyter_server/serverapp.py b/jupyter_server/serverapp.py
@@ -83,9 +83,11 @@
 from traitlets.config.application import boolean_flag, catch_config_error
 
 from jupyter_server import (
+    DEFAULT_EVENTS_SCHEMA_PATH,
     DEFAULT_JUPYTER_SERVER_PORT,
     DEFAULT_STATIC_FILES_PATH,
     DEFAULT_TEMPLATE_PATH_LIST,
+    JUPYTER_SERVER_EVENTS_URI,
     __version__,
 )
 from jupyter_server._sysinfo import get_sys_info
@@ -1951,6 +1953,19 @@ def init_logging(self):
     def init_event_logger(self):
         """Initialize the Event Bus."""
         self.event_logger = EventLogger(parent=self)
+        # Load the core Jupyter Server event schemas
+        # All event schemas must start with Jupyter Server's
+        # events URI, `JUPYTER_SERVER_EVENTS_URI`.
+        schema_ids = [
+            "https://events.jupyter.org/jupyter_server/contents_service/v1",
+        ]
+        for schema_id in schema_ids:
+            # Get the schema path from the schema ID.
+            rel_schema_path = schema_id.lstrip(JUPYTER_SERVER_EVENTS_URI) + ".yaml"
+            schema_path = DEFAULT_EVENTS_SCHEMA_PATH / rel_schema_path
+            # Use this pathlib object to register the schema
+            # breakpoint()
+            self.event_logger.register_event_schema(schema_path)
 
     def init_webapp(self):
         """initialize tornado webapp"""

diff --git a/jupyter_server/services/contents/filemanager.py b/jupyter_server/services/contents/filemanager.py
@@ -395,6 +395,7 @@ def get(self, path, content=True, type=None, format=None):
             if type == "directory":
                 raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
             model = self._file_model(path, content=content, format=format)
+        self.emit(data={"action": "get", "path": path})
         return model
 
     def _save_directory(self, os_path, model, path=""):
@@ -459,7 +460,7 @@ def save(self, model, path=""):
             model["message"] = validation_message
 
         self.run_post_save_hooks(model=model, os_path=os_path)
-
+        self.emit(data={"action": "save", "path": path})
         return model
 
     def delete_file(self, path):
@@ -735,6 +736,7 @@ async def get(self, path, content=True, type=None, format=None):
             if type == "directory":
                 raise web.HTTPError(400, "%s is not a directory" % path, reason="bad type")
             model = await self._file_model(path, content=content, format=format)
+        self.emit(data={"action": "get", "path": path})
         return model
 
     async def _save_directory(self, os_path, model, path=""):
@@ -795,7 +797,7 @@ async def save(self, model, path=""):
             model["message"] = validation_message
 
         self.run_post_save_hooks(model=model, os_path=os_path)
-
+        self.emit(data={"action": "save", "path": path})
         return model
 
     async def delete_file(self, path):

diff --git a/jupyter_server/services/contents/largefilemanager.py b/jupyter_server/services/contents/largefilemanager.py
@@ -54,6 +54,7 @@ def save(self, model, path=""):
             # Last chunk
             if chunk == -1:
                 self.run_post_save_hooks(model=model, os_path=os_path)
+            self.emit(data={"action": "save", "path": path})
             return model
         else:
             return super().save(model, path)
@@ -125,6 +126,8 @@ async def save(self, model, path=""):
             # Last chunk
             if chunk == -1:
                 self.run_post_save_hooks(model=model, os_path=os_path)
+
+            self.emit(data={"action": "save", "path": path})
             return model
         else:
             return await super().save(model, path)

diff --git a/jupyter_server/services/contents/manager.py b/jupyter_server/services/contents/manager.py
@@ -7,6 +7,7 @@
 import warnings
 from fnmatch import fnmatch
 
+from jupyter_events import EventLogger
 from nbformat import ValidationError, sign
 from nbformat import validate as validate_nb
 from nbformat.v4 import new_notebook
@@ -25,6 +26,7 @@
 )
 from traitlets.config.configurable import LoggingConfigurable
 
+from jupyter_server import DEFAULT_EVENTS_SCHEMA_PATH, JUPYTER_SERVER_EVENTS_URI
 from jupyter_server.transutils import _i18n
 from jupyter_server.utils import ensure_async, import_item
 
@@ -53,6 +55,24 @@ class ContentsManager(LoggingConfigurable):
 
     """
 
+    event_schema_id = JUPYTER_SERVER_EVENTS_URI + "/contents_service/v1"
+    event_logger = Instance(EventLogger).tag(config=True)
+
+    @default("event_logger")
+    def _default_event_logger(self):
+        if self.parent and hasattr(self.parent, "event_logger"):
+            return self.parent.event_logger
+        else:
+            # If parent does not have an event logger, create one.
+            logger = EventLogger()
+            schema_path = DEFAULT_EVENTS_SCHEMA_PATH / "contents_service" / "v1.yaml"
+            logger.register_event_schema(schema_path)
+            return logger
+
+    def emit(self, data):
+        """Emit event using the core event schema from Jupyter Server's Contents Manager."""
+        self.event_logger.emit(schema_id=self.event_schema_id, data=data)
+
     root_dir = Unicode("/", config=True)
 
     allow_hidden = Bool(False, config=True, help="Allow access to hidden files")
@@ -416,11 +436,13 @@ def delete(self, path):
             raise HTTPError(400, "Can't delete root")
         self.delete_file(path)
         self.checkpoints.delete_all_checkpoints(path)
+        self.emit(data={"action": "delete", "path": path})
 
     def rename(self, old_path, new_path):
         """Rename a file and any checkpoints associated with that file."""
         self.rename_file(old_path, new_path)
         self.checkpoints.rename_all_checkpoints(old_path, new_path)
+        self.emit(data={"action": "rename", "path": new_path, "source_path": old_path})
 
     def update(self, model, path):
         """Update the file's path
@@ -616,6 +638,7 @@ def copy(self, from_path, to_path=None):
             raise HTTPError(404, "No such directory: %s" % to_path)
 
         model = self.save(model, to_path)
+        self.emit(data={"action": "copy", "path": to_path, "source_path": from_path})
         return model
 
     def log_info(self):
@@ -819,11 +842,13 @@ async def delete(self, path):
 
         await self.delete_file(path)
         await self.checkpoints.delete_all_checkpoints(path)
+        self.emit(data={"action": "delete", "path": path})
 
     async def rename(self, old_path, new_path):
         """Rename a file and any checkpoints associated with that file."""
         await self.rename_file(old_path, new_path)
         await self.checkpoints.rename_all_checkpoints(old_path, new_path)
+        self.emit(data={"action": "rename", "path": new_path, "source_path": old_path})
 
     async def update(self, model, path):
         """Update the file's path
@@ -985,6 +1010,7 @@ async def copy(self, from_path, to_path=None):
             raise HTTPError(404, "No such directory: %s" % to_path)
 
         model = await self.save(model, to_path)
+        self.emit(data={"action": "copy", "path": to_path, "source_path": from_path})
         return model
 
     async def trust_notebook(self, path):