Skip to content

Commit cf3df67

Browse files
authored
TorchIO download data to MONAI_DATA_DIRECTORY (#686)
1 parent db40736 commit cf3df67

File tree

2 files changed

+60
-32
lines changed

2 files changed

+60
-32
lines changed

.gitignore

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,3 +138,13 @@ tests/testing_data/*Hippocampus*
138138
# Ignore torch saves
139139
*/torch/runs
140140
logs
141+
*/runs
142+
lightning_logs
143+
144+
# ignore automatically created files
145+
*.ts
146+
nohup.out
147+
deepgrow/ignite/_image.nii.gz
148+
*.zip
149+
deployment/bentoml/mednist_classifier_bentoml.py
150+
deployment/ray/mednist_classifier_start.py

modules/TorchIO_MONAI_PyTorch_Lightning.ipynb

Lines changed: 50 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
},
5454
{
5555
"cell_type": "code",
56-
"execution_count": null,
56+
"execution_count": 1,
5757
"metadata": {},
5858
"outputs": [],
5959
"source": [
@@ -69,7 +69,7 @@
6969
},
7070
{
7171
"cell_type": "code",
72-
"execution_count": null,
72+
"execution_count": 2,
7373
"metadata": {},
7474
"outputs": [],
7575
"source": [
@@ -78,7 +78,7 @@
7878
},
7979
{
8080
"cell_type": "code",
81-
"execution_count": null,
81+
"execution_count": 3,
8282
"metadata": {
8383
"id": "KvbbZuhmquRR"
8484
},
@@ -92,14 +92,16 @@
9292
},
9393
{
9494
"cell_type": "code",
95-
"execution_count": null,
95+
"execution_count": 4,
9696
"metadata": {
9797
"id": "gduPdIturUIB"
9898
},
9999
"outputs": [],
100100
"source": [
101-
"from pathlib import Path\n",
102101
"from datetime import datetime\n",
102+
"import os\n",
103+
"import tempfile\n",
104+
"from glob import glob\n",
103105
"\n",
104106
"import torch\n",
105107
"from torch.utils.data import random_split, DataLoader\n",
@@ -117,6 +119,36 @@
117119
"%load_ext tensorboard"
118120
]
119121
},
122+
{
123+
"cell_type": "markdown",
124+
"metadata": {},
125+
"source": [
126+
"## Setup data directory\n",
127+
"\n",
128+
"You can specify a directory with the `MONAI_DATA_DIRECTORY` environment variable. \n",
129+
"This allows you to save results and reuse downloads. \n",
130+
"If not specified a temporary directory will be used."
131+
]
132+
},
133+
{
134+
"cell_type": "code",
135+
"execution_count": 5,
136+
"metadata": {},
137+
"outputs": [
138+
{
139+
"name": "stdout",
140+
"output_type": "stream",
141+
"text": [
142+
"/mnt/data/rbrown/Documents/Data/MONAI\n"
143+
]
144+
}
145+
],
146+
"source": [
147+
"directory = os.environ.get(\"MONAI_DATA_DIRECTORY\")\n",
148+
"root_dir = tempfile.mkdtemp() if directory is None else directory\n",
149+
"print(root_dir)"
150+
]
151+
},
120152
{
121153
"cell_type": "markdown",
122154
"metadata": {
@@ -145,20 +177,19 @@
145177
},
146178
{
147179
"cell_type": "code",
148-
"execution_count": null,
180+
"execution_count": 6,
149181
"metadata": {
150182
"id": "KuhTaRl3vf37"
151183
},
152184
"outputs": [],
153185
"source": [
154-
"\n",
155-
"\n",
156186
"class MedicalDecathlonDataModule(pl.LightningDataModule):\n",
157187
" def __init__(self, task, batch_size, train_val_ratio):\n",
158188
" super().__init__()\n",
159189
" self.task = task\n",
160190
" self.batch_size = batch_size\n",
161-
" self.dataset_dir = Path(task)\n",
191+
" self.base_dir = root_dir\n",
192+
" self.dataset_dir = os.path.join(root_dir, task)\n",
162193
" self.train_val_ratio = train_val_ratio\n",
163194
" self.subjects = None\n",
164195
" self.test_subjects = None\n",
@@ -175,16 +206,13 @@
175206
" return shapes.max(axis=0)\n",
176207
"\n",
177208
" def download_data(self):\n",
178-
" if not self.dataset_dir.is_dir():\n",
179-
" url = 'https://msd-for-monai.s3-us-west-2.amazonaws.com/Task04_Hippocampus.tar'\n",
180-
" monai.apps.download_and_extract(url=url, output_dir=\".\")\n",
209+
" if not os.path.isdir(self.dataset_dir):\n",
210+
" url = f'https://msd-for-monai.s3-us-west-2.amazonaws.com/{self.task}.tar'\n",
211+
" monai.apps.download_and_extract(url=url, output_dir=self.base_dir)\n",
181212
"\n",
182-
" def get_niis(d):\n",
183-
" return sorted(p for p in d.glob('*.nii*') if not p.name.startswith('.'))\n",
184-
"\n",
185-
" image_training_paths = get_niis(self.dataset_dir / 'imagesTr')\n",
186-
" label_training_paths = get_niis(self.dataset_dir / 'labelsTr')\n",
187-
" image_test_paths = get_niis(self.dataset_dir / 'imagesTs')\n",
213+
" image_training_paths = sorted(glob(os.path.join(self.dataset_dir, 'imagesTr', \"*.nii*\")))\n",
214+
" label_training_paths = sorted(glob(os.path.join(self.dataset_dir, 'labelsTr', \"*.nii*\")))\n",
215+
" image_test_paths = sorted(glob(os.path.join(self.dataset_dir, 'imagesTs', \"*.nii*\")))\n",
188216
" return image_training_paths, label_training_paths, image_test_paths\n",
189217
"\n",
190218
" def prepare_data(self):\n",
@@ -260,7 +288,7 @@
260288
},
261289
{
262290
"cell_type": "code",
263-
"execution_count": null,
291+
"execution_count": 7,
264292
"metadata": {
265293
"id": "hcHf9w2nLfyC"
266294
},
@@ -284,7 +312,7 @@
284312
},
285313
{
286314
"cell_type": "code",
287-
"execution_count": null,
315+
"execution_count": 8,
288316
"metadata": {
289317
"colab": {
290318
"base_uri": "https://localhost:8080/"
@@ -293,16 +321,6 @@
293321
"outputId": "7cb39051-4c26-4811-b838-8a5e938e53a3"
294322
},
295323
"outputs": [
296-
{
297-
"name": "stderr",
298-
"output_type": "stream",
299-
"text": [
300-
"Downloading...\n",
301-
"From: https://drive.google.com/uc?id=1RzPB1_bqzQhlWvU-YGvZzhx2omcDh38C\n",
302-
"To: /content/Task04_Hippocampus.tar\n",
303-
"28.4MB [00:00, 82.8MB/s]\n"
304-
]
305-
},
306324
{
307325
"name": "stdout",
308326
"output_type": "stream",
@@ -341,7 +359,7 @@
341359
},
342360
{
343361
"cell_type": "code",
344-
"execution_count": null,
362+
"execution_count": 9,
345363
"metadata": {
346364
"id": "1Ov3H12p6Qx1"
347365
},
@@ -395,7 +413,7 @@
395413
},
396414
{
397415
"cell_type": "code",
398-
"execution_count": null,
416+
"execution_count": 10,
399417
"metadata": {
400418
"colab": {
401419
"base_uri": "https://localhost:8080/"

0 commit comments

Comments
 (0)