Skip to content

Add weights_only=True to torch.load #3012

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion advanced_source/dynamic_quantization_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ def tokenize(self, path):
model.load_state_dict(
torch.load(
model_data_filepath + 'word_language_model_quantize.pth',
map_location=torch.device('cpu')
map_location=torch.device('cpu'),
weights_only=True
)
)

Expand Down
2 changes: 1 addition & 1 deletion advanced_source/static_quantization_tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ We next define several helper functions to help with model evaluation. These mos

def load_model(model_file):
model = MobileNetV2()
state_dict = torch.load(model_file)
state_dict = torch.load(model_file, weights_only=True)
model.load_state_dict(state_dict)
model.to('cpu')
return model
Expand Down
2 changes: 1 addition & 1 deletion beginner_source/basics/quickstart_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test(dataloader, model, loss_fn):
# the state dictionary into it.

model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("model.pth"))
model.load_state_dict(torch.load("model.pth", weights_only=True))

#############################################################
# This model can now be used to make predictions.
Expand Down
16 changes: 13 additions & 3 deletions beginner_source/basics/saveloadrun_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,14 @@
##########################
# To load model weights, you need to create an instance of the same model first, and then load the parameters
# using ``load_state_dict()`` method.
#
# In the code below, we set ``weights_only=True`` to limit the
# functions executed during unpickling to only those necessary for
# loading weights. Using ``weights_only=True`` is considered
# a best practice when loading weights.

model = models.vgg16() # we do not specify ``weights``, i.e. create untrained model
model.load_state_dict(torch.load('model_weights.pth'))
model.load_state_dict(torch.load('model_weights.pth', weights_only=True))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: since this is the main landing page for save load, a comment along these lines would be great

"Setting weights_only=True restricts the functions that can be executed during unpickling to those required to load weights, using this flag is a best practice for loading weights."

model.eval()

###########################
Expand All @@ -50,9 +55,14 @@
torch.save(model, 'model.pth')

########################
# We can then load the model like this:
# We can then load the model as demonstrated below.
#
# As described in `Saving and loading torch.nn.Modules <pytorch.org/docs/main/notes/serialization.html#saving-and-loading-torch-nn-modules>`__,
# saving ``state_dict``s is considered the best practice. However,
# below we use ``weights_only=False`` because this involves loading the
# model, which is a legacy use case for ``torch.save``.

model = torch.load('model.pth')
model = torch.load('model.pth', weights_only=False),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I would add a comment here that as noted in https://pytorch.org/docs/main/notes/serialization.html#saving-and-loading-torch-nn-modules, saving state_dicts is the best practice and that we use weights_only=False here because this is loading a model, which is a legacy use case for torch.save


########################
# .. note:: This approach uses Python `pickle <https://docs.python.org/3/library/pickle.html>`_ module when serializing the model, thus it relies on the actual class definition to be available when loading the model.
Expand Down
2 changes: 1 addition & 1 deletion beginner_source/blitz/cifar10_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def forward(self, x):
# wasn't necessary here, we only did it to illustrate how to do so):

net = Net()
net.load_state_dict(torch.load(PATH))
net.load_state_dict(torch.load(PATH, weights_only=True))

########################################################################
# Okay, now let us see what the neural network thinks these examples above are:
Expand Down
2 changes: 1 addition & 1 deletion beginner_source/fgsm_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def forward(self, x):
model = Net().to(device)

# Load the pretrained model
model.load_state_dict(torch.load(pretrained_model, map_location=device))
model.load_state_dict(torch.load(pretrained_model, map_location=device, weights_only=True))

# Set the model in evaluation mode. In this case this is for the Dropout layers
model.eval()
Expand Down
16 changes: 8 additions & 8 deletions beginner_source/saving_loading_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@
# .. code:: python
#
# model = TheModelClass(*args, **kwargs)
# model.load_state_dict(torch.load(PATH))
# model.load_state_dict(torch.load(PATH), weights_only=True)
# model.eval()
#
# .. note::
Expand Down Expand Up @@ -206,7 +206,7 @@
# .. code:: python
#
# # Model class must be defined somewhere
# model = torch.load(PATH)
# model = torch.load(PATH, weights_only=False)
# model.eval()
#
# This save/load process uses the most intuitive syntax and involves the
Expand Down Expand Up @@ -290,7 +290,7 @@
# model = TheModelClass(*args, **kwargs)
# optimizer = TheOptimizerClass(*args, **kwargs)
#
# checkpoint = torch.load(PATH)
# checkpoint = torch.load(PATH, weights_only=True)
# model.load_state_dict(checkpoint['model_state_dict'])
# optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
# epoch = checkpoint['epoch']
Expand Down Expand Up @@ -354,7 +354,7 @@
# optimizerA = TheOptimizerAClass(*args, **kwargs)
# optimizerB = TheOptimizerBClass(*args, **kwargs)
#
# checkpoint = torch.load(PATH)
# checkpoint = torch.load(PATH, weights_only=True)
# modelA.load_state_dict(checkpoint['modelA_state_dict'])
# modelB.load_state_dict(checkpoint['modelB_state_dict'])
# optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
Expand Down Expand Up @@ -407,7 +407,7 @@
# .. code:: python
#
# modelB = TheModelBClass(*args, **kwargs)
# modelB.load_state_dict(torch.load(PATH), strict=False)
# modelB.load_state_dict(torch.load(PATH), strict=False, weights_only=True)
#
# Partially loading a model or loading a partial model are common
# scenarios when transfer learning or training a new complex model.
Expand Down Expand Up @@ -446,7 +446,7 @@
#
# device = torch.device('cpu')
# model = TheModelClass(*args, **kwargs)
# model.load_state_dict(torch.load(PATH, map_location=device))
# model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))
#
# When loading a model on a CPU that was trained with a GPU, pass
# ``torch.device('cpu')`` to the ``map_location`` argument in the
Expand All @@ -469,7 +469,7 @@
#
# device = torch.device("cuda")
# model = TheModelClass(*args, **kwargs)
# model.load_state_dict(torch.load(PATH))
# model.load_state_dict(torch.load(PATH, weights_only=True))
# model.to(device)
# # Make sure to call input = input.to(device) on any input tensors that you feed to the model
#
Expand Down Expand Up @@ -497,7 +497,7 @@
#
# device = torch.device("cuda")
# model = TheModelClass(*args, **kwargs)
# model.load_state_dict(torch.load(PATH, map_location="cuda:0")) # Choose whatever GPU device number you want
# model.load_state_dict(torch.load(PATH, weights_only=True, map_location="cuda:0")) # Choose whatever GPU device number you want
# model.to(device)
# # Make sure to call input = input.to(device) on any input tensors that you feed to the model
#
Expand Down
2 changes: 1 addition & 1 deletion beginner_source/transfer_learning_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
print(f'Best val Acc: {best_acc:4f}')

# load best model weights
model.load_state_dict(torch.load(best_model_params_path))
model.load_state_dict(torch.load(best_model_params_path, weights_only=True))
return model


Expand Down
6 changes: 3 additions & 3 deletions intermediate_source/autograd_saved_tensors_hooks_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def pack_hook(tensor):
return name

def unpack_hook(name):
return torch.load(name)
return torch.load(name, weights_only=True)


######################################################################
Expand All @@ -420,7 +420,7 @@ def pack_hook(tensor):
return name

def unpack_hook(name):
tensor = torch.load(name)
tensor = torch.load(name, weights_only=True)
os.remove(name)
return tensor

Expand Down Expand Up @@ -462,7 +462,7 @@ def pack_hook(tensor):
return temp_file

def unpack_hook(temp_file):
return torch.load(temp_file.name)
return torch.load(temp_file.name, weights_only=True)


######################################################################
Expand Down
2 changes: 1 addition & 1 deletion intermediate_source/ddp_tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ and elasticity support, please refer to `TorchElastic <https://pytorch.org/elast
# configure map_location properly
map_location = {'cuda:%d' % 0: 'cuda:%d' % rank}
ddp_model.load_state_dict(
torch.load(CHECKPOINT_PATH, map_location=map_location))
torch.load(CHECKPOINT_PATH, map_location=map_location, weights_only=True))

loss_fn = nn.MSELoss()
optimizer = optim.SGD(ddp_model.parameters(), lr=0.001)
Expand Down
2 changes: 1 addition & 1 deletion intermediate_source/tiatoolbox_tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ The PatchPredictor class runs a CNN-based classifier written in PyTorch.

# Users can load any PyTorch model architecture instead using the following script
model = vanilla.CNNModel(backbone="resnet18", num_classes=9) # Importing model from torchvision.models.resnet18
model.load_state_dict(torch.load(weights_path, map_location="cpu"), strict=True)
model.load_state_dict(torch.load(weights_path, map_location="cpu", weights_only=True), strict=True)
def preproc_func(img):
img = PIL.Image.fromarray(img)
img = transforms.ToTensor()(img)
Expand Down
3 changes: 2 additions & 1 deletion prototype_source/fx_graph_mode_ptq_dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ def tokenize(self, path):
model.load_state_dict(
torch.load(
model_data_filepath + 'word_language_model_quantize.pth',
map_location=torch.device('cpu')
map_location=torch.device('cpu'),
weights_only=True
)
)

Expand Down
6 changes: 3 additions & 3 deletions prototype_source/fx_graph_mode_ptq_static.rst
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re

def load_model(model_file):
model = resnet18(pretrained=False)
state_dict = torch.load(model_file)
state_dict = torch.load(model_file, weights_only=True)
model.load_state_dict(state_dict)
model.to("cpu")
return model
Expand Down Expand Up @@ -320,15 +320,15 @@ We can now print the size and accuracy of the quantized model.
# ModuleAttributeError: 'ConvReLU2d' object has no attribute '_modules'
# save the whole model directly
# torch.save(quantized_model, fx_graph_mode_model_file_path)
# loaded_quantized_model = torch.load(fx_graph_mode_model_file_path)
# loaded_quantized_model = torch.load(fx_graph_mode_model_file_path, weights_only=False)

# save with state_dict
# torch.save(quantized_model.state_dict(), fx_graph_mode_model_file_path)
# import copy
# model_to_quantize = copy.deepcopy(float_model)
# prepared_model = prepare_fx(model_to_quantize, {"": qconfig})
# loaded_quantized_model = convert_fx(prepared_model)
# loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path))
# loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path), weights_only=True)

# save with script
torch.jit.save(torch.jit.script(quantized_model), fx_graph_mode_model_file_path)
Expand Down
2 changes: 1 addition & 1 deletion prototype_source/pt2e_quant_ptq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ and rename it to ``data/resnet18_pretrained_float.pth``.

def load_model(model_file):
model = resnet18(pretrained=False)
state_dict = torch.load(model_file)
state_dict = torch.load(model_file, weights_only=True)
model.load_state_dict(state_dict)
model.to("cpu")
return model
Expand Down
2 changes: 1 addition & 1 deletion prototype_source/pt2e_quant_qat.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ prepare the data. These steps are very similar to the ones defined in the

def load_model(model_file):
model = resnet18(pretrained=False)
state_dict = torch.load(model_file)
state_dict = torch.load(model_file, weights_only=True)
model.load_state_dict(state_dict)
return model

Expand Down
2 changes: 1 addition & 1 deletion recipes_source/intel_neural_compressor_for_pytorch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ In this tutorial, the LeNet model is used to demonstrate how to deal with *Intel
return F.log_softmax(x, dim=1)

model = Net()
model.load_state_dict(torch.load('./lenet_mnist_model.pth'))
model.load_state_dict(torch.load('./lenet_mnist_model.pth', weights_only=True))

The pretrained model weight `lenet_mnist_model.pth` comes from
`here <https://drive.google.com/drive/folders/1fn83DF14tWmit0RTKWRhPq5uVXt73e0h?usp=sharing>`_.
Expand Down
8 changes: 4 additions & 4 deletions recipes_source/recipes/module_load_state_dict_tips.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ def forward(self, x):
# to ``torch.load``, the ``torch.device()`` context manager and the ``assign``
# keyword argument to ``nn.Module.load_state_dict()``.

state_dict = torch.load('checkpoint.pth', mmap=True)
state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True)
with torch.device('meta'):
meta_m = SomeModule(1000)
meta_m.load_state_dict(state_dict, assign=True)

#############################################################################
# Compare the snippet below to the one above:

state_dict = torch.load('checkpoint.pth')
state_dict = torch.load('checkpoint.pth', weights_only=True)
m = SomeModule(1000)
m.load_state_dict(state_dict)

Expand All @@ -71,7 +71,7 @@ def forward(self, x):
# * Waiting for the entire checkpoint to be loaded into RAM before performing, for example, some per-tensor processing.

start_time = time.time()
state_dict = torch.load('checkpoint.pth')
state_dict = torch.load('checkpoint.pth', weights_only=True)
end_time = time.time()
print(f"loading time without mmap={end_time - start_time}")

Expand All @@ -84,7 +84,7 @@ def forward(self, x):
# storages will be memory-mapped.

start_time = time.time()
state_dict = torch.load('checkpoint.pth', mmap=True)
state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True)
end_time = time.time()
print(f"loading time with mmap={end_time - start_time}")

Expand Down
2 changes: 1 addition & 1 deletion recipes_source/recipes/save_load_across_devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def forward(self, x):
# Load
device = torch.device('cpu')
model = Net()
model.load_state_dict(torch.load(PATH, map_location=device))
model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))


######################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def forward(self, x):
model = Net()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

checkpoint = torch.load(PATH)
checkpoint = torch.load(PATH, weights_only=True)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def forward(self, x):

# Load
model = Net()
model.load_state_dict(torch.load(PATH))
model.load_state_dict(torch.load(PATH, weights_only=True))
model.eval()


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def forward(self, x):
optimModelA = optim.SGD(modelA.parameters(), lr=0.001, momentum=0.9)
optimModelB = optim.SGD(modelB.parameters(), lr=0.001, momentum=0.9)

checkpoint = torch.load(PATH)
checkpoint = torch.load(PATH, weights_only=True)
modelA.load_state_dict(checkpoint['modelA_state_dict'])
modelB.load_state_dict(checkpoint['modelB_state_dict'])
optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def forward(self, x):
# are loading into.
#

netB.load_state_dict(torch.load(PATH), strict=False)
netB.load_state_dict(torch.load(PATH, weights_only=True), strict=False)


######################################################################
Expand Down
Loading