huggingface
diff --git a/‎clean_checkpoint.py
Lines changed: 8 additions & 4 deletions b/‎clean_checkpoint.py
Lines changed: 8 additions & 4 deletions
diff --git a/‎requirements.txt
Lines changed: 2 additions & 2 deletions b/‎requirements.txt
Lines changed: 2 additions & 2 deletions
diff --git a/‎sotabench.py
Lines changed: 37 additions & 1 deletion b/‎sotabench.py
Lines changed: 37 additions & 1 deletion
diff --git a/‎timm/models/__init__.py
Lines changed: 3 additions & 1 deletion b/‎timm/models/__init__.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎timm/models/activations.py
Lines changed: 155 additions & 0 deletions b/‎timm/models/activations.py
Lines changed: 155 additions & 0 deletions
diff --git a/‎timm/models/conv2d_helpers.py
Lines changed: 0 additions & 120 deletions b/‎timm/models/conv2d_helpers.py
Lines changed: 0 additions & 120 deletions
@@ -2,6 +2,7 @@
 import argparse
 import os
 import hashlib
+import shutil
 from collections import OrderedDict
 
 parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation')
@@ -31,10 +32,9 @@ def main():
             if state_dict_key in checkpoint:
                 state_dict = checkpoint[state_dict_key]
             else:
-                print("Error: No state_dict found in checkpoint {}.".format(args.checkpoint))
-                exit(1)
+                state_dict = checkpoint
         else:
-            state_dict = checkpoint
+            assert False
         for k, v in state_dict.items():
             name = k[7:] if k.startswith('module') else k
             new_state_dict[name] = v
@@ -43,7 +43,11 @@ def main():
         torch.save(new_state_dict, args.output)
         with open(args.output, 'rb') as f:
             sha_hash = hashlib.sha256(f.read()).hexdigest()
-        print("=> Saved state_dict to '{}, SHA256: {}'".format(args.output, sha_hash))
+
+        checkpoint_base = os.path.splitext(args.checkpoint)[0]
+        final_filename = '-'.join([checkpoint_base, sha_hash[:8]]) + '.pth'
+        shutil.move(args.output, final_filename)
+        print("=> Saved state_dict to '{}, SHA256: {}'".format(final_filename, sha_hash))
     else:
         print("Error: Checkpoint ({}) doesn't exist".format(args.checkpoint))
 
 
@@ -1,3 +1,3 @@
-torch>=1.1.0
-torchvision>=0.3.0
+torch>=1.2.0
+torchvision>=0.4.0
 pyyaml
@@ -78,7 +78,7 @@ def _entry(model_name, paper_model_name, paper_arxiv_id, batch_size=BATCH_SIZE,
     _entry('mixnet_m', 'MixNet-M', '1907.09595'),
     _entry('mixnet_s', 'MixNet-S', '1907.09595'),
     _entry('mnasnet_100', 'MnasNet-B1', '1807.11626'),
-    _entry('mobilenetv3_100', 'MobileNet V3-Large 1.0', '1905.02244',
+    _entry('mobilenetv3_rw', 'MobileNet V3-Large 1.0', '1905.02244',
            model_desc='Trained in PyTorch with RMSProp, exponential LR decay, and hyper-params matching '
                       'paper as closely as possible.'),
     _entry('resnet18', 'ResNet-18', '1812.01187'),
@@ -114,6 +114,30 @@ def _entry(model_name, paper_model_name, paper_arxiv_id, batch_size=BATCH_SIZE,
            model_desc='Ported from official Google AI Tensorflow weights'),
     _entry('tf_efficientnet_b7', 'EfficientNet-B7 (RandAugment)', '1905.11946', batch_size=BATCH_SIZE//8,
            model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b0_ap', 'EfficientNet-B0 (AdvProp)', '1911.09665',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b1_ap', 'EfficientNet-B1 (AdvProp)', '1911.09665',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b2_ap', 'EfficientNet-B2 (AdvProp)', '1911.09665',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b3_ap', 'EfficientNet-B3 (AdvProp)', '1911.09665', batch_size=BATCH_SIZE // 2,
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b4_ap', 'EfficientNet-B4 (AdvProp)', '1911.09665', batch_size=BATCH_SIZE // 2,
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b5_ap', 'EfficientNet-B5 (AdvProp)', '1911.09665', batch_size=BATCH_SIZE // 4,
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b6_ap', 'EfficientNet-B6 (AdvProp)', '1911.09665', batch_size=BATCH_SIZE // 8,
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b7_ap', 'EfficientNet-B7 (AdvProp)', '1911.09665', batch_size=BATCH_SIZE // 8,
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_b8_ap', 'EfficientNet-B8 (AdvProp)', '1911.09665', batch_size=BATCH_SIZE // 8,
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_cc_b0_4e', 'EfficientNet-CondConv-B0 4 experts', '1904.04971',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_cc_b0_8e', 'EfficientNet-CondConv-B0 8 experts', '1904.04971',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_efficientnet_cc_b1_8e', 'EfficientNet-CondConv-B1 8 experts', '1904.04971',
+           model_desc='Ported from official Google AI Tensorflow weights'),
     _entry('tf_efficientnet_es', 'EfficientNet-EdgeTPU-S', '1905.11946',
            model_desc='Ported from official Google AI Tensorflow weights'),
     _entry('tf_efficientnet_em', 'EfficientNet-EdgeTPU-M', '1905.11946',
@@ -124,6 +148,18 @@ def _entry(model_name, paper_model_name, paper_arxiv_id, batch_size=BATCH_SIZE,
     _entry('tf_mixnet_l', 'MixNet-L', '1907.09595', model_desc='Ported from official Google AI Tensorflow weights'),
     _entry('tf_mixnet_m', 'MixNet-M', '1907.09595', model_desc='Ported from official Google AI Tensorflow weights'),
     _entry('tf_mixnet_s', 'MixNet-S', '1907.09595', model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_mobilenetv3_large_100', 'MobileNet V3-Large 1.0', '1905.02244',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_mobilenetv3_large_075', 'MobileNet V3-Large 0.75', '1905.02244',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_mobilenetv3_large_minimal_100', 'MobileNet V3-Large Minimal 1.0', '1905.02244',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_mobilenetv3_small_100', 'MobileNet V3-Small 1.0', '1905.02244',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_mobilenetv3_small_075', 'MobileNet V3-Small 0.75', '1905.02244',
+           model_desc='Ported from official Google AI Tensorflow weights'),
+    _entry('tf_mobilenetv3_small_minimal_100', 'MobileNet V3-Small Minimal 1.0', '1905.02244',
+           model_desc='Ported from official Google AI Tensorflow weights'),
 
     ## Cadene ported weights (to remove if Cadene adds sotabench)
     _entry('inception_resnet_v2', 'Inception ResNet V2', '1602.07261'),
 
@@ -7,12 +7,14 @@
 from .xception import *
 from .nasnet import *
 from .pnasnet import *
-from .gen_efficientnet import *
+from .efficientnet import *
+from .mobilenetv3 import *
 from .inception_v3 import *
 from .gluon_resnet import *
 from .gluon_xception import *
 from .res2net import *
 from .dla import *
+from .hrnet import *
 
 from .registry import *
 from .factory import create_model
 
@@ -0,0 +1,155 @@
+import torch
+from torch import nn as nn
+from torch.nn import functional as F
+
+
+_USE_MEM_EFFICIENT_ISH = True
+if _USE_MEM_EFFICIENT_ISH:
+    # This version reduces memory overhead of Swish during training by
+    # recomputing torch.sigmoid(x) in backward instead of saving it.
+    @torch.jit.script
+    def swish_jit_fwd(x):
+        return x.mul(torch.sigmoid(x))
+
+
+    @torch.jit.script
+    def swish_jit_bwd(x, grad_output):
+        x_sigmoid = torch.sigmoid(x)
+        return grad_output * (x_sigmoid * (1 + x * (1 - x_sigmoid)))
+
+
+    class SwishJitAutoFn(torch.autograd.Function):
+        """ torch.jit.script optimised Swish
+        Inspired by conversation btw Jeremy Howard & Adam Pazske
+        https://twitter.com/jeremyphoward/status/1188251041835315200
+        """
+
+        @staticmethod
+        def forward(ctx, x):
+            ctx.save_for_backward(x)
+            return swish_jit_fwd(x)
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            x = ctx.saved_tensors[0]
+            return swish_jit_bwd(x, grad_output)
+
+
+    def swish(x, _inplace=False):
+        return SwishJitAutoFn.apply(x)
+
+
+    @torch.jit.script
+    def mish_jit_fwd(x):
+        return x.mul(torch.tanh(F.softplus(x)))
+
+
+    @torch.jit.script
+    def mish_jit_bwd(x, grad_output):
+        x_sigmoid = torch.sigmoid(x)
+        x_tanh_sp = F.softplus(x).tanh()
+        return grad_output.mul(x_tanh_sp + x * x_sigmoid * (1 - x_tanh_sp * x_tanh_sp))
+
+
+    class MishJitAutoFn(torch.autograd.Function):
+        @staticmethod
+        def forward(ctx, x):
+            ctx.save_for_backward(x)
+            return mish_jit_fwd(x)
+
+        @staticmethod
+        def backward(ctx, grad_output):
+            x = ctx.saved_tensors[0]
+            return mish_jit_bwd(x, grad_output)
+
+    def mish(x, _inplace=False):
+        return MishJitAutoFn.apply(x)
+
+else:
+    def swish(x, inplace=False):
+        """Swish - Described in: https://arxiv.org/abs/1710.05941
+        """
+        return x.mul_(x.sigmoid()) if inplace else x.mul(x.sigmoid())
+
+
+    def mish(x, _inplace=False):
+        """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
+        """
+        return x.mul(F.softplus(x).tanh())
+
+
+class Swish(nn.Module):
+    def __init__(self, inplace=False):
+        super(Swish, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return swish(x, self.inplace)
+
+
+class Mish(nn.Module):
+    def __init__(self, inplace=False):
+        super(Mish, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return mish(x, self.inplace)
+
+
+def sigmoid(x, inplace=False):
+    return x.sigmoid_() if inplace else x.sigmoid()
+
+
+# PyTorch has this, but not with a consistent inplace argmument interface
+class Sigmoid(nn.Module):
+    def __init__(self, inplace=False):
+        super(Sigmoid, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return x.sigmoid_() if self.inplace else x.sigmoid()
+
+
+def tanh(x, inplace=False):
+    return x.tanh_() if inplace else x.tanh()
+
+
+# PyTorch has this, but not with a consistent inplace argmument interface
+class Tanh(nn.Module):
+    def __init__(self, inplace=False):
+        super(Tanh, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return x.tanh_() if self.inplace else x.tanh()
+
+
+def hard_swish(x, inplace=False):
+    inner = F.relu6(x + 3.).div_(6.)
+    return x.mul_(inner) if inplace else x.mul(inner)
+
+
+class HardSwish(nn.Module):
+    def __init__(self, inplace=False):
+        super(HardSwish, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return hard_swish(x, self.inplace)
+
+
+def hard_sigmoid(x, inplace=False):
+    if inplace:
+        return x.add_(3.).clamp_(0., 6.).div_(6.)
+    else:
+        return F.relu6(x + 3.) / 6.
+
+
+class HardSigmoid(nn.Module):
+    def __init__(self, inplace=False):
+        super(HardSigmoid, self).__init__()
+        self.inplace = inplace
+
+    def forward(self, x):
+        return hard_sigmoid(x, self.inplace)
+