forked from qubvel-org/segmentation_models.pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
135 lines (119 loc) · 6.22 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import warnings
from typing import Any, Dict, Optional, Union, Sequence, Callable
from segmentation_models_pytorch.base import (
ClassificationHead,
SegmentationHead,
SegmentationModel,
)
from segmentation_models_pytorch.encoders import get_encoder
from segmentation_models_pytorch.base.hub_mixin import supports_config_loading
from .decoder import MAnetDecoder
class MAnet(SegmentationModel):
"""MAnet_ : Multi-scale Attention Net. The MA-Net can capture rich contextual dependencies based on
the attention mechanism, using two blocks:
- Position-wise Attention Block (PAB), which captures the spatial dependencies between pixels in a global view
- Multi-scale Fusion Attention Block (MFAB), which captures the channel dependencies between any feature map by
multi-scale semantic feature fusion
Args:
encoder_name: Name of the classification model that will be used as an encoder (a.k.a backbone)
to extract features of different spatial resolution
encoder_depth: A number of stages used in encoder in range [3, 5]. Each stage generate features
two times smaller in spatial dimensions than previous one (e.g. for depth 0 we will have features
with shapes [(N, C, H, W),], for depth 1 - [(N, C, H, W), (N, C, H // 2, W // 2)] and so on).
Default is 5
encoder_weights: One of **None** (random initialization), **"imagenet"** (pre-training on ImageNet) and
other pretrained weights (see table with available weights for each encoder_name)
decoder_channels: List of integers which specify **in_channels** parameter for convolutions used in decoder.
Length of the list should be the same as **encoder_depth**
decoder_use_norm: Specifies normalization between Conv2D and activation.
Accepts the following types:
- **True**: Defaults to `"batchnorm"`.
- **False**: No normalization (`nn.Identity`).
- **str**: Specifies normalization type using default parameters. Available values:
`"batchnorm"`, `"identity"`, `"layernorm"`, `"instancenorm"`, `"inplace"`.
- **dict**: Fully customizable normalization settings. Structure:
```python
{"type": <norm_type>, **kwargs}
```
where `norm_name` corresponds to normalization type (see above), and `kwargs` are passed directly to the normalization layer as defined in PyTorch documentation.
**Example**:
```python
decoder_use_norm={"type": "layernorm", "eps": 1e-2}
```
decoder_pab_channels: A number of channels for PAB module in decoder.
Default is 64.
decoder_interpolation: Interpolation mode used in decoder of the model. Available options are
**"nearest"**, **"bilinear"**, **"bicubic"**, **"area"**, **"nearest-exact"**. Default is **"nearest"**.
in_channels: A number of input channels for the model, default is 3 (RGB images)
classes: A number of classes for output mask (or you can think as a number of channels of output mask)
activation: An activation function to apply after the final convolution layer.
Available options are **"sigmoid"**, **"softmax"**, **"logsoftmax"**, **"tanh"**, **"identity"**,
**callable** and **None**.
Default is **None**
aux_params: Dictionary with parameters of the auxiliary output (classification head). Auxiliary output is build
on top of encoder if **aux_params** is not **None** (default). Supported params:
- classes (int): A number of classes
- pooling (str): One of "max", "avg". Default is "avg"
- dropout (float): Dropout factor in [0, 1)
- activation (str): An activation function to apply "sigmoid"/"softmax"
(could be **None** to return logits)
kwargs: Arguments passed to the encoder class ``__init__()`` function. Applies only to ``timm`` models. Keys with ``None`` values are pruned before passing.
Returns:
``torch.nn.Module``: **MAnet**
.. _MAnet:
https://ieeexplore.ieee.org/abstract/document/9201310
"""
@supports_config_loading
def __init__(
self,
encoder_name: str = "resnet34",
encoder_depth: int = 5,
encoder_weights: Optional[str] = "imagenet",
decoder_use_norm: Union[bool, str, Dict[str, Any]] = "batchnorm",
decoder_channels: Sequence[int] = (256, 128, 64, 32, 16),
decoder_pab_channels: int = 64,
decoder_interpolation: str = "nearest",
in_channels: int = 3,
classes: int = 1,
activation: Optional[Union[str, Callable]] = None,
aux_params: Optional[dict] = None,
**kwargs: dict[str, Any],
):
super().__init__()
decoder_use_batchnorm = kwargs.pop("decoder_use_batchnorm", None)
if decoder_use_batchnorm is not None:
warnings.warn(
"The usage of decoder_use_batchnorm is deprecated. Please modify your code for decoder_use_norm",
DeprecationWarning,
stacklevel=2,
)
decoder_use_norm = decoder_use_batchnorm
self.encoder = get_encoder(
encoder_name,
in_channels=in_channels,
depth=encoder_depth,
weights=encoder_weights,
**kwargs,
)
self.decoder = MAnetDecoder(
encoder_channels=self.encoder.out_channels,
decoder_channels=decoder_channels,
n_blocks=encoder_depth,
use_norm=decoder_use_norm,
pab_channels=decoder_pab_channels,
interpolation_mode=decoder_interpolation,
)
self.segmentation_head = SegmentationHead(
in_channels=decoder_channels[-1],
out_channels=classes,
activation=activation,
kernel_size=3,
)
if aux_params is not None:
self.classification_head = ClassificationHead(
in_channels=self.encoder.out_channels[-1], **aux_params
)
else:
self.classification_head = None
self.name = "manet-{}".format(encoder_name)
self.initialize()