1
- /* Copyright © 2017-2020 ABBYY Production LLC
1
+ /* Copyright © 2017-2024 ABBYY
2
2
3
3
Licensed under the Apache License, Version 2.0 (the "License");
4
4
you may not use this file except in compliance with the License.
@@ -20,22 +20,6 @@ limitations under the License.
20
20
21
21
namespace NeoML {
22
22
23
- CBinaryCrossEntropyLossLayer::CBinaryCrossEntropyLossLayer ( IMathEngine& mathEngine ) :
24
- CLossLayer ( mathEngine, " CCnnBinaryCrossEntropyLossLayer" ),
25
- positiveWeightMinusOneValue ( 0 )
26
- {
27
- }
28
-
29
- void CBinaryCrossEntropyLossLayer::SetPositiveWeight ( float value )
30
- {
31
- positiveWeightMinusOneValue = value - 1 ;
32
- }
33
-
34
- float CBinaryCrossEntropyLossLayer::GetPositiveWeight () const
35
- {
36
- return positiveWeightMinusOneValue + 1 ;
37
- }
38
-
39
23
void CBinaryCrossEntropyLossLayer::Reshape ()
40
24
{
41
25
CLossLayer::Reshape ();
@@ -44,8 +28,8 @@ void CBinaryCrossEntropyLossLayer::Reshape()
44
28
" BinaryCrossEntropy layer can only work with a binary classificaion problem" );
45
29
}
46
30
47
- void CBinaryCrossEntropyLossLayer::BatchCalculateLossAndGradient ( int batchSize, CConstFloatHandle data, int /* vectorSize */ ,
48
- CConstFloatHandle label, int /* labelSize */ , CFloatHandle lossValue, CFloatHandle lossGradient )
31
+ void CBinaryCrossEntropyLossLayer::BatchCalculateLossAndGradient ( int batchSize, CConstFloatHandle data, int /* vectorSize*/ ,
32
+ CConstFloatHandle label, int /* labelSize*/ , CFloatHandle lossValue, CFloatHandle lossGradient )
49
33
{
50
34
// Therefore the labels vector can only contain {-1, 1} values
51
35
CFloatHandleStackVar one ( MathEngine () );
@@ -56,22 +40,23 @@ void CBinaryCrossEntropyLossLayer::BatchCalculateLossAndGradient( int batchSize,
56
40
minusOne.SetValue ( -1 .f );
57
41
CFloatHandleStackVar zero ( MathEngine () );
58
42
zero.SetValue ( 0 .f );
59
- CFloatHandleStackVar positiveWeightMinusOne ( MathEngine () );
60
- positiveWeightMinusOne .SetValue ( positiveWeightMinusOneValue );
43
+ CFloatHandleStackVar positiveWeightMinusOneVar ( MathEngine () );
44
+ positiveWeightMinusOneVar .SetValue ( positiveWeightMinusOne );
61
45
46
+ CFloatHandleStackVar temp ( MathEngine (), batchSize * 3 );
62
47
// Convert the target values to [0, 1] range using the binaryLabel = 0.5 * ( label + 1 ) formula
63
- CFloatHandleStackVar binaryLabel ( MathEngine (), batchSize );
48
+ CFloatHandle binaryLabel = temp. GetHandle ( );
64
49
MathEngine ().VectorAddValue ( label, binaryLabel, batchSize, one );
65
50
MathEngine ().VectorMultiply ( binaryLabel, binaryLabel, batchSize, half );
66
51
67
52
// Notations:
68
- // x = logits, z = labels, q = pos_weight, l = 1 + (q - 1) * z
53
+ // x = logits, z = labels, q = pos_weight, lCoef = 1 + (q - 1) * z
69
54
70
55
// The original loss function formula:
71
- // loss = (1 - z) * x + l * log(1 + exp(-x))
56
+ // loss = (1 - z) * x + lCoef * log(1 + exp(-x))
72
57
73
58
// The formula to avoid overflow for large exponent power in exp(-x):
74
- // loss = (1 - z) * x + l * (log(1 + exp(-abs(x))) + max(-x, 0))
59
+ // loss = (1 - z) * x + lCoef * (log(1 + exp(-abs(x))) + max(-x, 0))
75
60
76
61
// (1-z)*x
77
62
CFloatHandleStackVar temp ( MathEngine (), batchSize);
@@ -104,11 +89,11 @@ void CBinaryCrossEntropyLossLayer::BatchCalculateLossAndGradient( int batchSize,
104
89
MathEngine ().VectorAdd ( lossValue, temp, lossValue, batchSize );
105
90
106
91
if ( !lossGradient.IsNull () ) {
107
- // loss' = (1-z) - l / ( 1+exp(x) ) = (1-z) - l * sigmoid(-x)
108
92
109
93
// (z-1)
110
94
CFloatHandleStackVar temp5 ( MathEngine (), batchSize );
111
95
MathEngine ().VectorAddValue ( binaryLabel, temp5, batchSize, minusOne );
96
+ // loss' = (1 - z) - lCoef / ( 1 + exp(x) ) = (1 - z) - lCoef * sigmoid(-x)
112
97
113
98
// -x
114
99
CFloatHandleStackVar temp6 ( MathEngine (), batchSize );
@@ -130,55 +115,60 @@ void CBinaryCrossEntropyLossLayer::BatchCalculateLossAndGradient( int batchSize,
130
115
}
131
116
132
117
// Overflow-safe sigmoid calculation
133
- void CBinaryCrossEntropyLossLayer::calculateStableSigmoid ( const CConstFloatHandle & firstHandle,
118
+ void CBinaryCrossEntropyLossLayer::calculateStableSigmoid ( const CFloatHandle & firstHandle,
134
119
const CFloatHandle& resultHandle, int vectorSize ) const
135
120
{
136
121
CFloatHandleStackVar one ( MathEngine () );
137
122
one.SetValue ( 1 .f );
138
123
CFloatHandleStackVar zero ( MathEngine () );
139
124
zero.SetValue ( 0 .f );
140
125
126
+ NeoPresume ( !firstHandle.IsNull () );
127
+ NeoPresume ( !resultHandle.IsNull () );
128
+ NeoPresume ( firstHandle != resultHandle );
129
+ // reduced memory usage for calculation
130
+ CFloatHandle numerator = resultHandle;
131
+ CFloatHandle denominator = firstHandle;
132
+
141
133
// The sigmoid formula:
142
- // Sigmoid(x) = 1 / (1 + e^-x )
134
+ // Sigmoid(x) = 1 / ( 1 + e^-x )
143
135
144
136
// The formula to avoid overflow for large exponent power in exp(-x):
145
- // Sigmoid(x) = e^(-max(-x, 0) ) / ( 1 + e^-|x| )
137
+ // Sigmoid(x) = e^( -max(-x, 0) ) / ( 1 + e^-|x| )
146
138
147
- // e^(-max(-x, 0) )
148
- CFloatHandleStackVar temp ( MathEngine (), vectorSize );
149
- MathEngine ().VectorNegMultiply ( firstHandle, temp, vectorSize, one );
150
- MathEngine ().VectorReLU ( temp, temp, vectorSize, zero );
151
- MathEngine ().VectorNegMultiply ( temp, temp, vectorSize, one );
152
- MathEngine ().VectorExp ( temp, temp, vectorSize );
139
+ // e^( -max(-x, 0) )
140
+ MathEngine ().VectorNegMultiply ( firstHandle, numerator, vectorSize, one );
141
+ MathEngine ().VectorReLU ( numerator, numerator, vectorSize, zero );
142
+ MathEngine ().VectorNegMultiply ( numerator, numerator, vectorSize, one );
143
+ MathEngine ().VectorExp ( numerator, numerator, vectorSize );
153
144
154
145
// ( 1 + e^-|x| )
155
- CFloatHandleStackVar temp2 ( MathEngine (), vectorSize );
156
- MathEngine ().VectorAbs ( firstHandle, temp2, vectorSize );
157
- MathEngine ().VectorNegMultiply ( temp2, temp2, vectorSize, one );
158
- MathEngine ().VectorExp ( temp2, temp2, vectorSize );
159
- MathEngine ().VectorAddValue ( temp2, temp2, vectorSize, one );
146
+ MathEngine ().VectorAbs ( firstHandle, denominator, vectorSize );
147
+ MathEngine ().VectorNegMultiply ( denominator, denominator, vectorSize, one );
148
+ MathEngine ().VectorExp ( denominator, denominator, vectorSize );
149
+ MathEngine ().VectorAddValue ( denominator, denominator, vectorSize, one );
160
150
161
151
// The sigmoid
162
- MathEngine ().VectorEltwiseDivide ( temp, temp2 , resultHandle, vectorSize );
152
+ MathEngine ().VectorEltwiseDivide ( numerator, denominator , resultHandle, vectorSize );
163
153
}
164
154
165
- static const int BinaryCrossEntropyLossLayerVersion = 2000 ;
155
+ constexpr int binaryCrossEntropyLossLayerVersion = 2000 ;
166
156
167
157
void CBinaryCrossEntropyLossLayer::Serialize ( CArchive& archive )
168
158
{
169
- archive.SerializeVersion ( BinaryCrossEntropyLossLayerVersion , CDnn::ArchiveMinSupportedVersion );
159
+ archive.SerializeVersion ( binaryCrossEntropyLossLayerVersion , CDnn::ArchiveMinSupportedVersion );
170
160
CLossLayer::Serialize ( archive );
171
-
172
- archive.Serialize ( positiveWeightMinusOneValue );
161
+
162
+ archive.Serialize ( positiveWeightMinusOne );
173
163
}
174
164
175
- CLayerWrapper<CBinaryCrossEntropyLossLayer> BinaryCrossEntropyLoss (
176
- float positiveWeight, float lossWeight )
165
+ CLayerWrapper<CBinaryCrossEntropyLossLayer> BinaryCrossEntropyLoss ( float positiveWeight, float lossWeight )
177
166
{
178
- return CLayerWrapper<CBinaryCrossEntropyLossLayer>( " BinaryCrossEntropyLoss" , [=]( CBinaryCrossEntropyLossLayer* result ) {
179
- result->SetPositiveWeight ( positiveWeight );
180
- result->SetLossWeight ( lossWeight );
181
- } );
167
+ return CLayerWrapper<CBinaryCrossEntropyLossLayer>( " BinaryCrossEntropyLoss" ,
168
+ [=]( CBinaryCrossEntropyLossLayer* result ) {
169
+ result->SetPositiveWeight ( positiveWeight );
170
+ result->SetLossWeight ( lossWeight );
171
+ } );
182
172
}
183
173
184
174
} // namespace NeoML
0 commit comments