Skip to content

Commit 90d43bc

Browse files
committed
[NeoMathEngine] CMemoryHandle less memory consumption
Signed-off-by: Kirill Golikov <[email protected]>
1 parent 72d48e6 commit 90d43bc

File tree

9 files changed

+94
-13
lines changed

9 files changed

+94
-13
lines changed

NeoMathEngine/include/NeoMathEngine/MemoryHandle.h

Lines changed: 44 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ limitations under the License.
1616
#pragma once
1717

1818
#include <NeoMathEngine/NeoMathEngineDefs.h>
19+
#include <NeoMathEngine/NeoMathEngineException.h>
1920
#include <cstddef>
2021
#include <type_traits>
2122

@@ -24,31 +25,66 @@ namespace NeoML {
2425
class IMathEngine;
2526
class CMemoryHandleInternal;
2627

28+
// Get pointer to IMathEngine by the given current entity
29+
NEOMATHENGINE_API IMathEngine* GetMathEngineByIndex( size_t currentEntity );
30+
// Get current entity from the given pointer to IMathEngine
31+
NEOMATHENGINE_API size_t GetIndexOfMathEngine( const IMathEngine* mathEngine );
32+
2733
// Wraps the pointer to memory allocated by a math engine
2834
class NEOMATHENGINE_API CMemoryHandle {
35+
private:
36+
#if FINE_PLATFORM( FINE_64_BIT )
37+
static constexpr int mathEngineCountWidth = 10; // compress to bitfield
38+
static constexpr int mathEngineCountShift = ( sizeof( size_t ) * CHAR_BIT ) - mathEngineCountWidth;
39+
static constexpr size_t mathEngineMaxOffset = size_t( 1 ) << mathEngineCountShift;
40+
#else // FINE_32_BIT
41+
// only for bitfield compiles correct. no compress
42+
static constexpr int mathEngineCountWidth = sizeof( size_t ) * CHAR_BIT;
43+
static constexpr int mathEngineCountShift = sizeof( size_t ) * CHAR_BIT;
44+
#endif // FINE_32_BIT
45+
2946
public:
30-
constexpr CMemoryHandle() = default;
47+
// Any possible number of all mathEngines
48+
static constexpr int MaxMathEngineEntities = 1024;
49+
static constexpr size_t MathEngineEntityInvalid = size_t( -1 );
50+
51+
CMemoryHandle() : CMemoryHandle( nullptr, 0, MathEngineEntityInvalid ) {}
3152
// Be copied and moved by default
32-
53+
3354
bool operator!=( const CMemoryHandle& other ) const { return !operator==( other ); }
3455
bool operator==( const CMemoryHandle& other ) const
35-
{ return MathEngine == other.MathEngine && Object == other.Object && Offset == other.Offset; }
56+
{ return Object == other.Object && Offset == other.Offset && Entity == other.Entity; }
3657

3758
bool IsNull() const { return *this == CMemoryHandle{}; }
3859

39-
IMathEngine* GetMathEngine() const { return MathEngine; }
60+
IMathEngine* GetMathEngine() const { return GetMathEngineByIndex( Entity ); }
4061

4162
protected:
42-
IMathEngine* MathEngine = nullptr; // the math engine owner
63+
// struct of (16 bytes size for x64 and arm-x64) and (12 bytes size for x86 and arm-x32)
4364
const void* Object = nullptr; // the memory allocated base pointer
44-
std::ptrdiff_t Offset = 0; // the offset in the memory allocated volume, in bytes
65+
// The offset in the memory allocated volume, in bytes
66+
size_t Offset : mathEngineCountShift; // (x64) the less significant bits of size_t stores offset in the base object, in bytes
67+
// The math engine owner
68+
size_t Entity : mathEngineCountWidth; // (x64) the most significant bits of size_t stores the number of IMathEngine entity
4569

4670
friend class CMemoryHandleInternal;
4771

4872
explicit CMemoryHandle( IMathEngine* mathEngine, const void* object, ptrdiff_t offset ) :
49-
MathEngine( mathEngine ), Object( object ), Offset( offset ) {}
73+
CMemoryHandle( object , offset, GetIndexOfMathEngine( mathEngine ) ) {}
5074

51-
CMemoryHandle Copy( ptrdiff_t shift ) const { return CMemoryHandle( MathEngine, Object, Offset + shift ); }
75+
CMemoryHandle Copy( ptrdiff_t shift ) const { return CMemoryHandle( Object, Offset + shift, Entity ); }
76+
77+
private:
78+
explicit CMemoryHandle( const void* object, ptrdiff_t offset, size_t entity ) :
79+
Object( object ), Offset( offset ), Entity( entity & ( MaxMathEngineEntities - 1 ) )
80+
{
81+
#if FINE_PLATFORM( FINE_64_BIT )
82+
static_assert( MaxMathEngineEntities == ( 1 << mathEngineCountWidth ), "Invalid max MathEngine entities" );
83+
// Checks that the most significant bits do not interfere the result
84+
ASSERT_EXPR( 0 <= offset && size_t( offset ) < mathEngineMaxOffset );
85+
#endif // FINE_64_BIT
86+
ASSERT_EXPR( entity == MathEngineEntityInvalid || entity < ( MaxMathEngineEntities - 1/*Invalid*/ ) );
87+
}
5288
};
5389

5490
//---------------------------------------------------------------------------------------------------------------------

NeoMathEngine/include/NeoMathEngine/MemoryHandle.inl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,30 +23,30 @@ template<class T>
2323
inline void CTypedMemoryHandle<T>::SetValueAt( int index, T value ) const
2424
{
2525
CTypedMemoryHandle<T> result = *this + index;
26-
MathEngine->DataExchangeRaw( result, &value, sizeof( T ) );
26+
GetMathEngine()->DataExchangeRaw( result, &value, sizeof( T ) );
2727
}
2828

2929
template<class T>
3030
inline T CTypedMemoryHandle<T>::GetValueAt( int index ) const
3131
{
3232
char result[sizeof(T)];
3333
CTypedMemoryHandle<T> source = *this + index;
34-
MathEngine->DataExchangeRaw( result, source, sizeof( T ) );
34+
GetMathEngine()->DataExchangeRaw( result, source, sizeof( T ) );
3535
T* value = reinterpret_cast<T*>( &result );
3636
return *value;
3737
}
3838

3939
template<class T>
4040
inline void CTypedMemoryHandle<T>::SetValue( T value ) const
4141
{
42-
MathEngine->DataExchangeRaw( *this, &value, sizeof( T ) );
42+
GetMathEngine()->DataExchangeRaw( *this, &value, sizeof( T ) );
4343
}
4444

4545
template<class T>
4646
inline T CTypedMemoryHandle<T>::GetValue() const
4747
{
4848
char result[sizeof(T)];
49-
MathEngine->DataExchangeRaw( result, *this, sizeof( T ) );
49+
GetMathEngine()->DataExchangeRaw( result, *this, sizeof( T ) );
5050
T* value = reinterpret_cast<T*>( &result );
5151
return *value;
5252
}

NeoMathEngine/include/NeoMathEngine/NeoMathEngine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1234,7 +1234,7 @@ class NEOMATHENGINE_API IMathEngine : public IDnnEngine {
12341234
// This object should be destroyed using the standard delete operator after use.
12351235
virtual IPerformanceCounters* CreatePerformanceCounters( bool isTimeOnly = false ) const = 0;
12361236

1237-
// For Distributed only
1237+
// Methods group for the DnnDistributed execution only
12381238
virtual CMathEngineDistributedInfo GetDistributedInfo() { return CMathEngineDistributedInfo(); }
12391239
virtual void AllReduce( const CFloatHandle& handle, int size ) = 0;
12401240
virtual void Broadcast( const CFloatHandle& handle, int size, int root ) = 0;

NeoMathEngine/src/CPU/CpuMathEngine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ int NEOMATHENGINE_API FloatAlignment = CCPUInfo::DefineFloatAlignment();
5151
CCpuMathEngine::CCpuMathEngine( size_t _memoryLimit,
5252
std::shared_ptr<CMultiThreadDistributedCommunicator> communicator,
5353
const CMathEngineDistributedInfo& distributedInfo ) :
54+
CMemoryEngineMixin( 0 ),
5455
floatAlignment( FloatAlignment ),
5556
communicator( communicator ),
5657
distributedInfo( distributedInfo ),

NeoMathEngine/src/GPU/CUDA/CudaMathEngine.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ const int CudaMemoryAlignment = 4;
4040

4141
CCudaMathEngine::CCudaMathEngine( const CCusparse* _cusparse, const CCublas* _cublas,
4242
std::unique_ptr<CCudaDevice>& _device, int flags ) :
43+
CMemoryEngineMixin( 0 ),
4344
loader( CDllLoader::CUDA_DLL ),
4445
cusparse( _cusparse ),
4546
cublas( _cublas ),

NeoMathEngine/src/GPU/Metal/MetalMathEngine.mm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ bool LoadMetalEngineInfo( CMathEngineInfo& info )
6666
const int MetalMemoryAlignment = 16;
6767

6868
CMetalMathEngine::CMetalMathEngine( size_t memoryLimit ) :
69+
CMemoryEngineMixin( 0 ),
6970
queue( new CMetalCommandQueue() )
7071
{
7172
ASSERT_EXPR( queue->Create() );

NeoMathEngine/src/GPU/Vulkan/VulkanMathEngine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ bool LoadVulkanEngineInfo( const CVulkanDll& dll, std::vector< CMathEngineInfo,
6969
constexpr int VulkanMemoryAlignment = 16;
7070

7171
CVulkanMathEngine::CVulkanMathEngine( std::unique_ptr<const CVulkanDevice>& _device, size_t memoryLimit ) :
72+
CMemoryEngineMixin( 0 ),
7273
dllLoader( CDllLoader::VULKAN_DLL ),
7374
device( std::move( _device ) ),
7475
tmpImages( TVI_Count, nullptr )

NeoMathEngine/src/MemoryEngineMixin.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,25 @@ limitations under the License.
2424

2525
namespace NeoML {
2626

27+
size_t CMemoryEngineMixin::MathEngineEntitiesNumerator = 0;
28+
IMathEngine* CMemoryEngineMixin::MathEngineEntitiesArray[CMemoryHandle::MaxMathEngineEntities]{};
29+
30+
// Get pointer to IMathEngine by the given current entity
31+
IMathEngine* GetMathEngineByIndex( size_t currentEntity )
32+
{
33+
return CMemoryEngineMixin::MathEngineEntitiesArray[currentEntity];
34+
}
35+
36+
// Get current entity from the given pointer to IMathEngine
37+
size_t GetIndexOfMathEngine( const IMathEngine* mathEngine )
38+
{
39+
return ( mathEngine == nullptr )
40+
? CMemoryHandle::MathEngineEntityInvalid
41+
: static_cast<const CMemoryEngineMixin*>( mathEngine )->CurrentEntity;
42+
}
43+
44+
//---------------------------------------------------------------------------------------------------------------------
45+
2746
void CMemoryEngineMixin::InitializeMemory( IRawMemoryManager* _rawManager, size_t _memoryLimit, int _memoryAlignment,
2847
bool _reuse, bool _hostStack )
2948
{

NeoMathEngine/src/MemoryEngineMixin.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,28 @@ class CMemoryEngineMixin : public IMathEngine {
5959
std::unique_ptr<IStackAllocator, CStackAllocatorDeleter> HostStackAllocator; // stack allocator for regular memory
6060

6161
void CleanUpSpecial() override {}
62+
63+
// All below is need to avoid excess (8 bytes) field in each CMemoryHandler
64+
65+
// Special constructor
66+
explicit CMemoryEngineMixin( int/*cannot be no call*/ ) :
67+
CurrentEntity( ++MathEngineEntitiesNumerator )
68+
{
69+
ASSERT_EXPR( CurrentEntity < CMemoryHandle::MaxMathEngineEntities );
70+
MathEngineEntitiesArray[CurrentEntity] = this;
71+
}
72+
73+
// Generation for indices of all IMathEngine entities,
74+
// Incremets evey moment new MathEngine created to generate its CurrentEntity value.
75+
static size_t MathEngineEntitiesNumerator;
76+
// Array for pointers to all entities of IMathEngine
77+
// No cache ping-pong, because pointers are created once and never changes
78+
static IMathEngine* MathEngineEntitiesArray[CMemoryHandle::MaxMathEngineEntities];
79+
// Index of the current IMathEngine entity
80+
const size_t CurrentEntity;
81+
82+
friend IMathEngine* GetMathEngineByIndex( size_t currentEntity );
83+
friend size_t GetIndexOfMathEngine( const IMathEngine* mathEngine );
6284
};
6385

6486
} // namespace NeoML

0 commit comments

Comments
 (0)