Skip to content

Commit 9d972cd

Browse files
committed
[NeoMathEngine] CMemoryHandle less memory consumption
Signed-off-by: Kirill Golikov <[email protected]>
1 parent 81ac696 commit 9d972cd

File tree

9 files changed

+94
-15
lines changed

9 files changed

+94
-15
lines changed

NeoMathEngine/include/NeoMathEngine/MemoryHandle.h

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ limitations under the License.
1616
#pragma once
1717

1818
#include <NeoMathEngine/NeoMathEngineDefs.h>
19+
#include <NeoMathEngine/NeoMathEngineException.h>
1920
#include <cstddef>
2021
#include <type_traits>
2122

@@ -24,10 +25,31 @@ namespace NeoML {
2425
class IMathEngine;
2526
class CMemoryHandleInternal;
2627

28+
// Get pointer to IMathEngine by the given current entity
29+
NEOMATHENGINE_API IMathEngine* GetMathEngineByIndex( size_t currentEntity );
30+
// Get current entity from the given pointer to IMathEngine
31+
NEOMATHENGINE_API size_t GetIndexOfMathEngine( const IMathEngine* mathEngine );
32+
2733
// Wraps the pointer to memory allocated by a math engine
2834
class NEOMATHENGINE_API CMemoryHandle {
35+
private:
36+
#if FINE_PLATFORM( FINE_64_BIT )
37+
static constexpr int MathEngineCountWidth = 10; // compress to bitfield
38+
static constexpr int MathEngineCountShift = ( sizeof( size_t ) * 8 /*bits*/ ) - MathEngineCountWidth;
39+
static constexpr size_t MathEngineEntityInvalid = ( ( size_t( 1 ) << MathEngineCountWidth ) - 1 );
40+
static constexpr size_t MathEngineMaxOffset = size_t( 1 ) << MathEngineCountShift;
41+
#else // FINE_32_BIT
42+
// only for bitfield compiles correct. no compress
43+
static constexpr int MathEngineCountWidth = sizeof( size_t ) * 8;
44+
static constexpr int MathEngineCountShift = sizeof( size_t ) * 8;
45+
static constexpr size_t MathEngineEntityInvalid = size_t( -1 );
46+
#endif // FINE_32_BIT
47+
2948
public:
30-
CMemoryHandle() : mathEngine( 0 ), object( 0 ), offset( 0 ) {}
49+
// Any possible number of all mathEngines
50+
static constexpr int MaxMathEngineEntities = 1024;
51+
52+
CMemoryHandle() : object( nullptr ), offset( 0 ), entity( MathEngineEntityInvalid ) {}
3153
CMemoryHandle( CMemoryHandle&& other ) = default;
3254
CMemoryHandle( const CMemoryHandle& other ) = default;
3355

@@ -36,23 +58,37 @@ class NEOMATHENGINE_API CMemoryHandle {
3658

3759
bool operator!=( const CMemoryHandle& other ) const { return !operator==( other ); }
3860
bool operator==( const CMemoryHandle& other ) const
39-
{ return mathEngine == other.mathEngine && object == other.object && offset == other.offset; }
61+
{ return object == other.object && offset == other.offset && entity == other.entity; }
4062

41-
bool IsNull() const { return mathEngine == 0 && object == 0 && offset == 0; }
63+
bool IsNull() const { return object == nullptr && offset == 0 && entity == MathEngineEntityInvalid; }
4264

43-
IMathEngine* GetMathEngine() const { return mathEngine; }
65+
IMathEngine* GetMathEngine() const { return GetMathEngineByIndex( entity ); }
4466

4567
protected:
46-
IMathEngine* mathEngine; // the math engine
68+
// struct of (16 bytes size for x64 and arm-x64) and (12 bytes size for x86 and arm-x32)
4769
const void* object; // the base object
48-
std::ptrdiff_t offset; // the offset in the base object, in bytes
70+
size_t offset : MathEngineCountShift; // (x64) the less significant bits of size_t stores offset in the base object, in bytes
71+
size_t entity : MathEngineCountWidth; // (x64) the most significant bits of size_t stores the number of IMathEngine entity
4972

5073
friend class CMemoryHandleInternal;
5174

52-
CMemoryHandle( IMathEngine* _mathEngine, const void* _object, ptrdiff_t _offset ) :
53-
mathEngine( _mathEngine ), object( _object ), offset( _offset )
75+
explicit CMemoryHandle( IMathEngine* _mathEngine, const void* _object, ptrdiff_t _offset ) :
76+
CMemoryHandle( _object , _offset, GetIndexOfMathEngine( _mathEngine ) )
5477
{}
55-
CMemoryHandle CopyMemoryHandle( ptrdiff_t shift ) const { return CMemoryHandle( mathEngine, object, offset + shift ); }
78+
79+
CMemoryHandle CopyMemoryHandle( ptrdiff_t shift ) const { return CMemoryHandle( object, offset + shift, entity ); }
80+
81+
private:
82+
explicit CMemoryHandle( const void* _object, ptrdiff_t _offset, size_t _entity ) :
83+
object( _object ), offset( _offset ), entity( _entity )
84+
{
85+
#if FINE_PLATFORM( FINE_64_BIT )
86+
static_assert( MaxMathEngineEntities == ( 1 << MathEngineCountWidth ), "Invalid MaxMathEngineEntities" );
87+
// Checks that the most significant bits do not interfere the result
88+
ASSERT_EXPR( 0 <= _offset && size_t( _offset ) < MathEngineMaxOffset );
89+
#endif // FINE_64_BIT
90+
ASSERT_EXPR( _entity < MaxMathEngineEntities );
91+
}
5692
};
5793

5894
//------------------------------------------------------------------------------------------------------------

NeoMathEngine/include/NeoMathEngine/MemoryHandle.inl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* Copyright © 2017-2020 ABBYY Production LLC
1+
/* Copyright © 2017-2024 ABBYY
22
33
Licensed under the Apache License, Version 2.0 (the "License");
44
you may not use this file except in compliance with the License.
@@ -23,30 +23,30 @@ template<class T>
2323
inline void CTypedMemoryHandle<T>::SetValueAt( int index, T value ) const
2424
{
2525
CTypedMemoryHandle<T> result = *this + index;
26-
mathEngine->DataExchangeRaw( result, &value, sizeof( T ) );
26+
GetMathEngine()->DataExchangeRaw( result, &value, sizeof( T ) );
2727
}
2828

2929
template<class T>
3030
inline T CTypedMemoryHandle<T>::GetValueAt( int index ) const
3131
{
3232
char result[sizeof(T)];
3333
CTypedMemoryHandle<T> source = *this + index;
34-
mathEngine->DataExchangeRaw( result, source, sizeof( T ) );
34+
GetMathEngine()->DataExchangeRaw( result, source, sizeof( T ) );
3535
T* value = reinterpret_cast<T*>( &result );
3636
return *value;
3737
}
3838

3939
template<class T>
4040
inline void CTypedMemoryHandle<T>::SetValue( T value ) const
4141
{
42-
mathEngine->DataExchangeRaw( *this, &value, sizeof( T ) );
42+
GetMathEngine()->DataExchangeRaw( *this, &value, sizeof( T ) );
4343
}
4444

4545
template<class T>
4646
inline T CTypedMemoryHandle<T>::GetValue() const
4747
{
4848
char result[sizeof(T)];
49-
mathEngine->DataExchangeRaw( result, *this, sizeof( T ) );
49+
GetMathEngine()->DataExchangeRaw( result, *this, sizeof( T ) );
5050
T* value = reinterpret_cast<T*>( &result );
5151
return *value;
5252
}

NeoMathEngine/include/NeoMathEngine/NeoMathEngine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1233,7 +1233,7 @@ class NEOMATHENGINE_API IMathEngine : public IDnnEngine {
12331233
// This object should be destroyed using the standard delete operator after use.
12341234
virtual IPerformanceCounters* CreatePerformanceCounters( bool isTimeOnly = false ) const = 0;
12351235

1236-
// For Distributed only
1236+
// Methods group for the DnnDistributed execution only
12371237
virtual CMathEngineDistributedInfo GetDistributedInfo() { return CMathEngineDistributedInfo(); }
12381238
virtual void AllReduce( const CFloatHandle& handle, int size ) = 0;
12391239
virtual void Broadcast( const CFloatHandle& handle, int size, int root ) = 0;

NeoMathEngine/src/CPU/CpuMathEngine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ int NEOMATHENGINE_API FloatAlignment = CCPUInfo::DefineFloatAlignment();
5151
CCpuMathEngine::CCpuMathEngine( size_t _memoryLimit,
5252
std::shared_ptr<CMultiThreadDistributedCommunicator> communicator,
5353
const CMathEngineDistributedInfo& distributedInfo ) :
54+
IMemoryEngine( 0 ),
5455
floatAlignment( FloatAlignment ),
5556
communicator( communicator ),
5657
distributedInfo( distributedInfo ),

NeoMathEngine/src/GPU/CUDA/CudaMathEngine.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ const int CudaMemoryAlignment = 4;
4040

4141
CCudaMathEngine::CCudaMathEngine( const CCusparse* _cusparse, const CCublas* _cublas,
4242
std::unique_ptr<CCudaDevice>& _device, int flags ) :
43+
IMemoryEngine( 0 ),
4344
loader( CDllLoader::CUDA_DLL ),
4445
cusparse( _cusparse ),
4546
cublas( _cublas ),

NeoMathEngine/src/GPU/Metal/MetalMathEngine.mm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ bool LoadMetalEngineInfo( CMathEngineInfo& info )
6666
const int MetalMemoryAlignment = 16;
6767

6868
CMetalMathEngine::CMetalMathEngine( size_t memoryLimit ) :
69+
IMemoryEngine( 0 ),
6970
queue( new CMetalCommandQueue() )
7071
{
7172
ASSERT_EXPR( queue->Create() );

NeoMathEngine/src/GPU/Vulkan/VulkanMathEngine.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ bool LoadVulkanEngineInfo( const CVulkanDll& dll, std::vector< CMathEngineInfo,
6969
constexpr int VulkanMemoryAlignment = 16;
7070

7171
CVulkanMathEngine::CVulkanMathEngine( std::unique_ptr<const CVulkanDevice>& _device, size_t memoryLimit ) :
72+
IMemoryEngine( 0 ),
7273
dllLoader( CDllLoader::VULKAN_DLL ),
7374
device( std::move( _device ) ),
7475
tmpImages( TVI_Count, nullptr )

NeoMathEngine/src/MemoryEngine.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,23 @@ limitations under the License.
2424

2525
namespace NeoML {
2626

27+
size_t IMemoryEngine::MathEngineEntitiesNumerator = 0;
28+
IMathEngine* IMemoryEngine::MathEngineEntitiesArray[CMemoryHandle::MaxMathEngineEntities]{};
29+
30+
// Get pointer to IMathEngine by the given current entity
31+
IMathEngine* GetMathEngineByIndex( size_t currentEntity )
32+
{
33+
return IMemoryEngine::MathEngineEntitiesArray[currentEntity];
34+
}
35+
36+
// Get current entity from the given pointer to IMathEngine
37+
size_t GetIndexOfMathEngine( const IMathEngine* mathEngine )
38+
{
39+
return static_cast<const IMemoryEngine*>( mathEngine )->CurrentEntity;
40+
}
41+
42+
//---------------------------------------------------------------------------------------------------------------------
43+
2744
void IMemoryEngine::InitializeMemory( IRawMemoryManager* _rawManager, size_t _memoryLimit, int _memoryAlignment,
2845
bool _reuse, bool _hostStack )
2946
{

NeoMathEngine/src/MemoryEngine.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,28 @@ class IMemoryEngine : public IMathEngine {
5959
std::unique_ptr<IStackAllocator, CStackAllocatorDeleter> HostStackAllocator; // stack allocator for regular memory
6060

6161
void CleanUpSpecial() override {}
62+
63+
// All below is need to avoid excess (8 bytes) field in each CMemoryHandler
64+
65+
// Special constructor
66+
explicit IMemoryEngine( int/*cannot be no call*/ ) :
67+
CurrentEntity( ++MathEngineEntitiesNumerator )
68+
{
69+
ASSERT_EXPR( CurrentEntity < CMemoryHandle::MaxMathEngineEntities );
70+
MathEngineEntitiesArray[CurrentEntity] = this;
71+
}
72+
73+
// Generation for indices of all IMathEngine entities,
74+
// Incremets evey moment new MathEngine created to generate its CurrentEntity value.
75+
static size_t MathEngineEntitiesNumerator;
76+
// Array for pointers to all entities of IMathEngine
77+
// No cache ping-pong, because pointers are created once and never changes
78+
static IMathEngine* MathEngineEntitiesArray[CMemoryHandle::MaxMathEngineEntities];
79+
// Index of the current IMathEngine entity
80+
const size_t CurrentEntity;
81+
82+
friend IMathEngine* GetMathEngineByIndex( size_t currentEntity );
83+
friend size_t GetIndexOfMathEngine( const IMathEngine* mathEngine );
6284
};
6385

6486
} // namespace NeoML

0 commit comments

Comments
 (0)