@@ -118,7 +118,16 @@ typedef void* thread_ret_t;
118
118
#define GGML_ALIGNED_MALLOC (size ) _aligned_malloc(size, GGML_MEM_ALIGN)
119
119
#define GGML_ALIGNED_FREE (ptr ) _aligned_free(ptr)
120
120
#else
121
- #define GGML_ALIGNED_MALLOC (size ) aligned_alloc(GGML_MEM_ALIGN, size)
121
+ inline static void * ggml_aligned_malloc (size_t size ) {
122
+ void * aligned_memory = NULL ;
123
+ int result = posix_memalign (& aligned_memory , GGML_MEM_ALIGN , size );
124
+ if (result != 0 ) {
125
+ // Handle allocation failure
126
+ return NULL ;
127
+ }
128
+ return aligned_memory ;
129
+ }
130
+ #define GGML_ALIGNED_MALLOC (size ) ggml_aligned_malloc(size)
122
131
#define GGML_ALIGNED_FREE (ptr ) free(ptr)
123
132
#endif
124
133
@@ -531,31 +540,31 @@ inline static float vaddvq_f32(float32x4_t v) {
531
540
return vgetq_lane_f32 (v , 0 ) + vgetq_lane_f32 (v , 1 ) + vgetq_lane_f32 (v , 2 ) + vgetq_lane_f32 (v , 3 );
532
541
}
533
542
534
- inline float vminvq_f32 (float32x4_t v ) {
543
+ float vminvq_f32 (float32x4_t v ) {
535
544
return
536
545
MIN (MIN (vgetq_lane_f32 (v , 0 ), vgetq_lane_f32 (v , 1 )),
537
546
MIN (vgetq_lane_f32 (v , 2 ), vgetq_lane_f32 (v , 3 )));
538
547
}
539
548
540
- inline float vmaxvq_f32 (float32x4_t v ) {
549
+ float vmaxvq_f32 (float32x4_t v ) {
541
550
return
542
551
MAX (MAX (vgetq_lane_f32 (v , 0 ), vgetq_lane_f32 (v , 1 )),
543
552
MAX (vgetq_lane_f32 (v , 2 ), vgetq_lane_f32 (v , 3 )));
544
553
}
545
554
546
- inline int8x8_t vzip1_s8 (int8x8_t a , int8x8_t b ) {
555
+ int8x8_t vzip1_s8 (int8x8_t a , int8x8_t b ) {
547
556
return vget_low_s8 (vcombine_s8 (a , b ));
548
557
}
549
558
550
- inline int8x8_t vzip2_s8 (int8x8_t a , int8x8_t b ) {
559
+ int8x8_t vzip2_s8 (int8x8_t a , int8x8_t b ) {
551
560
return vget_high_s8 (vcombine_s8 (a , b ));
552
561
}
553
562
554
- inline uint8x8_t vzip1_u8 (uint8x8_t a , uint8x8_t b ) {
563
+ uint8x8_t vzip1_u8 (uint8x8_t a , uint8x8_t b ) {
555
564
return vget_low_u8 (vcombine_u8 (a , b ));
556
565
}
557
566
558
- inline uint8x8_t vzip2_u8 (uint8x8_t a , uint8x8_t b ) {
567
+ uint8x8_t vzip2_u8 (uint8x8_t a , uint8x8_t b ) {
559
568
return vget_high_u8 (vcombine_u8 (a , b ));
560
569
}
561
570
0 commit comments