added wraper functions and moved the 1.0f - there

GuyAv46 · GuyAv46 · commit 7b41eca7051d · 2022-01-19T18:22:59.000+02:00
diff --git a/hnswlib/space_ip.h b/hnswlib/space_ip.h
@@ -4,21 +4,27 @@
 namespace hnswlib {
 
     static float
-    InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
+    InnerProduct_impl(const void *pVect1, const void *pVect2, const void *qty_ptr) {
         size_t qty = *((size_t *) qty_ptr);
         float res = 0;
         for (unsigned i = 0; i < qty; i++) {
             res += ((float *) pVect1)[i] * ((float *) pVect2)[i];
         }
-        return (1.0f - res);
+        return res;
 
     }
 
+    static float
+    InnerProduct(const void *pVect1, const void *pVect2, const void *qty_ptr) {
+        return 1.0f - InnerProduct_impl(pVect1, pVect2, qty_ptr);
+    }
+
+#if defined(USE_AVX) || defined(USE_SSE)
 #if defined(USE_AVX)
 
 // Favor using AVX if available.
     static float
-    InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    InnerProductSIMD4Ext_impl(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -61,13 +67,13 @@ namespace hnswlib {
 
         _mm_store_ps(TmpRes, sum_prod);
         float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];;
-        return 1.0f - sum;
-}
+        return sum;
+    }
 
 #elif defined(USE_SSE)
 
     static float
-    InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    InnerProductSIMD4Ext_impl(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -119,16 +125,24 @@ namespace hnswlib {
         _mm_store_ps(TmpRes, sum_prod);
         float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
 
-        return 1.0f - sum;
+        return sum;
     }
 
 #endif
+    
+    static float
+    InnerProductSIMD4Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+        return 1.0f - InnerProductSIMD4Ext_impl(pVect1v, pVect2v, qty_ptr);
+    }
 
+#endif
 
+
+#if defined(USE_SSE) || defined(USE_AVX) || defined(USE_AVX512)
 #if defined(USE_AVX512)
 
     static float
-    InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    InnerProductSIMD16Ext_impl(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN64 TmpRes[16];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -154,13 +168,13 @@ namespace hnswlib {
         _mm512_store_ps(TmpRes, sum512);
         float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7] + TmpRes[8] + TmpRes[9] + TmpRes[10] + TmpRes[11] + TmpRes[12] + TmpRes[13] + TmpRes[14] + TmpRes[15];
 
-        return 1.0f - sum;
+        return sum;
     }
 
 #elif defined(USE_AVX)
 
     static float
-    InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    InnerProductSIMD16Ext_impl(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -192,13 +206,13 @@ namespace hnswlib {
         _mm256_store_ps(TmpRes, sum256);
         float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3] + TmpRes[4] + TmpRes[5] + TmpRes[6] + TmpRes[7];
 
-        return 1.0f - sum;
+        return sum;
     }
 
 #elif defined(USE_SSE)
 
-      static float
-      InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+    static float
+    InnerProductSIMD16Ext_impl(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         float PORTABLE_ALIGN32 TmpRes[8];
         float *pVect1 = (float *) pVect1v;
         float *pVect2 = (float *) pVect2v;
@@ -239,7 +253,14 @@ namespace hnswlib {
         _mm_store_ps(TmpRes, sum_prod);
         float sum = TmpRes[0] + TmpRes[1] + TmpRes[2] + TmpRes[3];
 
-        return 1.0f - sum;
+        return sum;
+    }
+
+#endif
+
+    static float
+    InnerProductSIMD16Ext(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
+        return 1.0f - InnerProductSIMD16Ext_impl(pVect1v, pVect2v, qty_ptr);
     }
 
 #endif
@@ -249,28 +270,28 @@ namespace hnswlib {
     InnerProductSIMD16ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         size_t qty = *((size_t *) qty_ptr);
         size_t qty16 = qty >> 4 << 4;
-        float res = InnerProductSIMD16Ext(pVect1v, pVect2v, &qty16);
+        float res = InnerProductSIMD16Ext_impl(pVect1v, pVect2v, &qty16);
         float *pVect1 = (float *) pVect1v + qty16;
         float *pVect2 = (float *) pVect2v + qty16;
 
         size_t qty_left = qty - qty16;
-        float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
-        return res + res_tail - 1.0f;
+        float res_tail = InnerProduct_impl(pVect1, pVect2, &qty_left);
+        return 1.0f - (res + res_tail);
     }
 
     static float
     InnerProductSIMD4ExtResiduals(const void *pVect1v, const void *pVect2v, const void *qty_ptr) {
         size_t qty = *((size_t *) qty_ptr);
         size_t qty4 = qty >> 2 << 2;
 
-        float res = InnerProductSIMD4Ext(pVect1v, pVect2v, &qty4);
+        float res = InnerProductSIMD4Ext_impl(pVect1v, pVect2v, &qty4);
         size_t qty_left = qty - qty4;
 
         float *pVect1 = (float *) pVect1v + qty4;
         float *pVect2 = (float *) pVect2v + qty4;
-        float res_tail = InnerProduct(pVect1, pVect2, &qty_left);
+        float res_tail = InnerProduct_impl(pVect1, pVect2, &qty_left);
 
-        return res + res_tail - 1.0f;
+        return 1.0f - (res + res_tail);
     }
 #endif
 
@@ -311,5 +332,4 @@ namespace hnswlib {
     ~InnerProductSpace() {}
     };
 
-
 }