Disable flaky test points

ccreutzi · ccreutzi · commit 1ac24ff374d5 · 2024-06-24T11:57:50.000+01:00
It is not yet clear what causes Ollama to not be reliable in these situations, but we do see differences. Will need to analyse separately and report upstream.
diff --git a/tests/tollamaChat.m b/tests/tollamaChat.m
@@ -50,8 +50,13 @@ function extremeTopK(testCase)
         end
 
         function extremeTfsZ(testCase)
-            % setting tfs_z to z=0 leaves no random choice,
-            % so we expect to get a fixed response.
+            %% This should work, and it does on some computers. On others, Ollama
+            %% receives the parameter, but either Ollama or llama.cpp fails to
+            %% honor it correctly.
+            testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably");
+
+            % setting tfs_z to z=0 leaves no random choice, but degrades to
+            % greedy sampling, so we expect to get a fixed response.
             chat = ollamaChat("mistral",TailFreeSamplingZ=0);
             prompt = "Sampling with tfs_z=0 returns a definite answer.";
             response1 = generate(chat,prompt);
@@ -70,6 +75,11 @@ function stopSequences(testCase)
         end
 
         function seedFixesResult(testCase)
+            %% This should work, and it does on some computers. On others, Ollama
+            %% receives the parameter, but either Ollama or llama.cpp fails to
+            %% honor it correctly.
+            testCase.assumeTrue(false,"disabled due to Ollama/llama.cpp not honoring parameter reliably");
+
             chat = ollamaChat("mistral");
             response1 = generate(chat,"hi",Seed=1234);
             response2 = generate(chat,"hi",Seed=1234);