Flush STDOUT when server starts listening. (ggml-org#651)

ira-peach · web-flow · commit e00e17b3f98d · 2024-01-31T14:40:45.000+08:00
This works around a Win32 issue when piping output from a PyInstaller context, such as when doing so in a perl script or to an output file. Print statements from a Python context don't properly get output unless flushed. This strategically flushes the print statements so no information is lost, though it may be better to flush all print statements in a Python context via a subroutine wrapper. See also: https://mail.python.org/pipermail/python-bugs-list/2004-August/024923.html https://stackoverflow.com/a/466849 https://stackoverflow.com/q/62693079
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -2433,7 +2433,9 @@ def main(launch_args,start_server=True):
 
     modelname = os.path.abspath(args.model_param)
     print(args)
-    print(f"==========\nLoading model: {modelname} \n[Threads: {args.threads}, BlasThreads: {args.blasthreads}, SmartContext: {args.smartcontext}, ContextShift: {not (args.noshift)}]")
+    # Flush stdout for win32 issue with regards to piping in terminals,
+    # especially before handing over to C++ context.
+    print(f"==========\nLoading model: {modelname} \n[Threads: {args.threads}, BlasThreads: {args.blasthreads}, SmartContext: {args.smartcontext}, ContextShift: {not (args.noshift)}]", flush=True)
     loadok = load_model(modelname)
     print("Load Model OK: " + str(loadok))
 
@@ -2507,10 +2509,12 @@ def onready_subprocess():
     if start_server:
         if args.remotetunnel:
             setuptunnel()
-        print(f"======\nPlease connect to custom endpoint at {epurl}")
+        # Flush stdout for previous win32 issue so the client can see output.
+        print(f"======\nPlease connect to custom endpoint at {epurl}", flush=True)
         asyncio.run(RunServerMultiThreaded(args.host, args.port, embedded_kailite, embedded_kcpp_docs))
     else:
-        print(f"Server was not started, main function complete. Idling.")
+        # Flush stdout for previous win32 issue so the client can see output.
+        print(f"Server was not started, main function complete. Idling.", flush=True)
 
 def run_in_queue(launch_args, input_queue, output_queue):
     main(launch_args, start_server=False)