Skip to content

golang test large context

Benson Wong edited this page Jun 3, 2025 · 2 revisions

This is a simple golang program to send a very large request to an OpenAI compatible server. It is useful for testing prompt processing speed, memory usage, etc.

This was used to test issue 12433 in llama.cpp. Sharing here as it could be useful testing other servers.

Usage

$ cat my-large-file.txt | head -n 8000 | go run ./main.go -model gemma-test -max-tokens 75 -prompt 'what is in <stdin>?' | jq .

Source

package main

import (
	"bytes"
	"encoding/json"
	"flag"
	"io"
	"log"
	"net/http"
	"os"
)

func main() {
	defaultMaxTokens := 100
	var maxTokens int
	var prompt string
	var apiURL string
	var model string

	flag.IntVar(&maxTokens, "max-tokens", defaultMaxTokens, "Maximum tokens to generate")
	flag.StringVar(&apiURL, "apiUrl", "http://localhost:8080/v1/chat/completions", "url to completions endpoint")
	flag.StringVar(&model, "model", "gemma", "model name")
	flag.StringVar(&prompt, "prompt", "", "Additional text to append to input")

	flag.Parse()

	// Read stdin if available
	var stdinContent []byte
	stat, _ := os.Stdin.Stat()
	if (stat.Mode() & os.ModeCharDevice) == 0 {
		var err error
		stdinContent, err = io.ReadAll(os.Stdin)
		if err != nil {
			log.Fatalf("Error reading stdin: %v", err)
		}
	}

	// Combine inputs
	fullContent := "<stdin>" + string(stdinContent) + "</stdin>"
	if prompt != "" {
		if fullContent != "" {
			fullContent += "\n\n"
		}
		fullContent += prompt
	}

	if fullContent == "" {
		log.Fatal("Error: No input provided (either from stdin or --prompt)")
	}

	// Create request payload
	requestBody := struct {
		Model     string `json:"model"`
		MaxTokens int    `json:"max_tokens"`
		Messages  []struct {
			Role    string `json:"role"`
			Content string `json:"content"`
		} `json:"messages"`
	}{
		Model:     model,
		MaxTokens: maxTokens,
		Messages: []struct {
			Role    string `json:"role"`
			Content string `json:"content"`
		}{
			{
				Role:    "user",
				Content: fullContent,
			},
		},
	}

	jsonData, err := json.Marshal(requestBody)
	if err != nil {
		log.Fatalf("Error creating JSON: %v", err)
	}
	//fmt.Printf("%s\n", jsonData)

	// Create HTTP request
	req, err := http.NewRequest("POST", apiURL, bytes.NewBuffer(jsonData))
	if err != nil {
		log.Fatalf("Error creating request: %v", err)
	}
	req.Header.Set("Content-Type", "application/json")

	// Send request
	client := &http.Client{}
	resp, err := client.Do(req)
	if err != nil {
		log.Fatalf("Error sending request: %v", err)
	}
	defer resp.Body.Close()

	// Output response
	_, err = io.Copy(os.Stdout, resp.Body)
	if err != nil {
		log.Fatalf("Error reading response: %v", err)
	}

}
Clone this wiki locally