Skip to content

Commit 2d7baaf

Browse files
authored
vim : streaming and more (#2495)
* Update Vim plugin * Remove getbufoneline usage, Add input bind example. getbufoneline() appears to be a recently added function and has been replaced with getbufline for compatibility. An additional example that explains how to add a keybind that works in insert mode was added.
1 parent f3c3b4b commit 2d7baaf

File tree

2 files changed

+132
-23
lines changed

2 files changed

+132
-23
lines changed

examples/llama.vim

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
" Requires an already running llama.cpp server
2+
" To install either copy or symlink to ~/.vim/autoload/llama.vim
3+
" Then start with either :call llama#doLlamaGen(),
4+
" or add a keybind to your vimrc such as
5+
" nnoremap Z :call llama#doLlamaGen()<CR>
6+
" Similarly, you could add an insert mode keybind with
7+
" inoremap <C-B> <Cmd>call llama#doLlamaGen()<CR>
8+
"
9+
" g:llama_api_url and g:llama_overrides can be configured in your .vimrc
10+
" let g:llama_api_url = "192.168.1.10:8080"
11+
" llama_overrides can also be set through buffer/window scopes. For instance
12+
" autocmd filetype python let b:llama_overrides = {"temp": 0.2}
13+
" Could be added to your .vimrc to automatically set a lower temperature when
14+
" editing a python script
15+
" Additionally, an override dict can be stored at the top of a file
16+
" !*{"stop": ["User:"]}
17+
" Could be added to the start of your chatlog.txt to set the stopping token
18+
" These parameter dicts are merged together from lowest to highest priority:
19+
" server default -> g:llama_overrides -> w:llama_overrides ->
20+
" b:llama_overrides -> in file (!*) overrides
21+
"
22+
" Sublists (like logit_bias and stop) are overridden, not merged
23+
" Example override:
24+
" !*{"logit_bias": [[13, -5], [2, false]], "temperature": 1, "top_k": 5, "top_p": 0.5, "n_predict": 256, "repeat_last_n": 256, "repeat_penalty": 1.17647}
25+
if !exists("g:llama_api_url")
26+
let g:llama_api_url= "127.0.0.1:8080"
27+
endif
28+
if !exists("g:llama_overrides")
29+
let g:llama_overrides = {}
30+
endif
31+
const s:querydata = {"n_predict": 256, "stop": [ "\n" ], "stream": v:true }
32+
const s:curlcommand = ['curl','--data-raw', "{\"prompt\":\"### System:\"}", '--silent', '--no-buffer', '--request', 'POST', '--url', g:llama_api_url .. '/completion', '--header', "Content-Type: application/json"]
33+
let s:linedict = {}
34+
35+
func s:callbackHandler(bufn, channel, msg)
36+
if len(a:msg) < 3
37+
return
38+
elseif a:msg[0] == "d"
39+
let l:msg = a:msg[6:-1]
40+
else
41+
let l:msg = a:msg
42+
endif
43+
let l:decoded_msg = json_decode(l:msg)
44+
let l:newtext = split(l:decoded_msg['content'], "\n", 1)
45+
if len(l:newtext) > 0
46+
call setbufline(a:bufn, s:linedict[a:bufn], getbufline(a:bufn, s:linedict[a:bufn])[0] .. newtext[0])
47+
else
48+
echo "nothing genned"
49+
endif
50+
if len(newtext) > 1
51+
let l:failed = appendbufline(a:bufn, s:linedict[a:bufn], newtext[1:-1])
52+
let s:linedict[a:bufn] = s:linedict[a:bufn] + len(newtext)-1
53+
endif
54+
if has_key(l:decoded_msg, "stop") && l:decoded_msg.stop
55+
echo "Finished generation"
56+
endif
57+
endfunction
58+
59+
func llama#doLlamaGen()
60+
if exists("b:job")
61+
if job_status(b:job) == "run"
62+
call job_stop(b:job)
63+
return
64+
endif
65+
endif
66+
67+
let l:cbuffer = bufnr("%")
68+
let s:linedict[l:cbuffer] = line('$')
69+
let l:buflines = getbufline(l:cbuffer, 1, 1000)
70+
let l:querydata = copy(s:querydata)
71+
call extend(l:querydata, g:llama_overrides)
72+
if exists("w:llama_overrides")
73+
call extend(l:querydata, w:llama_overrides)
74+
endif
75+
if exists("b:llama_overrides")
76+
call extend(l:querydata, b:llama_overrides)
77+
endif
78+
if l:buflines[0][0:1] == '!*'
79+
let l:userdata = json_decode(l:buflines[0][2:-1])
80+
call extend(l:querydata, l:userdata)
81+
let l:buflines = l:buflines[1:-1]
82+
endif
83+
let l:querydata.prompt = join(l:buflines, "\n")
84+
let l:curlcommand = copy(s:curlcommand)
85+
let l:curlcommand[2] = json_encode(l:querydata)
86+
let b:job = job_start(l:curlcommand, {"callback": function("s:callbackHandler", [l:cbuffer])})
87+
endfunction
88+
89+
" Echos the tokkenization of the provided string , or cursor to end of word
90+
" Onus is placed on the user to include the preceding space
91+
func llama#tokenizeWord(...)
92+
if (a:0 > 0)
93+
let l:input = a:1
94+
else
95+
exe "normal \"*ye"
96+
let l:input = @*
97+
endif
98+
let l:querydata = {"content": l:input}
99+
let l:curlcommand = copy(s:curlcommand)
100+
let l:curlcommand[2] = json_encode(l:querydata)
101+
let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
102+
let s:token_job = job_start(l:curlcommand, {"callback": function("s:tokenizeWordCallback", [l:input])})
103+
endfunction
104+
105+
func s:tokenizeWordCallback(plaintext, channel, msg)
106+
echo '"' .. a:plaintext ..'" - ' .. string(json_decode(a:msg).tokens)
107+
endfunction
108+
109+
110+
" Echos the token count of the entire buffer (or provided string)
111+
" Example usage :echo llama#tokenCount()
112+
func llama#tokenCount(...)
113+
if (a:0 > 0)
114+
let l:buflines = a:1
115+
else
116+
let l:buflines = getline(1,1000)
117+
if l:buflines[0][0:1] == '!*'
118+
let l:buflines = l:buflines[1:-1]
119+
endif
120+
let l:buflines = join(l:buflines, "\n")
121+
endif
122+
let l:querydata = {"content": l:buflines}
123+
let l:curlcommand = copy(s:curlcommand)
124+
let l:curlcommand[2] = json_encode(l:querydata)
125+
let l:curlcommand[8] = g:llama_api_url .. "/tokenize"
126+
let s:token_job = job_start(l:curlcommand, {"callback": "s:tokenCountCallback"})
127+
endfunction
128+
129+
func s:tokenCountCallback(channel, msg)
130+
let resp = json_decode(a:msg)
131+
echo len(resp.tokens)
132+
endfunction

examples/llm.vim

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)