Skip to content

LLM Inference

ngn has an LLM toolbox mod, used for inference. You must install ngn with the -f llm feature, in order for tbx::llm to be available.

import { load, stream } from "tbx::llm"
global MODEL = load("./tinyllama.gguf")
fn handler(req: Request): StreamingResponse {
const prompt = req.body
const chunks = channel<string>()
thread(|| {
match (MODEL) {
Ok(m) => {
const llm_ch = stream(m, prompt)
for (token in <-? llm_ch) {
if (chunks.isClosed()) {
llm_ch.close()
break
}
chunks <- token
}
},
Error(e) => chunks <- "Error: ${e}"
}
chunks.close()
})
return StreamingResponse { body: chunks }
}
export default { fetch: handler }