LLM Inference
ngn has an LLM toolbox mod, used for inference. You must install ngn with the -f llm feature, in order for tbx::llm to be available.
import { load, stream } from "tbx::llm"
global MODEL = load("./tinyllama.gguf")
fn handler(req: Request): StreamingResponse { const prompt = req.body const chunks = channel<string>()
thread(|| { match (MODEL) { Ok(m) => { const llm_ch = stream(m, prompt) for (token in <-? llm_ch) { if (chunks.isClosed()) { llm_ch.close() break } chunks <- token } }, Error(e) => chunks <- "Error: ${e}" } chunks.close() })
return StreamingResponse { body: chunks }}
export default { fetch: handler }