saga.tokenizers
Bpe.Builder
saga
saga.models
type builder
val create : unit -> builder
Create a new builder with default settings
val vocab_and_merges : builder -> vocab -> merges -> builder
Set vocabulary and merges
val cache_capacity : builder -> int -> builder
Set cache capacity (0 to disable)
val dropout : builder -> float -> builder
Set dropout probability (0.0 to 1.0)
val unk_token : builder -> string -> builder
Set unknown token
val continuing_subword_prefix : builder -> string -> builder
Set prefix for continuing subwords
val end_of_word_suffix : builder -> string -> builder
Set suffix for end-of-word tokens
val fuse_unk : builder -> bool -> builder
Set whether to fuse consecutive unknown tokens
val byte_fallback : builder -> bool -> builder
Enable byte-level fallback for unknown characters
val ignore_merges : builder -> bool -> builder
Ignore merges and output words directly if in vocab
val build : builder -> t
Build the BPE model