saga.tokenizers
Wordpiece.Builder
saga
saga.models
type builder
val create : unit -> builder
Create a new builder with default settings
val files : builder -> string -> builder
Set vocabulary file path
val vocab : builder -> vocab -> builder
Set vocabulary directly
val unk_token : builder -> string -> builder
Set unknown token (default: "UNK")
UNK
val continuing_subword_prefix : builder -> string -> builder
Set prefix for continuing subwords (default: "##")
val max_input_chars_per_word : builder -> int -> builder
Set maximum input characters per word (default: 100)
val build : builder -> t
Build the WordPiece model