@@ -21,6 +21,7 @@ macro_rules! type_name {
2121 } ;
2222}
2323
24+ /// Guide object based on Index.
2425#[ pyclass( name = "Guide" , module = "outlines_core" ) ]
2526#[ derive( Clone , Debug , PartialEq , Encode , Decode ) ]
2627pub struct PyGuide {
@@ -30,6 +31,7 @@ pub struct PyGuide {
3031
3132#[ pymethods]
3233impl PyGuide {
34+ /// Creates a Guide object based on Index.
3335 #[ new]
3436 fn __new__ ( index : PyIndex ) -> Self {
3537 PyGuide {
@@ -38,10 +40,12 @@ impl PyGuide {
3840 }
3941 }
4042
43+ /// Retrieves current state id of the Guide.
4144 fn get_state ( & self ) -> StateId {
4245 self . state
4346 }
4447
48+ /// Gets the list of allowed tokens for the current state.
4549 fn get_tokens ( & self ) -> PyResult < Vec < TokenId > > {
4650 self . index
4751 . get_allowed_tokens ( self . state )
@@ -53,6 +57,7 @@ impl PyGuide {
5357 ) ) )
5458 }
5559
60+ /// Guide moves to the next state provided by the token id and returns a list of allowed tokens.
5661 fn advance ( & mut self , token_id : TokenId ) -> PyResult < Vec < TokenId > > {
5762 match self . index . get_next_state ( self . state , token_id) {
5863 Some ( new_state) => {
@@ -66,24 +71,28 @@ impl PyGuide {
6671 }
6772 }
6873
74+ /// Checks if the automaton is in a final state.
6975 fn is_finished ( & self ) -> bool {
7076 self . index . is_final_state ( self . state )
7177 }
7278
79+ /// Gets the debug string representation of the guide.
7380 fn __repr__ ( & self ) -> String {
7481 format ! (
7582 "Guide object with the state={:#?} and {:#?}" ,
7683 self . state, self . index
7784 )
7885 }
7986
87+ /// Gets the string representation of the guide.
8088 fn __str__ ( & self ) -> String {
8189 format ! (
8290 "Guide object with the state={} and {}" ,
8391 self . state, self . index. 0
8492 )
8593 }
8694
95+ /// Compares whether two guides are the same.
8796 fn __eq__ ( & self , other : & PyGuide ) -> bool {
8897 self == other
8998 }
@@ -109,12 +118,14 @@ impl PyGuide {
109118 }
110119}
111120
121+ /// Index object based on regex and vocabulary.
112122#[ pyclass( name = "Index" , module = "outlines_core" ) ]
113123#[ derive( Clone , Debug , PartialEq , Encode , Decode ) ]
114124pub struct PyIndex ( Arc < Index > ) ;
115125
116126#[ pymethods]
117127impl PyIndex {
128+ /// Creates an index from a regex and vocabulary.
118129 #[ new]
119130 fn __new__ ( py : Python < ' _ > , regex : & str , vocabulary : & PyVocabulary ) -> PyResult < Self > {
120131 py. allow_threads ( || {
@@ -124,42 +135,52 @@ impl PyIndex {
124135 } )
125136 }
126137
138+ /// Returns allowed tokens in this state.
127139 fn get_allowed_tokens ( & self , state : StateId ) -> Option < Vec < TokenId > > {
128140 self . 0 . allowed_tokens ( & state)
129141 }
130142
143+ /// Updates the state.
131144 fn get_next_state ( & self , state : StateId , token_id : TokenId ) -> Option < StateId > {
132145 self . 0 . next_state ( & state, & token_id)
133146 }
134147
148+ /// Determines whether the current state is a final state.
135149 fn is_final_state ( & self , state : StateId ) -> bool {
136150 self . 0 . is_final_state ( & state)
137151 }
138152
153+ /// Get all final states.
139154 fn get_final_states ( & self ) -> HashSet < StateId > {
140155 self . 0 . final_states ( ) . clone ( )
141156 }
142157
158+ /// Returns the Index as a Python Dict object.
143159 fn get_transitions ( & self ) -> HashMap < StateId , HashMap < TokenId , StateId > > {
144160 self . 0 . transitions ( ) . clone ( )
145161 }
146162
163+ /// Returns the ID of the initial state of the index.
147164 fn get_initial_state ( & self ) -> StateId {
148165 self . 0 . initial_state ( )
149166 }
150167
168+ /// Gets the debug string representation of the index.
151169 fn __repr__ ( & self ) -> String {
152170 format ! ( "{:#?}" , self . 0 )
153171 }
154172
173+ /// Gets the string representation of the index.
155174 fn __str__ ( & self ) -> String {
156175 format ! ( "{}" , self . 0 )
157176 }
158177
178+ /// Compares whether two indexes are the same.
159179 fn __eq__ ( & self , other : & PyIndex ) -> bool {
160180 * self . 0 == * other. 0
161181 }
162182
183+ /// Makes a deep copy of the Index.
163184 fn __deepcopy__ ( & self , _py : Python < ' _ > , _memo : Py < PyDict > ) -> Self {
164185 PyIndex ( Arc :: new ( ( * self . 0 ) . clone ( ) ) )
165186 }
@@ -185,12 +206,14 @@ impl PyIndex {
185206 }
186207}
187208
209+ /// LLM vocabulary.
188210#[ pyclass( name = "Vocabulary" , module = "outlines_core" ) ]
189211#[ derive( Clone , Debug , Encode , Decode ) ]
190212pub struct PyVocabulary ( Vocabulary ) ;
191213
192214#[ pymethods]
193215impl PyVocabulary {
216+ /// Creates a vocabulary from eos token id and a map of tokens to token ids.
194217 #[ new]
195218 fn __new__ ( py : Python < ' _ > , eos_token_id : TokenId , map : Py < PyAny > ) -> PyResult < PyVocabulary > {
196219 if let Ok ( dict) = map. extract :: < HashMap < String , Vec < TokenId > > > ( py) {
@@ -213,6 +236,7 @@ impl PyVocabulary {
213236 }
214237 }
215238
239+ /// Creates the vocabulary of a pre-trained model.
216240 #[ staticmethod]
217241 #[ pyo3( signature = ( model, revision=None , token=None ) ) ]
218242 fn from_pretrained (
@@ -231,6 +255,7 @@ impl PyVocabulary {
231255 Ok ( PyVocabulary ( v) )
232256 }
233257
258+ /// Inserts new token with token_id or extends list of token_ids if token already present.
234259 fn insert ( & mut self , py : Python < ' _ > , token : Py < PyAny > , token_id : TokenId ) -> PyResult < ( ) > {
235260 if let Ok ( t) = token. extract :: < String > ( py) {
236261 return Ok ( self . 0 . try_insert ( t, token_id) ?) ;
@@ -244,6 +269,7 @@ impl PyVocabulary {
244269 ) ) )
245270 }
246271
272+ /// Removes a token from vocabulary.
247273 fn remove ( & mut self , py : Python < ' _ > , token : Py < PyAny > ) -> PyResult < ( ) > {
248274 if let Ok ( t) = token. extract :: < String > ( py) {
249275 self . 0 . remove ( t) ;
@@ -259,6 +285,7 @@ impl PyVocabulary {
259285 ) ) )
260286 }
261287
288+ /// Gets token ids of a given token.
262289 fn get ( & self , py : Python < ' _ > , token : Py < PyAny > ) -> PyResult < Option < Vec < TokenId > > > {
263290 if let Ok ( t) = token. extract :: < String > ( py) {
264291 return Ok ( self . 0 . token_ids ( t. into_bytes ( ) ) . cloned ( ) ) ;
@@ -272,26 +299,32 @@ impl PyVocabulary {
272299 ) ) )
273300 }
274301
302+ /// Gets the end of sentence token id.
275303 fn get_eos_token_id ( & self ) -> TokenId {
276304 self . 0 . eos_token_id ( )
277305 }
278306
307+ /// Gets the debug string representation of the vocabulary.
279308 fn __repr__ ( & self ) -> String {
280309 format ! ( "{:#?}" , self . 0 )
281310 }
282311
312+ /// Gets the string representation of the vocabulary.
283313 fn __str__ ( & self ) -> String {
284314 format ! ( "{}" , self . 0 )
285315 }
286316
317+ /// Compares whether two vocabularies are the same.
287318 fn __eq__ ( & self , other : & PyVocabulary ) -> bool {
288319 self . 0 == other. 0
289320 }
290321
322+ /// Returns length of Vocabulary's tokens, excluding EOS token.
291323 fn __len__ ( & self ) -> usize {
292324 self . 0 . tokens ( ) . len ( )
293325 }
294326
327+ /// Makes a deep copy of the Vocabulary.
295328 fn __deepcopy__ ( & self , _py : Python < ' _ > , _memo : Py < PyDict > ) -> Self {
296329 PyVocabulary ( self . 0 . clone ( ) )
297330 }
@@ -323,6 +356,7 @@ impl PyVocabulary {
323356 }
324357}
325358
359+ /// Creates regex string from JSON schema with optional whitespace pattern.
326360#[ pyfunction( name = "build_regex_from_schema" ) ]
327361#[ pyo3( signature = ( json_schema, whitespace_pattern=None ) ) ]
328362pub fn build_regex_from_schema_py (
@@ -363,6 +397,11 @@ fn register_child_module(parent_module: &Bound<'_, PyModule>) -> PyResult<()> {
363397 Ok ( ( ) )
364398}
365399
400+ /// This package provides core functionality for structured generation, providing a convenient way to:
401+ ///
402+ /// - build regular expressions from JSON schemas
403+ ///
404+ /// - construct an Index object by combining a Vocabulary and regular expression to efficiently map tokens from a given Vocabulary to state transitions in a finite-state automation
366405#[ pymodule]
367406fn outlines_core ( m : & Bound < ' _ , PyModule > ) -> PyResult < ( ) > {
368407 let version = env ! ( "CARGO_PKG_VERSION" ) ;
0 commit comments