@@ -216,91 +216,3 @@ def recognize_using_websocket(self,
216216 http_proxy_host ,
217217 http_proxy_port ,
218218 self .verify )
219-
220- def add_corpus (self ,
221- customization_id ,
222- corpus_name ,
223- corpus_file ,
224- allow_overwrite = None ,
225- ** kwargs ):
226- """
227- Add a corpus.
228-
229- Adds a single corpus text file of new training data to a custom language model.
230- Use multiple requests to submit multiple corpus text files. You must use
231- credentials for the instance of the service that owns a model to add a corpus to
232- it. Adding a corpus does not affect the custom language model until you train the
233- model for the new data by using the **Train a custom language model** method.
234- Submit a plain text file that contains sample sentences from the domain of
235- interest to enable the service to extract words in context. The more sentences you
236- add that represent the context in which speakers use words from the domain, the
237- better the service's recognition accuracy. For guidelines about adding a corpus
238- text file and for information about how the service parses a corpus file, see
239- [Preparing a corpus text
240- file](https://console.bluemix.net/docs/services/speech-to-text/language-resource.html#prepareCorpus).
241- The call returns an HTTP 201 response code if the corpus is valid. The service
242- then asynchronously processes the contents of the corpus and automatically
243- extracts new words that it finds. This can take on the order of a minute or two to
244- complete depending on the total number of words and the number of new words in the
245- corpus, as well as the current load on the service. You cannot submit requests to
246- add additional corpora or words to the custom model, or to train the model, until
247- the service's analysis of the corpus for the current request completes. Use the
248- **List a corpus** method to check the status of the analysis.
249- The service auto-populates the model's words resource with any word that is not
250- found in its base vocabulary; these are referred to as out-of-vocabulary (OOV)
251- words. You can use the **List custom words** method to examine the words resource,
252- using other words method to eliminate typos and modify how words are pronounced as
253- needed.
254- To add a corpus file that has the same name as an existing corpus, set the
255- `allow_overwrite` parameter to `true`; otherwise, the request fails. Overwriting
256- an existing corpus causes the service to process the corpus text file and extract
257- OOV words anew. Before doing so, it removes any OOV words associated with the
258- existing corpus from the model's words resource unless they were also added by
259- another corpus or they have been modified in some way with the **Add custom
260- words** or **Add a custom word** method.
261- The service limits the overall amount of data that you can add to a custom model
262- to a maximum of 10 million total words from all corpora combined. Also, you can
263- add no more than 30 thousand custom (OOV) words to a model; this includes words
264- that the service extracts from corpora and words that you add directly.
265-
266- :param str customization_id: The customization ID (GUID) of the custom language
267- model. You must make the request with service credentials created for the instance
268- of the service that owns the custom model.
269- :param str corpus_name: The name of the corpus for the custom language model. When
270- adding a corpus, do not include spaces in the name; use a localized name that
271- matches the language of the custom model; and do not use the name `user`, which is
272- reserved by the service to denote custom words added or modified by the user.
273- :param file corpus_file: A plain text file that contains the training data for the
274- corpus. Encode the file in UTF-8 if it contains non-ASCII characters; the service
275- assumes UTF-8 encoding if it encounters non-ASCII characters. With cURL, use the
276- `--data-binary` option to upload the file for the request.
277- :param bool allow_overwrite: If `true`, the specified corpus or audio resource
278- overwrites an existing corpus or audio resource with the same name. If `false`,
279- the request fails if a corpus or audio resource with the same name already exists.
280- The parameter has no effect if a corpus or audio resource with the same name does
281- not already exist.
282- :param dict headers: A `dict` containing the request headers
283- :return: A `DetailedResponse` containing the result, headers and HTTP status code.
284- :rtype: DetailedResponse
285- """
286- if customization_id is None :
287- raise ValueError ('customization_id must be provided' )
288- if corpus_name is None :
289- raise ValueError ('corpus_name must be provided' )
290- if corpus_file is None :
291- raise ValueError ('corpus_file must be provided' )
292- headers = {}
293- if 'headers' in kwargs :
294- headers .update (kwargs .get ('headers' ))
295- params = {'allow_overwrite' : allow_overwrite }
296- data = corpus_file
297- url = '/v1/customizations/{0}/corpora/{1}' .format (
298- * self ._encode_path_vars (customization_id , corpus_name ))
299- response = self .request (
300- method = 'POST' ,
301- url = url ,
302- headers = headers ,
303- params = params ,
304- data = data ,
305- accept_json = True )
306- return response
0 commit comments