Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright IBM Corp. 2016, 2024.
* (C) Copyright IBM Corp. 2016, 2025.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Expand All @@ -12,7 +12,7 @@
*/

/*
* IBM OpenAPI SDK Code Generator Version: 3.97.0-0e90eab1-20241120-170029
* IBM OpenAPI SDK Code Generator Version: 3.105.0-3c13b041-20250605-193116
*/

package com.ibm.watson.speech_to_text.v1;
Expand Down Expand Up @@ -520,6 +520,9 @@ public ServiceCall<SpeechRecognitionResults> recognize(RecognizeOptions recogniz
"speech_detector_sensitivity",
String.valueOf(recognizeOptions.speechDetectorSensitivity()));
}
if (recognizeOptions.sadModule() != null) {
builder.query("sad_module", String.valueOf(recognizeOptions.sadModule()));
}
if (recognizeOptions.backgroundAudioSuppression() != null) {
builder.query(
"background_audio_suppression",
Expand Down Expand Up @@ -854,6 +857,9 @@ public ServiceCall<RecognitionJob> createJob(CreateJobOptions createJobOptions)
"speech_detector_sensitivity",
String.valueOf(createJobOptions.speechDetectorSensitivity()));
}
if (createJobOptions.sadModule() != null) {
builder.query("sad_module", String.valueOf(createJobOptions.sadModule()));
}
if (createJobOptions.backgroundAudioSuppression() != null) {
builder.query(
"background_audio_suppression",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ public interface Events {
protected Double endOfPhraseSilenceTime;
protected Boolean splitTranscriptAtPhraseEnd;
protected Float speechDetectorSensitivity;
protected Long sadModule;
protected Float backgroundAudioSuppression;
protected Boolean lowLatency;
protected Float characterInsertionBias;
Expand Down Expand Up @@ -306,6 +307,7 @@ public static class Builder {
private Double endOfPhraseSilenceTime;
private Boolean splitTranscriptAtPhraseEnd;
private Float speechDetectorSensitivity;
private Long sadModule;
private Float backgroundAudioSuppression;
private Boolean lowLatency;
private Float characterInsertionBias;
Expand Down Expand Up @@ -346,6 +348,7 @@ private Builder(CreateJobOptions createJobOptions) {
this.endOfPhraseSilenceTime = createJobOptions.endOfPhraseSilenceTime;
this.splitTranscriptAtPhraseEnd = createJobOptions.splitTranscriptAtPhraseEnd;
this.speechDetectorSensitivity = createJobOptions.speechDetectorSensitivity;
this.sadModule = createJobOptions.sadModule;
this.backgroundAudioSuppression = createJobOptions.backgroundAudioSuppression;
this.lowLatency = createJobOptions.lowLatency;
this.characterInsertionBias = createJobOptions.characterInsertionBias;
Expand Down Expand Up @@ -717,6 +720,17 @@ public Builder speechDetectorSensitivity(Float speechDetectorSensitivity) {
return this;
}

/**
* Set the sadModule.
*
* @param sadModule the sadModule
* @return the CreateJobOptions builder
*/
public Builder sadModule(long sadModule) {
this.sadModule = sadModule;
return this;
}

/**
* Set the backgroundAudioSuppression.
*
Expand Down Expand Up @@ -797,6 +811,7 @@ protected CreateJobOptions(Builder builder) {
endOfPhraseSilenceTime = builder.endOfPhraseSilenceTime;
splitTranscriptAtPhraseEnd = builder.splitTranscriptAtPhraseEnd;
speechDetectorSensitivity = builder.speechDetectorSensitivity;
sadModule = builder.sadModule;
backgroundAudioSuppression = builder.backgroundAudioSuppression;
lowLatency = builder.lowLatency;
characterInsertionBias = builder.characterInsertionBias;
Expand Down Expand Up @@ -1353,6 +1368,23 @@ public Float speechDetectorSensitivity() {
return speechDetectorSensitivity;
}

/**
* Gets the sadModule.
*
* <p>Detects speech boundaries within the audio stream with better performance, improved noise
* suppression, faster responsiveness, and increased accuracy.
*
* <p>Specify `sad_module: 2`
*
* <p>See [Speech Activity Detection
* (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
*
* @return the sadModule
*/
public Long sadModule() {
return sadModule;
}

/**
* Gets the backgroundAudioSuppression.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ public interface Model {
protected Double endOfPhraseSilenceTime;
protected Boolean splitTranscriptAtPhraseEnd;
protected Float speechDetectorSensitivity;
protected Long sadModule;
protected Float backgroundAudioSuppression;
protected Boolean lowLatency;
protected Float characterInsertionBias;
Expand Down Expand Up @@ -268,6 +269,7 @@ public static class Builder {
private Double endOfPhraseSilenceTime;
private Boolean splitTranscriptAtPhraseEnd;
private Float speechDetectorSensitivity;
private Long sadModule;
private Float backgroundAudioSuppression;
private Boolean lowLatency;
private Float characterInsertionBias;
Expand Down Expand Up @@ -303,6 +305,7 @@ private Builder(RecognizeOptions recognizeOptions) {
this.endOfPhraseSilenceTime = recognizeOptions.endOfPhraseSilenceTime;
this.splitTranscriptAtPhraseEnd = recognizeOptions.splitTranscriptAtPhraseEnd;
this.speechDetectorSensitivity = recognizeOptions.speechDetectorSensitivity;
this.sadModule = recognizeOptions.sadModule;
this.backgroundAudioSuppression = recognizeOptions.backgroundAudioSuppression;
this.lowLatency = recognizeOptions.lowLatency;
this.characterInsertionBias = recognizeOptions.characterInsertionBias;
Expand Down Expand Up @@ -619,6 +622,17 @@ public Builder speechDetectorSensitivity(Float speechDetectorSensitivity) {
return this;
}

/**
* Set the sadModule.
*
* @param sadModule the sadModule
* @return the RecognizeOptions builder
*/
public Builder sadModule(long sadModule) {
this.sadModule = sadModule;
return this;
}

/**
* Set the backgroundAudioSuppression.
*
Expand Down Expand Up @@ -694,6 +708,7 @@ protected RecognizeOptions(Builder builder) {
endOfPhraseSilenceTime = builder.endOfPhraseSilenceTime;
splitTranscriptAtPhraseEnd = builder.splitTranscriptAtPhraseEnd;
speechDetectorSensitivity = builder.speechDetectorSensitivity;
sadModule = builder.sadModule;
backgroundAudioSuppression = builder.backgroundAudioSuppression;
lowLatency = builder.lowLatency;
characterInsertionBias = builder.characterInsertionBias;
Expand Down Expand Up @@ -759,7 +774,8 @@ public String model() {
* when a speech activity is detected in the stream. This can be used both in standard and low
* latency mode. This feature enables client applications to know that some words/speech has been
* detected and the service is in the process of decoding. This can be used in lieu of interim
* results in standard mode. See [Using speech recognition
* results in standard mode. Use `sad_module: 2` to increase accuracy and performance in detecting
* speech boundaries within the audio stream. See [Using speech recognition
* parameters](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-service-features#features-parameters).
*
* @return the speechBeginEvent
Expand Down Expand Up @@ -1154,6 +1170,23 @@ public Float speechDetectorSensitivity() {
return speechDetectorSensitivity;
}

/**
* Gets the sadModule.
*
* <p>Detects speech boundaries within the audio stream with better performance, improved noise
* suppression, faster responsiveness, and increased accuracy.
*
* <p>Specify `sad_module: 2`
*
* <p>See [Speech Activity Detection
* (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
*
* @return the sadModule
*/
public Long sadModule() {
return sadModule;
}

/**
* Gets the backgroundAudioSuppression.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ public interface Model {
protected Float backgroundAudioSuppression;
protected Boolean lowLatency;
protected Float characterInsertionBias;
protected Long sadModule;
private Boolean interimResults;
private Boolean processingMetrics;
private Float processingMetricsInterval;
Expand Down Expand Up @@ -236,6 +237,7 @@ public static class Builder {
private Float backgroundAudioSuppression;
private Boolean lowLatency;
private Float characterInsertionBias;
private Long sadModule;
private Boolean interimResults;
private Boolean processingMetrics;
private Float processingMetricsInterval;
Expand Down Expand Up @@ -268,6 +270,7 @@ private Builder(RecognizeWithWebsocketsOptions recognizeWithWebsocketsOptions) {
this.backgroundAudioSuppression = recognizeWithWebsocketsOptions.backgroundAudioSuppression;
this.lowLatency = recognizeWithWebsocketsOptions.lowLatency;
this.characterInsertionBias = recognizeWithWebsocketsOptions.characterInsertionBias;
this.sadModule = recognizeWithWebsocketsOptions.sadModule;
this.interimResults = recognizeWithWebsocketsOptions.interimResults;
this.processingMetrics = recognizeWithWebsocketsOptions.processingMetrics;
this.processingMetricsInterval = recognizeWithWebsocketsOptions.processingMetricsInterval;
Expand Down Expand Up @@ -606,6 +609,17 @@ public Builder characterInsertionBias(Float characterInsertionBias) {
return this;
}

/**
* Set the sadModule.
*
* @param sadModule the sadModule
* @return the RecognizeOptions builder
*/
public Builder sadModule(Long sadModule) {
this.sadModule = sadModule;
return this;
}

/**
* Set the interimResults.
*
Expand Down Expand Up @@ -687,6 +701,7 @@ protected RecognizeWithWebsocketsOptions(Builder builder) {
backgroundAudioSuppression = builder.backgroundAudioSuppression;
lowLatency = builder.lowLatency;
characterInsertionBias = builder.characterInsertionBias;
sadModule = builder.sadModule;
interimResults = builder.interimResults;
processingMetrics = builder.processingMetrics;
processingMetricsInterval = builder.processingMetricsInterval;
Expand Down Expand Up @@ -1176,6 +1191,23 @@ public Float characterInsertionBias() {
return characterInsertionBias;
}

/**
* Gets the sadModule.
*
* <p>Detects speech boundaries within the audio stream with better performance, improved noise
* suppression, faster responsiveness, and increased accuracy.
*
* <p>Specify `sad_module: 2`
*
* <p>See [Speech Activity Detection
* (SAD)](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#sad).
*
* @return the sadModule
*/
public Long sadModule() {
return sadModule;
}

/**
* Gets the interimResults.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright IBM Corp. 2019, 2024.
* (C) Copyright IBM Corp. 2019, 2025.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -249,6 +249,7 @@ public void testRecognizeWOptions() throws Throwable {
.endOfPhraseSilenceTime(Double.valueOf("0.8"))
.splitTranscriptAtPhraseEnd(false)
.speechDetectorSensitivity(Float.valueOf("0.5"))
.sadModule(Long.valueOf("1"))
.backgroundAudioSuppression(Float.valueOf("0.0"))
.lowLatency(false)
.characterInsertionBias(Float.valueOf("0.0"))
Expand Down Expand Up @@ -296,6 +297,7 @@ public void testRecognizeWOptions() throws Throwable {
assertEquals(
Boolean.valueOf(query.get("split_transcript_at_phrase_end")), Boolean.valueOf(false));
assertEquals(Float.valueOf(query.get("speech_detector_sensitivity")), Float.valueOf("0.5"));
assertEquals(Long.valueOf(query.get("sad_module")), Long.valueOf("1"));
assertEquals(Float.valueOf(query.get("background_audio_suppression")), Float.valueOf("0.0"));
assertEquals(Boolean.valueOf(query.get("low_latency")), Boolean.valueOf(false));
assertEquals(Float.valueOf(query.get("character_insertion_bias")), Float.valueOf("0.0"));
Expand Down Expand Up @@ -470,6 +472,7 @@ public void testCreateJobWOptions() throws Throwable {
.endOfPhraseSilenceTime(Double.valueOf("0.8"))
.splitTranscriptAtPhraseEnd(false)
.speechDetectorSensitivity(Float.valueOf("0.5"))
.sadModule(Long.valueOf("1"))
.backgroundAudioSuppression(Float.valueOf("0.0"))
.lowLatency(false)
.characterInsertionBias(Float.valueOf("0.0"))
Expand Down Expand Up @@ -522,6 +525,7 @@ public void testCreateJobWOptions() throws Throwable {
assertEquals(
Boolean.valueOf(query.get("split_transcript_at_phrase_end")), Boolean.valueOf(false));
assertEquals(Float.valueOf(query.get("speech_detector_sensitivity")), Float.valueOf("0.5"));
assertEquals(Long.valueOf(query.get("sad_module")), Long.valueOf("1"));
assertEquals(Float.valueOf(query.get("background_audio_suppression")), Float.valueOf("0.0"));
assertEquals(Boolean.valueOf(query.get("low_latency")), Boolean.valueOf(false));
assertEquals(Float.valueOf(query.get("character_insertion_bias")), Float.valueOf("0.0"));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright IBM Corp. 2020, 2024.
* (C) Copyright IBM Corp. 2020, 2025.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -63,6 +63,7 @@ public void testCreateJobOptions() throws Throwable {
.endOfPhraseSilenceTime(Double.valueOf("0.8"))
.splitTranscriptAtPhraseEnd(false)
.speechDetectorSensitivity(Float.valueOf("0.5"))
.sadModule(Long.valueOf("1"))
.backgroundAudioSuppression(Float.valueOf("0.0"))
.lowLatency(false)
.characterInsertionBias(Float.valueOf("0.0"))
Expand Down Expand Up @@ -99,6 +100,7 @@ public void testCreateJobOptions() throws Throwable {
assertEquals(createJobOptionsModel.endOfPhraseSilenceTime(), Double.valueOf("0.8"));
assertEquals(createJobOptionsModel.splitTranscriptAtPhraseEnd(), Boolean.valueOf(false));
assertEquals(createJobOptionsModel.speechDetectorSensitivity(), Float.valueOf("0.5"));
assertEquals(createJobOptionsModel.sadModule(), Long.valueOf("1"));
assertEquals(createJobOptionsModel.backgroundAudioSuppression(), Float.valueOf("0.0"));
assertEquals(createJobOptionsModel.lowLatency(), Boolean.valueOf(false));
assertEquals(createJobOptionsModel.characterInsertionBias(), Float.valueOf("0.0"));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* (C) Copyright IBM Corp. 2020, 2024.
* (C) Copyright IBM Corp. 2020, 2025.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -58,6 +58,7 @@ public void testRecognizeOptions() throws Throwable {
.endOfPhraseSilenceTime(Double.valueOf("0.8"))
.splitTranscriptAtPhraseEnd(false)
.speechDetectorSensitivity(Float.valueOf("0.5"))
.sadModule(Long.valueOf("1"))
.backgroundAudioSuppression(Float.valueOf("0.0"))
.lowLatency(false)
.characterInsertionBias(Float.valueOf("0.0"))
Expand Down Expand Up @@ -89,6 +90,7 @@ public void testRecognizeOptions() throws Throwable {
assertEquals(recognizeOptionsModel.endOfPhraseSilenceTime(), Double.valueOf("0.8"));
assertEquals(recognizeOptionsModel.splitTranscriptAtPhraseEnd(), Boolean.valueOf(false));
assertEquals(recognizeOptionsModel.speechDetectorSensitivity(), Float.valueOf("0.5"));
assertEquals(recognizeOptionsModel.sadModule(), Long.valueOf("1"));
assertEquals(recognizeOptionsModel.backgroundAudioSuppression(), Float.valueOf("0.0"));
assertEquals(recognizeOptionsModel.lowLatency(), Boolean.valueOf(false));
assertEquals(recognizeOptionsModel.characterInsertionBias(), Float.valueOf("0.0"));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,24 +307,25 @@ public ServiceCall<Voice> getVoice(GetVoiceOptions getVoiceOptions) {
* format with the Opus codec (`audio/ogg;codecs=opus`). The service always returns single-channel
* audio. * `audio/alaw` - You must specify the `rate` of the audio. * `audio/basic` - The service
* returns audio with a sampling rate of 8000 Hz. * `audio/flac` - You can optionally specify the
* `rate` of the audio. The default sampling rate is 22,050 Hz. * `audio/l16` - You must specify
* the `rate` of the audio. You can optionally specify the `endianness` of the audio. The default
* endianness is `little-endian`. * `audio/mp3` - You can optionally specify the `rate` of the
* audio. The default sampling rate is 24,000 Hz for Natural voices and 22,050 Hz for for all
* other voices. * `audio/mpeg` - You can optionally specify the `rate` of the audio. The default
* sampling rate is 22,050 Hz. * `audio/mulaw` - You must specify the `rate` of the audio. *
* `audio/ogg` - The service returns the audio in the `vorbis` codec. You can optionally specify
* the `rate` of the audio. The default sampling rate is 22,050 Hz. * `audio/ogg;codecs=opus` -
* You can optionally specify the `rate` of the audio. Only the following values are valid
* sampling rates: `48000`, `24000`, `16000`, `12000`, or `8000`. If you specify a value other
* than one of these, the service returns an error. The default sampling rate is 48,000 Hz. *
* `audio/ogg;codecs=vorbis` - You can optionally specify the `rate` of the audio. The default
* sampling rate is 22,050 Hz. * `audio/wav` - You can optionally specify the `rate` of the audio.
* The default sampling rate is 22,050 Hz. * `audio/webm` - The service returns the audio in the
* `opus` codec. The service returns audio with a sampling rate of 48,000 Hz. *
* `audio/webm;codecs=opus` - The service returns audio with a sampling rate of 48,000 Hz. *
* `audio/webm;codecs=vorbis` - You can optionally specify the `rate` of the audio. The default
* sampling rate is 22,050 Hz.
* `rate` of the audio. The default sampling rate is 24,000 Hz for Natural voices and 22,050 Hz
* for all other voices. * `audio/l16` - You must specify the `rate` of the audio. You can
* optionally specify the `endianness` of the audio. The default endianness is `little-endian`. *
* `audio/mp3` - You can optionally specify the `rate` of the audio. The default sampling rate is
* 24,000 Hz for Natural voices and 22,050 Hz for for all other voices. * `audio/mpeg` - You can
* optionally specify the `rate` of the audio. The default sampling rate is 24,000 Hz for Natural
* voices and 22,050 Hz for all other voices. * `audio/mulaw` - You must specify the `rate` of the
* audio. * `audio/ogg` - The service returns the audio in the `vorbis` codec. You can optionally
* specify the `rate` of the audio. The default sampling rate is 48,000 Hz. *
* `audio/ogg;codecs=opus` - You can optionally specify the `rate` of the audio. Only the
* following values are valid sampling rates: `48000`, `24000`, `16000`, `12000`, or `8000`. If
* you specify a value other than one of these, the service returns an error. The default sampling
* rate is 48,000 Hz. * `audio/ogg;codecs=vorbis` - You can optionally specify the `rate` of the
* audio. The default sampling rate is 48,000 Hz. * `audio/wav` - You can optionally specify the
* `rate` of the audio. The default sampling rate is 24,000 Hz for Natural voices and 22,050 Hz
* for all other voices. * `audio/webm` - The service returns the audio in the `opus` codec. The
* service returns audio with a sampling rate of 48,000 Hz. * `audio/webm;codecs=opus` - The
* service returns audio with a sampling rate of 48,000 Hz. * `audio/webm;codecs=vorbis` - You can
* optionally specify the `rate` of the audio. The default sampling rate is 48,000 Hz.
*
* <p>For more information about specifying an audio format, including additional details about
* some of the formats, see [Using audio
Expand Down
Loading