0

Move Web Speech API .mojom files to //media/mojo/mojom

This CL moves the Web Speech API .mojom files to //media/mojo/mojom so that they can be used by the speech recognition service. //media cannot depend on //third_party/blink/public/mojom due to circular dependencies.

Bug: 1495388
Change-Id: Ibd59ced528b323497eafc9f8230b609c2ef14445
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5528282
Reviewed-by: Andrew Grieve <agrieve@chromium.org>
Reviewed-by: Devlin Cronin <rdevlin.cronin@chromium.org>
Reviewed-by: Brendon Tiszka <tiszka@chromium.org>
Reviewed-by: Philip Rogers <pdr@chromium.org>
Reviewed-by: Xiaohan Wang <xhwang@chromium.org>
Reviewed-by: Peter Beverloo <peter@chromium.org>
Commit-Queue: Evan Liu <evliu@google.com>
Reviewed-by: Avi Drissman <avi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1302179}
This commit is contained in:
Evan Liu
2024-05-16 20:10:32 +00:00
committed by Chromium LUCI CQ
parent 0fc8cd0571
commit d725228c05
60 changed files with 407 additions and 367 deletions
BUILD.gn
android_webview/browser
chrome
content
extensions/shell/browser
media/mojo/mojom
third_party/blink

@ -1146,6 +1146,7 @@ if (use_blink && !is_cronet_build) {
"//device/vr/public/mojom:vr_service_js_data_deps",
"//media/capture/mojom:image_capture_js_data_deps",
"//media/midi:mojo_js_data_deps",
"//media/mojo/mojom:web_speech_recognition_js_data_deps",
"//mojo/public/interfaces/bindings/tests:test_data_deps",
"//mojo/public/js/ts/bindings/tests:test_interfaces_js_data_deps",
"//mojo/public/mojom/base:base_js_data_deps",

@ -307,6 +307,7 @@ source_set("browser") {
"//components/webdata/common",
"//content/public/browser",
"//media/mojo:buildflags",
"//media/mojo/mojom:web_speech_recognition",
"//mojo/public/cpp/base:protobuf_support",
"//services/cert_verifier/public/mojom",
"//services/device/public/cpp:device_feature_list",

@ -75,6 +75,7 @@ include_rules = [
"+media/base/android",
"+media/base/media_switches.h", # For media command line switches.
"+media/mojo/buildflags.h",
"+media/mojo/mojom",
"+components/policy/policy_constants.h",
"+components/embedder_support/android",

@ -17,8 +17,8 @@
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/browser/web_contents.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
using content::BrowserThread;
@ -40,11 +40,11 @@ void AwSpeechRecognitionManagerDelegate::OnAudioEnd(int session_id) {}
void AwSpeechRecognitionManagerDelegate::OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) {}
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result) {}
void AwSpeechRecognitionManagerDelegate::OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) {}
const media::mojom::SpeechRecognitionError& error) {}
void AwSpeechRecognitionManagerDelegate::OnAudioLevelsChange(
int session_id,

@ -37,11 +37,11 @@ class AwSpeechRecognitionManagerDelegate
void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override;
void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) override;
const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id,
float volume,
float noise_volume) override;

@ -2581,6 +2581,7 @@ static_library("browser") {
"//media/midi",
"//media/mojo:buildflags",
"//media/mojo/common",
"//media/mojo/mojom:web_speech_recognition",
"//media/mojo/services",
"//media/webrtc",
"//mojo/core/embedder",

@ -19,8 +19,8 @@
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/browser/web_contents.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#if BUILDFLAG(ENABLE_EXTENSIONS)
#include "chrome/browser/extensions/extension_service.h"
@ -82,11 +82,11 @@ void ChromeSpeechRecognitionManagerDelegate::OnAudioEnd(int session_id) {
void ChromeSpeechRecognitionManagerDelegate::OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) {}
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result) {}
void ChromeSpeechRecognitionManagerDelegate::OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) {}
const media::mojom::SpeechRecognitionError& error) {}
void ChromeSpeechRecognitionManagerDelegate::OnAudioLevelsChange(
int session_id, float volume, float noise_volume) {

@ -36,11 +36,11 @@ class ChromeSpeechRecognitionManagerDelegate
void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override;
void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) override;
const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id,
float volume,
float noise_volume) override;

@ -20,8 +20,8 @@
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_preamble.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
// Invalid speech session.
static const int kInvalidSessionId = -1;
@ -68,11 +68,11 @@ class NetworkSpeechRecognizer::EventListener
void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override;
void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) override;
const media::mojom::SpeechRecognitionError& error) override;
void OnSoundStart(int session_id) override;
void OnSoundEnd(int session_id) override;
void OnAudioLevelsChange(int session_id,
@ -200,7 +200,7 @@ void NetworkSpeechRecognizer::EventListener::OnRecognitionEnd(int session_id) {
void NetworkSpeechRecognizer::EventListener::OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
std::u16string result_str;
size_t final_count = 0;
// The number of results with |is_provisional| false. If |final_count| ==
@ -211,7 +211,7 @@ void NetworkSpeechRecognizer::EventListener::OnRecognitionResults(
final_count++;
result_str += result->hypotheses[0]->utterance;
}
// blink::mojom::SpeechRecognitionResult doesn't have word offsets.
// media::mojom::WebSpeechRecognitionResult doesn't have word offsets.
content::GetUIThreadTaskRunner({})->PostTask(
FROM_HERE,
base::BindOnce(&SpeechRecognizerDelegate::OnSpeechResult, delegate_,
@ -223,9 +223,9 @@ void NetworkSpeechRecognizer::EventListener::OnRecognitionResults(
void NetworkSpeechRecognizer::EventListener::OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) {
const media::mojom::SpeechRecognitionError& error) {
StopOnIOThread();
if (error.code == blink::mojom::SpeechRecognitionErrorCode::kNetwork) {
if (error.code == media::mojom::SpeechRecognitionErrorCode::kNetwork) {
NotifyRecognitionStateChanged(SPEECH_RECOGNIZER_ERROR);
}
NotifyRecognitionStateChanged(SPEECH_RECOGNIZER_READY);

@ -120,6 +120,7 @@ if (!is_android) {
"//media/mojo/mojom:mojom_mojolpm",
"//media/mojo/mojom:remoting_mojolpm",
"//media/mojo/mojom:speech_recognition_mojolpm",
"//media/mojo/mojom:web_speech_recognition_mojolpm",
"//services/device/public/mojom:mojom_mojolpm",
"//services/image_annotation/public/mojom:mojom_mojolpm",
"//services/network/public/mojom:cookies_mojom_mojolpm",

@ -345,8 +345,8 @@ context_browser_exposed_interfaces = [
"Remote",
],
[
"//third_party/blink/public/mojom/speech/speech_recognizer.mojom",
"blink.mojom.SpeechRecognizer",
"//media/mojo/mojom/speech_recognizer.mojom",
"media.mojom.SpeechRecognizer",
"Remote",
],
[

@ -175,6 +175,7 @@ source_set("browser") {
"//media/mojo:buildflags",
"//media/mojo/clients",
"//media/mojo/mojom",
"//media/mojo/mojom:web_speech_recognition",
"//media/mojo/services",
"//media/webrtc",
"//mojo/core/embedder",
@ -3327,6 +3328,8 @@ source_set("browser") {
"speech/soda_speech_recognition_engine_impl.cc",
"speech/soda_speech_recognition_engine_impl.h",
]
deps += [ "//media/mojo/mojom:web_speech_recognition" ]
}
deps += [

@ -98,6 +98,7 @@
#include "media/mojo/mojom/media_metrics_provider.mojom.h"
#include "media/mojo/mojom/media_player.mojom.h"
#include "media/mojo/mojom/remoting.mojom.h"
#include "media/mojo/mojom/speech_recognizer.mojom.h"
#include "media/mojo/mojom/video_decode_perf_history.mojom.h"
#include "media/mojo/mojom/video_encoder_metrics_provider.mojom.h"
#include "media/mojo/mojom/webrtc_video_perf.mojom.h"
@ -175,7 +176,6 @@
#include "third_party/blink/public/mojom/sensor/web_sensor_provider.mojom.h"
#include "third_party/blink/public/mojom/sms/webotp_service.mojom.h"
#include "third_party/blink/public/mojom/speculation_rules/speculation_rules.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
#include "third_party/blink/public/mojom/storage_access/storage_access_handle.mojom.h"
#include "third_party/blink/public/mojom/usb/web_usb_service.mojom.h"
@ -856,7 +856,7 @@ void PopulateFrameBinders(RenderFrameHostImpl* host, mojo::BinderMap* map) {
map->Add<blink::mojom::SharedWorkerConnector>(
base::BindRepeating(&BindSharedWorkerConnector, base::Unretained(host)));
map->Add<blink::mojom::SpeechRecognizer>(
map->Add<media::mojom::SpeechRecognizer>(
base::BindRepeating(&SpeechRecognitionDispatcherHost::Create,
host->GetProcess()->GetID(), host->GetRoutingID()),
GetIOThreadTaskRunner({}));

@ -1,6 +1,7 @@
include_rules = [
"+components/speech",
"+components/soda",
"+media/mojo/mojom",
"+google_apis", # Exception to general rule, see content/DEPS for details.
]

@ -23,13 +23,13 @@
#include "content/public/browser/google_streaming_api.pb.h"
#include "google_apis/google_api_keys.h"
#include "media/base/audio_timestamp_helper.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "mojo/public/c/system/types.h"
#include "mojo/public/cpp/bindings/receiver_set.h"
#include "net/base/load_flags.h"
#include "net/traffic_annotation/network_traffic_annotation.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
namespace content {
namespace {
@ -406,7 +406,7 @@ NetworkSpeechRecognitionEngineImpl::ConnectBothStreams(const FSMEventArgs&) {
base::NumberToString(max_alternatives));
}
upstream_args.push_back("app=chromium");
for (const blink::mojom::SpeechRecognitionGrammar& grammar :
for (const media::mojom::SpeechRecognitionGrammar& grammar :
config_.grammars) {
std::string grammar_value(base::NumberToString(grammar.weight) + ":" +
grammar.url.spec());
@ -553,23 +553,23 @@ NetworkSpeechRecognitionEngineImpl::ProcessDownstreamResponse(
case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
break;
case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNoSpeech);
return Abort(media::mojom::SpeechRecognitionErrorCode::kNoSpeech);
case proto::SpeechRecognitionEvent::STATUS_ABORTED:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kAborted);
return Abort(media::mojom::SpeechRecognitionErrorCode::kAborted);
case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kAudioCapture);
return Abort(media::mojom::SpeechRecognitionErrorCode::kAudioCapture);
case proto::SpeechRecognitionEvent::STATUS_NETWORK:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNetwork);
return Abort(media::mojom::SpeechRecognitionErrorCode::kNetwork);
case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNotAllowed);
return Abort(media::mojom::SpeechRecognitionErrorCode::kNotAllowed);
case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
return Abort(
blink::mojom::SpeechRecognitionErrorCode::kServiceNotAllowed);
media::mojom::SpeechRecognitionErrorCode::kServiceNotAllowed);
case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kBadGrammar);
return Abort(media::mojom::SpeechRecognitionErrorCode::kBadGrammar);
case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
return Abort(
blink::mojom::SpeechRecognitionErrorCode::kLanguageNotSupported);
media::mojom::SpeechRecognitionErrorCode::kLanguageNotSupported);
}
}
@ -578,11 +578,11 @@ NetworkSpeechRecognitionEngineImpl::ProcessDownstreamResponse(
delegate_->OnSpeechRecognitionEngineEndOfUtterance();
}
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
for (int i = 0; i < ws_event.result_size(); ++i) {
const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = !(ws_result.has_final() && ws_result.final());
if (!result->is_provisional) {
@ -592,8 +592,8 @@ NetworkSpeechRecognitionEngineImpl::ProcessDownstreamResponse(
for (int j = 0; j < ws_result.alternative_size(); ++j) {
const proto::SpeechRecognitionAlternative& ws_alternative =
ws_result.alternative(j);
blink::mojom::SpeechRecognitionHypothesisPtr hypothesis =
blink::mojom::SpeechRecognitionHypothesis::New();
media::mojom::SpeechRecognitionHypothesisPtr hypothesis =
media::mojom::SpeechRecognitionHypothesis::New();
if (ws_alternative.has_confidence()) {
hypothesis->confidence = ws_alternative.confidence();
} else if (ws_result.has_stability()) {
@ -622,7 +622,7 @@ NetworkSpeechRecognitionEngineImpl::RaiseNoMatchErrorIfGotNoResults(
// Provide an empty result to notify that recognition is ended with no
// errors, yet neither any further results.
delegate_->OnSpeechRecognitionEngineResults(
std::vector<blink::mojom::SpeechRecognitionResultPtr>());
std::vector<media::mojom::WebSpeechRecognitionResultPtr>());
}
return AbortSilently(event_args);
}
@ -666,23 +666,23 @@ NetworkSpeechRecognitionEngineImpl::CloseDownstream(const FSMEventArgs&) {
NetworkSpeechRecognitionEngineImpl::FSMState
NetworkSpeechRecognitionEngineImpl::AbortSilently(const FSMEventArgs&) {
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNone);
return Abort(media::mojom::SpeechRecognitionErrorCode::kNone);
}
NetworkSpeechRecognitionEngineImpl::FSMState
NetworkSpeechRecognitionEngineImpl::AbortWithError(const FSMEventArgs&) {
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNetwork);
return Abort(media::mojom::SpeechRecognitionErrorCode::kNetwork);
}
NetworkSpeechRecognitionEngineImpl::FSMState
NetworkSpeechRecognitionEngineImpl::Abort(
blink::mojom::SpeechRecognitionErrorCode error_code) {
media::mojom::SpeechRecognitionErrorCode error_code) {
DVLOG(1) << "Aborting with error " << error_code;
if (error_code != blink::mojom::SpeechRecognitionErrorCode::kNone) {
if (error_code != media::mojom::SpeechRecognitionErrorCode::kNone) {
delegate_->OnSpeechRecognitionEngineError(
blink::mojom::SpeechRecognitionError(
error_code, blink::mojom::SpeechAudioErrorDetails::kNone));
media::mojom::SpeechRecognitionError(
error_code, media::mojom::SpeechAudioErrorDetails::kNone));
}
downstream_loader_.reset();
upstream_loader_.reset();

@ -24,10 +24,10 @@
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_preamble.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_grammar.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "services/network/public/cpp/simple_url_loader_stream_consumer.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_grammar.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
class AudioChunk;
@ -69,7 +69,7 @@ class CONTENT_EXPORT NetworkSpeechRecognitionEngineImpl
~Config();
std::string language;
std::vector<blink::mojom::SpeechRecognitionGrammar> grammars;
std::vector<media::mojom::SpeechRecognitionGrammar> grammars;
bool filter_profanities = false;
bool continuous = true;
bool interim_results = true;
@ -183,7 +183,7 @@ class CONTENT_EXPORT NetworkSpeechRecognitionEngineImpl
FSMState CloseDownstream(const FSMEventArgs& event_args);
FSMState AbortSilently(const FSMEventArgs& event_args);
FSMState AbortWithError(const FSMEventArgs& event_args);
FSMState Abort(blink::mojom::SpeechRecognitionErrorCode error);
FSMState Abort(media::mojom::SpeechRecognitionErrorCode error);
FSMState DoNothing(const FSMEventArgs& event_args);
FSMState NotFeasible(const FSMEventArgs& event_args);

@ -19,6 +19,8 @@
#include "components/speech/audio_buffer.h"
#include "content/browser/speech/speech_recognition_engine.h"
#include "content/public/browser/google_streaming_api.pb.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "net/base/net_errors.h"
#include "net/http/http_response_headers.h"
@ -28,8 +30,6 @@
#include "services/network/public/mojom/url_response_head.mojom.h"
#include "services/network/test/test_url_loader_factory.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
namespace content {
@ -46,12 +46,12 @@ class NetworkSpeechRecognitionEngineImplTest
public:
NetworkSpeechRecognitionEngineImplTest()
: last_number_of_upstream_chunks_seen_(0U),
error_(blink::mojom::SpeechRecognitionErrorCode::kNone),
error_(media::mojom::SpeechRecognitionErrorCode::kNone),
end_of_utterance_counter_(0) {}
// SpeechRecognitionRequestDelegate methods.
void OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override {
results_.push(mojo::Clone(results));
}
@ -59,7 +59,7 @@ class NetworkSpeechRecognitionEngineImplTest
++end_of_utterance_counter_;
}
void OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) override {
const media::mojom::SpeechRecognitionError& error) override {
error_ = error.code;
}
@ -75,8 +75,8 @@ class NetworkSpeechRecognitionEngineImplTest
DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH
};
static bool ResultsAreEqual(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& a,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& b);
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& a,
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& b);
static std::string SerializeProtobufResponse(
const proto::SpeechRecognitionEvent& msg);
@ -89,9 +89,9 @@ class NetworkSpeechRecognitionEngineImplTest
void ProvideMockProtoResultDownstream(
const proto::SpeechRecognitionEvent& result);
void ProvideMockResultDownstream(
const blink::mojom::SpeechRecognitionResultPtr& result);
const media::mojom::WebSpeechRecognitionResultPtr& result);
void ExpectResultsReceived(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result);
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result);
void ExpectFramedChunk(const std::string& chunk, uint32_t type);
// Reads and returns all pending upload data from |upstream_data_pipe_|,
// initializing the pipe from |GetUpstreamRequest()|, if needed.
@ -108,9 +108,10 @@ class NetworkSpeechRecognitionEngineImplTest
std::unique_ptr<NetworkSpeechRecognitionEngineImpl> engine_under_test_;
size_t last_number_of_upstream_chunks_seen_;
std::string response_buffer_;
blink::mojom::SpeechRecognitionErrorCode error_;
media::mojom::SpeechRecognitionErrorCode error_;
int end_of_utterance_counter_;
base::queue<std::vector<blink::mojom::SpeechRecognitionResultPtr>> results_;
base::queue<std::vector<media::mojom::WebSpeechRecognitionResultPtr>>
results_;
};
TEST_F(NetworkSpeechRecognitionEngineImplTest, SingleDefinitiveResult) {
@ -132,14 +133,14 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SingleDefinitiveResult) {
// Simulate a protobuf message streamed from the server containing a single
// result with two hypotheses.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = false;
result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 1", 0.1F));
media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 1", 0.1F));
result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 2", 0.2F));
media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 2", 0.2F));
ProvideMockResultDownstream(result);
ExpectResultsReceived(results);
@ -149,7 +150,7 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SingleDefinitiveResult) {
CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size());
}
@ -162,12 +163,12 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SeveralStreamingResults) {
InjectDummyAudioChunk();
ASSERT_NE("", ConsumeChunkedUploadData());
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = (i % 2 == 0); // Alternate result types.
float confidence = result->is_provisional ? 0.0F : (i * 0.1F);
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New(
result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
u"hypothesis", confidence));
ProvideMockResultDownstream(result);
@ -181,11 +182,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SeveralStreamingResults) {
ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
// Simulate a final definitive result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = false;
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New(
result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
u"The final result", 1.0F));
ProvideMockResultDownstream(result);
ExpectResultsReceived(results);
@ -195,7 +196,7 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SeveralStreamingResults) {
CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size());
}
@ -210,11 +211,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest,
ASSERT_NE("", ConsumeChunkedUploadData());
// Simulate the corresponding definitive result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis", 1.0F));
media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis", 1.0F));
ProvideMockResultDownstream(result);
ExpectResultsReceived(results);
ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
@ -227,13 +228,13 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest,
// Expect an empty result, aimed at notifying recognition ended with no
// actual results nor errors.
std::vector<blink::mojom::SpeechRecognitionResultPtr> empty_results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> empty_results;
ExpectResultsReceived(empty_results);
// Ensure everything is closed cleanly after the downstream is closed.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size());
}
@ -256,11 +257,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, ReRequestData) {
ASSERT_EQ(uploaded_data, ConsumeChunkedUploadData());
// Simulate the corresponding definitive result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis", 1.0F));
media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis", 1.0F));
ProvideMockResultDownstream(result);
ExpectResultsReceived(results);
ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
@ -282,13 +283,13 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, ReRequestData) {
// Expect an empty result, aimed at notifying recognition ended with no
// actual results nor errors.
std::vector<blink::mojom::SpeechRecognitionResultPtr> empty_results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> empty_results;
ExpectResultsReceived(empty_results);
// Ensure everything is closed cleanly after the downstream is closed.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size());
}
@ -306,11 +307,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, NoMatchError) {
ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
// Simulate only a provisional result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = true;
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New(
result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
u"The final result", 0.0F));
ProvideMockResultDownstream(result);
ExpectResultsReceived(results);
@ -321,7 +322,7 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, NoMatchError) {
// Expect an empty result.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition();
std::vector<blink::mojom::SpeechRecognitionResultPtr> empty_result;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> empty_result;
ExpectResultsReceived(empty_result);
}
@ -336,11 +337,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, HTTPError) {
// Close the downstream with a HTTP 500 error.
CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500);
// Expect a blink::mojom::SpeechRecognitionErrorCode::kNetwork error to be
// Expect a media::mojom::SpeechRecognitionErrorCode::kNetwork error to be
// raised.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
ASSERT_EQ(0U, results_.size());
}
@ -355,11 +356,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, NetworkError) {
// Close the downstream fetcher simulating a network failure.
CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK);
// Expect a blink::mojom::SpeechRecognitionErrorCode::kNetwork error to be
// Expect a media::mojom::SpeechRecognitionErrorCode::kNetwork error to be
// raised.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
ASSERT_EQ(0U, results_.size());
}
@ -385,12 +386,12 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, Stability) {
ProvideMockProtoResultDownstream(proto_event);
// Set up expectations.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = true;
result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"foo", 0.5));
media::mojom::SpeechRecognitionHypothesis::New(u"foo", 0.5));
// Check that the protobuf generated the expected result.
ExpectResultsReceived(results);
@ -404,9 +405,9 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, Stability) {
EndMockRecognition();
// Since there was no final result, we get an empty "no match" result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> empty_result;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> empty_result;
ExpectResultsReceived(empty_result);
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size());
}
@ -466,12 +467,12 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SendPreamble) {
// Simulate a protobuf message streamed from the server containing a single
// result with one hypotheses.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = false;
result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 1", 0.1F));
media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 1", 0.1F));
ProvideMockResultDownstream(result);
ExpectResultsReceived(results);
@ -481,7 +482,7 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SendPreamble) {
CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size());
}
@ -605,7 +606,7 @@ void NetworkSpeechRecognitionEngineImplTest::ProvideMockProtoResultDownstream(
}
void NetworkSpeechRecognitionEngineImplTest::ProvideMockResultDownstream(
const blink::mojom::SpeechRecognitionResultPtr& result) {
const media::mojom::WebSpeechRecognitionResultPtr& result) {
proto::SpeechRecognitionEvent proto_event;
proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS);
proto::SpeechRecognitionResult* proto_result = proto_event.add_result();
@ -613,7 +614,7 @@ void NetworkSpeechRecognitionEngineImplTest::ProvideMockResultDownstream(
for (size_t i = 0; i < result->hypotheses.size(); ++i) {
proto::SpeechRecognitionAlternative* proto_alternative =
proto_result->add_alternative();
const blink::mojom::SpeechRecognitionHypothesisPtr& hypothesis =
const media::mojom::SpeechRecognitionHypothesisPtr& hypothesis =
result->hypotheses[i];
proto_alternative->set_confidence(hypothesis->confidence);
proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis->utterance));
@ -659,15 +660,15 @@ void NetworkSpeechRecognitionEngineImplTest::CloseMockDownstream(
}
void NetworkSpeechRecognitionEngineImplTest::ExpectResultsReceived(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
ASSERT_GE(1U, results_.size());
ASSERT_TRUE(ResultsAreEqual(results, results_.front()));
results_.pop();
}
bool NetworkSpeechRecognitionEngineImplTest::ResultsAreEqual(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& a,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& b) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& a,
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& b) {
if (a.size() != b.size())
return false;
@ -679,9 +680,9 @@ bool NetworkSpeechRecognitionEngineImplTest::ResultsAreEqual(
return false;
}
for (size_t i = 0; i < (*it_a)->hypotheses.size(); ++i) {
const blink::mojom::SpeechRecognitionHypothesisPtr& hyp_a =
const media::mojom::SpeechRecognitionHypothesisPtr& hyp_a =
(*it_a)->hypotheses[i];
const blink::mojom::SpeechRecognitionHypothesisPtr& hyp_b =
const media::mojom::SpeechRecognitionHypothesisPtr& hyp_b =
(*it_b)->hypotheses[i];
if (hyp_a->utterance != hyp_b->utterance ||
hyp_a->confidence != hyp_b->confidence) {

@ -111,7 +111,7 @@ void SodaSpeechRecognitionEngineImpl::EndRecognition() {
void SodaSpeechRecognitionEngineImpl::TakeAudioChunk(const AudioChunk& data) {
DCHECK_CALLED_ON_VALID_SEQUENCE(main_sequence_checker_);
if (!is_start_recognition_) {
Abort(blink::mojom::SpeechRecognitionErrorCode::kNotAllowed);
Abort(media::mojom::SpeechRecognitionErrorCode::kNotAllowed);
return;
}
@ -136,13 +136,13 @@ void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionRecognitionEvent(
std::move(reply).Run(is_start_recognition_);
// Map recognition results.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = !recognition_result.is_final;
blink::mojom::SpeechRecognitionHypothesisPtr hypothesis =
blink::mojom::SpeechRecognitionHypothesis::New();
media::mojom::SpeechRecognitionHypothesisPtr hypothesis =
media::mojom::SpeechRecognitionHypothesis::New();
// TODO(crbug.com/40286514): Hardcode now.
hypothesis->confidence = kSpeechRecognitionConfidence;
hypothesis->utterance = base::UTF8ToUTF16(recognition_result.transcription);
@ -156,14 +156,14 @@ void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionRecognitionEvent(
}
void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionError() {
Abort(blink::mojom::SpeechRecognitionErrorCode::kNoSpeech);
Abort(media::mojom::SpeechRecognitionErrorCode::kNoSpeech);
}
void SodaSpeechRecognitionEngineImpl::OnLanguageIdentificationEvent(
media::mojom::LanguageIdentificationEventPtr event) {}
void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionStopped() {
Abort(blink::mojom::SpeechRecognitionErrorCode::kAborted);
Abort(media::mojom::SpeechRecognitionErrorCode::kAborted);
}
void SodaSpeechRecognitionEngineImpl::
@ -193,7 +193,7 @@ void SodaSpeechRecognitionEngineImpl::OnRecognizerBound(
void SodaSpeechRecognitionEngineImpl::OnRecognizerDisconnected() {
DCHECK_CALLED_ON_VALID_SEQUENCE(main_sequence_checker_);
Abort(blink::mojom::SpeechRecognitionErrorCode::kAborted);
Abort(media::mojom::SpeechRecognitionErrorCode::kAborted);
}
void SodaSpeechRecognitionEngineImpl::SendAudioToSpeechRecognitionService(
@ -212,13 +212,13 @@ void SodaSpeechRecognitionEngineImpl::MarkDone() {
}
void SodaSpeechRecognitionEngineImpl::Abort(
blink::mojom::SpeechRecognitionErrorCode error_code) {
media::mojom::SpeechRecognitionErrorCode error_code) {
DVLOG(1) << "Aborting with error " << error_code;
if (error_code != blink::mojom::SpeechRecognitionErrorCode::kNone) {
if (error_code != media::mojom::SpeechRecognitionErrorCode::kNone) {
delegate_->OnSpeechRecognitionEngineError(
blink::mojom::SpeechRecognitionError(
error_code, blink::mojom::SpeechAudioErrorDetails::kNone));
media::mojom::SpeechRecognitionError(
error_code, media::mojom::SpeechAudioErrorDetails::kNone));
}
}

@ -82,7 +82,7 @@ class CONTENT_EXPORT SodaSpeechRecognitionEngineImpl
void MarkDone();
void Abort(blink::mojom::SpeechRecognitionErrorCode error);
void Abort(media::mojom::SpeechRecognitionErrorCode error);
media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(const AudioChunk& data);

@ -39,11 +39,11 @@ class SodaSpeechRecognitionEngineImplTest
// SpeechRecognitionRequestDelegate methods.
void OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override;
void OnSpeechRecognitionEngineEndOfUtterance() override;
void OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) override;
const media::mojom::SpeechRecognitionError& error) override;
// context.
std::unique_ptr<SodaSpeechRecognitionEngineImpl> CreateSpeechRecognition(
@ -55,16 +55,16 @@ class SodaSpeechRecognitionEngineImplTest
// operations.
void SendDummyAudioChunk();
void FillRecognitionExpectResults(
std::vector<blink::mojom::SpeechRecognitionResultPtr>& results,
std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results,
const char* transcription,
bool is_final);
void SendSpeechResult(const char* result, bool is_final);
void SendTranscriptionError();
void ExpectResultsReceived(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results);
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results);
bool ResultsAreEqual(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& a,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& b);
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& a,
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& b);
protected:
content::BrowserTaskEnvironment task_environment_;
@ -74,8 +74,9 @@ class SodaSpeechRecognitionEngineImplTest
fake_speech_recognition_mgr_delegate_;
std::unique_ptr<SodaSpeechRecognitionEngineImpl> client_under_test_;
base::queue<std::vector<blink::mojom::SpeechRecognitionResultPtr>> results_;
blink::mojom::SpeechRecognitionErrorCode error_;
base::queue<std::vector<media::mojom::WebSpeechRecognitionResultPtr>>
results_;
media::mojom::SpeechRecognitionErrorCode error_;
int end_of_utterance_counter_ = 0;
bool recognition_ready_ = false;
@ -83,7 +84,7 @@ class SodaSpeechRecognitionEngineImplTest
};
void SodaSpeechRecognitionEngineImplTest::SetUp() {
error_ = blink::mojom::SpeechRecognitionErrorCode::kNone;
error_ = media::mojom::SpeechRecognitionErrorCode::kNone;
end_of_utterance_counter_ = 0;
recognition_ready_ = false;
browser_context_ = std::make_unique<content::TestBrowserContext>();
@ -97,7 +98,7 @@ void SodaSpeechRecognitionEngineImplTest::SetUp() {
void SodaSpeechRecognitionEngineImplTest::TearDown() {}
void SodaSpeechRecognitionEngineImplTest::OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
results_.push(mojo::Clone(results));
}
@ -107,7 +108,7 @@ void SodaSpeechRecognitionEngineImplTest::
}
void SodaSpeechRecognitionEngineImplTest::OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) {
const media::mojom::SpeechRecognitionError& error) {
error_ = error.code;
}
@ -158,15 +159,15 @@ void SodaSpeechRecognitionEngineImplTest::SendDummyAudioChunk() {
}
void SodaSpeechRecognitionEngineImplTest::FillRecognitionExpectResults(
std::vector<blink::mojom::SpeechRecognitionResultPtr>& results,
std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results,
const char* transcription,
bool is_final) {
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = !is_final;
blink::mojom::SpeechRecognitionHypothesisPtr hypothesis =
blink::mojom::SpeechRecognitionHypothesis::New();
media::mojom::SpeechRecognitionHypothesisPtr hypothesis =
media::mojom::SpeechRecognitionHypothesis::New();
hypothesis->confidence = 1.0;
hypothesis->utterance = base::UTF8ToUTF16(transcription);
result->hypotheses.push_back(std::move(hypothesis));
@ -187,15 +188,15 @@ void SodaSpeechRecognitionEngineImplTest::SendTranscriptionError() {
}
void SodaSpeechRecognitionEngineImplTest::ExpectResultsReceived(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
ASSERT_GE(1U, results_.size());
ASSERT_TRUE(ResultsAreEqual(results, results_.front()));
results_.pop();
}
bool SodaSpeechRecognitionEngineImplTest::ResultsAreEqual(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& a,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& b) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& a,
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& b) {
if (a.size() != b.size()) {
return false;
}
@ -208,9 +209,9 @@ bool SodaSpeechRecognitionEngineImplTest::ResultsAreEqual(
return false;
}
for (size_t i = 0; i < (*it_a)->hypotheses.size(); ++i) {
const blink::mojom::SpeechRecognitionHypothesisPtr& hyp_a =
const media::mojom::SpeechRecognitionHypothesisPtr& hyp_a =
(*it_a)->hypotheses[i];
const blink::mojom::SpeechRecognitionHypothesisPtr& hyp_b =
const media::mojom::SpeechRecognitionHypothesisPtr& hyp_b =
(*it_b)->hypotheses[i];
if (hyp_a->utterance != hyp_b->utterance ||
hyp_a->confidence != hyp_b->confidence) {
@ -235,19 +236,19 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionResults) {
client_under_test_->StartRecognition();
SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> first_results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> first_results;
FillRecognitionExpectResults(first_results, kFirstSpeechResult, false);
SendSpeechResult(kFirstSpeechResult, /*is_final=*/false);
ExpectResultsReceived(first_results);
SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> second_results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> second_results;
FillRecognitionExpectResults(second_results, kSecondSpeechResult, false);
SendSpeechResult(kSecondSpeechResult, /*is_final=*/false);
ExpectResultsReceived(second_results);
SendTranscriptionError();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNoSpeech, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNoSpeech, error_);
}
TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionAudioChunksEnded) {
@ -263,7 +264,7 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionAudioChunksEnded) {
client_under_test_->StartRecognition();
SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> first_results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> first_results;
FillRecognitionExpectResults(first_results, kFirstSpeechResult, false);
SendSpeechResult(kFirstSpeechResult, /*is_final=*/false);
ExpectResultsReceived(first_results);
@ -272,7 +273,7 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionAudioChunksEnded) {
client_under_test_->AudioChunksEnded();
client_under_test_->EndRecognition();
loop.RunUntilIdle();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAborted, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kAborted, error_);
}
TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEndOfUtterance) {
@ -289,12 +290,12 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEndOfUtterance) {
client_under_test_->StartRecognition();
SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> first_results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> first_results;
FillRecognitionExpectResults(first_results, kFirstSpeechResult, false);
SendSpeechResult(kFirstSpeechResult, /*is_final=*/false);
ExpectResultsReceived(first_results);
std::vector<blink::mojom::SpeechRecognitionResultPtr> second_results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> second_results;
FillRecognitionExpectResults(second_results, kSecondSpeechResult, true);
SendSpeechResult(kSecondSpeechResult, /*is_final=*/true);
ExpectResultsReceived(second_results);
@ -316,7 +317,7 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEnd) {
client_under_test_->StartRecognition();
SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> first_results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> first_results;
FillRecognitionExpectResults(first_results, kFirstSpeechResult, false);
SendSpeechResult(kFirstSpeechResult, /*is_final=*/false);
ExpectResultsReceived(first_results);
@ -324,7 +325,7 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEnd) {
client_under_test_->EndRecognition();
SendDummyAudioChunk();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNotAllowed, error_);
ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNotAllowed, error_);
}
TEST_F(SodaSpeechRecognitionEngineImplTest, SetOnReadyCallbackAfterBind) {

@ -157,15 +157,15 @@ std::string MakeGoodResponse() {
proto::SpeechRecognitionEvent proto_event;
proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS);
proto::SpeechRecognitionResult* proto_result = proto_event.add_result();
blink::mojom::SpeechRecognitionResultPtr result =
blink::mojom::SpeechRecognitionResult::New();
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New(
media::mojom::WebSpeechRecognitionResultPtr result =
media::mojom::WebSpeechRecognitionResult::New();
result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
u"Pictures of the moon", 1.0F));
proto_result->set_final(!result->is_provisional);
for (size_t i = 0; i < result->hypotheses.size(); ++i) {
proto::SpeechRecognitionAlternative* proto_alternative =
proto_result->add_alternative();
const blink::mojom::SpeechRecognitionHypothesisPtr& hypothesis =
const media::mojom::SpeechRecognitionHypothesisPtr& hypothesis =
result->hypotheses[i];
proto_alternative->set_confidence(hypothesis->confidence);
proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis->utterance));

@ -42,7 +42,7 @@ SpeechRecognitionDispatcherHost::SpeechRecognitionDispatcherHost(
void SpeechRecognitionDispatcherHost::Create(
int render_process_id,
int render_frame_id,
mojo::PendingReceiver<blink::mojom::SpeechRecognizer> receiver) {
mojo::PendingReceiver<media::mojom::SpeechRecognizer> receiver) {
mojo::MakeSelfOwnedReceiver(std::make_unique<SpeechRecognitionDispatcherHost>(
render_process_id, render_frame_id),
std::move(receiver));
@ -55,10 +55,10 @@ SpeechRecognitionDispatcherHost::AsWeakPtr() {
return weak_factory_.GetWeakPtr();
}
// -------- blink::mojom::SpeechRecognizer interface implementation ------------
// -------- media::mojom::SpeechRecognizer interface implementation ------------
void SpeechRecognitionDispatcherHost::Start(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params) {
media::mojom::StartSpeechRecognitionRequestParamsPtr params) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
GetUIThreadTaskRunner({})->PostTask(
@ -74,7 +74,7 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI(
speech_recognition_dispatcher_host,
int render_process_id,
int render_frame_id,
blink::mojom::StartSpeechRecognitionRequestParamsPtr params) {
media::mojom::StartSpeechRecognitionRequestParamsPtr params) {
DCHECK_CURRENTLY_ON(BrowserThread::UI);
int embedder_render_process_id = 0;
int embedder_render_frame_id = MSG_ROUTING_NONE;
@ -145,7 +145,7 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI(
}
void SpeechRecognitionDispatcherHost::StartSessionOnIO(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params,
media::mojom::StartSpeechRecognitionRequestParamsPtr params,
int embedder_render_process_id,
int embedder_render_frame_id,
const url::Origin& origin,
@ -178,7 +178,7 @@ void SpeechRecognitionDispatcherHost::StartSessionOnIO(
config.interim_results = params->interim_results;
config.event_listener = session->AsWeakPtr();
for (blink::mojom::SpeechRecognitionGrammarPtr& grammar_ptr :
for (media::mojom::SpeechRecognitionGrammarPtr& grammar_ptr :
params->grammars) {
config.grammars.push_back(*grammar_ptr);
}

@ -11,10 +11,10 @@
#include "base/memory/weak_ptr.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "media/mojo/mojom/speech_recognizer.mojom.h"
#include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom.h"
namespace network {
class PendingSharedURLLoaderFactory;
@ -31,7 +31,7 @@ class SpeechRecognitionManager;
// SpeechRecognitionDispatcherHost is an implementation of the SpeechRecognizer
// interface that allows a RenderFrame to start a speech recognition session
// in the browser process, by communicating with SpeechRecognitionManager.
class SpeechRecognitionDispatcherHost : public blink::mojom::SpeechRecognizer {
class SpeechRecognitionDispatcherHost : public media::mojom::SpeechRecognizer {
public:
SpeechRecognitionDispatcherHost(int render_process_id, int render_frame_id);
@ -44,12 +44,12 @@ class SpeechRecognitionDispatcherHost : public blink::mojom::SpeechRecognizer {
static void Create(
int render_process_id,
int render_frame_id,
mojo::PendingReceiver<blink::mojom::SpeechRecognizer> receiver);
mojo::PendingReceiver<media::mojom::SpeechRecognizer> receiver);
base::WeakPtr<SpeechRecognitionDispatcherHost> AsWeakPtr();
// blink::mojom::SpeechRecognizer implementation
// media::mojom::SpeechRecognizer implementation
void Start(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params) override;
media::mojom::StartSpeechRecognitionRequestParamsPtr params) override;
private:
static void StartRequestOnUI(
@ -57,9 +57,9 @@ class SpeechRecognitionDispatcherHost : public blink::mojom::SpeechRecognizer {
speech_recognition_dispatcher_host,
int render_process_id,
int render_frame_id,
blink::mojom::StartSpeechRecognitionRequestParamsPtr params);
media::mojom::StartSpeechRecognitionRequestParamsPtr params);
void StartSessionOnIO(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params,
media::mojom::StartSpeechRecognitionRequestParamsPtr params,
int embedder_render_process_id,
int embedder_render_frame_id,
const url::Origin& origin,

@ -10,8 +10,8 @@
#include "components/speech/audio_buffer.h"
#include "content/common/content_export.h"
#include "media/base/audio_parameters.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace blink {
namespace mojom {
@ -39,11 +39,11 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
public:
// Called whenever a result is retrieved.
virtual void OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>&
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>&
results) = 0;
virtual void OnSpeechRecognitionEngineEndOfUtterance() = 0;
virtual void OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) = 0;
const media::mojom::SpeechRecognitionError& error) = 0;
protected:
virtual ~Delegate() = default;

@ -35,8 +35,8 @@
#include "content/public/browser/web_contents_observer.h"
#include "content/public/common/content_client.h"
#include "media/audio/audio_device_description.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "url/gurl.h"
#include "url/origin.h"
@ -342,9 +342,9 @@ void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id,
weak_factory_.GetWeakPtr(), session_id, EVENT_START));
} else {
OnRecognitionError(
session_id, blink::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kNotAllowed,
blink::mojom::SpeechAudioErrorDetails::kNone));
session_id, media::mojom::SpeechRecognitionError(
media::mojom::SpeechRecognitionErrorCode::kNotAllowed,
media::mojom::SpeechAudioErrorDetails::kNone));
base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask(
FROM_HERE,
base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent,
@ -522,7 +522,7 @@ void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) {
void SpeechRecognitionManagerImpl::OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
if (!SessionExists(session_id))
return;
@ -535,7 +535,7 @@ void SpeechRecognitionManagerImpl::OnRecognitionResults(
void SpeechRecognitionManagerImpl::OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) {
const media::mojom::SpeechRecognitionError& error) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
if (!SessionExists(session_id))
return;

@ -16,8 +16,8 @@
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/mediastream/media_stream.mojom-forward.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
namespace media {
class AudioSystem;
@ -82,11 +82,11 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override;
void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) override;
const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id,
float volume,
float noise_volume) override;

@ -15,7 +15,7 @@
namespace content {
SpeechRecognitionSession::SpeechRecognitionSession(
mojo::PendingRemote<blink::mojom::SpeechRecognitionSessionClient> client)
mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient> client)
: client_(std::move(client)) {
client_.set_disconnect_handler(
base::BindOnce(&SpeechRecognitionSession::ConnectionErrorHandler,
@ -75,17 +75,17 @@ void SpeechRecognitionSession::OnRecognitionEnd(int session_id) {
void SpeechRecognitionSession::OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
client_->ResultRetrieved(mojo::Clone(results));
}
void SpeechRecognitionSession::OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) {
const media::mojom::SpeechRecognitionError& error) {
if (!client_.is_bound()) {
return;
}
client_->ErrorOccurred(blink::mojom::SpeechRecognitionError::New(error));
client_->ErrorOccurred(media::mojom::SpeechRecognitionError::New(error));
}
// The events below are currently not used by speech JS APIs implementation.

@ -10,29 +10,29 @@
#include "base/memory/weak_ptr.h"
#include "content/browser/speech/speech_recognition_manager_impl.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "media/mojo/mojom/speech_recognizer.mojom.h"
#include "mojo/public/cpp/bindings/pending_remote.h"
#include "mojo/public/cpp/bindings/remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom.h"
namespace content {
// SpeechRecognitionSession implements the
// blink::mojom::SpeechRecognitionSession interface for a particular session. It
// media::mojom::SpeechRecognitionSession interface for a particular session. It
// also acts as a proxy for events sent from SpeechRecognitionManager, and
// forwards the events to the renderer using a
// mojo::Remote<SpeechRecognitionSessionClient> (that is passed from the render
// process).
class SpeechRecognitionSession : public blink::mojom::SpeechRecognitionSession,
class SpeechRecognitionSession : public media::mojom::SpeechRecognitionSession,
public SpeechRecognitionEventListener {
public:
explicit SpeechRecognitionSession(
mojo::PendingRemote<blink::mojom::SpeechRecognitionSessionClient> client);
mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient> client);
~SpeechRecognitionSession() override;
base::WeakPtr<SpeechRecognitionSession> AsWeakPtr();
void SetSessionId(int session_id) { session_id_ = session_id; }
// blink::mojom::SpeechRecognitionSession implementation.
// media::mojom::SpeechRecognitionSession implementation.
void Abort() override;
void StopCapture() override;
@ -45,11 +45,11 @@ class SpeechRecognitionSession : public blink::mojom::SpeechRecognitionSession,
void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override;
void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) override;
const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id,
float volume,
float noise_volume) override;
@ -58,7 +58,7 @@ class SpeechRecognitionSession : public blink::mojom::SpeechRecognitionSession,
void ConnectionErrorHandler();
int session_id_ = SpeechRecognitionManager::kSessionIDInvalid;
mojo::Remote<blink::mojom::SpeechRecognitionSessionClient> client_;
mojo::Remote<media::mojom::SpeechRecognitionSessionClient> client_;
bool stopped_ = false;
base::WeakPtrFactory<SpeechRecognitionSession> weak_factory_{this};

@ -310,7 +310,7 @@ void SpeechRecognizerImpl::OnCaptureError(
}
void SpeechRecognizerImpl::OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) {
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
FSMEventArgs event_args(EVENT_ENGINE_RESULT);
event_args.engine_results = mojo::Clone(results);
GetIOThreadTaskRunner({})->PostTask(
@ -324,7 +324,7 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineEndOfUtterance() {
}
void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) {
const media::mojom::SpeechRecognitionError& error) {
FSMEventArgs event_args(EVENT_ENGINE_ERROR);
event_args.engine_error = error;
GetIOThreadTaskRunner({})->PostTask(
@ -679,9 +679,9 @@ SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) {
listener()->OnSoundStart(session_id());
return STATE_RECOGNIZING;
} else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) {
return Abort(blink::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kNoSpeech,
blink::mojom::SpeechAudioErrorDetails::kNone));
return Abort(media::mojom::SpeechRecognitionError(
media::mojom::SpeechRecognitionErrorCode::kNoSpeech,
media::mojom::SpeechAudioErrorDetails::kNone));
}
return STATE_WAITING_FOR_SPEECH;
}
@ -712,27 +712,27 @@ SpeechRecognizerImpl::FSMState
SpeechRecognizerImpl::AbortSilently(const FSMEventArgs& event_args) {
DCHECK_NE(event_args.event, EVENT_AUDIO_ERROR);
DCHECK_NE(event_args.event, EVENT_ENGINE_ERROR);
return Abort(blink::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kNone,
blink::mojom::SpeechAudioErrorDetails::kNone));
return Abort(media::mojom::SpeechRecognitionError(
media::mojom::SpeechRecognitionErrorCode::kNone,
media::mojom::SpeechAudioErrorDetails::kNone));
}
SpeechRecognizerImpl::FSMState
SpeechRecognizerImpl::AbortWithError(const FSMEventArgs& event_args) {
if (event_args.event == EVENT_AUDIO_ERROR) {
return Abort(blink::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kAudioCapture,
blink::mojom::SpeechAudioErrorDetails::kNone));
return Abort(media::mojom::SpeechRecognitionError(
media::mojom::SpeechRecognitionErrorCode::kAudioCapture,
media::mojom::SpeechAudioErrorDetails::kNone));
} else if (event_args.event == EVENT_ENGINE_ERROR) {
return Abort(event_args.engine_error);
}
return Abort(blink::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kAborted,
blink::mojom::SpeechAudioErrorDetails::kNone));
return Abort(media::mojom::SpeechRecognitionError(
media::mojom::SpeechRecognitionErrorCode::kAborted,
media::mojom::SpeechAudioErrorDetails::kNone));
}
SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
const blink::mojom::SpeechRecognitionError& error) {
const media::mojom::SpeechRecognitionError& error) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
if (IsCapturingAudio())
@ -757,8 +757,9 @@ SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)
listener()->OnAudioEnd(session_id());
if (error.code != blink::mojom::SpeechRecognitionErrorCode::kNone)
if (error.code != media::mojom::SpeechRecognitionErrorCode::kNone) {
listener()->OnRecognitionError(session_id(), error);
}
listener()->OnRecognitionEnd(session_id());
@ -787,13 +788,13 @@ SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::ProcessIntermediateResult(
SpeechRecognizerImpl::FSMState
SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) {
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results =
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results =
event_args.engine_results;
auto i = results.begin();
bool provisional_results_pending = false;
bool results_are_empty = true;
for (; i != results.end(); ++i) {
const blink::mojom::SpeechRecognitionResultPtr& result = *i;
const media::mojom::WebSpeechRecognitionResultPtr& result = *i;
if (result->is_provisional) {
DCHECK(provisional_results_);
provisional_results_pending = true;
@ -905,8 +906,8 @@ media::AudioCapturerSource* SpeechRecognizerImpl::GetAudioCapturerSource() {
SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
: event(event_value),
audio_data(nullptr),
engine_error(blink::mojom::SpeechRecognitionErrorCode::kNone,
blink::mojom::SpeechAudioErrorDetails::kNone) {}
engine_error(media::mojom::SpeechRecognitionErrorCode::kNone,
media::mojom::SpeechAudioErrorDetails::kNone) {}
SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other)
: event(other.event),

@ -15,8 +15,8 @@
#include "content/browser/speech/speech_recognizer.h"
#include "content/common/content_export.h"
#include "media/base/audio_capturer_source.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace media {
class AudioBus;
@ -99,8 +99,8 @@ class CONTENT_EXPORT SpeechRecognizerImpl
FSMEvent event;
scoped_refptr<AudioChunk> audio_data;
std::vector<blink::mojom::SpeechRecognitionResultPtr> engine_results;
blink::mojom::SpeechRecognitionError engine_error;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> engine_results;
media::mojom::SpeechRecognitionError engine_error;
};
~SpeechRecognizerImpl() override;
@ -129,7 +129,7 @@ class CONTENT_EXPORT SpeechRecognizerImpl
FSMState ProcessFinalResult(const FSMEventArgs& event_args);
FSMState AbortSilently(const FSMEventArgs& event_args);
FSMState AbortWithError(const FSMEventArgs& event_args);
FSMState Abort(const blink::mojom::SpeechRecognitionError& error);
FSMState Abort(const media::mojom::SpeechRecognitionError& error);
FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
FSMState DoNothing(const FSMEventArgs& event_args) const;
FSMState NotFeasible(const FSMEventArgs& event_args);
@ -156,11 +156,11 @@ class CONTENT_EXPORT SpeechRecognizerImpl
// SpeechRecognitionEngineDelegate methods.
void OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override;
void OnSpeechRecognitionEngineEndOfUtterance() override;
void OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) override;
const media::mojom::SpeechRecognitionError& error) override;
media::AudioSystem* GetAudioSystem();
void CreateAudioCapturerSource();

@ -18,7 +18,7 @@
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
using base::android::AppendJavaStringArrayToStringVector;
using base::android::AttachCurrentThread;
@ -171,12 +171,12 @@ void SpeechRecognizerImplAndroid::OnRecognitionResults(
std::vector<float> scores(options.size(), 0.0);
if (floats != NULL)
JavaFloatArrayToFloatVector(env, floats, &scores);
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back();
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(media::mojom::WebSpeechRecognitionResult::New());
media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
CHECK_EQ(options.size(), scores.size());
for (size_t i = 0; i < options.size(); ++i) {
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New(
result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
options[i], static_cast<double>(scores[i])));
}
result->is_provisional = provisional;
@ -188,7 +188,7 @@ void SpeechRecognizerImplAndroid::OnRecognitionResults(
}
void SpeechRecognizerImplAndroid::OnRecognitionResultsOnIOThread(
std::vector<blink::mojom::SpeechRecognitionResultPtr> results) {
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results) {
DCHECK_CURRENTLY_ON(BrowserThread::IO);
listener()->OnRecognitionResults(session_id(), results);
}
@ -207,9 +207,9 @@ void SpeechRecognizerImplAndroid::OnRecognitionError(
DCHECK_CURRENTLY_ON(BrowserThread::IO);
listener()->OnRecognitionError(
session_id(),
blink::mojom::SpeechRecognitionError(
static_cast<blink::mojom::SpeechRecognitionErrorCode>(error),
blink::mojom::SpeechAudioErrorDetails::kNone));
media::mojom::SpeechRecognitionError(
static_cast<media::mojom::SpeechRecognitionErrorCode>(error),
media::mojom::SpeechAudioErrorDetails::kNone));
}
void SpeechRecognizerImplAndroid::OnRecognitionEnd(

@ -12,8 +12,8 @@
#include "base/android/scoped_java_ref.h"
#include "content/browser/speech/speech_recognizer.h"
#include "content/common/content_export.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace content {
@ -65,7 +65,7 @@ class CONTENT_EXPORT SpeechRecognizerImplAndroid : public SpeechRecognizer {
bool continuous,
bool interim_results);
void OnRecognitionResultsOnIOThread(
std::vector<blink::mojom::SpeechRecognitionResultPtr> results);
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results);
~SpeechRecognizerImplAndroid() override;

@ -84,7 +84,7 @@ class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
audio_ended_(false),
sound_started_(false),
sound_ended_(false),
error_(blink::mojom::SpeechRecognitionErrorCode::kNone),
error_(media::mojom::SpeechRecognitionErrorCode::kNone),
volume_(-1.0f) {
// This test environment is not set up to support out-of-process services.
feature_list_.InitWithFeatures(
@ -196,14 +196,14 @@ class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override {
result_received_ = true;
}
void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) override {
const media::mojom::SpeechRecognitionError& error) override {
EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(recognition_ended_);
error_ = error.code;
@ -301,7 +301,7 @@ class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
bool audio_ended_;
bool sound_started_;
bool sound_ended_;
blink::mojom::SpeechRecognitionErrorCode error_;
media::mojom::SpeechRecognitionErrorCode error_;
std::vector<uint8_t> audio_packet_;
std::unique_ptr<media::AudioBus> audio_bus_;
float volume_;
@ -321,7 +321,7 @@ TEST_F(SpeechRecognizerImplTest, StartNoInputDevices) {
EXPECT_FALSE(result_received_);
OnCaptureError();
base::RunLoop().RunUntilIdle();
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAudioCapture, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kAudioCapture, error_);
CheckFinalEventsConsistency();
}
@ -339,7 +339,7 @@ TEST_F(SpeechRecognizerImplTest, StartFakeInputDevice) {
EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
recognizer_->AbortRecognition();
base::RunLoop().RunUntilIdle();
CheckFinalEventsConsistency();
@ -369,7 +369,7 @@ TEST_F(SpeechRecognizerImplTest, StopBeforeDeviceInfoReceived) {
EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
CheckFinalEventsConsistency();
}
@ -397,7 +397,7 @@ TEST_F(SpeechRecognizerImplTest, CancelBeforeDeviceInfoReceived) {
EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
CheckFinalEventsConsistency();
}
@ -412,7 +412,7 @@ TEST_F(SpeechRecognizerImplTest, StopNoData) {
EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
CheckFinalEventsConsistency();
}
@ -428,7 +428,7 @@ TEST_F(SpeechRecognizerImplTest, CancelNoData) {
EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAborted, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kAborted, error_);
CheckFinalEventsConsistency();
}
@ -503,7 +503,7 @@ TEST_F(SpeechRecognizerImplTest, StopWithData) {
EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
// Create a response string.
proto::SpeechRecognitionEvent proto_event;
@ -528,7 +528,7 @@ TEST_F(SpeechRecognizerImplTest, StopWithData) {
EXPECT_TRUE(recognition_ended_);
EXPECT_TRUE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
CheckFinalEventsConsistency();
}
@ -548,7 +548,7 @@ TEST_F(SpeechRecognizerImplTest, CancelWithData) {
EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAborted, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kAborted, error_);
CheckFinalEventsConsistency();
}
@ -571,7 +571,7 @@ TEST_F(SpeechRecognizerImplTest, ConnectionError) {
EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
// Issue the network callback to complete the process.
const network::TestURLLoaderFactory::PendingRequest* pending_request;
@ -583,7 +583,7 @@ TEST_F(SpeechRecognizerImplTest, ConnectionError) {
base::RunLoop().RunUntilIdle();
EXPECT_TRUE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
CheckFinalEventsConsistency();
}
@ -606,7 +606,7 @@ TEST_F(SpeechRecognizerImplTest, ServerError) {
EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
const network::TestURLLoaderFactory::PendingRequest* pending_request;
ASSERT_TRUE(GetUpstreamRequest(&pending_request));
@ -621,7 +621,7 @@ TEST_F(SpeechRecognizerImplTest, ServerError) {
base::RunLoop().RunUntilIdle();
EXPECT_TRUE(recognition_ended_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
CheckFinalEventsConsistency();
}
@ -638,7 +638,7 @@ TEST_F(SpeechRecognizerImplTest, OnCaptureError_PropagatesError) {
EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAudioCapture, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kAudioCapture, error_);
CheckFinalEventsConsistency();
}
@ -663,7 +663,7 @@ TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_);
EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNoSpeech, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNoSpeech, error_);
CheckFinalEventsConsistency();
}
@ -692,7 +692,7 @@ TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
}
base::RunLoop().RunUntilIdle();
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_TRUE(audio_started_);
EXPECT_FALSE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
@ -733,7 +733,7 @@ TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
EXPECT_NEAR(0.89926866f, volume_, 0.00001f);
EXPECT_FLOAT_EQ(0.75071919f, noise_volume_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_FALSE(audio_ended_);
EXPECT_FALSE(recognition_ended_);
recognizer_->AbortRecognition();

@ -162,6 +162,7 @@ android_library("content_full_java") {
"//media/capture/video/android:capture_java",
"//media/midi:midi_java",
"//media/mojo/mojom:mojom_java",
"//media/mojo/mojom:web_speech_recognition_java",
"//mojo/public/java:base_java",
"//mojo/public/java:bindings_java",
"//mojo/public/java:system_java",

@ -26,9 +26,9 @@ import org.chromium.base.BuildInfo;
import org.chromium.base.ContextUtils;
import org.chromium.base.Log;
import org.chromium.base.PackageUtils;
import org.chromium.blink.mojom.SpeechRecognitionErrorCode;
import org.chromium.content.R;
import org.chromium.content_public.browser.SpeechRecognition;
import org.chromium.media.mojom.SpeechRecognitionErrorCode;
import org.chromium.ui.widget.Toast;
import java.util.ArrayList;

@ -573,6 +573,7 @@ source_set("browser_sources") {
"//gpu/command_buffer/service:gles2",
"//media",
"//media/capture",
"//media/mojo/mojom:web_speech_recognition",
"//net",
"//services/device/public/cpp/geolocation",
"//services/metrics/public/cpp:metrics_cpp",

@ -6,13 +6,11 @@
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_EVENT_LISTENER_H_
#include "content/common/content_export.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace blink {
namespace mojom {
namespace media::mojom {
class SpeechRecognitionError;
}
} // namespace blink
} // namespace media::mojom
namespace content {
@ -40,14 +38,15 @@ class CONTENT_EXPORT SpeechRecognitionEventListener {
// Invoked when a result is retrieved.
virtual void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) = 0;
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>&
results) = 0;
// Invoked if there was an error while capturing or recognizing audio.
// The recognition has already been cancelled when this call is made and
// no more events will be raised.
virtual void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) = 0;
const media::mojom::SpeechRecognitionError& error) = 0;
// Informs of a change in the captured audio level, useful if displaying
// a microphone volume indicator while recording.

@ -14,8 +14,8 @@
#include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/browser/speech_recognition_session_preamble.h"
#include "media/mojo/mojom/speech_recognition_grammar.mojom.h"
#include "services/network/public/cpp/shared_url_loader_factory.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_grammar.mojom.h"
#include "url/origin.h"
namespace content {
@ -32,7 +32,7 @@ struct CONTENT_EXPORT SpeechRecognitionSessionConfig {
// Accept language header. If |language| is empty, used to get a language
// instead.
std::string accept_language;
std::vector<blink::mojom::SpeechRecognitionGrammar> grammars;
std::vector<media::mojom::SpeechRecognitionGrammar> grammars;
url::Origin origin;
bool filter_profanities;
bool continuous;

@ -14,9 +14,9 @@
#include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager_delegate.h"
#include "content/public/test/test_utils.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
namespace {
const char kTestResult[] = "Pictures of the moon";
@ -194,15 +194,15 @@ void FakeSpeechRecognitionManager::SetFakeRecognitionResult(
listener_->OnSoundStart(session_id_);
has_sent_result_ = true;
}
blink::mojom::SpeechRecognitionResultPtr result =
blink::mojom::SpeechRecognitionResult::New();
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New(
media::mojom::WebSpeechRecognitionResultPtr result =
media::mojom::WebSpeechRecognitionResult::New();
result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
base::UTF8ToUTF16(fake_result_), 1.0));
// If `is_provisional` is true, then the result is an interim result that
// could be changed. Otherwise, it's a final result. Consequently,
// `is_provisional` is the converse of `is_final`.
result->is_provisional = !is_final_;
std::vector<blink::mojom::SpeechRecognitionResultPtr> results;
std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(std::move(result));
listener_->OnRecognitionResults(session_id_, results);
GetUIThreadTaskRunner({})->PostTask(
@ -247,9 +247,9 @@ void FakeSpeechRecognitionManager::SendFakeSpeechRecognitionError() {
VLOG(1) << "Sending fake recognition error.";
listener_->OnRecognitionError(
session_id_, *blink::mojom::SpeechRecognitionError::New(
blink::mojom::SpeechRecognitionErrorCode::kNetwork,
blink::mojom::SpeechAudioErrorDetails::kNone));
session_id_, *media::mojom::SpeechRecognitionError::New(
media::mojom::SpeechRecognitionErrorCode::kNetwork,
media::mojom::SpeechAudioErrorDetails::kNone));
GetUIThreadTaskRunner({})->PostTask(
FROM_HERE, base::BindOnce(&FakeSpeechRecognitionManager::OnFakeErrorSent,
base::Unretained(this)));

@ -77,11 +77,11 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManager,
void OnRecognitionEnd(int session_id) override {}
void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override {}
void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) override {}
const media::mojom::SpeechRecognitionError& error) override {}
void OnAudioLevelsChange(int session_id,
float volume,
float noise_volume) override {}

@ -568,6 +568,7 @@ static_library("test_support") {
"//media",
"//media/capture",
"//media/capture/mojom:image_capture",
"//media/mojo/mojom:web_speech_recognition",
"//mojo/core/embedder",
"//mojo/public/cpp/test_support:test_utils",
"//net:quic_test_tools",
@ -2114,6 +2115,7 @@ test("content_browsertests") {
deps += [
"//components/soda:utils",
"//content/public/browser:proto",
"//media/mojo/mojom:web_speech_recognition",
"//ui/base/clipboard:clipboard_test_support",
]
@ -3352,7 +3354,10 @@ test("content_unittests") {
"../browser/speech/speech_recognizer_impl_unittest.cc",
"../browser/tracing/tracing_ui_unittest.cc",
]
deps += [ "//components/speech:speech" ]
deps += [
"//components/speech:speech",
"//media/mojo/mojom:web_speech_recognition",
]
if (!is_fuchsia) {
sources += [

@ -53,11 +53,11 @@ void ShellSpeechRecognitionManagerDelegate::OnRecognitionEnd(int session_id) {
void ShellSpeechRecognitionManagerDelegate::OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) {}
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result) {}
void ShellSpeechRecognitionManagerDelegate::OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) {}
const media::mojom::SpeechRecognitionError& error) {}
void ShellSpeechRecognitionManagerDelegate::OnAudioLevelsChange(
int session_id,

@ -41,11 +41,11 @@ class ShellSpeechRecognitionManagerDelegate
void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults(
int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result)
const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override;
void OnRecognitionError(
int session_id,
const blink::mojom::SpeechRecognitionError& error) override;
const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id,
float volume,
float noise_volume) override;

@ -875,3 +875,20 @@ source_set("unit_tests") {
deps += [ ":speech_recognition" ]
}
}
mojom("web_speech_recognition") {
generate_java = true
sources = [
"speech_recognition_error.mojom",
"speech_recognition_error_code.mojom",
"speech_recognition_grammar.mojom",
"speech_recognition_result.mojom",
"speech_recognizer.mojom",
]
deps = [
"//mojo/public/mojom/base",
"//url/mojom:url_mojom_gurl",
]
}

@ -1,10 +1,10 @@
// Copyright 2018 The Chromium Authors
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module blink.mojom;
module media.mojom;
import "third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom";
import "media/mojo/mojom/speech_recognition_error_code.mojom";
// Used to provide further details about an audio capture related error.
enum SpeechAudioErrorDetails {
@ -15,6 +15,6 @@ enum SpeechAudioErrorDetails {
// Used to send information to the renderer about an error in the browser's
// speech recognition engine.
struct SpeechRecognitionError {
blink.mojom.SpeechRecognitionErrorCode code;
blink.mojom.SpeechAudioErrorDetails details;
media.mojom.SpeechRecognitionErrorCode code;
media.mojom.SpeechAudioErrorDetails details;
};

@ -1,8 +1,8 @@
// Copyright 2018 The Chromium Authors
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module blink.mojom;
module media.mojom;
// Used by the browser's speech recognition engine to indicate the reason for a
// speech recognition error.

@ -1,8 +1,8 @@
// Copyright 2018 The Chromium Authors
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module blink.mojom;
module media.mojom;
import "url/mojom/url.mojom";

@ -1,8 +1,8 @@
// Copyright 2018 The Chromium Authors
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module blink.mojom;
module media.mojom;
import "mojo/public/mojom/base/string16.mojom";
@ -18,9 +18,9 @@ struct SpeechRecognitionHypothesis {
};
// Group of recognition hypotheses for a particular speech segment.
struct SpeechRecognitionResult {
struct WebSpeechRecognitionResult {
// An N-best list of hypotheses.
array<blink.mojom.SpeechRecognitionHypothesis> hypotheses;
array<media.mojom.SpeechRecognitionHypothesis> hypotheses;
// False if this is the final time the speech service will return this
// particular result. If true, then this represents an interim result that

@ -1,12 +1,12 @@
// Copyright 2018 The Chromium Authors
// Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
module blink.mojom;
module media.mojom;
import "third_party/blink/public/mojom/speech/speech_recognition_grammar.mojom";
import "third_party/blink/public/mojom/speech/speech_recognition_result.mojom";
import "third_party/blink/public/mojom/speech/speech_recognition_error.mojom";
import "media/mojo/mojom/speech_recognition_grammar.mojom";
import "media/mojo/mojom/speech_recognition_result.mojom";
import "media/mojo/mojom/speech_recognition_error.mojom";
// Created by the renderer and sent to the browser to start a speech recognition
// session.
@ -65,7 +65,7 @@ interface SpeechRecognitionSession {
// WebSpeechRecognitionHandle.
interface SpeechRecognitionSessionClient {
// Called to dispatch the "result" event.
ResultRetrieved(array<SpeechRecognitionResult> results);
ResultRetrieved(array<WebSpeechRecognitionResult> results);
// Called to dispatch the "nomatch" event if the error code passed is of types
// kNoMatch, otherwise dispatchers an "error" event.

@ -223,11 +223,6 @@ mojom("mojom_platform") {
"smart_card/smart_card.mojom",
"sms/webotp_service.mojom",
"speculation_rules/speculation_rules.mojom",
"speech/speech_recognition_error.mojom",
"speech/speech_recognition_error_code.mojom",
"speech/speech_recognition_grammar.mojom",
"speech/speech_recognition_result.mojom",
"speech/speech_recognizer.mojom",
"speech/speech_synthesis.mojom",
"storage_access/storage_access_automation.mojom",
"subapps/sub_apps_service.mojom",

@ -36,4 +36,6 @@ blink_modules_sources("speech") {
"speech_synthesis_voice.cc",
"speech_synthesis_voice.h",
]
deps = [ "//media/mojo/mojom:web_speech_recognition_blink" ]
}

@ -1,4 +1,5 @@
include_rules = [
"+media",
"-third_party/blink/renderer/modules",
"+third_party/blink/renderer/modules/event_modules.h",
"+third_party/blink/renderer/modules/event_target_modules.h",

@ -28,10 +28,10 @@
#include <algorithm>
#include "build/build_config.h"
#include "media/mojo/mojom/speech_recognition_error.mojom-blink.h"
#include "media/mojo/mojom/speech_recognition_result.mojom-blink.h"
#include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom-blink.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom-blink.h"
#include "third_party/blink/renderer/core/frame/local_dom_window.h"
#include "third_party/blink/renderer/core/frame/local_frame.h"
#include "third_party/blink/renderer/core/page/page.h"
@ -96,7 +96,7 @@ void SpeechRecognition::abort() {
}
void SpeechRecognition::ResultRetrieved(
WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) {
WTF::Vector<media::mojom::blink::WebSpeechRecognitionResultPtr> results) {
auto* it = std::stable_partition(
results.begin(), results.end(),
[](const auto& result) { return !result->is_provisional; });
@ -139,8 +139,9 @@ void SpeechRecognition::ResultRetrieved(
}
void SpeechRecognition::ErrorOccurred(
mojom::blink::SpeechRecognitionErrorPtr error) {
if (error->code == mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
media::mojom::blink::SpeechRecognitionErrorPtr error) {
if (error->code ==
media::mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr));
} else {
// TODO(primiano): message?
@ -202,9 +203,9 @@ void SpeechRecognition::PageVisibilityChanged() {
}
void SpeechRecognition::OnConnectionError() {
ErrorOccurred(mojom::blink::SpeechRecognitionError::New(
mojom::blink::SpeechRecognitionErrorCode::kNetwork,
mojom::blink::SpeechAudioErrorDetails::kNone));
ErrorOccurred(media::mojom::blink::SpeechRecognitionError::New(
media::mojom::blink::SpeechRecognitionErrorCode::kNetwork,
media::mojom::blink::SpeechAudioErrorDetails::kNone));
Ended();
}
@ -228,7 +229,7 @@ void SpeechRecognition::StartInternal(ExceptionState* exception_state) {
}
final_results_.clear();
mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient>
mojo::PendingRemote<media::mojom::blink::SpeechRecognitionSessionClient>
session_client;
// See https://bit.ly/2S0zRAS for task types.
receiver_.Bind(

@ -26,7 +26,7 @@
#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_H_
#define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_H_
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom-blink.h"
#include "media/mojo/mojom/speech_recognizer.mojom-blink.h"
#include "third_party/blink/public/platform/web_private_ptr.h"
#include "third_party/blink/renderer/bindings/core/v8/active_script_wrappable.h"
#include "third_party/blink/renderer/core/execution_context/execution_context_lifecycle_observer.h"
@ -51,7 +51,7 @@ class MODULES_EXPORT SpeechRecognition final
: public EventTarget,
public ActiveScriptWrappable<SpeechRecognition>,
public ExecutionContextLifecycleObserver,
public mojom::blink::SpeechRecognitionSessionClient,
public media::mojom::blink::SpeechRecognitionSessionClient,
public PageVisibilityObserver {
DEFINE_WRAPPERTYPEINFO();
@ -83,10 +83,12 @@ class MODULES_EXPORT SpeechRecognition final
void stopFunction();
void abort();
// mojom::blink::SpeechRecognitionSessionClient
// media::mojom::blink::SpeechRecognitionSessionClient
void ResultRetrieved(
WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) override;
void ErrorOccurred(mojom::blink::SpeechRecognitionErrorPtr error) override;
WTF::Vector<media::mojom::blink::WebSpeechRecognitionResultPtr> results)
override;
void ErrorOccurred(
media::mojom::blink::SpeechRecognitionErrorPtr error) override;
void Started() override;
void AudioStarted() override;
void SoundStarted() override;
@ -135,10 +137,10 @@ class MODULES_EXPORT SpeechRecognition final
bool started_;
bool stopping_;
HeapVector<Member<SpeechRecognitionResult>> final_results_;
HeapMojoReceiver<mojom::blink::SpeechRecognitionSessionClient,
HeapMojoReceiver<media::mojom::blink::SpeechRecognitionSessionClient,
SpeechRecognition>
receiver_;
HeapMojoRemote<mojom::blink::SpeechRecognitionSession> session_;
HeapMojoRemote<media::mojom::blink::SpeechRecognitionSession> session_;
};
} // namespace blink

@ -27,6 +27,7 @@
#include <memory>
#include "media/mojo/mojom/speech_recognizer.mojom-blink.h"
#include "third_party/blink/public/common/browser_interface_broker_proxy.h"
#include "third_party/blink/renderer/core/dom/document.h"
#include "third_party/blink/renderer/core/frame/local_dom_window.h"
@ -58,21 +59,22 @@ SpeechRecognitionController::~SpeechRecognitionController() {
}
void SpeechRecognitionController::Start(
mojo::PendingReceiver<mojom::blink::SpeechRecognitionSession>
mojo::PendingReceiver<media::mojom::blink::SpeechRecognitionSession>
session_receiver,
mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient>
mojo::PendingRemote<media::mojom::blink::SpeechRecognitionSessionClient>
session_client,
const SpeechGrammarList& grammars,
const String& lang,
bool continuous,
bool interim_results,
uint32_t max_alternatives) {
mojom::blink::StartSpeechRecognitionRequestParamsPtr msg_params =
mojom::blink::StartSpeechRecognitionRequestParams::New();
media::mojom::blink::StartSpeechRecognitionRequestParamsPtr msg_params =
media::mojom::blink::StartSpeechRecognitionRequestParams::New();
for (unsigned i = 0; i < grammars.length(); i++) {
SpeechGrammar* grammar = grammars.item(i);
msg_params->grammars.push_back(mojom::blink::SpeechRecognitionGrammar::New(
grammar->src(), grammar->weight()));
msg_params->grammars.push_back(
media::mojom::blink::SpeechRecognitionGrammar::New(grammar->src(),
grammar->weight()));
}
msg_params->language = lang.IsNull() ? g_empty_string : lang;
msg_params->max_hypotheses = max_alternatives;
@ -89,7 +91,7 @@ void SpeechRecognitionController::Trace(Visitor* visitor) const {
visitor->Trace(speech_recognizer_);
}
mojom::blink::SpeechRecognizer*
media::mojom::blink::SpeechRecognizer*
SpeechRecognitionController::GetSpeechRecognizer() {
if (!speech_recognizer_.is_bound()) {
GetSupplementable()->GetBrowserInterfaceBroker().GetInterface(

@ -26,9 +26,9 @@
#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_CONTROLLER_H_
#define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_CONTROLLER_H_
#include "media/mojo/mojom/speech_recognizer.mojom-blink.h"
#include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom-blink.h"
#include "third_party/blink/renderer/modules/modules_export.h"
#include "third_party/blink/renderer/platform/mojo/heap_mojo_remote.h"
#include "third_party/blink/renderer/platform/mojo/heap_mojo_wrapper_mode.h"
@ -49,24 +49,25 @@ class SpeechRecognitionController final
explicit SpeechRecognitionController(LocalDOMWindow&);
virtual ~SpeechRecognitionController();
void Start(mojo::PendingReceiver<mojom::blink::SpeechRecognitionSession>
session_receiver,
mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient>
session_client,
const SpeechGrammarList& grammars,
const String& lang,
bool continuous,
bool interim_results,
uint32_t max_alternatives);
void Start(
mojo::PendingReceiver<media::mojom::blink::SpeechRecognitionSession>
session_receiver,
mojo::PendingRemote<media::mojom::blink::SpeechRecognitionSessionClient>
session_client,
const SpeechGrammarList& grammars,
const String& lang,
bool continuous,
bool interim_results,
uint32_t max_alternatives);
static SpeechRecognitionController* From(LocalDOMWindow&);
void Trace(Visitor* visitor) const override;
private:
mojom::blink::SpeechRecognizer* GetSpeechRecognizer();
media::mojom::blink::SpeechRecognizer* GetSpeechRecognizer();
HeapMojoRemote<mojom::blink::SpeechRecognizer> speech_recognizer_;
HeapMojoRemote<media::mojom::blink::SpeechRecognizer> speech_recognizer_;
};
} // namespace blink

@ -25,32 +25,33 @@
#include "third_party/blink/renderer/modules/speech/speech_recognition_error_event.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom-blink.h"
#include "media/mojo/mojom/speech_recognition_error_code.mojom-blink.h"
#include "third_party/blink/renderer/core/event_type_names.h"
namespace blink {
static String ErrorCodeToString(mojom::blink::SpeechRecognitionErrorCode code) {
static String ErrorCodeToString(
media::mojom::blink::SpeechRecognitionErrorCode code) {
switch (code) {
case mojom::blink::SpeechRecognitionErrorCode::kNone:
case media::mojom::blink::SpeechRecognitionErrorCode::kNone:
return "other";
case mojom::blink::SpeechRecognitionErrorCode::kNoSpeech:
case media::mojom::blink::SpeechRecognitionErrorCode::kNoSpeech:
return "no-speech";
case mojom::blink::SpeechRecognitionErrorCode::kAborted:
case media::mojom::blink::SpeechRecognitionErrorCode::kAborted:
return "aborted";
case mojom::blink::SpeechRecognitionErrorCode::kAudioCapture:
case media::mojom::blink::SpeechRecognitionErrorCode::kAudioCapture:
return "audio-capture";
case mojom::blink::SpeechRecognitionErrorCode::kNetwork:
case media::mojom::blink::SpeechRecognitionErrorCode::kNetwork:
return "network";
case mojom::blink::SpeechRecognitionErrorCode::kNotAllowed:
case media::mojom::blink::SpeechRecognitionErrorCode::kNotAllowed:
return "not-allowed";
case mojom::blink::SpeechRecognitionErrorCode::kServiceNotAllowed:
case media::mojom::blink::SpeechRecognitionErrorCode::kServiceNotAllowed:
return "service-not-allowed";
case mojom::blink::SpeechRecognitionErrorCode::kBadGrammar:
case media::mojom::blink::SpeechRecognitionErrorCode::kBadGrammar:
return "bad-grammar";
case mojom::blink::SpeechRecognitionErrorCode::kLanguageNotSupported:
case media::mojom::blink::SpeechRecognitionErrorCode::kLanguageNotSupported:
return "language-not-supported";
case mojom::blink::SpeechRecognitionErrorCode::kNoMatch:
case media::mojom::blink::SpeechRecognitionErrorCode::kNoMatch:
NOTREACHED_IN_MIGRATION();
break;
}
@ -60,7 +61,7 @@ static String ErrorCodeToString(mojom::blink::SpeechRecognitionErrorCode code) {
}
SpeechRecognitionErrorEvent* SpeechRecognitionErrorEvent::Create(
mojom::blink::SpeechRecognitionErrorCode code,
media::mojom::blink::SpeechRecognitionErrorCode code,
const String& message) {
return MakeGarbageCollected<SpeechRecognitionErrorEvent>(
ErrorCodeToString(code), message);

@ -26,7 +26,7 @@
#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_ERROR_EVENT_H_
#define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_ERROR_EVENT_H_
#include "third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom-blink-forward.h"
#include "media/mojo/mojom/speech_recognition_error_code.mojom-blink-forward.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_speech_recognition_error_event_init.h"
#include "third_party/blink/renderer/modules/event_modules.h"
#include "third_party/blink/renderer/modules/modules_export.h"
@ -39,7 +39,7 @@ class MODULES_EXPORT SpeechRecognitionErrorEvent final : public Event {
public:
static SpeechRecognitionErrorEvent* Create(
mojom::blink::SpeechRecognitionErrorCode,
media::mojom::blink::SpeechRecognitionErrorCode,
const String&);
static SpeechRecognitionErrorEvent* Create(
const AtomicString&,

@ -1,6 +1,6 @@
import {SpeechAudioErrorDetails} from '/gen/third_party/blink/public/mojom/speech/speech_recognition_error.mojom.m.js';
import {SpeechRecognitionErrorCode} from '/gen/third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom.m.js';
import {SpeechRecognitionSessionReceiver, SpeechRecognizer, SpeechRecognizerReceiver} from '/gen/third_party/blink/public/mojom/speech/speech_recognizer.mojom.m.js';
import {SpeechAudioErrorDetails} from '/gen/media/mojo/mojom/speech_recognition_error.mojom.m.js';
import {SpeechRecognitionErrorCode} from '/gen/media/mojo/mojom/speech_recognition_error_code.mojom.m.js';
import {SpeechRecognitionSessionReceiver, SpeechRecognizer, SpeechRecognizerReceiver} from '/gen/media/mojo/mojom/speech_recognizer.mojom.m.js';
// MockSpeechRecognizer is a mock implementation of blink.mojom.SpeechRecognizer
// and the browser speech recognition service. Mock results can be set using

@ -4,7 +4,7 @@
<script src="/resources/testharnessreport.js"></script>
<script type="module">
import {MockSpeechRecognizer} from '../resources/mock-speechrecognizer.js';
import {SpeechRecognitionErrorCode} from '/gen/third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom.m.js';
import {SpeechRecognitionErrorCode} from '/gen/media/mojo/mojom/speech_recognition_error_code.mojom.m.js';
const mock = new MockSpeechRecognizer();