0

Move Web Speech API .mojom files to //media/mojo/mojom

This CL moves the Web Speech API .mojom files to //media/mojo/mojom so that they can be used by the speech recognition service. //media cannot depend on //third_party/blink/public/mojom due to circular dependencies.

Bug: 1495388
Change-Id: Ibd59ced528b323497eafc9f8230b609c2ef14445
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5528282
Reviewed-by: Andrew Grieve <agrieve@chromium.org>
Reviewed-by: Devlin Cronin <rdevlin.cronin@chromium.org>
Reviewed-by: Brendon Tiszka <tiszka@chromium.org>
Reviewed-by: Philip Rogers <pdr@chromium.org>
Reviewed-by: Xiaohan Wang <xhwang@chromium.org>
Reviewed-by: Peter Beverloo <peter@chromium.org>
Commit-Queue: Evan Liu <evliu@google.com>
Reviewed-by: Avi Drissman <avi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1302179}
This commit is contained in:
Evan Liu
2024-05-16 20:10:32 +00:00
committed by Chromium LUCI CQ
parent 0fc8cd0571
commit d725228c05
60 changed files with 407 additions and 367 deletions
BUILD.gn
android_webview/browser
chrome
content
extensions/shell/browser
media/mojo/mojom
third_party/blink

@@ -1146,6 +1146,7 @@ if (use_blink && !is_cronet_build) {
"//device/vr/public/mojom:vr_service_js_data_deps", "//device/vr/public/mojom:vr_service_js_data_deps",
"//media/capture/mojom:image_capture_js_data_deps", "//media/capture/mojom:image_capture_js_data_deps",
"//media/midi:mojo_js_data_deps", "//media/midi:mojo_js_data_deps",
"//media/mojo/mojom:web_speech_recognition_js_data_deps",
"//mojo/public/interfaces/bindings/tests:test_data_deps", "//mojo/public/interfaces/bindings/tests:test_data_deps",
"//mojo/public/js/ts/bindings/tests:test_interfaces_js_data_deps", "//mojo/public/js/ts/bindings/tests:test_interfaces_js_data_deps",
"//mojo/public/mojom/base:base_js_data_deps", "//mojo/public/mojom/base:base_js_data_deps",

@@ -307,6 +307,7 @@ source_set("browser") {
"//components/webdata/common", "//components/webdata/common",
"//content/public/browser", "//content/public/browser",
"//media/mojo:buildflags", "//media/mojo:buildflags",
"//media/mojo/mojom:web_speech_recognition",
"//mojo/public/cpp/base:protobuf_support", "//mojo/public/cpp/base:protobuf_support",
"//services/cert_verifier/public/mojom", "//services/cert_verifier/public/mojom",
"//services/device/public/cpp:device_feature_list", "//services/device/public/cpp:device_feature_list",

@@ -75,6 +75,7 @@ include_rules = [
"+media/base/android", "+media/base/android",
"+media/base/media_switches.h", # For media command line switches. "+media/base/media_switches.h", # For media command line switches.
"+media/mojo/buildflags.h", "+media/mojo/buildflags.h",
"+media/mojo/mojom",
"+components/policy/policy_constants.h", "+components/policy/policy_constants.h",
"+components/embedder_support/android", "+components/embedder_support/android",

@@ -17,8 +17,8 @@
#include "content/public/browser/speech_recognition_manager.h" #include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_context.h" #include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/browser/web_contents.h" #include "content/public/browser/web_contents.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h" #include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" #include "media/mojo/mojom/speech_recognition_result.mojom.h"
using content::BrowserThread; using content::BrowserThread;
@@ -40,11 +40,11 @@ void AwSpeechRecognitionManagerDelegate::OnAudioEnd(int session_id) {}
void AwSpeechRecognitionManagerDelegate::OnRecognitionResults( void AwSpeechRecognitionManagerDelegate::OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) {} const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result) {}
void AwSpeechRecognitionManagerDelegate::OnRecognitionError( void AwSpeechRecognitionManagerDelegate::OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) {} const media::mojom::SpeechRecognitionError& error) {}
void AwSpeechRecognitionManagerDelegate::OnAudioLevelsChange( void AwSpeechRecognitionManagerDelegate::OnAudioLevelsChange(
int session_id, int session_id,

@@ -37,11 +37,11 @@ class AwSpeechRecognitionManagerDelegate
void OnRecognitionEnd(int session_id) override; void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults( void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override; override;
void OnRecognitionError( void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) override; const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id, void OnAudioLevelsChange(int session_id,
float volume, float volume,
float noise_volume) override; float noise_volume) override;

@@ -2581,6 +2581,7 @@ static_library("browser") {
"//media/midi", "//media/midi",
"//media/mojo:buildflags", "//media/mojo:buildflags",
"//media/mojo/common", "//media/mojo/common",
"//media/mojo/mojom:web_speech_recognition",
"//media/mojo/services", "//media/mojo/services",
"//media/webrtc", "//media/webrtc",
"//mojo/core/embedder", "//mojo/core/embedder",

@@ -19,8 +19,8 @@
#include "content/public/browser/speech_recognition_manager.h" #include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_context.h" #include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/browser/web_contents.h" #include "content/public/browser/web_contents.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h" #include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" #include "media/mojo/mojom/speech_recognition_result.mojom.h"
#if BUILDFLAG(ENABLE_EXTENSIONS) #if BUILDFLAG(ENABLE_EXTENSIONS)
#include "chrome/browser/extensions/extension_service.h" #include "chrome/browser/extensions/extension_service.h"
@@ -82,11 +82,11 @@ void ChromeSpeechRecognitionManagerDelegate::OnAudioEnd(int session_id) {
void ChromeSpeechRecognitionManagerDelegate::OnRecognitionResults( void ChromeSpeechRecognitionManagerDelegate::OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) {} const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result) {}
void ChromeSpeechRecognitionManagerDelegate::OnRecognitionError( void ChromeSpeechRecognitionManagerDelegate::OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) {} const media::mojom::SpeechRecognitionError& error) {}
void ChromeSpeechRecognitionManagerDelegate::OnAudioLevelsChange( void ChromeSpeechRecognitionManagerDelegate::OnAudioLevelsChange(
int session_id, float volume, float noise_volume) { int session_id, float volume, float noise_volume) {

@@ -36,11 +36,11 @@ class ChromeSpeechRecognitionManagerDelegate
void OnRecognitionEnd(int session_id) override; void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults( void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override; override;
void OnRecognitionError( void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) override; const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id, void OnAudioLevelsChange(int session_id,
float volume, float volume,
float noise_volume) override; float noise_volume) override;

@@ -20,8 +20,8 @@
#include "content/public/browser/speech_recognition_manager.h" #include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h" #include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_preamble.h" #include "content/public/browser/speech_recognition_session_preamble.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "services/network/public/cpp/shared_url_loader_factory.h" #include "services/network/public/cpp/shared_url_loader_factory.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
// Invalid speech session. // Invalid speech session.
static const int kInvalidSessionId = -1; static const int kInvalidSessionId = -1;
@@ -68,11 +68,11 @@ class NetworkSpeechRecognizer::EventListener
void OnRecognitionEnd(int session_id) override; void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults( void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override; override;
void OnRecognitionError( void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) override; const media::mojom::SpeechRecognitionError& error) override;
void OnSoundStart(int session_id) override; void OnSoundStart(int session_id) override;
void OnSoundEnd(int session_id) override; void OnSoundEnd(int session_id) override;
void OnAudioLevelsChange(int session_id, void OnAudioLevelsChange(int session_id,
@@ -200,7 +200,7 @@ void NetworkSpeechRecognizer::EventListener::OnRecognitionEnd(int session_id) {
void NetworkSpeechRecognizer::EventListener::OnRecognitionResults( void NetworkSpeechRecognizer::EventListener::OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
std::u16string result_str; std::u16string result_str;
size_t final_count = 0; size_t final_count = 0;
// The number of results with |is_provisional| false. If |final_count| == // The number of results with |is_provisional| false. If |final_count| ==
@@ -211,7 +211,7 @@ void NetworkSpeechRecognizer::EventListener::OnRecognitionResults(
final_count++; final_count++;
result_str += result->hypotheses[0]->utterance; result_str += result->hypotheses[0]->utterance;
} }
// blink::mojom::SpeechRecognitionResult doesn't have word offsets. // media::mojom::WebSpeechRecognitionResult doesn't have word offsets.
content::GetUIThreadTaskRunner({})->PostTask( content::GetUIThreadTaskRunner({})->PostTask(
FROM_HERE, FROM_HERE,
base::BindOnce(&SpeechRecognizerDelegate::OnSpeechResult, delegate_, base::BindOnce(&SpeechRecognizerDelegate::OnSpeechResult, delegate_,
@@ -223,9 +223,9 @@ void NetworkSpeechRecognizer::EventListener::OnRecognitionResults(
void NetworkSpeechRecognizer::EventListener::OnRecognitionError( void NetworkSpeechRecognizer::EventListener::OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) { const media::mojom::SpeechRecognitionError& error) {
StopOnIOThread(); StopOnIOThread();
if (error.code == blink::mojom::SpeechRecognitionErrorCode::kNetwork) { if (error.code == media::mojom::SpeechRecognitionErrorCode::kNetwork) {
NotifyRecognitionStateChanged(SPEECH_RECOGNIZER_ERROR); NotifyRecognitionStateChanged(SPEECH_RECOGNIZER_ERROR);
} }
NotifyRecognitionStateChanged(SPEECH_RECOGNIZER_READY); NotifyRecognitionStateChanged(SPEECH_RECOGNIZER_READY);

@@ -120,6 +120,7 @@ if (!is_android) {
"//media/mojo/mojom:mojom_mojolpm", "//media/mojo/mojom:mojom_mojolpm",
"//media/mojo/mojom:remoting_mojolpm", "//media/mojo/mojom:remoting_mojolpm",
"//media/mojo/mojom:speech_recognition_mojolpm", "//media/mojo/mojom:speech_recognition_mojolpm",
"//media/mojo/mojom:web_speech_recognition_mojolpm",
"//services/device/public/mojom:mojom_mojolpm", "//services/device/public/mojom:mojom_mojolpm",
"//services/image_annotation/public/mojom:mojom_mojolpm", "//services/image_annotation/public/mojom:mojom_mojolpm",
"//services/network/public/mojom:cookies_mojom_mojolpm", "//services/network/public/mojom:cookies_mojom_mojolpm",

@@ -345,8 +345,8 @@ context_browser_exposed_interfaces = [
"Remote", "Remote",
], ],
[ [
"//third_party/blink/public/mojom/speech/speech_recognizer.mojom", "//media/mojo/mojom/speech_recognizer.mojom",
"blink.mojom.SpeechRecognizer", "media.mojom.SpeechRecognizer",
"Remote", "Remote",
], ],
[ [

@@ -175,6 +175,7 @@ source_set("browser") {
"//media/mojo:buildflags", "//media/mojo:buildflags",
"//media/mojo/clients", "//media/mojo/clients",
"//media/mojo/mojom", "//media/mojo/mojom",
"//media/mojo/mojom:web_speech_recognition",
"//media/mojo/services", "//media/mojo/services",
"//media/webrtc", "//media/webrtc",
"//mojo/core/embedder", "//mojo/core/embedder",
@@ -3327,6 +3328,8 @@ source_set("browser") {
"speech/soda_speech_recognition_engine_impl.cc", "speech/soda_speech_recognition_engine_impl.cc",
"speech/soda_speech_recognition_engine_impl.h", "speech/soda_speech_recognition_engine_impl.h",
] ]
deps += [ "//media/mojo/mojom:web_speech_recognition" ]
} }
deps += [ deps += [

@@ -98,6 +98,7 @@
#include "media/mojo/mojom/media_metrics_provider.mojom.h" #include "media/mojo/mojom/media_metrics_provider.mojom.h"
#include "media/mojo/mojom/media_player.mojom.h" #include "media/mojo/mojom/media_player.mojom.h"
#include "media/mojo/mojom/remoting.mojom.h" #include "media/mojo/mojom/remoting.mojom.h"
#include "media/mojo/mojom/speech_recognizer.mojom.h"
#include "media/mojo/mojom/video_decode_perf_history.mojom.h" #include "media/mojo/mojom/video_decode_perf_history.mojom.h"
#include "media/mojo/mojom/video_encoder_metrics_provider.mojom.h" #include "media/mojo/mojom/video_encoder_metrics_provider.mojom.h"
#include "media/mojo/mojom/webrtc_video_perf.mojom.h" #include "media/mojo/mojom/webrtc_video_perf.mojom.h"
@@ -175,7 +176,6 @@
#include "third_party/blink/public/mojom/sensor/web_sensor_provider.mojom.h" #include "third_party/blink/public/mojom/sensor/web_sensor_provider.mojom.h"
#include "third_party/blink/public/mojom/sms/webotp_service.mojom.h" #include "third_party/blink/public/mojom/sms/webotp_service.mojom.h"
#include "third_party/blink/public/mojom/speculation_rules/speculation_rules.mojom.h" #include "third_party/blink/public/mojom/speculation_rules/speculation_rules.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h" #include "third_party/blink/public/mojom/speech/speech_synthesis.mojom.h"
#include "third_party/blink/public/mojom/storage_access/storage_access_handle.mojom.h" #include "third_party/blink/public/mojom/storage_access/storage_access_handle.mojom.h"
#include "third_party/blink/public/mojom/usb/web_usb_service.mojom.h" #include "third_party/blink/public/mojom/usb/web_usb_service.mojom.h"
@@ -856,7 +856,7 @@ void PopulateFrameBinders(RenderFrameHostImpl* host, mojo::BinderMap* map) {
map->Add<blink::mojom::SharedWorkerConnector>( map->Add<blink::mojom::SharedWorkerConnector>(
base::BindRepeating(&BindSharedWorkerConnector, base::Unretained(host))); base::BindRepeating(&BindSharedWorkerConnector, base::Unretained(host)));
map->Add<blink::mojom::SpeechRecognizer>( map->Add<media::mojom::SpeechRecognizer>(
base::BindRepeating(&SpeechRecognitionDispatcherHost::Create, base::BindRepeating(&SpeechRecognitionDispatcherHost::Create,
host->GetProcess()->GetID(), host->GetRoutingID()), host->GetProcess()->GetID(), host->GetRoutingID()),
GetIOThreadTaskRunner({})); GetIOThreadTaskRunner({}));

@@ -1,6 +1,7 @@
include_rules = [ include_rules = [
"+components/speech", "+components/speech",
"+components/soda", "+components/soda",
"+media/mojo/mojom",
"+google_apis", # Exception to general rule, see content/DEPS for details. "+google_apis", # Exception to general rule, see content/DEPS for details.
] ]

@@ -23,13 +23,13 @@
#include "content/public/browser/google_streaming_api.pb.h" #include "content/public/browser/google_streaming_api.pb.h"
#include "google_apis/google_api_keys.h" #include "google_apis/google_api_keys.h"
#include "media/base/audio_timestamp_helper.h" #include "media/base/audio_timestamp_helper.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "mojo/public/c/system/types.h" #include "mojo/public/c/system/types.h"
#include "mojo/public/cpp/bindings/receiver_set.h" #include "mojo/public/cpp/bindings/receiver_set.h"
#include "net/base/load_flags.h" #include "net/base/load_flags.h"
#include "net/traffic_annotation/network_traffic_annotation.h" #include "net/traffic_annotation/network_traffic_annotation.h"
#include "services/network/public/cpp/shared_url_loader_factory.h" #include "services/network/public/cpp/shared_url_loader_factory.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
namespace content { namespace content {
namespace { namespace {
@@ -406,7 +406,7 @@ NetworkSpeechRecognitionEngineImpl::ConnectBothStreams(const FSMEventArgs&) {
base::NumberToString(max_alternatives)); base::NumberToString(max_alternatives));
} }
upstream_args.push_back("app=chromium"); upstream_args.push_back("app=chromium");
for (const blink::mojom::SpeechRecognitionGrammar& grammar : for (const media::mojom::SpeechRecognitionGrammar& grammar :
config_.grammars) { config_.grammars) {
std::string grammar_value(base::NumberToString(grammar.weight) + ":" + std::string grammar_value(base::NumberToString(grammar.weight) + ":" +
grammar.url.spec()); grammar.url.spec());
@@ -553,23 +553,23 @@ NetworkSpeechRecognitionEngineImpl::ProcessDownstreamResponse(
case proto::SpeechRecognitionEvent::STATUS_SUCCESS: case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
break; break;
case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH: case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNoSpeech); return Abort(media::mojom::SpeechRecognitionErrorCode::kNoSpeech);
case proto::SpeechRecognitionEvent::STATUS_ABORTED: case proto::SpeechRecognitionEvent::STATUS_ABORTED:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kAborted); return Abort(media::mojom::SpeechRecognitionErrorCode::kAborted);
case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE: case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kAudioCapture); return Abort(media::mojom::SpeechRecognitionErrorCode::kAudioCapture);
case proto::SpeechRecognitionEvent::STATUS_NETWORK: case proto::SpeechRecognitionEvent::STATUS_NETWORK:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNetwork); return Abort(media::mojom::SpeechRecognitionErrorCode::kNetwork);
case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED: case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNotAllowed); return Abort(media::mojom::SpeechRecognitionErrorCode::kNotAllowed);
case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED: case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
return Abort( return Abort(
blink::mojom::SpeechRecognitionErrorCode::kServiceNotAllowed); media::mojom::SpeechRecognitionErrorCode::kServiceNotAllowed);
case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR: case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
return Abort(blink::mojom::SpeechRecognitionErrorCode::kBadGrammar); return Abort(media::mojom::SpeechRecognitionErrorCode::kBadGrammar);
case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED: case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
return Abort( return Abort(
blink::mojom::SpeechRecognitionErrorCode::kLanguageNotSupported); media::mojom::SpeechRecognitionErrorCode::kLanguageNotSupported);
} }
} }
@@ -578,11 +578,11 @@ NetworkSpeechRecognitionEngineImpl::ProcessDownstreamResponse(
delegate_->OnSpeechRecognitionEngineEndOfUtterance(); delegate_->OnSpeechRecognitionEngineEndOfUtterance();
} }
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
for (int i = 0; i < ws_event.result_size(); ++i) { for (int i = 0; i < ws_event.result_size(); ++i) {
const proto::SpeechRecognitionResult& ws_result = ws_event.result(i); const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = !(ws_result.has_final() && ws_result.final()); result->is_provisional = !(ws_result.has_final() && ws_result.final());
if (!result->is_provisional) { if (!result->is_provisional) {
@@ -592,8 +592,8 @@ NetworkSpeechRecognitionEngineImpl::ProcessDownstreamResponse(
for (int j = 0; j < ws_result.alternative_size(); ++j) { for (int j = 0; j < ws_result.alternative_size(); ++j) {
const proto::SpeechRecognitionAlternative& ws_alternative = const proto::SpeechRecognitionAlternative& ws_alternative =
ws_result.alternative(j); ws_result.alternative(j);
blink::mojom::SpeechRecognitionHypothesisPtr hypothesis = media::mojom::SpeechRecognitionHypothesisPtr hypothesis =
blink::mojom::SpeechRecognitionHypothesis::New(); media::mojom::SpeechRecognitionHypothesis::New();
if (ws_alternative.has_confidence()) { if (ws_alternative.has_confidence()) {
hypothesis->confidence = ws_alternative.confidence(); hypothesis->confidence = ws_alternative.confidence();
} else if (ws_result.has_stability()) { } else if (ws_result.has_stability()) {
@@ -622,7 +622,7 @@ NetworkSpeechRecognitionEngineImpl::RaiseNoMatchErrorIfGotNoResults(
// Provide an empty result to notify that recognition is ended with no // Provide an empty result to notify that recognition is ended with no
// errors, yet neither any further results. // errors, yet neither any further results.
delegate_->OnSpeechRecognitionEngineResults( delegate_->OnSpeechRecognitionEngineResults(
std::vector<blink::mojom::SpeechRecognitionResultPtr>()); std::vector<media::mojom::WebSpeechRecognitionResultPtr>());
} }
return AbortSilently(event_args); return AbortSilently(event_args);
} }
@@ -666,23 +666,23 @@ NetworkSpeechRecognitionEngineImpl::CloseDownstream(const FSMEventArgs&) {
NetworkSpeechRecognitionEngineImpl::FSMState NetworkSpeechRecognitionEngineImpl::FSMState
NetworkSpeechRecognitionEngineImpl::AbortSilently(const FSMEventArgs&) { NetworkSpeechRecognitionEngineImpl::AbortSilently(const FSMEventArgs&) {
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNone); return Abort(media::mojom::SpeechRecognitionErrorCode::kNone);
} }
NetworkSpeechRecognitionEngineImpl::FSMState NetworkSpeechRecognitionEngineImpl::FSMState
NetworkSpeechRecognitionEngineImpl::AbortWithError(const FSMEventArgs&) { NetworkSpeechRecognitionEngineImpl::AbortWithError(const FSMEventArgs&) {
return Abort(blink::mojom::SpeechRecognitionErrorCode::kNetwork); return Abort(media::mojom::SpeechRecognitionErrorCode::kNetwork);
} }
NetworkSpeechRecognitionEngineImpl::FSMState NetworkSpeechRecognitionEngineImpl::FSMState
NetworkSpeechRecognitionEngineImpl::Abort( NetworkSpeechRecognitionEngineImpl::Abort(
blink::mojom::SpeechRecognitionErrorCode error_code) { media::mojom::SpeechRecognitionErrorCode error_code) {
DVLOG(1) << "Aborting with error " << error_code; DVLOG(1) << "Aborting with error " << error_code;
if (error_code != blink::mojom::SpeechRecognitionErrorCode::kNone) { if (error_code != media::mojom::SpeechRecognitionErrorCode::kNone) {
delegate_->OnSpeechRecognitionEngineError( delegate_->OnSpeechRecognitionEngineError(
blink::mojom::SpeechRecognitionError( media::mojom::SpeechRecognitionError(
error_code, blink::mojom::SpeechAudioErrorDetails::kNone)); error_code, media::mojom::SpeechAudioErrorDetails::kNone));
} }
downstream_loader_.reset(); downstream_loader_.reset();
upstream_loader_.reset(); upstream_loader_.reset();

@@ -24,10 +24,10 @@
#include "content/browser/speech/speech_recognition_engine.h" #include "content/browser/speech/speech_recognition_engine.h"
#include "content/common/content_export.h" #include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_preamble.h" #include "content/public/browser/speech_recognition_session_preamble.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_grammar.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "services/network/public/cpp/simple_url_loader_stream_consumer.h" #include "services/network/public/cpp/simple_url_loader_stream_consumer.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_grammar.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
class AudioChunk; class AudioChunk;
@@ -69,7 +69,7 @@ class CONTENT_EXPORT NetworkSpeechRecognitionEngineImpl
~Config(); ~Config();
std::string language; std::string language;
std::vector<blink::mojom::SpeechRecognitionGrammar> grammars; std::vector<media::mojom::SpeechRecognitionGrammar> grammars;
bool filter_profanities = false; bool filter_profanities = false;
bool continuous = true; bool continuous = true;
bool interim_results = true; bool interim_results = true;
@@ -183,7 +183,7 @@ class CONTENT_EXPORT NetworkSpeechRecognitionEngineImpl
FSMState CloseDownstream(const FSMEventArgs& event_args); FSMState CloseDownstream(const FSMEventArgs& event_args);
FSMState AbortSilently(const FSMEventArgs& event_args); FSMState AbortSilently(const FSMEventArgs& event_args);
FSMState AbortWithError(const FSMEventArgs& event_args); FSMState AbortWithError(const FSMEventArgs& event_args);
FSMState Abort(blink::mojom::SpeechRecognitionErrorCode error); FSMState Abort(media::mojom::SpeechRecognitionErrorCode error);
FSMState DoNothing(const FSMEventArgs& event_args); FSMState DoNothing(const FSMEventArgs& event_args);
FSMState NotFeasible(const FSMEventArgs& event_args); FSMState NotFeasible(const FSMEventArgs& event_args);

@@ -19,6 +19,8 @@
#include "components/speech/audio_buffer.h" #include "components/speech/audio_buffer.h"
#include "content/browser/speech/speech_recognition_engine.h" #include "content/browser/speech/speech_recognition_engine.h"
#include "content/public/browser/google_streaming_api.pb.h" #include "content/public/browser/google_streaming_api.pb.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "mojo/public/cpp/bindings/remote.h" #include "mojo/public/cpp/bindings/remote.h"
#include "net/base/net_errors.h" #include "net/base/net_errors.h"
#include "net/http/http_response_headers.h" #include "net/http/http_response_headers.h"
@@ -28,8 +30,6 @@
#include "services/network/public/mojom/url_response_head.mojom.h" #include "services/network/public/mojom/url_response_head.mojom.h"
#include "services/network/test/test_url_loader_factory.h" #include "services/network/test/test_url_loader_factory.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
namespace content { namespace content {
@@ -46,12 +46,12 @@ class NetworkSpeechRecognitionEngineImplTest
public: public:
NetworkSpeechRecognitionEngineImplTest() NetworkSpeechRecognitionEngineImplTest()
: last_number_of_upstream_chunks_seen_(0U), : last_number_of_upstream_chunks_seen_(0U),
error_(blink::mojom::SpeechRecognitionErrorCode::kNone), error_(media::mojom::SpeechRecognitionErrorCode::kNone),
end_of_utterance_counter_(0) {} end_of_utterance_counter_(0) {}
// SpeechRecognitionRequestDelegate methods. // SpeechRecognitionRequestDelegate methods.
void OnSpeechRecognitionEngineResults( void OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override { override {
results_.push(mojo::Clone(results)); results_.push(mojo::Clone(results));
} }
@@ -59,7 +59,7 @@ class NetworkSpeechRecognitionEngineImplTest
++end_of_utterance_counter_; ++end_of_utterance_counter_;
} }
void OnSpeechRecognitionEngineError( void OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) override { const media::mojom::SpeechRecognitionError& error) override {
error_ = error.code; error_ = error.code;
} }
@@ -75,8 +75,8 @@ class NetworkSpeechRecognitionEngineImplTest
DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH DOWNSTREAM_ERROR_WEBSERVICE_NO_MATCH
}; };
static bool ResultsAreEqual( static bool ResultsAreEqual(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& a, const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& a,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& b); const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& b);
static std::string SerializeProtobufResponse( static std::string SerializeProtobufResponse(
const proto::SpeechRecognitionEvent& msg); const proto::SpeechRecognitionEvent& msg);
@@ -89,9 +89,9 @@ class NetworkSpeechRecognitionEngineImplTest
void ProvideMockProtoResultDownstream( void ProvideMockProtoResultDownstream(
const proto::SpeechRecognitionEvent& result); const proto::SpeechRecognitionEvent& result);
void ProvideMockResultDownstream( void ProvideMockResultDownstream(
const blink::mojom::SpeechRecognitionResultPtr& result); const media::mojom::WebSpeechRecognitionResultPtr& result);
void ExpectResultsReceived( void ExpectResultsReceived(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result); const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result);
void ExpectFramedChunk(const std::string& chunk, uint32_t type); void ExpectFramedChunk(const std::string& chunk, uint32_t type);
// Reads and returns all pending upload data from |upstream_data_pipe_|, // Reads and returns all pending upload data from |upstream_data_pipe_|,
// initializing the pipe from |GetUpstreamRequest()|, if needed. // initializing the pipe from |GetUpstreamRequest()|, if needed.
@@ -108,9 +108,10 @@ class NetworkSpeechRecognitionEngineImplTest
std::unique_ptr<NetworkSpeechRecognitionEngineImpl> engine_under_test_; std::unique_ptr<NetworkSpeechRecognitionEngineImpl> engine_under_test_;
size_t last_number_of_upstream_chunks_seen_; size_t last_number_of_upstream_chunks_seen_;
std::string response_buffer_; std::string response_buffer_;
blink::mojom::SpeechRecognitionErrorCode error_; media::mojom::SpeechRecognitionErrorCode error_;
int end_of_utterance_counter_; int end_of_utterance_counter_;
base::queue<std::vector<blink::mojom::SpeechRecognitionResultPtr>> results_; base::queue<std::vector<media::mojom::WebSpeechRecognitionResultPtr>>
results_;
}; };
TEST_F(NetworkSpeechRecognitionEngineImplTest, SingleDefinitiveResult) { TEST_F(NetworkSpeechRecognitionEngineImplTest, SingleDefinitiveResult) {
@@ -132,14 +133,14 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SingleDefinitiveResult) {
// Simulate a protobuf message streamed from the server containing a single // Simulate a protobuf message streamed from the server containing a single
// result with two hypotheses. // result with two hypotheses.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = false; result->is_provisional = false;
result->hypotheses.push_back( result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 1", 0.1F)); media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 1", 0.1F));
result->hypotheses.push_back( result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 2", 0.2F)); media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 2", 0.2F));
ProvideMockResultDownstream(result); ProvideMockResultDownstream(result);
ExpectResultsReceived(results); ExpectResultsReceived(results);
@@ -149,7 +150,7 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SingleDefinitiveResult) {
CloseMockDownstream(DOWNSTREAM_ERROR_NONE); CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition(); EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size()); ASSERT_EQ(0U, results_.size());
} }
@@ -162,12 +163,12 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SeveralStreamingResults) {
InjectDummyAudioChunk(); InjectDummyAudioChunk();
ASSERT_NE("", ConsumeChunkedUploadData()); ASSERT_NE("", ConsumeChunkedUploadData());
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = (i % 2 == 0); // Alternate result types. result->is_provisional = (i % 2 == 0); // Alternate result types.
float confidence = result->is_provisional ? 0.0F : (i * 0.1F); float confidence = result->is_provisional ? 0.0F : (i * 0.1F);
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New( result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
u"hypothesis", confidence)); u"hypothesis", confidence));
ProvideMockResultDownstream(result); ProvideMockResultDownstream(result);
@@ -181,11 +182,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SeveralStreamingResults) {
ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
// Simulate a final definitive result. // Simulate a final definitive result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = false; result->is_provisional = false;
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New( result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
u"The final result", 1.0F)); u"The final result", 1.0F));
ProvideMockResultDownstream(result); ProvideMockResultDownstream(result);
ExpectResultsReceived(results); ExpectResultsReceived(results);
@@ -195,7 +196,7 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SeveralStreamingResults) {
CloseMockDownstream(DOWNSTREAM_ERROR_NONE); CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition(); EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size()); ASSERT_EQ(0U, results_.size());
} }
@@ -210,11 +211,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest,
ASSERT_NE("", ConsumeChunkedUploadData()); ASSERT_NE("", ConsumeChunkedUploadData());
// Simulate the corresponding definitive result. // Simulate the corresponding definitive result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->hypotheses.push_back( result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis", 1.0F)); media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis", 1.0F));
ProvideMockResultDownstream(result); ProvideMockResultDownstream(result);
ExpectResultsReceived(results); ExpectResultsReceived(results);
ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
@@ -227,13 +228,13 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest,
// Expect an empty result, aimed at notifying recognition ended with no // Expect an empty result, aimed at notifying recognition ended with no
// actual results nor errors. // actual results nor errors.
std::vector<blink::mojom::SpeechRecognitionResultPtr> empty_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> empty_results;
ExpectResultsReceived(empty_results); ExpectResultsReceived(empty_results);
// Ensure everything is closed cleanly after the downstream is closed. // Ensure everything is closed cleanly after the downstream is closed.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition(); EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size()); ASSERT_EQ(0U, results_.size());
} }
@@ -256,11 +257,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, ReRequestData) {
ASSERT_EQ(uploaded_data, ConsumeChunkedUploadData()); ASSERT_EQ(uploaded_data, ConsumeChunkedUploadData());
// Simulate the corresponding definitive result. // Simulate the corresponding definitive result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->hypotheses.push_back( result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis", 1.0F)); media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis", 1.0F));
ProvideMockResultDownstream(result); ProvideMockResultDownstream(result);
ExpectResultsReceived(results); ExpectResultsReceived(results);
ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
@@ -282,13 +283,13 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, ReRequestData) {
// Expect an empty result, aimed at notifying recognition ended with no // Expect an empty result, aimed at notifying recognition ended with no
// actual results nor errors. // actual results nor errors.
std::vector<blink::mojom::SpeechRecognitionResultPtr> empty_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> empty_results;
ExpectResultsReceived(empty_results); ExpectResultsReceived(empty_results);
// Ensure everything is closed cleanly after the downstream is closed. // Ensure everything is closed cleanly after the downstream is closed.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition(); EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size()); ASSERT_EQ(0U, results_.size());
} }
@@ -306,11 +307,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, NoMatchError) {
ASSERT_TRUE(engine_under_test_->IsRecognitionPending()); ASSERT_TRUE(engine_under_test_->IsRecognitionPending());
// Simulate only a provisional result. // Simulate only a provisional result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = true; result->is_provisional = true;
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New( result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
u"The final result", 0.0F)); u"The final result", 0.0F));
ProvideMockResultDownstream(result); ProvideMockResultDownstream(result);
ExpectResultsReceived(results); ExpectResultsReceived(results);
@@ -321,7 +322,7 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, NoMatchError) {
// Expect an empty result. // Expect an empty result.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition(); EndMockRecognition();
std::vector<blink::mojom::SpeechRecognitionResultPtr> empty_result; std::vector<media::mojom::WebSpeechRecognitionResultPtr> empty_result;
ExpectResultsReceived(empty_result); ExpectResultsReceived(empty_result);
} }
@@ -336,11 +337,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, HTTPError) {
// Close the downstream with a HTTP 500 error. // Close the downstream with a HTTP 500 error.
CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500); CloseMockDownstream(DOWNSTREAM_ERROR_HTTP500);
// Expect a blink::mojom::SpeechRecognitionErrorCode::kNetwork error to be // Expect a media::mojom::SpeechRecognitionErrorCode::kNetwork error to be
// raised. // raised.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition(); EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNetwork, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
ASSERT_EQ(0U, results_.size()); ASSERT_EQ(0U, results_.size());
} }
@@ -355,11 +356,11 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, NetworkError) {
// Close the downstream fetcher simulating a network failure. // Close the downstream fetcher simulating a network failure.
CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK); CloseMockDownstream(DOWNSTREAM_ERROR_NETWORK);
// Expect a blink::mojom::SpeechRecognitionErrorCode::kNetwork error to be // Expect a media::mojom::SpeechRecognitionErrorCode::kNetwork error to be
// raised. // raised.
ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition(); EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNetwork, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
ASSERT_EQ(0U, results_.size()); ASSERT_EQ(0U, results_.size());
} }
@@ -385,12 +386,12 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, Stability) {
ProvideMockProtoResultDownstream(proto_event); ProvideMockProtoResultDownstream(proto_event);
// Set up expectations. // Set up expectations.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = true; result->is_provisional = true;
result->hypotheses.push_back( result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"foo", 0.5)); media::mojom::SpeechRecognitionHypothesis::New(u"foo", 0.5));
// Check that the protobuf generated the expected result. // Check that the protobuf generated the expected result.
ExpectResultsReceived(results); ExpectResultsReceived(results);
@@ -404,9 +405,9 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, Stability) {
EndMockRecognition(); EndMockRecognition();
// Since there was no final result, we get an empty "no match" result. // Since there was no final result, we get an empty "no match" result.
std::vector<blink::mojom::SpeechRecognitionResultPtr> empty_result; std::vector<media::mojom::WebSpeechRecognitionResultPtr> empty_result;
ExpectResultsReceived(empty_result); ExpectResultsReceived(empty_result);
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size()); ASSERT_EQ(0U, results_.size());
} }
@@ -466,12 +467,12 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SendPreamble) {
// Simulate a protobuf message streamed from the server containing a single // Simulate a protobuf message streamed from the server containing a single
// result with one hypotheses. // result with one hypotheses.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = false; result->is_provisional = false;
result->hypotheses.push_back( result->hypotheses.push_back(
blink::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 1", 0.1F)); media::mojom::SpeechRecognitionHypothesis::New(u"hypothesis 1", 0.1F));
ProvideMockResultDownstream(result); ProvideMockResultDownstream(result);
ExpectResultsReceived(results); ExpectResultsReceived(results);
@@ -481,7 +482,7 @@ TEST_F(NetworkSpeechRecognitionEngineImplTest, SendPreamble) {
CloseMockDownstream(DOWNSTREAM_ERROR_NONE); CloseMockDownstream(DOWNSTREAM_ERROR_NONE);
ASSERT_FALSE(engine_under_test_->IsRecognitionPending()); ASSERT_FALSE(engine_under_test_->IsRecognitionPending());
EndMockRecognition(); EndMockRecognition();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
ASSERT_EQ(0U, results_.size()); ASSERT_EQ(0U, results_.size());
} }
@@ -605,7 +606,7 @@ void NetworkSpeechRecognitionEngineImplTest::ProvideMockProtoResultDownstream(
} }
void NetworkSpeechRecognitionEngineImplTest::ProvideMockResultDownstream( void NetworkSpeechRecognitionEngineImplTest::ProvideMockResultDownstream(
const blink::mojom::SpeechRecognitionResultPtr& result) { const media::mojom::WebSpeechRecognitionResultPtr& result) {
proto::SpeechRecognitionEvent proto_event; proto::SpeechRecognitionEvent proto_event;
proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS); proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS);
proto::SpeechRecognitionResult* proto_result = proto_event.add_result(); proto::SpeechRecognitionResult* proto_result = proto_event.add_result();
@@ -613,7 +614,7 @@ void NetworkSpeechRecognitionEngineImplTest::ProvideMockResultDownstream(
for (size_t i = 0; i < result->hypotheses.size(); ++i) { for (size_t i = 0; i < result->hypotheses.size(); ++i) {
proto::SpeechRecognitionAlternative* proto_alternative = proto::SpeechRecognitionAlternative* proto_alternative =
proto_result->add_alternative(); proto_result->add_alternative();
const blink::mojom::SpeechRecognitionHypothesisPtr& hypothesis = const media::mojom::SpeechRecognitionHypothesisPtr& hypothesis =
result->hypotheses[i]; result->hypotheses[i];
proto_alternative->set_confidence(hypothesis->confidence); proto_alternative->set_confidence(hypothesis->confidence);
proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis->utterance)); proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis->utterance));
@@ -659,15 +660,15 @@ void NetworkSpeechRecognitionEngineImplTest::CloseMockDownstream(
} }
void NetworkSpeechRecognitionEngineImplTest::ExpectResultsReceived( void NetworkSpeechRecognitionEngineImplTest::ExpectResultsReceived(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
ASSERT_GE(1U, results_.size()); ASSERT_GE(1U, results_.size());
ASSERT_TRUE(ResultsAreEqual(results, results_.front())); ASSERT_TRUE(ResultsAreEqual(results, results_.front()));
results_.pop(); results_.pop();
} }
bool NetworkSpeechRecognitionEngineImplTest::ResultsAreEqual( bool NetworkSpeechRecognitionEngineImplTest::ResultsAreEqual(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& a, const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& a,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& b) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& b) {
if (a.size() != b.size()) if (a.size() != b.size())
return false; return false;
@@ -679,9 +680,9 @@ bool NetworkSpeechRecognitionEngineImplTest::ResultsAreEqual(
return false; return false;
} }
for (size_t i = 0; i < (*it_a)->hypotheses.size(); ++i) { for (size_t i = 0; i < (*it_a)->hypotheses.size(); ++i) {
const blink::mojom::SpeechRecognitionHypothesisPtr& hyp_a = const media::mojom::SpeechRecognitionHypothesisPtr& hyp_a =
(*it_a)->hypotheses[i]; (*it_a)->hypotheses[i];
const blink::mojom::SpeechRecognitionHypothesisPtr& hyp_b = const media::mojom::SpeechRecognitionHypothesisPtr& hyp_b =
(*it_b)->hypotheses[i]; (*it_b)->hypotheses[i];
if (hyp_a->utterance != hyp_b->utterance || if (hyp_a->utterance != hyp_b->utterance ||
hyp_a->confidence != hyp_b->confidence) { hyp_a->confidence != hyp_b->confidence) {

@@ -111,7 +111,7 @@ void SodaSpeechRecognitionEngineImpl::EndRecognition() {
void SodaSpeechRecognitionEngineImpl::TakeAudioChunk(const AudioChunk& data) { void SodaSpeechRecognitionEngineImpl::TakeAudioChunk(const AudioChunk& data) {
DCHECK_CALLED_ON_VALID_SEQUENCE(main_sequence_checker_); DCHECK_CALLED_ON_VALID_SEQUENCE(main_sequence_checker_);
if (!is_start_recognition_) { if (!is_start_recognition_) {
Abort(blink::mojom::SpeechRecognitionErrorCode::kNotAllowed); Abort(media::mojom::SpeechRecognitionErrorCode::kNotAllowed);
return; return;
} }
@@ -136,13 +136,13 @@ void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionRecognitionEvent(
std::move(reply).Run(is_start_recognition_); std::move(reply).Run(is_start_recognition_);
// Map recognition results. // Map recognition results.
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = !recognition_result.is_final; result->is_provisional = !recognition_result.is_final;
blink::mojom::SpeechRecognitionHypothesisPtr hypothesis = media::mojom::SpeechRecognitionHypothesisPtr hypothesis =
blink::mojom::SpeechRecognitionHypothesis::New(); media::mojom::SpeechRecognitionHypothesis::New();
// TODO(crbug.com/40286514): Hardcode now. // TODO(crbug.com/40286514): Hardcode now.
hypothesis->confidence = kSpeechRecognitionConfidence; hypothesis->confidence = kSpeechRecognitionConfidence;
hypothesis->utterance = base::UTF8ToUTF16(recognition_result.transcription); hypothesis->utterance = base::UTF8ToUTF16(recognition_result.transcription);
@@ -156,14 +156,14 @@ void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionRecognitionEvent(
} }
void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionError() { void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionError() {
Abort(blink::mojom::SpeechRecognitionErrorCode::kNoSpeech); Abort(media::mojom::SpeechRecognitionErrorCode::kNoSpeech);
} }
void SodaSpeechRecognitionEngineImpl::OnLanguageIdentificationEvent( void SodaSpeechRecognitionEngineImpl::OnLanguageIdentificationEvent(
media::mojom::LanguageIdentificationEventPtr event) {} media::mojom::LanguageIdentificationEventPtr event) {}
void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionStopped() { void SodaSpeechRecognitionEngineImpl::OnSpeechRecognitionStopped() {
Abort(blink::mojom::SpeechRecognitionErrorCode::kAborted); Abort(media::mojom::SpeechRecognitionErrorCode::kAborted);
} }
void SodaSpeechRecognitionEngineImpl:: void SodaSpeechRecognitionEngineImpl::
@@ -193,7 +193,7 @@ void SodaSpeechRecognitionEngineImpl::OnRecognizerBound(
void SodaSpeechRecognitionEngineImpl::OnRecognizerDisconnected() { void SodaSpeechRecognitionEngineImpl::OnRecognizerDisconnected() {
DCHECK_CALLED_ON_VALID_SEQUENCE(main_sequence_checker_); DCHECK_CALLED_ON_VALID_SEQUENCE(main_sequence_checker_);
Abort(blink::mojom::SpeechRecognitionErrorCode::kAborted); Abort(media::mojom::SpeechRecognitionErrorCode::kAborted);
} }
void SodaSpeechRecognitionEngineImpl::SendAudioToSpeechRecognitionService( void SodaSpeechRecognitionEngineImpl::SendAudioToSpeechRecognitionService(
@@ -212,13 +212,13 @@ void SodaSpeechRecognitionEngineImpl::MarkDone() {
} }
void SodaSpeechRecognitionEngineImpl::Abort( void SodaSpeechRecognitionEngineImpl::Abort(
blink::mojom::SpeechRecognitionErrorCode error_code) { media::mojom::SpeechRecognitionErrorCode error_code) {
DVLOG(1) << "Aborting with error " << error_code; DVLOG(1) << "Aborting with error " << error_code;
if (error_code != blink::mojom::SpeechRecognitionErrorCode::kNone) { if (error_code != media::mojom::SpeechRecognitionErrorCode::kNone) {
delegate_->OnSpeechRecognitionEngineError( delegate_->OnSpeechRecognitionEngineError(
blink::mojom::SpeechRecognitionError( media::mojom::SpeechRecognitionError(
error_code, blink::mojom::SpeechAudioErrorDetails::kNone)); error_code, media::mojom::SpeechAudioErrorDetails::kNone));
} }
} }

@@ -82,7 +82,7 @@ class CONTENT_EXPORT SodaSpeechRecognitionEngineImpl
void MarkDone(); void MarkDone();
void Abort(blink::mojom::SpeechRecognitionErrorCode error); void Abort(media::mojom::SpeechRecognitionErrorCode error);
media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(const AudioChunk& data); media::mojom::AudioDataS16Ptr ConvertToAudioDataS16(const AudioChunk& data);

@@ -39,11 +39,11 @@ class SodaSpeechRecognitionEngineImplTest
// SpeechRecognitionRequestDelegate methods. // SpeechRecognitionRequestDelegate methods.
void OnSpeechRecognitionEngineResults( void OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override; override;
void OnSpeechRecognitionEngineEndOfUtterance() override; void OnSpeechRecognitionEngineEndOfUtterance() override;
void OnSpeechRecognitionEngineError( void OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) override; const media::mojom::SpeechRecognitionError& error) override;
// context. // context.
std::unique_ptr<SodaSpeechRecognitionEngineImpl> CreateSpeechRecognition( std::unique_ptr<SodaSpeechRecognitionEngineImpl> CreateSpeechRecognition(
@@ -55,16 +55,16 @@ class SodaSpeechRecognitionEngineImplTest
// operations. // operations.
void SendDummyAudioChunk(); void SendDummyAudioChunk();
void FillRecognitionExpectResults( void FillRecognitionExpectResults(
std::vector<blink::mojom::SpeechRecognitionResultPtr>& results, std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results,
const char* transcription, const char* transcription,
bool is_final); bool is_final);
void SendSpeechResult(const char* result, bool is_final); void SendSpeechResult(const char* result, bool is_final);
void SendTranscriptionError(); void SendTranscriptionError();
void ExpectResultsReceived( void ExpectResultsReceived(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results); const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results);
bool ResultsAreEqual( bool ResultsAreEqual(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& a, const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& a,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& b); const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& b);
protected: protected:
content::BrowserTaskEnvironment task_environment_; content::BrowserTaskEnvironment task_environment_;
@@ -74,8 +74,9 @@ class SodaSpeechRecognitionEngineImplTest
fake_speech_recognition_mgr_delegate_; fake_speech_recognition_mgr_delegate_;
std::unique_ptr<SodaSpeechRecognitionEngineImpl> client_under_test_; std::unique_ptr<SodaSpeechRecognitionEngineImpl> client_under_test_;
base::queue<std::vector<blink::mojom::SpeechRecognitionResultPtr>> results_; base::queue<std::vector<media::mojom::WebSpeechRecognitionResultPtr>>
blink::mojom::SpeechRecognitionErrorCode error_; results_;
media::mojom::SpeechRecognitionErrorCode error_;
int end_of_utterance_counter_ = 0; int end_of_utterance_counter_ = 0;
bool recognition_ready_ = false; bool recognition_ready_ = false;
@@ -83,7 +84,7 @@ class SodaSpeechRecognitionEngineImplTest
}; };
void SodaSpeechRecognitionEngineImplTest::SetUp() { void SodaSpeechRecognitionEngineImplTest::SetUp() {
error_ = blink::mojom::SpeechRecognitionErrorCode::kNone; error_ = media::mojom::SpeechRecognitionErrorCode::kNone;
end_of_utterance_counter_ = 0; end_of_utterance_counter_ = 0;
recognition_ready_ = false; recognition_ready_ = false;
browser_context_ = std::make_unique<content::TestBrowserContext>(); browser_context_ = std::make_unique<content::TestBrowserContext>();
@@ -97,7 +98,7 @@ void SodaSpeechRecognitionEngineImplTest::SetUp() {
void SodaSpeechRecognitionEngineImplTest::TearDown() {} void SodaSpeechRecognitionEngineImplTest::TearDown() {}
void SodaSpeechRecognitionEngineImplTest::OnSpeechRecognitionEngineResults( void SodaSpeechRecognitionEngineImplTest::OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
results_.push(mojo::Clone(results)); results_.push(mojo::Clone(results));
} }
@@ -107,7 +108,7 @@ void SodaSpeechRecognitionEngineImplTest::
} }
void SodaSpeechRecognitionEngineImplTest::OnSpeechRecognitionEngineError( void SodaSpeechRecognitionEngineImplTest::OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) { const media::mojom::SpeechRecognitionError& error) {
error_ = error.code; error_ = error.code;
} }
@@ -158,15 +159,15 @@ void SodaSpeechRecognitionEngineImplTest::SendDummyAudioChunk() {
} }
void SodaSpeechRecognitionEngineImplTest::FillRecognitionExpectResults( void SodaSpeechRecognitionEngineImplTest::FillRecognitionExpectResults(
std::vector<blink::mojom::SpeechRecognitionResultPtr>& results, std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results,
const char* transcription, const char* transcription,
bool is_final) { bool is_final) {
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
result->is_provisional = !is_final; result->is_provisional = !is_final;
blink::mojom::SpeechRecognitionHypothesisPtr hypothesis = media::mojom::SpeechRecognitionHypothesisPtr hypothesis =
blink::mojom::SpeechRecognitionHypothesis::New(); media::mojom::SpeechRecognitionHypothesis::New();
hypothesis->confidence = 1.0; hypothesis->confidence = 1.0;
hypothesis->utterance = base::UTF8ToUTF16(transcription); hypothesis->utterance = base::UTF8ToUTF16(transcription);
result->hypotheses.push_back(std::move(hypothesis)); result->hypotheses.push_back(std::move(hypothesis));
@@ -187,15 +188,15 @@ void SodaSpeechRecognitionEngineImplTest::SendTranscriptionError() {
} }
void SodaSpeechRecognitionEngineImplTest::ExpectResultsReceived( void SodaSpeechRecognitionEngineImplTest::ExpectResultsReceived(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
ASSERT_GE(1U, results_.size()); ASSERT_GE(1U, results_.size());
ASSERT_TRUE(ResultsAreEqual(results, results_.front())); ASSERT_TRUE(ResultsAreEqual(results, results_.front()));
results_.pop(); results_.pop();
} }
bool SodaSpeechRecognitionEngineImplTest::ResultsAreEqual( bool SodaSpeechRecognitionEngineImplTest::ResultsAreEqual(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& a, const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& a,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& b) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& b) {
if (a.size() != b.size()) { if (a.size() != b.size()) {
return false; return false;
} }
@@ -208,9 +209,9 @@ bool SodaSpeechRecognitionEngineImplTest::ResultsAreEqual(
return false; return false;
} }
for (size_t i = 0; i < (*it_a)->hypotheses.size(); ++i) { for (size_t i = 0; i < (*it_a)->hypotheses.size(); ++i) {
const blink::mojom::SpeechRecognitionHypothesisPtr& hyp_a = const media::mojom::SpeechRecognitionHypothesisPtr& hyp_a =
(*it_a)->hypotheses[i]; (*it_a)->hypotheses[i];
const blink::mojom::SpeechRecognitionHypothesisPtr& hyp_b = const media::mojom::SpeechRecognitionHypothesisPtr& hyp_b =
(*it_b)->hypotheses[i]; (*it_b)->hypotheses[i];
if (hyp_a->utterance != hyp_b->utterance || if (hyp_a->utterance != hyp_b->utterance ||
hyp_a->confidence != hyp_b->confidence) { hyp_a->confidence != hyp_b->confidence) {
@@ -235,19 +236,19 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionResults) {
client_under_test_->StartRecognition(); client_under_test_->StartRecognition();
SendDummyAudioChunk(); SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> first_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> first_results;
FillRecognitionExpectResults(first_results, kFirstSpeechResult, false); FillRecognitionExpectResults(first_results, kFirstSpeechResult, false);
SendSpeechResult(kFirstSpeechResult, /*is_final=*/false); SendSpeechResult(kFirstSpeechResult, /*is_final=*/false);
ExpectResultsReceived(first_results); ExpectResultsReceived(first_results);
SendDummyAudioChunk(); SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> second_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> second_results;
FillRecognitionExpectResults(second_results, kSecondSpeechResult, false); FillRecognitionExpectResults(second_results, kSecondSpeechResult, false);
SendSpeechResult(kSecondSpeechResult, /*is_final=*/false); SendSpeechResult(kSecondSpeechResult, /*is_final=*/false);
ExpectResultsReceived(second_results); ExpectResultsReceived(second_results);
SendTranscriptionError(); SendTranscriptionError();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNoSpeech, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNoSpeech, error_);
} }
TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionAudioChunksEnded) { TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionAudioChunksEnded) {
@@ -263,7 +264,7 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionAudioChunksEnded) {
client_under_test_->StartRecognition(); client_under_test_->StartRecognition();
SendDummyAudioChunk(); SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> first_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> first_results;
FillRecognitionExpectResults(first_results, kFirstSpeechResult, false); FillRecognitionExpectResults(first_results, kFirstSpeechResult, false);
SendSpeechResult(kFirstSpeechResult, /*is_final=*/false); SendSpeechResult(kFirstSpeechResult, /*is_final=*/false);
ExpectResultsReceived(first_results); ExpectResultsReceived(first_results);
@@ -272,7 +273,7 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionAudioChunksEnded) {
client_under_test_->AudioChunksEnded(); client_under_test_->AudioChunksEnded();
client_under_test_->EndRecognition(); client_under_test_->EndRecognition();
loop.RunUntilIdle(); loop.RunUntilIdle();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAborted, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kAborted, error_);
} }
TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEndOfUtterance) { TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEndOfUtterance) {
@@ -289,12 +290,12 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEndOfUtterance) {
client_under_test_->StartRecognition(); client_under_test_->StartRecognition();
SendDummyAudioChunk(); SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> first_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> first_results;
FillRecognitionExpectResults(first_results, kFirstSpeechResult, false); FillRecognitionExpectResults(first_results, kFirstSpeechResult, false);
SendSpeechResult(kFirstSpeechResult, /*is_final=*/false); SendSpeechResult(kFirstSpeechResult, /*is_final=*/false);
ExpectResultsReceived(first_results); ExpectResultsReceived(first_results);
std::vector<blink::mojom::SpeechRecognitionResultPtr> second_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> second_results;
FillRecognitionExpectResults(second_results, kSecondSpeechResult, true); FillRecognitionExpectResults(second_results, kSecondSpeechResult, true);
SendSpeechResult(kSecondSpeechResult, /*is_final=*/true); SendSpeechResult(kSecondSpeechResult, /*is_final=*/true);
ExpectResultsReceived(second_results); ExpectResultsReceived(second_results);
@@ -316,7 +317,7 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEnd) {
client_under_test_->StartRecognition(); client_under_test_->StartRecognition();
SendDummyAudioChunk(); SendDummyAudioChunk();
std::vector<blink::mojom::SpeechRecognitionResultPtr> first_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> first_results;
FillRecognitionExpectResults(first_results, kFirstSpeechResult, false); FillRecognitionExpectResults(first_results, kFirstSpeechResult, false);
SendSpeechResult(kFirstSpeechResult, /*is_final=*/false); SendSpeechResult(kFirstSpeechResult, /*is_final=*/false);
ExpectResultsReceived(first_results); ExpectResultsReceived(first_results);
@@ -324,7 +325,7 @@ TEST_F(SodaSpeechRecognitionEngineImplTest, SpeechRecognitionEnd) {
client_under_test_->EndRecognition(); client_under_test_->EndRecognition();
SendDummyAudioChunk(); SendDummyAudioChunk();
ASSERT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNotAllowed, error_); ASSERT_EQ(media::mojom::SpeechRecognitionErrorCode::kNotAllowed, error_);
} }
TEST_F(SodaSpeechRecognitionEngineImplTest, SetOnReadyCallbackAfterBind) { TEST_F(SodaSpeechRecognitionEngineImplTest, SetOnReadyCallbackAfterBind) {

@@ -157,15 +157,15 @@ std::string MakeGoodResponse() {
proto::SpeechRecognitionEvent proto_event; proto::SpeechRecognitionEvent proto_event;
proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS); proto_event.set_status(proto::SpeechRecognitionEvent::STATUS_SUCCESS);
proto::SpeechRecognitionResult* proto_result = proto_event.add_result(); proto::SpeechRecognitionResult* proto_result = proto_event.add_result();
blink::mojom::SpeechRecognitionResultPtr result = media::mojom::WebSpeechRecognitionResultPtr result =
blink::mojom::SpeechRecognitionResult::New(); media::mojom::WebSpeechRecognitionResult::New();
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New( result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
u"Pictures of the moon", 1.0F)); u"Pictures of the moon", 1.0F));
proto_result->set_final(!result->is_provisional); proto_result->set_final(!result->is_provisional);
for (size_t i = 0; i < result->hypotheses.size(); ++i) { for (size_t i = 0; i < result->hypotheses.size(); ++i) {
proto::SpeechRecognitionAlternative* proto_alternative = proto::SpeechRecognitionAlternative* proto_alternative =
proto_result->add_alternative(); proto_result->add_alternative();
const blink::mojom::SpeechRecognitionHypothesisPtr& hypothesis = const media::mojom::SpeechRecognitionHypothesisPtr& hypothesis =
result->hypotheses[i]; result->hypotheses[i];
proto_alternative->set_confidence(hypothesis->confidence); proto_alternative->set_confidence(hypothesis->confidence);
proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis->utterance)); proto_alternative->set_transcript(base::UTF16ToUTF8(hypothesis->utterance));

@@ -42,7 +42,7 @@ SpeechRecognitionDispatcherHost::SpeechRecognitionDispatcherHost(
void SpeechRecognitionDispatcherHost::Create( void SpeechRecognitionDispatcherHost::Create(
int render_process_id, int render_process_id,
int render_frame_id, int render_frame_id,
mojo::PendingReceiver<blink::mojom::SpeechRecognizer> receiver) { mojo::PendingReceiver<media::mojom::SpeechRecognizer> receiver) {
mojo::MakeSelfOwnedReceiver(std::make_unique<SpeechRecognitionDispatcherHost>( mojo::MakeSelfOwnedReceiver(std::make_unique<SpeechRecognitionDispatcherHost>(
render_process_id, render_frame_id), render_process_id, render_frame_id),
std::move(receiver)); std::move(receiver));
@@ -55,10 +55,10 @@ SpeechRecognitionDispatcherHost::AsWeakPtr() {
return weak_factory_.GetWeakPtr(); return weak_factory_.GetWeakPtr();
} }
// -------- blink::mojom::SpeechRecognizer interface implementation ------------ // -------- media::mojom::SpeechRecognizer interface implementation ------------
void SpeechRecognitionDispatcherHost::Start( void SpeechRecognitionDispatcherHost::Start(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params) { media::mojom::StartSpeechRecognitionRequestParamsPtr params) {
DCHECK_CURRENTLY_ON(BrowserThread::IO); DCHECK_CURRENTLY_ON(BrowserThread::IO);
GetUIThreadTaskRunner({})->PostTask( GetUIThreadTaskRunner({})->PostTask(
@@ -74,7 +74,7 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI(
speech_recognition_dispatcher_host, speech_recognition_dispatcher_host,
int render_process_id, int render_process_id,
int render_frame_id, int render_frame_id,
blink::mojom::StartSpeechRecognitionRequestParamsPtr params) { media::mojom::StartSpeechRecognitionRequestParamsPtr params) {
DCHECK_CURRENTLY_ON(BrowserThread::UI); DCHECK_CURRENTLY_ON(BrowserThread::UI);
int embedder_render_process_id = 0; int embedder_render_process_id = 0;
int embedder_render_frame_id = MSG_ROUTING_NONE; int embedder_render_frame_id = MSG_ROUTING_NONE;
@@ -145,7 +145,7 @@ void SpeechRecognitionDispatcherHost::StartRequestOnUI(
} }
void SpeechRecognitionDispatcherHost::StartSessionOnIO( void SpeechRecognitionDispatcherHost::StartSessionOnIO(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params, media::mojom::StartSpeechRecognitionRequestParamsPtr params,
int embedder_render_process_id, int embedder_render_process_id,
int embedder_render_frame_id, int embedder_render_frame_id,
const url::Origin& origin, const url::Origin& origin,
@@ -178,7 +178,7 @@ void SpeechRecognitionDispatcherHost::StartSessionOnIO(
config.interim_results = params->interim_results; config.interim_results = params->interim_results;
config.event_listener = session->AsWeakPtr(); config.event_listener = session->AsWeakPtr();
for (blink::mojom::SpeechRecognitionGrammarPtr& grammar_ptr : for (media::mojom::SpeechRecognitionGrammarPtr& grammar_ptr :
params->grammars) { params->grammars) {
config.grammars.push_back(*grammar_ptr); config.grammars.push_back(*grammar_ptr);
} }

@@ -11,10 +11,10 @@
#include "base/memory/weak_ptr.h" #include "base/memory/weak_ptr.h"
#include "content/public/browser/browser_thread.h" #include "content/public/browser/browser_thread.h"
#include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/browser/speech_recognition_event_listener.h"
#include "media/mojo/mojom/speech_recognizer.mojom.h"
#include "mojo/public/cpp/bindings/pending_receiver.h" #include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h" #include "mojo/public/cpp/bindings/pending_remote.h"
#include "mojo/public/cpp/bindings/remote.h" #include "mojo/public/cpp/bindings/remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom.h"
namespace network { namespace network {
class PendingSharedURLLoaderFactory; class PendingSharedURLLoaderFactory;
@@ -31,7 +31,7 @@ class SpeechRecognitionManager;
// SpeechRecognitionDispatcherHost is an implementation of the SpeechRecognizer // SpeechRecognitionDispatcherHost is an implementation of the SpeechRecognizer
// interface that allows a RenderFrame to start a speech recognition session // interface that allows a RenderFrame to start a speech recognition session
// in the browser process, by communicating with SpeechRecognitionManager. // in the browser process, by communicating with SpeechRecognitionManager.
class SpeechRecognitionDispatcherHost : public blink::mojom::SpeechRecognizer { class SpeechRecognitionDispatcherHost : public media::mojom::SpeechRecognizer {
public: public:
SpeechRecognitionDispatcherHost(int render_process_id, int render_frame_id); SpeechRecognitionDispatcherHost(int render_process_id, int render_frame_id);
@@ -44,12 +44,12 @@ class SpeechRecognitionDispatcherHost : public blink::mojom::SpeechRecognizer {
static void Create( static void Create(
int render_process_id, int render_process_id,
int render_frame_id, int render_frame_id,
mojo::PendingReceiver<blink::mojom::SpeechRecognizer> receiver); mojo::PendingReceiver<media::mojom::SpeechRecognizer> receiver);
base::WeakPtr<SpeechRecognitionDispatcherHost> AsWeakPtr(); base::WeakPtr<SpeechRecognitionDispatcherHost> AsWeakPtr();
// blink::mojom::SpeechRecognizer implementation // media::mojom::SpeechRecognizer implementation
void Start( void Start(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params) override; media::mojom::StartSpeechRecognitionRequestParamsPtr params) override;
private: private:
static void StartRequestOnUI( static void StartRequestOnUI(
@@ -57,9 +57,9 @@ class SpeechRecognitionDispatcherHost : public blink::mojom::SpeechRecognizer {
speech_recognition_dispatcher_host, speech_recognition_dispatcher_host,
int render_process_id, int render_process_id,
int render_frame_id, int render_frame_id,
blink::mojom::StartSpeechRecognitionRequestParamsPtr params); media::mojom::StartSpeechRecognitionRequestParamsPtr params);
void StartSessionOnIO( void StartSessionOnIO(
blink::mojom::StartSpeechRecognitionRequestParamsPtr params, media::mojom::StartSpeechRecognitionRequestParamsPtr params,
int embedder_render_process_id, int embedder_render_process_id,
int embedder_render_frame_id, int embedder_render_frame_id,
const url::Origin& origin, const url::Origin& origin,

@@ -10,8 +10,8 @@
#include "components/speech/audio_buffer.h" #include "components/speech/audio_buffer.h"
#include "content/common/content_export.h" #include "content/common/content_export.h"
#include "media/base/audio_parameters.h" #include "media/base/audio_parameters.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h" #include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" #include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace blink { namespace blink {
namespace mojom { namespace mojom {
@@ -39,11 +39,11 @@ class CONTENT_EXPORT SpeechRecognitionEngine {
public: public:
// Called whenever a result is retrieved. // Called whenever a result is retrieved.
virtual void OnSpeechRecognitionEngineResults( virtual void OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& const std::vector<media::mojom::WebSpeechRecognitionResultPtr>&
results) = 0; results) = 0;
virtual void OnSpeechRecognitionEngineEndOfUtterance() = 0; virtual void OnSpeechRecognitionEngineEndOfUtterance() = 0;
virtual void OnSpeechRecognitionEngineError( virtual void OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) = 0; const media::mojom::SpeechRecognitionError& error) = 0;
protected: protected:
virtual ~Delegate() = default; virtual ~Delegate() = default;

@@ -35,8 +35,8 @@
#include "content/public/browser/web_contents_observer.h" #include "content/public/browser/web_contents_observer.h"
#include "content/public/common/content_client.h" #include "content/public/common/content_client.h"
#include "media/audio/audio_device_description.h" #include "media/audio/audio_device_description.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h" #include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" #include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "url/gurl.h" #include "url/gurl.h"
#include "url/origin.h" #include "url/origin.h"
@@ -342,9 +342,9 @@ void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id,
weak_factory_.GetWeakPtr(), session_id, EVENT_START)); weak_factory_.GetWeakPtr(), session_id, EVENT_START));
} else { } else {
OnRecognitionError( OnRecognitionError(
session_id, blink::mojom::SpeechRecognitionError( session_id, media::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kNotAllowed, media::mojom::SpeechRecognitionErrorCode::kNotAllowed,
blink::mojom::SpeechAudioErrorDetails::kNone)); media::mojom::SpeechAudioErrorDetails::kNone));
base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask( base::SingleThreadTaskRunner::GetCurrentDefault()->PostTask(
FROM_HERE, FROM_HERE,
base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent, base::BindOnce(&SpeechRecognitionManagerImpl::DispatchEvent,
@@ -522,7 +522,7 @@ void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) {
void SpeechRecognitionManagerImpl::OnRecognitionResults( void SpeechRecognitionManagerImpl::OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
DCHECK_CURRENTLY_ON(BrowserThread::IO); DCHECK_CURRENTLY_ON(BrowserThread::IO);
if (!SessionExists(session_id)) if (!SessionExists(session_id))
return; return;
@@ -535,7 +535,7 @@ void SpeechRecognitionManagerImpl::OnRecognitionResults(
void SpeechRecognitionManagerImpl::OnRecognitionError( void SpeechRecognitionManagerImpl::OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) { const media::mojom::SpeechRecognitionError& error) {
DCHECK_CURRENTLY_ON(BrowserThread::IO); DCHECK_CURRENTLY_ON(BrowserThread::IO);
if (!SessionExists(session_id)) if (!SessionExists(session_id))
return; return;

@@ -16,8 +16,8 @@
#include "content/public/browser/speech_recognition_manager.h" #include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h" #include "content/public/browser/speech_recognition_session_config.h"
#include "content/public/browser/speech_recognition_session_context.h" #include "content/public/browser/speech_recognition_session_context.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/mediastream/media_stream.mojom-forward.h" #include "third_party/blink/public/mojom/mediastream/media_stream.mojom-forward.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
namespace media { namespace media {
class AudioSystem; class AudioSystem;
@@ -82,11 +82,11 @@ class CONTENT_EXPORT SpeechRecognitionManagerImpl
void OnRecognitionEnd(int session_id) override; void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults( void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override; override;
void OnRecognitionError( void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) override; const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id, void OnAudioLevelsChange(int session_id,
float volume, float volume,
float noise_volume) override; float noise_volume) override;

@@ -15,7 +15,7 @@
namespace content { namespace content {
SpeechRecognitionSession::SpeechRecognitionSession( SpeechRecognitionSession::SpeechRecognitionSession(
mojo::PendingRemote<blink::mojom::SpeechRecognitionSessionClient> client) mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient> client)
: client_(std::move(client)) { : client_(std::move(client)) {
client_.set_disconnect_handler( client_.set_disconnect_handler(
base::BindOnce(&SpeechRecognitionSession::ConnectionErrorHandler, base::BindOnce(&SpeechRecognitionSession::ConnectionErrorHandler,
@@ -75,17 +75,17 @@ void SpeechRecognitionSession::OnRecognitionEnd(int session_id) {
void SpeechRecognitionSession::OnRecognitionResults( void SpeechRecognitionSession::OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
client_->ResultRetrieved(mojo::Clone(results)); client_->ResultRetrieved(mojo::Clone(results));
} }
void SpeechRecognitionSession::OnRecognitionError( void SpeechRecognitionSession::OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) { const media::mojom::SpeechRecognitionError& error) {
if (!client_.is_bound()) { if (!client_.is_bound()) {
return; return;
} }
client_->ErrorOccurred(blink::mojom::SpeechRecognitionError::New(error)); client_->ErrorOccurred(media::mojom::SpeechRecognitionError::New(error));
} }
// The events below are currently not used by speech JS APIs implementation. // The events below are currently not used by speech JS APIs implementation.

@@ -10,29 +10,29 @@
#include "base/memory/weak_ptr.h" #include "base/memory/weak_ptr.h"
#include "content/browser/speech/speech_recognition_manager_impl.h" #include "content/browser/speech/speech_recognition_manager_impl.h"
#include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/browser/speech_recognition_event_listener.h"
#include "media/mojo/mojom/speech_recognizer.mojom.h"
#include "mojo/public/cpp/bindings/pending_remote.h" #include "mojo/public/cpp/bindings/pending_remote.h"
#include "mojo/public/cpp/bindings/remote.h" #include "mojo/public/cpp/bindings/remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom.h"
namespace content { namespace content {
// SpeechRecognitionSession implements the // SpeechRecognitionSession implements the
// blink::mojom::SpeechRecognitionSession interface for a particular session. It // media::mojom::SpeechRecognitionSession interface for a particular session. It
// also acts as a proxy for events sent from SpeechRecognitionManager, and // also acts as a proxy for events sent from SpeechRecognitionManager, and
// forwards the events to the renderer using a // forwards the events to the renderer using a
// mojo::Remote<SpeechRecognitionSessionClient> (that is passed from the render // mojo::Remote<SpeechRecognitionSessionClient> (that is passed from the render
// process). // process).
class SpeechRecognitionSession : public blink::mojom::SpeechRecognitionSession, class SpeechRecognitionSession : public media::mojom::SpeechRecognitionSession,
public SpeechRecognitionEventListener { public SpeechRecognitionEventListener {
public: public:
explicit SpeechRecognitionSession( explicit SpeechRecognitionSession(
mojo::PendingRemote<blink::mojom::SpeechRecognitionSessionClient> client); mojo::PendingRemote<media::mojom::SpeechRecognitionSessionClient> client);
~SpeechRecognitionSession() override; ~SpeechRecognitionSession() override;
base::WeakPtr<SpeechRecognitionSession> AsWeakPtr(); base::WeakPtr<SpeechRecognitionSession> AsWeakPtr();
void SetSessionId(int session_id) { session_id_ = session_id; } void SetSessionId(int session_id) { session_id_ = session_id; }
// blink::mojom::SpeechRecognitionSession implementation. // media::mojom::SpeechRecognitionSession implementation.
void Abort() override; void Abort() override;
void StopCapture() override; void StopCapture() override;
@@ -45,11 +45,11 @@ class SpeechRecognitionSession : public blink::mojom::SpeechRecognitionSession,
void OnRecognitionEnd(int session_id) override; void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults( void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override; override;
void OnRecognitionError( void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) override; const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id, void OnAudioLevelsChange(int session_id,
float volume, float volume,
float noise_volume) override; float noise_volume) override;
@@ -58,7 +58,7 @@ class SpeechRecognitionSession : public blink::mojom::SpeechRecognitionSession,
void ConnectionErrorHandler(); void ConnectionErrorHandler();
int session_id_ = SpeechRecognitionManager::kSessionIDInvalid; int session_id_ = SpeechRecognitionManager::kSessionIDInvalid;
mojo::Remote<blink::mojom::SpeechRecognitionSessionClient> client_; mojo::Remote<media::mojom::SpeechRecognitionSessionClient> client_;
bool stopped_ = false; bool stopped_ = false;
base::WeakPtrFactory<SpeechRecognitionSession> weak_factory_{this}; base::WeakPtrFactory<SpeechRecognitionSession> weak_factory_{this};

@@ -310,7 +310,7 @@ void SpeechRecognizerImpl::OnCaptureError(
} }
void SpeechRecognizerImpl::OnSpeechRecognitionEngineResults( void SpeechRecognizerImpl::OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) { const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results) {
FSMEventArgs event_args(EVENT_ENGINE_RESULT); FSMEventArgs event_args(EVENT_ENGINE_RESULT);
event_args.engine_results = mojo::Clone(results); event_args.engine_results = mojo::Clone(results);
GetIOThreadTaskRunner({})->PostTask( GetIOThreadTaskRunner({})->PostTask(
@@ -324,7 +324,7 @@ void SpeechRecognizerImpl::OnSpeechRecognitionEngineEndOfUtterance() {
} }
void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) { const media::mojom::SpeechRecognitionError& error) {
FSMEventArgs event_args(EVENT_ENGINE_ERROR); FSMEventArgs event_args(EVENT_ENGINE_ERROR);
event_args.engine_error = error; event_args.engine_error = error;
GetIOThreadTaskRunner({})->PostTask( GetIOThreadTaskRunner({})->PostTask(
@@ -679,9 +679,9 @@ SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) {
listener()->OnSoundStart(session_id()); listener()->OnSoundStart(session_id());
return STATE_RECOGNIZING; return STATE_RECOGNIZING;
} else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) { } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) {
return Abort(blink::mojom::SpeechRecognitionError( return Abort(media::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kNoSpeech, media::mojom::SpeechRecognitionErrorCode::kNoSpeech,
blink::mojom::SpeechAudioErrorDetails::kNone)); media::mojom::SpeechAudioErrorDetails::kNone));
} }
return STATE_WAITING_FOR_SPEECH; return STATE_WAITING_FOR_SPEECH;
} }
@@ -712,27 +712,27 @@ SpeechRecognizerImpl::FSMState
SpeechRecognizerImpl::AbortSilently(const FSMEventArgs& event_args) { SpeechRecognizerImpl::AbortSilently(const FSMEventArgs& event_args) {
DCHECK_NE(event_args.event, EVENT_AUDIO_ERROR); DCHECK_NE(event_args.event, EVENT_AUDIO_ERROR);
DCHECK_NE(event_args.event, EVENT_ENGINE_ERROR); DCHECK_NE(event_args.event, EVENT_ENGINE_ERROR);
return Abort(blink::mojom::SpeechRecognitionError( return Abort(media::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kNone, media::mojom::SpeechRecognitionErrorCode::kNone,
blink::mojom::SpeechAudioErrorDetails::kNone)); media::mojom::SpeechAudioErrorDetails::kNone));
} }
SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::FSMState
SpeechRecognizerImpl::AbortWithError(const FSMEventArgs& event_args) { SpeechRecognizerImpl::AbortWithError(const FSMEventArgs& event_args) {
if (event_args.event == EVENT_AUDIO_ERROR) { if (event_args.event == EVENT_AUDIO_ERROR) {
return Abort(blink::mojom::SpeechRecognitionError( return Abort(media::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kAudioCapture, media::mojom::SpeechRecognitionErrorCode::kAudioCapture,
blink::mojom::SpeechAudioErrorDetails::kNone)); media::mojom::SpeechAudioErrorDetails::kNone));
} else if (event_args.event == EVENT_ENGINE_ERROR) { } else if (event_args.event == EVENT_ENGINE_ERROR) {
return Abort(event_args.engine_error); return Abort(event_args.engine_error);
} }
return Abort(blink::mojom::SpeechRecognitionError( return Abort(media::mojom::SpeechRecognitionError(
blink::mojom::SpeechRecognitionErrorCode::kAborted, media::mojom::SpeechRecognitionErrorCode::kAborted,
blink::mojom::SpeechAudioErrorDetails::kNone)); media::mojom::SpeechAudioErrorDetails::kNone));
} }
SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort( SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
const blink::mojom::SpeechRecognitionError& error) { const media::mojom::SpeechRecognitionError& error) {
DCHECK_CURRENTLY_ON(BrowserThread::IO); DCHECK_CURRENTLY_ON(BrowserThread::IO);
if (IsCapturingAudio()) if (IsCapturingAudio())
@@ -757,8 +757,9 @@ SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)
listener()->OnAudioEnd(session_id()); listener()->OnAudioEnd(session_id());
if (error.code != blink::mojom::SpeechRecognitionErrorCode::kNone) if (error.code != media::mojom::SpeechRecognitionErrorCode::kNone) {
listener()->OnRecognitionError(session_id(), error); listener()->OnRecognitionError(session_id(), error);
}
listener()->OnRecognitionEnd(session_id()); listener()->OnRecognitionEnd(session_id());
@@ -787,13 +788,13 @@ SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::ProcessIntermediateResult(
SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::FSMState
SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) { SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) {
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results = const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results =
event_args.engine_results; event_args.engine_results;
auto i = results.begin(); auto i = results.begin();
bool provisional_results_pending = false; bool provisional_results_pending = false;
bool results_are_empty = true; bool results_are_empty = true;
for (; i != results.end(); ++i) { for (; i != results.end(); ++i) {
const blink::mojom::SpeechRecognitionResultPtr& result = *i; const media::mojom::WebSpeechRecognitionResultPtr& result = *i;
if (result->is_provisional) { if (result->is_provisional) {
DCHECK(provisional_results_); DCHECK(provisional_results_);
provisional_results_pending = true; provisional_results_pending = true;
@@ -905,8 +906,8 @@ media::AudioCapturerSource* SpeechRecognizerImpl::GetAudioCapturerSource() {
SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
: event(event_value), : event(event_value),
audio_data(nullptr), audio_data(nullptr),
engine_error(blink::mojom::SpeechRecognitionErrorCode::kNone, engine_error(media::mojom::SpeechRecognitionErrorCode::kNone,
blink::mojom::SpeechAudioErrorDetails::kNone) {} media::mojom::SpeechAudioErrorDetails::kNone) {}
SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other)
: event(other.event), : event(other.event),

@@ -15,8 +15,8 @@
#include "content/browser/speech/speech_recognizer.h" #include "content/browser/speech/speech_recognizer.h"
#include "content/common/content_export.h" #include "content/common/content_export.h"
#include "media/base/audio_capturer_source.h" #include "media/base/audio_capturer_source.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h" #include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" #include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace media { namespace media {
class AudioBus; class AudioBus;
@@ -99,8 +99,8 @@ class CONTENT_EXPORT SpeechRecognizerImpl
FSMEvent event; FSMEvent event;
scoped_refptr<AudioChunk> audio_data; scoped_refptr<AudioChunk> audio_data;
std::vector<blink::mojom::SpeechRecognitionResultPtr> engine_results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> engine_results;
blink::mojom::SpeechRecognitionError engine_error; media::mojom::SpeechRecognitionError engine_error;
}; };
~SpeechRecognizerImpl() override; ~SpeechRecognizerImpl() override;
@@ -129,7 +129,7 @@ class CONTENT_EXPORT SpeechRecognizerImpl
FSMState ProcessFinalResult(const FSMEventArgs& event_args); FSMState ProcessFinalResult(const FSMEventArgs& event_args);
FSMState AbortSilently(const FSMEventArgs& event_args); FSMState AbortSilently(const FSMEventArgs& event_args);
FSMState AbortWithError(const FSMEventArgs& event_args); FSMState AbortWithError(const FSMEventArgs& event_args);
FSMState Abort(const blink::mojom::SpeechRecognitionError& error); FSMState Abort(const media::mojom::SpeechRecognitionError& error);
FSMState DetectEndOfSpeech(const FSMEventArgs& event_args); FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
FSMState DoNothing(const FSMEventArgs& event_args) const; FSMState DoNothing(const FSMEventArgs& event_args) const;
FSMState NotFeasible(const FSMEventArgs& event_args); FSMState NotFeasible(const FSMEventArgs& event_args);
@@ -156,11 +156,11 @@ class CONTENT_EXPORT SpeechRecognizerImpl
// SpeechRecognitionEngineDelegate methods. // SpeechRecognitionEngineDelegate methods.
void OnSpeechRecognitionEngineResults( void OnSpeechRecognitionEngineResults(
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override; override;
void OnSpeechRecognitionEngineEndOfUtterance() override; void OnSpeechRecognitionEngineEndOfUtterance() override;
void OnSpeechRecognitionEngineError( void OnSpeechRecognitionEngineError(
const blink::mojom::SpeechRecognitionError& error) override; const media::mojom::SpeechRecognitionError& error) override;
media::AudioSystem* GetAudioSystem(); media::AudioSystem* GetAudioSystem();
void CreateAudioCapturerSource(); void CreateAudioCapturerSource();

@@ -18,7 +18,7 @@
#include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager.h" #include "content/public/browser/speech_recognition_manager.h"
#include "content/public/browser/speech_recognition_session_config.h" #include "content/public/browser/speech_recognition_session_config.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" #include "media/mojo/mojom/speech_recognition_result.mojom.h"
using base::android::AppendJavaStringArrayToStringVector; using base::android::AppendJavaStringArrayToStringVector;
using base::android::AttachCurrentThread; using base::android::AttachCurrentThread;
@@ -171,12 +171,12 @@ void SpeechRecognizerImplAndroid::OnRecognitionResults(
std::vector<float> scores(options.size(), 0.0); std::vector<float> scores(options.size(), 0.0);
if (floats != NULL) if (floats != NULL)
JavaFloatArrayToFloatVector(env, floats, &scores); JavaFloatArrayToFloatVector(env, floats, &scores);
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(blink::mojom::SpeechRecognitionResult::New()); results.push_back(media::mojom::WebSpeechRecognitionResult::New());
blink::mojom::SpeechRecognitionResultPtr& result = results.back(); media::mojom::WebSpeechRecognitionResultPtr& result = results.back();
CHECK_EQ(options.size(), scores.size()); CHECK_EQ(options.size(), scores.size());
for (size_t i = 0; i < options.size(); ++i) { for (size_t i = 0; i < options.size(); ++i) {
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New( result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
options[i], static_cast<double>(scores[i]))); options[i], static_cast<double>(scores[i])));
} }
result->is_provisional = provisional; result->is_provisional = provisional;
@@ -188,7 +188,7 @@ void SpeechRecognizerImplAndroid::OnRecognitionResults(
} }
void SpeechRecognizerImplAndroid::OnRecognitionResultsOnIOThread( void SpeechRecognizerImplAndroid::OnRecognitionResultsOnIOThread(
std::vector<blink::mojom::SpeechRecognitionResultPtr> results) { std::vector<media::mojom::WebSpeechRecognitionResultPtr> results) {
DCHECK_CURRENTLY_ON(BrowserThread::IO); DCHECK_CURRENTLY_ON(BrowserThread::IO);
listener()->OnRecognitionResults(session_id(), results); listener()->OnRecognitionResults(session_id(), results);
} }
@@ -207,9 +207,9 @@ void SpeechRecognizerImplAndroid::OnRecognitionError(
DCHECK_CURRENTLY_ON(BrowserThread::IO); DCHECK_CURRENTLY_ON(BrowserThread::IO);
listener()->OnRecognitionError( listener()->OnRecognitionError(
session_id(), session_id(),
blink::mojom::SpeechRecognitionError( media::mojom::SpeechRecognitionError(
static_cast<blink::mojom::SpeechRecognitionErrorCode>(error), static_cast<media::mojom::SpeechRecognitionErrorCode>(error),
blink::mojom::SpeechAudioErrorDetails::kNone)); media::mojom::SpeechAudioErrorDetails::kNone));
} }
void SpeechRecognizerImplAndroid::OnRecognitionEnd( void SpeechRecognizerImplAndroid::OnRecognitionEnd(

@@ -12,8 +12,8 @@
#include "base/android/scoped_java_ref.h" #include "base/android/scoped_java_ref.h"
#include "content/browser/speech/speech_recognizer.h" #include "content/browser/speech/speech_recognizer.h"
#include "content/common/content_export.h" #include "content/common/content_export.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h" #include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" #include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace content { namespace content {
@@ -65,7 +65,7 @@ class CONTENT_EXPORT SpeechRecognizerImplAndroid : public SpeechRecognizer {
bool continuous, bool continuous,
bool interim_results); bool interim_results);
void OnRecognitionResultsOnIOThread( void OnRecognitionResultsOnIOThread(
std::vector<blink::mojom::SpeechRecognitionResultPtr> results); std::vector<media::mojom::WebSpeechRecognitionResultPtr> results);
~SpeechRecognizerImplAndroid() override; ~SpeechRecognizerImplAndroid() override;

@@ -84,7 +84,7 @@ class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
audio_ended_(false), audio_ended_(false),
sound_started_(false), sound_started_(false),
sound_ended_(false), sound_ended_(false),
error_(blink::mojom::SpeechRecognitionErrorCode::kNone), error_(media::mojom::SpeechRecognitionErrorCode::kNone),
volume_(-1.0f) { volume_(-1.0f) {
// This test environment is not set up to support out-of-process services. // This test environment is not set up to support out-of-process services.
feature_list_.InitWithFeatures( feature_list_.InitWithFeatures(
@@ -196,14 +196,14 @@ class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
void OnRecognitionResults( void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& results)
override { override {
result_received_ = true; result_received_ = true;
} }
void OnRecognitionError( void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) override { const media::mojom::SpeechRecognitionError& error) override {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(recognition_ended_); EXPECT_FALSE(recognition_ended_);
error_ = error.code; error_ = error.code;
@@ -301,7 +301,7 @@ class SpeechRecognizerImplTest : public SpeechRecognitionEventListener,
bool audio_ended_; bool audio_ended_;
bool sound_started_; bool sound_started_;
bool sound_ended_; bool sound_ended_;
blink::mojom::SpeechRecognitionErrorCode error_; media::mojom::SpeechRecognitionErrorCode error_;
std::vector<uint8_t> audio_packet_; std::vector<uint8_t> audio_packet_;
std::unique_ptr<media::AudioBus> audio_bus_; std::unique_ptr<media::AudioBus> audio_bus_;
float volume_; float volume_;
@@ -321,7 +321,7 @@ TEST_F(SpeechRecognizerImplTest, StartNoInputDevices) {
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
OnCaptureError(); OnCaptureError();
base::RunLoop().RunUntilIdle(); base::RunLoop().RunUntilIdle();
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAudioCapture, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kAudioCapture, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -339,7 +339,7 @@ TEST_F(SpeechRecognizerImplTest, StartFakeInputDevice) {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_); EXPECT_TRUE(audio_started_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
recognizer_->AbortRecognition(); recognizer_->AbortRecognition();
base::RunLoop().RunUntilIdle(); base::RunLoop().RunUntilIdle();
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
@@ -369,7 +369,7 @@ TEST_F(SpeechRecognizerImplTest, StopBeforeDeviceInfoReceived) {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_); EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -397,7 +397,7 @@ TEST_F(SpeechRecognizerImplTest, CancelBeforeDeviceInfoReceived) {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_); EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -412,7 +412,7 @@ TEST_F(SpeechRecognizerImplTest, StopNoData) {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_); EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -428,7 +428,7 @@ TEST_F(SpeechRecognizerImplTest, CancelNoData) {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_); EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAborted, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kAborted, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -503,7 +503,7 @@ TEST_F(SpeechRecognizerImplTest, StopWithData) {
EXPECT_TRUE(audio_ended_); EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_); EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
// Create a response string. // Create a response string.
proto::SpeechRecognitionEvent proto_event; proto::SpeechRecognitionEvent proto_event;
@@ -528,7 +528,7 @@ TEST_F(SpeechRecognizerImplTest, StopWithData) {
EXPECT_TRUE(recognition_ended_); EXPECT_TRUE(recognition_ended_);
EXPECT_TRUE(result_received_); EXPECT_TRUE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -548,7 +548,7 @@ TEST_F(SpeechRecognizerImplTest, CancelWithData) {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_); EXPECT_TRUE(audio_started_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAborted, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kAborted, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -571,7 +571,7 @@ TEST_F(SpeechRecognizerImplTest, ConnectionError) {
EXPECT_TRUE(audio_ended_); EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_); EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
// Issue the network callback to complete the process. // Issue the network callback to complete the process.
const network::TestURLLoaderFactory::PendingRequest* pending_request; const network::TestURLLoaderFactory::PendingRequest* pending_request;
@@ -583,7 +583,7 @@ TEST_F(SpeechRecognizerImplTest, ConnectionError) {
base::RunLoop().RunUntilIdle(); base::RunLoop().RunUntilIdle();
EXPECT_TRUE(recognition_ended_); EXPECT_TRUE(recognition_ended_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNetwork, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -606,7 +606,7 @@ TEST_F(SpeechRecognizerImplTest, ServerError) {
EXPECT_TRUE(audio_ended_); EXPECT_TRUE(audio_ended_);
EXPECT_FALSE(recognition_ended_); EXPECT_FALSE(recognition_ended_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
const network::TestURLLoaderFactory::PendingRequest* pending_request; const network::TestURLLoaderFactory::PendingRequest* pending_request;
ASSERT_TRUE(GetUpstreamRequest(&pending_request)); ASSERT_TRUE(GetUpstreamRequest(&pending_request));
@@ -621,7 +621,7 @@ TEST_F(SpeechRecognizerImplTest, ServerError) {
base::RunLoop().RunUntilIdle(); base::RunLoop().RunUntilIdle();
EXPECT_TRUE(recognition_ended_); EXPECT_TRUE(recognition_ended_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNetwork, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNetwork, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -638,7 +638,7 @@ TEST_F(SpeechRecognizerImplTest, OnCaptureError_PropagatesError) {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_FALSE(audio_started_); EXPECT_FALSE(audio_started_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kAudioCapture, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kAudioCapture, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -663,7 +663,7 @@ TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackIssued) {
EXPECT_TRUE(recognition_started_); EXPECT_TRUE(recognition_started_);
EXPECT_TRUE(audio_started_); EXPECT_TRUE(audio_started_);
EXPECT_FALSE(result_received_); EXPECT_FALSE(result_received_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNoSpeech, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNoSpeech, error_);
CheckFinalEventsConsistency(); CheckFinalEventsConsistency();
} }
@@ -692,7 +692,7 @@ TEST_F(SpeechRecognizerImplTest, NoSpeechCallbackNotIssued) {
} }
base::RunLoop().RunUntilIdle(); base::RunLoop().RunUntilIdle();
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_TRUE(audio_started_); EXPECT_TRUE(audio_started_);
EXPECT_FALSE(audio_ended_); EXPECT_FALSE(audio_ended_);
EXPECT_FALSE(recognition_ended_); EXPECT_FALSE(recognition_ended_);
@@ -733,7 +733,7 @@ TEST_F(SpeechRecognizerImplTest, SetInputVolumeCallback) {
EXPECT_NEAR(0.89926866f, volume_, 0.00001f); EXPECT_NEAR(0.89926866f, volume_, 0.00001f);
EXPECT_FLOAT_EQ(0.75071919f, noise_volume_); EXPECT_FLOAT_EQ(0.75071919f, noise_volume_);
EXPECT_EQ(blink::mojom::SpeechRecognitionErrorCode::kNone, error_); EXPECT_EQ(media::mojom::SpeechRecognitionErrorCode::kNone, error_);
EXPECT_FALSE(audio_ended_); EXPECT_FALSE(audio_ended_);
EXPECT_FALSE(recognition_ended_); EXPECT_FALSE(recognition_ended_);
recognizer_->AbortRecognition(); recognizer_->AbortRecognition();

@@ -162,6 +162,7 @@ android_library("content_full_java") {
"//media/capture/video/android:capture_java", "//media/capture/video/android:capture_java",
"//media/midi:midi_java", "//media/midi:midi_java",
"//media/mojo/mojom:mojom_java", "//media/mojo/mojom:mojom_java",
"//media/mojo/mojom:web_speech_recognition_java",
"//mojo/public/java:base_java", "//mojo/public/java:base_java",
"//mojo/public/java:bindings_java", "//mojo/public/java:bindings_java",
"//mojo/public/java:system_java", "//mojo/public/java:system_java",

@@ -26,9 +26,9 @@ import org.chromium.base.BuildInfo;
import org.chromium.base.ContextUtils; import org.chromium.base.ContextUtils;
import org.chromium.base.Log; import org.chromium.base.Log;
import org.chromium.base.PackageUtils; import org.chromium.base.PackageUtils;
import org.chromium.blink.mojom.SpeechRecognitionErrorCode;
import org.chromium.content.R; import org.chromium.content.R;
import org.chromium.content_public.browser.SpeechRecognition; import org.chromium.content_public.browser.SpeechRecognition;
import org.chromium.media.mojom.SpeechRecognitionErrorCode;
import org.chromium.ui.widget.Toast; import org.chromium.ui.widget.Toast;
import java.util.ArrayList; import java.util.ArrayList;

@@ -573,6 +573,7 @@ source_set("browser_sources") {
"//gpu/command_buffer/service:gles2", "//gpu/command_buffer/service:gles2",
"//media", "//media",
"//media/capture", "//media/capture",
"//media/mojo/mojom:web_speech_recognition",
"//net", "//net",
"//services/device/public/cpp/geolocation", "//services/device/public/cpp/geolocation",
"//services/metrics/public/cpp:metrics_cpp", "//services/metrics/public/cpp:metrics_cpp",

@@ -6,13 +6,11 @@
#define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_EVENT_LISTENER_H_ #define CONTENT_PUBLIC_BROWSER_SPEECH_RECOGNITION_EVENT_LISTENER_H_
#include "content/common/content_export.h" #include "content/common/content_export.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h" #include "media/mojo/mojom/speech_recognition_result.mojom.h"
namespace blink { namespace media::mojom {
namespace mojom {
class SpeechRecognitionError; class SpeechRecognitionError;
} } // namespace media::mojom
} // namespace blink
namespace content { namespace content {
@@ -40,14 +38,15 @@ class CONTENT_EXPORT SpeechRecognitionEventListener {
// Invoked when a result is retrieved. // Invoked when a result is retrieved.
virtual void OnRecognitionResults( virtual void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& results) = 0; const std::vector<media::mojom::WebSpeechRecognitionResultPtr>&
results) = 0;
// Invoked if there was an error while capturing or recognizing audio. // Invoked if there was an error while capturing or recognizing audio.
// The recognition has already been cancelled when this call is made and // The recognition has already been cancelled when this call is made and
// no more events will be raised. // no more events will be raised.
virtual void OnRecognitionError( virtual void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) = 0; const media::mojom::SpeechRecognitionError& error) = 0;
// Informs of a change in the captured audio level, useful if displaying // Informs of a change in the captured audio level, useful if displaying
// a microphone volume indicator while recording. // a microphone volume indicator while recording.

@@ -14,8 +14,8 @@
#include "content/common/content_export.h" #include "content/common/content_export.h"
#include "content/public/browser/speech_recognition_session_context.h" #include "content/public/browser/speech_recognition_session_context.h"
#include "content/public/browser/speech_recognition_session_preamble.h" #include "content/public/browser/speech_recognition_session_preamble.h"
#include "media/mojo/mojom/speech_recognition_grammar.mojom.h"
#include "services/network/public/cpp/shared_url_loader_factory.h" #include "services/network/public/cpp/shared_url_loader_factory.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_grammar.mojom.h"
#include "url/origin.h" #include "url/origin.h"
namespace content { namespace content {
@@ -32,7 +32,7 @@ struct CONTENT_EXPORT SpeechRecognitionSessionConfig {
// Accept language header. If |language| is empty, used to get a language // Accept language header. If |language| is empty, used to get a language
// instead. // instead.
std::string accept_language; std::string accept_language;
std::vector<blink::mojom::SpeechRecognitionGrammar> grammars; std::vector<media::mojom::SpeechRecognitionGrammar> grammars;
url::Origin origin; url::Origin origin;
bool filter_profanities; bool filter_profanities;
bool continuous; bool continuous;

@@ -14,9 +14,9 @@
#include "content/public/browser/speech_recognition_event_listener.h" #include "content/public/browser/speech_recognition_event_listener.h"
#include "content/public/browser/speech_recognition_manager_delegate.h" #include "content/public/browser/speech_recognition_manager_delegate.h"
#include "content/public/test/test_utils.h" #include "content/public/test/test_utils.h"
#include "media/mojo/mojom/speech_recognition_error.mojom.h"
#include "media/mojo/mojom/speech_recognition_result.mojom.h"
#include "testing/gtest/include/gtest/gtest.h" #include "testing/gtest/include/gtest/gtest.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom.h"
namespace { namespace {
const char kTestResult[] = "Pictures of the moon"; const char kTestResult[] = "Pictures of the moon";
@@ -194,15 +194,15 @@ void FakeSpeechRecognitionManager::SetFakeRecognitionResult(
listener_->OnSoundStart(session_id_); listener_->OnSoundStart(session_id_);
has_sent_result_ = true; has_sent_result_ = true;
} }
blink::mojom::SpeechRecognitionResultPtr result = media::mojom::WebSpeechRecognitionResultPtr result =
blink::mojom::SpeechRecognitionResult::New(); media::mojom::WebSpeechRecognitionResult::New();
result->hypotheses.push_back(blink::mojom::SpeechRecognitionHypothesis::New( result->hypotheses.push_back(media::mojom::SpeechRecognitionHypothesis::New(
base::UTF8ToUTF16(fake_result_), 1.0)); base::UTF8ToUTF16(fake_result_), 1.0));
// If `is_provisional` is true, then the result is an interim result that // If `is_provisional` is true, then the result is an interim result that
// could be changed. Otherwise, it's a final result. Consequently, // could be changed. Otherwise, it's a final result. Consequently,
// `is_provisional` is the converse of `is_final`. // `is_provisional` is the converse of `is_final`.
result->is_provisional = !is_final_; result->is_provisional = !is_final_;
std::vector<blink::mojom::SpeechRecognitionResultPtr> results; std::vector<media::mojom::WebSpeechRecognitionResultPtr> results;
results.push_back(std::move(result)); results.push_back(std::move(result));
listener_->OnRecognitionResults(session_id_, results); listener_->OnRecognitionResults(session_id_, results);
GetUIThreadTaskRunner({})->PostTask( GetUIThreadTaskRunner({})->PostTask(
@@ -247,9 +247,9 @@ void FakeSpeechRecognitionManager::SendFakeSpeechRecognitionError() {
VLOG(1) << "Sending fake recognition error."; VLOG(1) << "Sending fake recognition error.";
listener_->OnRecognitionError( listener_->OnRecognitionError(
session_id_, *blink::mojom::SpeechRecognitionError::New( session_id_, *media::mojom::SpeechRecognitionError::New(
blink::mojom::SpeechRecognitionErrorCode::kNetwork, media::mojom::SpeechRecognitionErrorCode::kNetwork,
blink::mojom::SpeechAudioErrorDetails::kNone)); media::mojom::SpeechAudioErrorDetails::kNone));
GetUIThreadTaskRunner({})->PostTask( GetUIThreadTaskRunner({})->PostTask(
FROM_HERE, base::BindOnce(&FakeSpeechRecognitionManager::OnFakeErrorSent, FROM_HERE, base::BindOnce(&FakeSpeechRecognitionManager::OnFakeErrorSent,
base::Unretained(this))); base::Unretained(this)));

@@ -77,11 +77,11 @@ class FakeSpeechRecognitionManager : public SpeechRecognitionManager,
void OnRecognitionEnd(int session_id) override {} void OnRecognitionEnd(int session_id) override {}
void OnRecognitionResults( void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override {} override {}
void OnRecognitionError( void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) override {} const media::mojom::SpeechRecognitionError& error) override {}
void OnAudioLevelsChange(int session_id, void OnAudioLevelsChange(int session_id,
float volume, float volume,
float noise_volume) override {} float noise_volume) override {}

@@ -568,6 +568,7 @@ static_library("test_support") {
"//media", "//media",
"//media/capture", "//media/capture",
"//media/capture/mojom:image_capture", "//media/capture/mojom:image_capture",
"//media/mojo/mojom:web_speech_recognition",
"//mojo/core/embedder", "//mojo/core/embedder",
"//mojo/public/cpp/test_support:test_utils", "//mojo/public/cpp/test_support:test_utils",
"//net:quic_test_tools", "//net:quic_test_tools",
@@ -2114,6 +2115,7 @@ test("content_browsertests") {
deps += [ deps += [
"//components/soda:utils", "//components/soda:utils",
"//content/public/browser:proto", "//content/public/browser:proto",
"//media/mojo/mojom:web_speech_recognition",
"//ui/base/clipboard:clipboard_test_support", "//ui/base/clipboard:clipboard_test_support",
] ]
@@ -3352,7 +3354,10 @@ test("content_unittests") {
"../browser/speech/speech_recognizer_impl_unittest.cc", "../browser/speech/speech_recognizer_impl_unittest.cc",
"../browser/tracing/tracing_ui_unittest.cc", "../browser/tracing/tracing_ui_unittest.cc",
] ]
deps += [ "//components/speech:speech" ] deps += [
"//components/speech:speech",
"//media/mojo/mojom:web_speech_recognition",
]
if (!is_fuchsia) { if (!is_fuchsia) {
sources += [ sources += [

@@ -53,11 +53,11 @@ void ShellSpeechRecognitionManagerDelegate::OnRecognitionEnd(int session_id) {
void ShellSpeechRecognitionManagerDelegate::OnRecognitionResults( void ShellSpeechRecognitionManagerDelegate::OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) {} const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result) {}
void ShellSpeechRecognitionManagerDelegate::OnRecognitionError( void ShellSpeechRecognitionManagerDelegate::OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) {} const media::mojom::SpeechRecognitionError& error) {}
void ShellSpeechRecognitionManagerDelegate::OnAudioLevelsChange( void ShellSpeechRecognitionManagerDelegate::OnAudioLevelsChange(
int session_id, int session_id,

@@ -41,11 +41,11 @@ class ShellSpeechRecognitionManagerDelegate
void OnRecognitionEnd(int session_id) override; void OnRecognitionEnd(int session_id) override;
void OnRecognitionResults( void OnRecognitionResults(
int session_id, int session_id,
const std::vector<blink::mojom::SpeechRecognitionResultPtr>& result) const std::vector<media::mojom::WebSpeechRecognitionResultPtr>& result)
override; override;
void OnRecognitionError( void OnRecognitionError(
int session_id, int session_id,
const blink::mojom::SpeechRecognitionError& error) override; const media::mojom::SpeechRecognitionError& error) override;
void OnAudioLevelsChange(int session_id, void OnAudioLevelsChange(int session_id,
float volume, float volume,
float noise_volume) override; float noise_volume) override;

@@ -875,3 +875,20 @@ source_set("unit_tests") {
deps += [ ":speech_recognition" ] deps += [ ":speech_recognition" ]
} }
} }
mojom("web_speech_recognition") {
generate_java = true
sources = [
"speech_recognition_error.mojom",
"speech_recognition_error_code.mojom",
"speech_recognition_grammar.mojom",
"speech_recognition_result.mojom",
"speech_recognizer.mojom",
]
deps = [
"//mojo/public/mojom/base",
"//url/mojom:url_mojom_gurl",
]
}

@@ -1,10 +1,10 @@
// Copyright 2018 The Chromium Authors // Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
module blink.mojom; module media.mojom;
import "third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom"; import "media/mojo/mojom/speech_recognition_error_code.mojom";
// Used to provide further details about an audio capture related error. // Used to provide further details about an audio capture related error.
enum SpeechAudioErrorDetails { enum SpeechAudioErrorDetails {
@@ -15,6 +15,6 @@ enum SpeechAudioErrorDetails {
// Used to send information to the renderer about an error in the browser's // Used to send information to the renderer about an error in the browser's
// speech recognition engine. // speech recognition engine.
struct SpeechRecognitionError { struct SpeechRecognitionError {
blink.mojom.SpeechRecognitionErrorCode code; media.mojom.SpeechRecognitionErrorCode code;
blink.mojom.SpeechAudioErrorDetails details; media.mojom.SpeechAudioErrorDetails details;
}; };

@@ -1,8 +1,8 @@
// Copyright 2018 The Chromium Authors // Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
module blink.mojom; module media.mojom;
// Used by the browser's speech recognition engine to indicate the reason for a // Used by the browser's speech recognition engine to indicate the reason for a
// speech recognition error. // speech recognition error.

@@ -1,8 +1,8 @@
// Copyright 2018 The Chromium Authors // Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
module blink.mojom; module media.mojom;
import "url/mojom/url.mojom"; import "url/mojom/url.mojom";

@@ -1,8 +1,8 @@
// Copyright 2018 The Chromium Authors // Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
module blink.mojom; module media.mojom;
import "mojo/public/mojom/base/string16.mojom"; import "mojo/public/mojom/base/string16.mojom";
@@ -18,9 +18,9 @@ struct SpeechRecognitionHypothesis {
}; };
// Group of recognition hypotheses for a particular speech segment. // Group of recognition hypotheses for a particular speech segment.
struct SpeechRecognitionResult { struct WebSpeechRecognitionResult {
// An N-best list of hypotheses. // An N-best list of hypotheses.
array<blink.mojom.SpeechRecognitionHypothesis> hypotheses; array<media.mojom.SpeechRecognitionHypothesis> hypotheses;
// False if this is the final time the speech service will return this // False if this is the final time the speech service will return this
// particular result. If true, then this represents an interim result that // particular result. If true, then this represents an interim result that

@@ -1,12 +1,12 @@
// Copyright 2018 The Chromium Authors // Copyright 2024 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
module blink.mojom; module media.mojom;
import "third_party/blink/public/mojom/speech/speech_recognition_grammar.mojom"; import "media/mojo/mojom/speech_recognition_grammar.mojom";
import "third_party/blink/public/mojom/speech/speech_recognition_result.mojom"; import "media/mojo/mojom/speech_recognition_result.mojom";
import "third_party/blink/public/mojom/speech/speech_recognition_error.mojom"; import "media/mojo/mojom/speech_recognition_error.mojom";
// Created by the renderer and sent to the browser to start a speech recognition // Created by the renderer and sent to the browser to start a speech recognition
// session. // session.
@@ -65,7 +65,7 @@ interface SpeechRecognitionSession {
// WebSpeechRecognitionHandle. // WebSpeechRecognitionHandle.
interface SpeechRecognitionSessionClient { interface SpeechRecognitionSessionClient {
// Called to dispatch the "result" event. // Called to dispatch the "result" event.
ResultRetrieved(array<SpeechRecognitionResult> results); ResultRetrieved(array<WebSpeechRecognitionResult> results);
// Called to dispatch the "nomatch" event if the error code passed is of types // Called to dispatch the "nomatch" event if the error code passed is of types
// kNoMatch, otherwise dispatchers an "error" event. // kNoMatch, otherwise dispatchers an "error" event.

@@ -223,11 +223,6 @@ mojom("mojom_platform") {
"smart_card/smart_card.mojom", "smart_card/smart_card.mojom",
"sms/webotp_service.mojom", "sms/webotp_service.mojom",
"speculation_rules/speculation_rules.mojom", "speculation_rules/speculation_rules.mojom",
"speech/speech_recognition_error.mojom",
"speech/speech_recognition_error_code.mojom",
"speech/speech_recognition_grammar.mojom",
"speech/speech_recognition_result.mojom",
"speech/speech_recognizer.mojom",
"speech/speech_synthesis.mojom", "speech/speech_synthesis.mojom",
"storage_access/storage_access_automation.mojom", "storage_access/storage_access_automation.mojom",
"subapps/sub_apps_service.mojom", "subapps/sub_apps_service.mojom",

@@ -36,4 +36,6 @@ blink_modules_sources("speech") {
"speech_synthesis_voice.cc", "speech_synthesis_voice.cc",
"speech_synthesis_voice.h", "speech_synthesis_voice.h",
] ]
deps = [ "//media/mojo/mojom:web_speech_recognition_blink" ]
} }

@@ -1,4 +1,5 @@
include_rules = [ include_rules = [
"+media",
"-third_party/blink/renderer/modules", "-third_party/blink/renderer/modules",
"+third_party/blink/renderer/modules/event_modules.h", "+third_party/blink/renderer/modules/event_modules.h",
"+third_party/blink/renderer/modules/event_target_modules.h", "+third_party/blink/renderer/modules/event_target_modules.h",

@@ -28,10 +28,10 @@
#include <algorithm> #include <algorithm>
#include "build/build_config.h" #include "build/build_config.h"
#include "media/mojo/mojom/speech_recognition_error.mojom-blink.h"
#include "media/mojo/mojom/speech_recognition_result.mojom-blink.h"
#include "mojo/public/cpp/bindings/pending_receiver.h" #include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h" #include "mojo/public/cpp/bindings/pending_remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error.mojom-blink.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_result.mojom-blink.h"
#include "third_party/blink/renderer/core/frame/local_dom_window.h" #include "third_party/blink/renderer/core/frame/local_dom_window.h"
#include "third_party/blink/renderer/core/frame/local_frame.h" #include "third_party/blink/renderer/core/frame/local_frame.h"
#include "third_party/blink/renderer/core/page/page.h" #include "third_party/blink/renderer/core/page/page.h"
@@ -96,7 +96,7 @@ void SpeechRecognition::abort() {
} }
void SpeechRecognition::ResultRetrieved( void SpeechRecognition::ResultRetrieved(
WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) { WTF::Vector<media::mojom::blink::WebSpeechRecognitionResultPtr> results) {
auto* it = std::stable_partition( auto* it = std::stable_partition(
results.begin(), results.end(), results.begin(), results.end(),
[](const auto& result) { return !result->is_provisional; }); [](const auto& result) { return !result->is_provisional; });
@@ -139,8 +139,9 @@ void SpeechRecognition::ResultRetrieved(
} }
void SpeechRecognition::ErrorOccurred( void SpeechRecognition::ErrorOccurred(
mojom::blink::SpeechRecognitionErrorPtr error) { media::mojom::blink::SpeechRecognitionErrorPtr error) {
if (error->code == mojom::blink::SpeechRecognitionErrorCode::kNoMatch) { if (error->code ==
media::mojom::blink::SpeechRecognitionErrorCode::kNoMatch) {
DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr)); DispatchEvent(*SpeechRecognitionEvent::CreateNoMatch(nullptr));
} else { } else {
// TODO(primiano): message? // TODO(primiano): message?
@@ -202,9 +203,9 @@ void SpeechRecognition::PageVisibilityChanged() {
} }
void SpeechRecognition::OnConnectionError() { void SpeechRecognition::OnConnectionError() {
ErrorOccurred(mojom::blink::SpeechRecognitionError::New( ErrorOccurred(media::mojom::blink::SpeechRecognitionError::New(
mojom::blink::SpeechRecognitionErrorCode::kNetwork, media::mojom::blink::SpeechRecognitionErrorCode::kNetwork,
mojom::blink::SpeechAudioErrorDetails::kNone)); media::mojom::blink::SpeechAudioErrorDetails::kNone));
Ended(); Ended();
} }
@@ -228,7 +229,7 @@ void SpeechRecognition::StartInternal(ExceptionState* exception_state) {
} }
final_results_.clear(); final_results_.clear();
mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient> mojo::PendingRemote<media::mojom::blink::SpeechRecognitionSessionClient>
session_client; session_client;
// See https://bit.ly/2S0zRAS for task types. // See https://bit.ly/2S0zRAS for task types.
receiver_.Bind( receiver_.Bind(

@@ -26,7 +26,7 @@
#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_H_ #ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_H_
#define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_H_ #define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_H_
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom-blink.h" #include "media/mojo/mojom/speech_recognizer.mojom-blink.h"
#include "third_party/blink/public/platform/web_private_ptr.h" #include "third_party/blink/public/platform/web_private_ptr.h"
#include "third_party/blink/renderer/bindings/core/v8/active_script_wrappable.h" #include "third_party/blink/renderer/bindings/core/v8/active_script_wrappable.h"
#include "third_party/blink/renderer/core/execution_context/execution_context_lifecycle_observer.h" #include "third_party/blink/renderer/core/execution_context/execution_context_lifecycle_observer.h"
@@ -51,7 +51,7 @@ class MODULES_EXPORT SpeechRecognition final
: public EventTarget, : public EventTarget,
public ActiveScriptWrappable<SpeechRecognition>, public ActiveScriptWrappable<SpeechRecognition>,
public ExecutionContextLifecycleObserver, public ExecutionContextLifecycleObserver,
public mojom::blink::SpeechRecognitionSessionClient, public media::mojom::blink::SpeechRecognitionSessionClient,
public PageVisibilityObserver { public PageVisibilityObserver {
DEFINE_WRAPPERTYPEINFO(); DEFINE_WRAPPERTYPEINFO();
@@ -83,10 +83,12 @@ class MODULES_EXPORT SpeechRecognition final
void stopFunction(); void stopFunction();
void abort(); void abort();
// mojom::blink::SpeechRecognitionSessionClient // media::mojom::blink::SpeechRecognitionSessionClient
void ResultRetrieved( void ResultRetrieved(
WTF::Vector<mojom::blink::SpeechRecognitionResultPtr> results) override; WTF::Vector<media::mojom::blink::WebSpeechRecognitionResultPtr> results)
void ErrorOccurred(mojom::blink::SpeechRecognitionErrorPtr error) override; override;
void ErrorOccurred(
media::mojom::blink::SpeechRecognitionErrorPtr error) override;
void Started() override; void Started() override;
void AudioStarted() override; void AudioStarted() override;
void SoundStarted() override; void SoundStarted() override;
@@ -135,10 +137,10 @@ class MODULES_EXPORT SpeechRecognition final
bool started_; bool started_;
bool stopping_; bool stopping_;
HeapVector<Member<SpeechRecognitionResult>> final_results_; HeapVector<Member<SpeechRecognitionResult>> final_results_;
HeapMojoReceiver<mojom::blink::SpeechRecognitionSessionClient, HeapMojoReceiver<media::mojom::blink::SpeechRecognitionSessionClient,
SpeechRecognition> SpeechRecognition>
receiver_; receiver_;
HeapMojoRemote<mojom::blink::SpeechRecognitionSession> session_; HeapMojoRemote<media::mojom::blink::SpeechRecognitionSession> session_;
}; };
} // namespace blink } // namespace blink

@@ -27,6 +27,7 @@
#include <memory> #include <memory>
#include "media/mojo/mojom/speech_recognizer.mojom-blink.h"
#include "third_party/blink/public/common/browser_interface_broker_proxy.h" #include "third_party/blink/public/common/browser_interface_broker_proxy.h"
#include "third_party/blink/renderer/core/dom/document.h" #include "third_party/blink/renderer/core/dom/document.h"
#include "third_party/blink/renderer/core/frame/local_dom_window.h" #include "third_party/blink/renderer/core/frame/local_dom_window.h"
@@ -58,21 +59,22 @@ SpeechRecognitionController::~SpeechRecognitionController() {
} }
void SpeechRecognitionController::Start( void SpeechRecognitionController::Start(
mojo::PendingReceiver<mojom::blink::SpeechRecognitionSession> mojo::PendingReceiver<media::mojom::blink::SpeechRecognitionSession>
session_receiver, session_receiver,
mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient> mojo::PendingRemote<media::mojom::blink::SpeechRecognitionSessionClient>
session_client, session_client,
const SpeechGrammarList& grammars, const SpeechGrammarList& grammars,
const String& lang, const String& lang,
bool continuous, bool continuous,
bool interim_results, bool interim_results,
uint32_t max_alternatives) { uint32_t max_alternatives) {
mojom::blink::StartSpeechRecognitionRequestParamsPtr msg_params = media::mojom::blink::StartSpeechRecognitionRequestParamsPtr msg_params =
mojom::blink::StartSpeechRecognitionRequestParams::New(); media::mojom::blink::StartSpeechRecognitionRequestParams::New();
for (unsigned i = 0; i < grammars.length(); i++) { for (unsigned i = 0; i < grammars.length(); i++) {
SpeechGrammar* grammar = grammars.item(i); SpeechGrammar* grammar = grammars.item(i);
msg_params->grammars.push_back(mojom::blink::SpeechRecognitionGrammar::New( msg_params->grammars.push_back(
grammar->src(), grammar->weight())); media::mojom::blink::SpeechRecognitionGrammar::New(grammar->src(),
grammar->weight()));
} }
msg_params->language = lang.IsNull() ? g_empty_string : lang; msg_params->language = lang.IsNull() ? g_empty_string : lang;
msg_params->max_hypotheses = max_alternatives; msg_params->max_hypotheses = max_alternatives;
@@ -89,7 +91,7 @@ void SpeechRecognitionController::Trace(Visitor* visitor) const {
visitor->Trace(speech_recognizer_); visitor->Trace(speech_recognizer_);
} }
mojom::blink::SpeechRecognizer* media::mojom::blink::SpeechRecognizer*
SpeechRecognitionController::GetSpeechRecognizer() { SpeechRecognitionController::GetSpeechRecognizer() {
if (!speech_recognizer_.is_bound()) { if (!speech_recognizer_.is_bound()) {
GetSupplementable()->GetBrowserInterfaceBroker().GetInterface( GetSupplementable()->GetBrowserInterfaceBroker().GetInterface(

@@ -26,9 +26,9 @@
#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_CONTROLLER_H_ #ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_CONTROLLER_H_
#define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_CONTROLLER_H_ #define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_CONTROLLER_H_
#include "media/mojo/mojom/speech_recognizer.mojom-blink.h"
#include "mojo/public/cpp/bindings/pending_receiver.h" #include "mojo/public/cpp/bindings/pending_receiver.h"
#include "mojo/public/cpp/bindings/pending_remote.h" #include "mojo/public/cpp/bindings/pending_remote.h"
#include "third_party/blink/public/mojom/speech/speech_recognizer.mojom-blink.h"
#include "third_party/blink/renderer/modules/modules_export.h" #include "third_party/blink/renderer/modules/modules_export.h"
#include "third_party/blink/renderer/platform/mojo/heap_mojo_remote.h" #include "third_party/blink/renderer/platform/mojo/heap_mojo_remote.h"
#include "third_party/blink/renderer/platform/mojo/heap_mojo_wrapper_mode.h" #include "third_party/blink/renderer/platform/mojo/heap_mojo_wrapper_mode.h"
@@ -49,24 +49,25 @@ class SpeechRecognitionController final
explicit SpeechRecognitionController(LocalDOMWindow&); explicit SpeechRecognitionController(LocalDOMWindow&);
virtual ~SpeechRecognitionController(); virtual ~SpeechRecognitionController();
void Start(mojo::PendingReceiver<mojom::blink::SpeechRecognitionSession> void Start(
session_receiver, mojo::PendingReceiver<media::mojom::blink::SpeechRecognitionSession>
mojo::PendingRemote<mojom::blink::SpeechRecognitionSessionClient> session_receiver,
session_client, mojo::PendingRemote<media::mojom::blink::SpeechRecognitionSessionClient>
const SpeechGrammarList& grammars, session_client,
const String& lang, const SpeechGrammarList& grammars,
bool continuous, const String& lang,
bool interim_results, bool continuous,
uint32_t max_alternatives); bool interim_results,
uint32_t max_alternatives);
static SpeechRecognitionController* From(LocalDOMWindow&); static SpeechRecognitionController* From(LocalDOMWindow&);
void Trace(Visitor* visitor) const override; void Trace(Visitor* visitor) const override;
private: private:
mojom::blink::SpeechRecognizer* GetSpeechRecognizer(); media::mojom::blink::SpeechRecognizer* GetSpeechRecognizer();
HeapMojoRemote<mojom::blink::SpeechRecognizer> speech_recognizer_; HeapMojoRemote<media::mojom::blink::SpeechRecognizer> speech_recognizer_;
}; };
} // namespace blink } // namespace blink

@@ -25,32 +25,33 @@
#include "third_party/blink/renderer/modules/speech/speech_recognition_error_event.h" #include "third_party/blink/renderer/modules/speech/speech_recognition_error_event.h"
#include "third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom-blink.h" #include "media/mojo/mojom/speech_recognition_error_code.mojom-blink.h"
#include "third_party/blink/renderer/core/event_type_names.h" #include "third_party/blink/renderer/core/event_type_names.h"
namespace blink { namespace blink {
static String ErrorCodeToString(mojom::blink::SpeechRecognitionErrorCode code) { static String ErrorCodeToString(
media::mojom::blink::SpeechRecognitionErrorCode code) {
switch (code) { switch (code) {
case mojom::blink::SpeechRecognitionErrorCode::kNone: case media::mojom::blink::SpeechRecognitionErrorCode::kNone:
return "other"; return "other";
case mojom::blink::SpeechRecognitionErrorCode::kNoSpeech: case media::mojom::blink::SpeechRecognitionErrorCode::kNoSpeech:
return "no-speech"; return "no-speech";
case mojom::blink::SpeechRecognitionErrorCode::kAborted: case media::mojom::blink::SpeechRecognitionErrorCode::kAborted:
return "aborted"; return "aborted";
case mojom::blink::SpeechRecognitionErrorCode::kAudioCapture: case media::mojom::blink::SpeechRecognitionErrorCode::kAudioCapture:
return "audio-capture"; return "audio-capture";
case mojom::blink::SpeechRecognitionErrorCode::kNetwork: case media::mojom::blink::SpeechRecognitionErrorCode::kNetwork:
return "network"; return "network";
case mojom::blink::SpeechRecognitionErrorCode::kNotAllowed: case media::mojom::blink::SpeechRecognitionErrorCode::kNotAllowed:
return "not-allowed"; return "not-allowed";
case mojom::blink::SpeechRecognitionErrorCode::kServiceNotAllowed: case media::mojom::blink::SpeechRecognitionErrorCode::kServiceNotAllowed:
return "service-not-allowed"; return "service-not-allowed";
case mojom::blink::SpeechRecognitionErrorCode::kBadGrammar: case media::mojom::blink::SpeechRecognitionErrorCode::kBadGrammar:
return "bad-grammar"; return "bad-grammar";
case mojom::blink::SpeechRecognitionErrorCode::kLanguageNotSupported: case media::mojom::blink::SpeechRecognitionErrorCode::kLanguageNotSupported:
return "language-not-supported"; return "language-not-supported";
case mojom::blink::SpeechRecognitionErrorCode::kNoMatch: case media::mojom::blink::SpeechRecognitionErrorCode::kNoMatch:
NOTREACHED_IN_MIGRATION(); NOTREACHED_IN_MIGRATION();
break; break;
} }
@@ -60,7 +61,7 @@ static String ErrorCodeToString(mojom::blink::SpeechRecognitionErrorCode code) {
} }
SpeechRecognitionErrorEvent* SpeechRecognitionErrorEvent::Create( SpeechRecognitionErrorEvent* SpeechRecognitionErrorEvent::Create(
mojom::blink::SpeechRecognitionErrorCode code, media::mojom::blink::SpeechRecognitionErrorCode code,
const String& message) { const String& message) {
return MakeGarbageCollected<SpeechRecognitionErrorEvent>( return MakeGarbageCollected<SpeechRecognitionErrorEvent>(
ErrorCodeToString(code), message); ErrorCodeToString(code), message);

@@ -26,7 +26,7 @@
#ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_ERROR_EVENT_H_ #ifndef THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_ERROR_EVENT_H_
#define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_ERROR_EVENT_H_ #define THIRD_PARTY_BLINK_RENDERER_MODULES_SPEECH_SPEECH_RECOGNITION_ERROR_EVENT_H_
#include "third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom-blink-forward.h" #include "media/mojo/mojom/speech_recognition_error_code.mojom-blink-forward.h"
#include "third_party/blink/renderer/bindings/modules/v8/v8_speech_recognition_error_event_init.h" #include "third_party/blink/renderer/bindings/modules/v8/v8_speech_recognition_error_event_init.h"
#include "third_party/blink/renderer/modules/event_modules.h" #include "third_party/blink/renderer/modules/event_modules.h"
#include "third_party/blink/renderer/modules/modules_export.h" #include "third_party/blink/renderer/modules/modules_export.h"
@@ -39,7 +39,7 @@ class MODULES_EXPORT SpeechRecognitionErrorEvent final : public Event {
public: public:
static SpeechRecognitionErrorEvent* Create( static SpeechRecognitionErrorEvent* Create(
mojom::blink::SpeechRecognitionErrorCode, media::mojom::blink::SpeechRecognitionErrorCode,
const String&); const String&);
static SpeechRecognitionErrorEvent* Create( static SpeechRecognitionErrorEvent* Create(
const AtomicString&, const AtomicString&,

@@ -1,6 +1,6 @@
import {SpeechAudioErrorDetails} from '/gen/third_party/blink/public/mojom/speech/speech_recognition_error.mojom.m.js'; import {SpeechAudioErrorDetails} from '/gen/media/mojo/mojom/speech_recognition_error.mojom.m.js';
import {SpeechRecognitionErrorCode} from '/gen/third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom.m.js'; import {SpeechRecognitionErrorCode} from '/gen/media/mojo/mojom/speech_recognition_error_code.mojom.m.js';
import {SpeechRecognitionSessionReceiver, SpeechRecognizer, SpeechRecognizerReceiver} from '/gen/third_party/blink/public/mojom/speech/speech_recognizer.mojom.m.js'; import {SpeechRecognitionSessionReceiver, SpeechRecognizer, SpeechRecognizerReceiver} from '/gen/media/mojo/mojom/speech_recognizer.mojom.m.js';
// MockSpeechRecognizer is a mock implementation of blink.mojom.SpeechRecognizer // MockSpeechRecognizer is a mock implementation of blink.mojom.SpeechRecognizer
// and the browser speech recognition service. Mock results can be set using // and the browser speech recognition service. Mock results can be set using

@@ -4,7 +4,7 @@
<script src="/resources/testharnessreport.js"></script> <script src="/resources/testharnessreport.js"></script>
<script type="module"> <script type="module">
import {MockSpeechRecognizer} from '../resources/mock-speechrecognizer.js'; import {MockSpeechRecognizer} from '../resources/mock-speechrecognizer.js';
import {SpeechRecognitionErrorCode} from '/gen/third_party/blink/public/mojom/speech/speech_recognition_error_code.mojom.m.js'; import {SpeechRecognitionErrorCode} from '/gen/media/mojo/mojom/speech_recognition_error_code.mojom.m.js';
const mock = new MockSpeechRecognizer(); const mock = new MockSpeechRecognizer();