Roll Opus to 55513e81d8f606bd75d0ff773d2144e5f2a732f5
This brings in bug fixes included in 1.5, 1.5.1, 1.5.2 and up to the commit at
55513e81d8f606bd75d0ff773d2144e5f2a732f5.
See 8cf872a1..55513e81
v1.5 release notes: https://opus-codec.org/release/stable/2024/03/04/libopus-1_5.html
v1.5.1 release notes: https://opus-codec.org/release/stable/2024/03/04/libopus-1_5_1.html
v1.5.2 release notes: https://opus-codec.org/release/stable/2024/04/12/libopus-1_5_2.html
This change does not yet enable the ML-based enhancements described in the
release notes above.
Bug: 412392753
Change-Id: I5188d385100a21ef6eaa31a77657724a1bd3f7ed
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6476985
Reviewed-by: Gustaf Ullberg <gustaf@chromium.org>
Commit-Queue: Felicia Lim <flim@google.com>
Reviewed-by: James Zern <jzern@google.com>
Cr-Commit-Position: refs/heads/main@{#1450605}
This commit is contained in:

committed by
Chromium LUCI CQ

parent
5dca79a70e
commit
a6241cac3b
third_party/opus
BUILD.gnREADME.chromium
src
.gitignoreCMakeLists.txtCOPYINGMakefile.amMakefile.mipsMakefile.unixREADMEautogen.batautogen.sh
celt
_kiss_fft_guts.harch.h
arm
bands.cbands.hcelt.ccelt.hcelt_decoder.ccelt_encoder.ccelt_lpc.ccelt_lpc.hcpu_support.hdump_modes
entdec.centdec.hentenc.centenc.hfixed_debug.hfixed_generic.hkiss_fft.ckiss_fft.hlaplace.claplace.hmathops.cmathops.hmdct.cmdct.hmeson.buildmips
modes.cmodes.hopus_custom_demo.cos_support.hpitch.cpitch.hquant_bands.cquant_bands.hrate.cstack_alloc.hstatic_modes_fixed.htests
test_unit_cwrs32.ctest_unit_dft.ctest_unit_laplace.ctest_unit_mathops.ctest_unit_mdct.ctest_unit_rotation.c
vq.cvq.hx86
cmake
configure.acdnn
LPCNet.ymlREADMEREADME.mdadaconvtest.c
arm
burg.cburg.hcommon.hdatasets.txtdownload_model.batdownload_model.shdred_coding.cdred_coding.hdred_config.hdred_decoder.cdred_decoder.hdred_encoder.cdred_encoder.hdred_rdovae.hdred_rdovae_dec.cdred_rdovae_dec.hdred_rdovae_enc.cdred_rdovae_enc.hdump_data.cdump_lpcnet_tables.cfargan.cfargan.hfargan_demo.cfreq.cfreq.hfwgan.cfwgan.hkiss99.ckiss99.hlossgen.clossgen.hlossgen_demo.clpcnet.clpcnet.hlpcnet_enc.clpcnet_plc.clpcnet_private.hlpcnet_tables.cmeson.buildnndsp.cnndsp.hnnet.cnnet.hnnet_arch.hnnet_default.cosce.cosce.hosce_config.hosce_features.cosce_features.hosce_structs.hparse_lpcnet_weights.cpitchdnn.cpitchdnn.htansig_table.htest_vec.ctorch
dnntools
fargan
README.mdadv_train_fargan.pydataset.pydump_fargan_weights.pyfargan.pyfilters.pyrc.pystft_loss.pytest_fargan.pytrain_fargan.py
fwgan
lossgen
lpcnet
README.mdadd_dataset_config.py
data
engine
make_default_setup.pymake_test_config.pymodels
print_lpcnet_complexity.pyscripts
collect_multi_run_results.pyloop_run.shmake_animation.pymodify_dataset_target.pymulti_run.shrun_inference_test.shupdate_checkpoints.pyupdate_output_folder.shupdate_setups.py
test_lpcnet.pytrain_lpcnet.pyutils
neural-pitch
README.mddata_augmentation.pydownload_demand.shevaluation.pyexperiments.pyexport_neuralpitch_weights.pymodels.pyneural_pitch_update.pyptdb_process.shrun_crepe.pytraining.pyutils.py
osce
README.mdadv_train_model.pyadv_train_vocoder.pycreate_testvectors.pytest_model.pytest_vocoder.pytrain_model.pytrain_vocoder.py
data
engine
export_model_weights.pylosses
make_default_setup.pymodels
__init__.pyfd_discriminator.pylace.pylavoce.pylavoce_400.pylpcnet_feature_net.pynns_base.pyno_lace.pyscale_embedding.pyshape_up_48.pysilk_feature_net.pysilk_feature_net_pl.py
requirements.txtresources
scripts
silk_16_to_48.pystndrd
evaluation
commonvoice_clip_selection.pycreate_input_data.shenv.rcevaluate.pylace_loss_metric.pymake_boxplots.pymake_boxplots_moctest.pymake_tables.pymake_tables_moctest.pymoc.pymoc2.pyprocess_dataset.shrun_nomad.pyrun_osce_test.py
presentation
utils
plc
rdovae
README.mdexport_rdovae_weights.pyfec_encoder.pyimport_rdovae_weights.py
packets
rdovae
requirements.txttrain_rdovae.pytestsuite
weight-exchange
training_tf2
dataloader.pydecode_rdovae.pydiffembed.pydump_lpcnet.pydump_plc.pydump_rdovae.pyencode_rdovae.pyfec_encoder.pyfec_packets.cfec_packets.hfec_packets.pykeraslayerdump.pylossfuncs.pylpcnet.pylpcnet_plc.pymdense.pypade.pyparameters.pyplc_loader.pyrdovae.pyrdovae_exchange.pyrdovae_import.pytest_lpcnet.pytest_plc.pytf_funcs.pytrain_lpcnet.pytrain_plc.pytrain_rdovae.pyulaw.pyuniform_noise.py
vec.hvec_avx.hvec_neon.hwrite_lpcnet_weights.cx86
doc
include
meson.buildmeson
meson_options.txtreleases.sha2scripts
silk
API.hNSQ.cNSQ_del_dec.cPLC.cPLC.h
arm
control.hdebug.cdec_API.cdecode_core.cdecode_frame.cenc_API.cfixed
float
SigProc_FLP.hautocorrelation_FLP.cburg_modified_FLP.ccorrMatrix_FLP.cencode_frame_FLP.cfind_LPC_FLP.cfind_LTP_FLP.cfind_pitch_lags_FLP.cfind_pred_coefs_FLP.cinner_product_FLP.cmain_FLP.hnoise_shape_analysis_FLP.cpitch_analysis_core_FLP.cwarped_autocorrelation_FLP.c
init_decoder.cinit_encoder.cmain.hmeson.buildx86
mips
structs.htests
x86
src
analysis.cextensions.cmapping_matrix.cmapping_matrix.hmeson.buildmlp.cmlp.hmlp_data.copus.copus_decoder.copus_demo.copus_encoder.copus_multistream_decoder.copus_multistream_encoder.copus_private.hopus_projection_decoder.copus_projection_encoder.crepacketizer.c
tar_list.txttests
meson.buildopus_build_test.shrandom_config.shtest_opus_api.ctest_opus_common.htest_opus_custom.ctest_opus_decode.ctest_opus_dred.ctest_opus_encode.ctest_opus_extensions.ctest_opus_padding.ctest_opus_projection.c
win32
1
third_party/opus/BUILD.gn
vendored
1
third_party/opus/BUILD.gn
vendored
@ -350,6 +350,7 @@ static_library("opus") {
|
||||
"src/silk/typedef.h",
|
||||
"src/src/analysis.c",
|
||||
"src/src/analysis.h",
|
||||
"src/src/extensions.c",
|
||||
"src/src/mapping_matrix.c",
|
||||
"src/src/mapping_matrix.h",
|
||||
"src/src/mlp.c",
|
||||
|
19
third_party/opus/README.chromium
vendored
19
third_party/opus/README.chromium
vendored
@ -1,7 +1,7 @@
|
||||
Name: opus
|
||||
URL: https://gitlab.xiph.org/xiph/opus
|
||||
Version: 8cf872a1
|
||||
Revision: 8cf872a186b96085b1bb3a547afd598354ebeb87
|
||||
Version: 55513e81
|
||||
Revision: 55513e81d8f606bd75d0ff773d2144e5f2a732f5
|
||||
License: Opus-Patent-BSD-3-Clause
|
||||
License File: src/COPYING
|
||||
Security Critical: yes
|
||||
@ -15,7 +15,20 @@ encoding/decoding.
|
||||
|
||||
Local changes:
|
||||
* copy .gitignore from https://git.xiph.org/?p=opus.git;a=tree
|
||||
* set 'x' flags: "chmod 750 win32/genversion.bat"
|
||||
* set 'x' flags for the following files, e.g. `chmod 750 autogen.bat`
|
||||
* autogen.bat
|
||||
* dnn/download_model.bat
|
||||
* dnn/torch/lpcnet/scripts/*.sh
|
||||
* dnn/torch/osce/stndrd/evaluation/crete_input_data.sh
|
||||
* dnn/training_tf2/decode_rdovae.py
|
||||
* dnn/training_tf2/encode_rdovae.py
|
||||
* dnn/training_tf2/lpcnet.py
|
||||
* dnn/training_tf2/lpcnet_plc.py
|
||||
* dnn/training_tf2/plc_loader.py
|
||||
* dnn/training_tf2/rdovae.py
|
||||
* dnn/training_tf2/test_plc.py
|
||||
* dnn/training_tf2/train_plc.py
|
||||
* dnn/training_tf2/train_rdovae.py
|
||||
* remove assertion messages in release builds (see crbug/1053572)
|
||||
* add workaround to ignore some int-overflows when fuzzing (see crbug/1146174, crbug/1491812)
|
||||
|
||||
|
1
third_party/opus/src/.gitignore
vendored
1
third_party/opus/src/.gitignore
vendored
@ -49,6 +49,7 @@ tests/*test
|
||||
tests/test_opus_api
|
||||
tests/test_opus_decode
|
||||
tests/test_opus_encode
|
||||
tests/test_opus_extensions
|
||||
tests/test_opus_padding
|
||||
tests/test_opus_projection
|
||||
celt/arm/armopts.s
|
||||
|
165
third_party/opus/src/CMakeLists.txt
vendored
165
third_party/opus/src/CMakeLists.txt
vendored
@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
include(OpusPackageVersion)
|
||||
@ -71,6 +71,10 @@ set(OPUS_CHECK_ASM_HELP_STR "enable bit-exactness checks between optimized and c
|
||||
option(OPUS_CHECK_ASM ${OPUS_CHECK_ASM_HELP_STR} OFF)
|
||||
add_feature_info(OPUS_CHECK_ASM OPUS_CHECK_ASM ${OPUS_CHECK_ASM_HELP_STR})
|
||||
|
||||
set(OPUS_DNN_FLOAT_DEBUG_HELP_STR "Run DNN computations as float for debugging purposes.")
|
||||
option(OPUS_DNN_FLOAT_DEBUG ${OPUS_DNN_FLOAT_DEBUG_HELP_STR} OFF)
|
||||
add_feature_info(OPUS_DNN_FLOAT_DEBUG OPUS_DNN_FLOAT_DEBUG ${OPUS_DNN_FLOAT_DEBUG_HELP_STR})
|
||||
|
||||
set(OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR "install pkg-config module.")
|
||||
option(OPUS_INSTALL_PKG_CONFIG_MODULE ${OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR} ON)
|
||||
add_feature_info(OPUS_INSTALL_PKG_CONFIG_MODULE OPUS_INSTALL_PKG_CONFIG_MODULE ${OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR})
|
||||
@ -79,12 +83,26 @@ set(OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR "install CMake package config modu
|
||||
option(OPUS_INSTALL_CMAKE_CONFIG_MODULE ${OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR} ON)
|
||||
add_feature_info(OPUS_INSTALL_CMAKE_CONFIG_MODULE OPUS_INSTALL_CMAKE_CONFIG_MODULE ${OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR})
|
||||
|
||||
set(OPUS_DRED_HELP_STR "enable DRED.")
|
||||
option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF)
|
||||
add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR})
|
||||
|
||||
set(OPUS_OSCE_HELP_STR "enable OSCE.")
|
||||
option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF)
|
||||
add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR})
|
||||
|
||||
if(APPLE)
|
||||
set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.")
|
||||
option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF)
|
||||
add_feature_info(OPUS_BUILD_FRAMEWORK OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR})
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
set(OPUS_STATIC_RUNTIME_HELP_STR "build with static runtime library.")
|
||||
option(OPUS_STATIC_RUNTIME ${OPUS_STATIC_RUNTIME_HELP_STR} OFF)
|
||||
add_feature_info(OPUS_STATIC_RUNTIME OPUS_STATIC_RUNTIME ${OPUS_STATIC_RUNTIME_HELP_STR})
|
||||
endif()
|
||||
|
||||
set(OPUS_FIXED_POINT_DEBUG_HELP_STR "debug fixed-point implementation.")
|
||||
cmake_dependent_option(OPUS_FIXED_POINT_DEBUG
|
||||
${OPUS_FIXED_POINT_DEBUG_HELP_STR}
|
||||
@ -173,13 +191,13 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64)
|
||||
OFF)
|
||||
add_feature_info(OPUS_X86_MAY_HAVE_SSE4_1 OPUS_X86_MAY_HAVE_SSE4_1 ${OPUS_X86_MAY_HAVE_SSE4_1_HELP_STR})
|
||||
|
||||
set(OPUS_X86_MAY_HAVE_AVX_HELP_STR "does runtime check for AVX support.")
|
||||
cmake_dependent_option(OPUS_X86_MAY_HAVE_AVX
|
||||
${OPUS_X86_MAY_HAVE_AVX_HELP_STR}
|
||||
set(OPUS_X86_MAY_HAVE_AVX2_HELP_STR "does runtime check for AVX FMA AVX2 support.")
|
||||
cmake_dependent_option(OPUS_X86_MAY_HAVE_AVX2
|
||||
${OPUS_X86_MAY_HAVE_AVX2_HELP_STR}
|
||||
ON
|
||||
"AVX_SUPPORTED; NOT OPUS_DISABLE_INTRINSICS"
|
||||
"AVX2_SUPPORTED; NOT OPUS_DISABLE_INTRINSICS"
|
||||
OFF)
|
||||
add_feature_info(OPUS_X86_MAY_HAVE_AVX OPUS_X86_MAY_HAVE_AVX ${OPUS_X86_MAY_HAVE_AVX_HELP_STR})
|
||||
add_feature_info(OPUS_X86_MAY_HAVE_AVX2 OPUS_X86_MAY_HAVE_AVX2 ${OPUS_X86_MAY_HAVE_AVX2_HELP_STR})
|
||||
|
||||
# PRESUME depends on MAY HAVE, but PRESUME will override runtime detection
|
||||
set(OPUS_X86_PRESUME_SSE_HELP_STR "assume target CPU has SSE1 support (override runtime check).")
|
||||
@ -220,13 +238,13 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64)
|
||||
OFF)
|
||||
add_feature_info(OPUS_X86_PRESUME_SSE4_1 OPUS_X86_PRESUME_SSE4_1 ${OPUS_X86_PRESUME_SSE4_1_HELP_STR})
|
||||
|
||||
set(OPUS_X86_PRESUME_AVX_HELP_STR "assume target CPU has AVX support (override runtime check).")
|
||||
cmake_dependent_option(OPUS_X86_PRESUME_AVX
|
||||
${OPUS_X86_PRESUME_AVX_HELP_STR}
|
||||
set(OPUS_X86_PRESUME_AVX2_HELP_STR "assume target CPU has AVX FMA AVX2 support (override runtime check).")
|
||||
cmake_dependent_option(OPUS_X86_PRESUME_AVX2
|
||||
${OPUS_X86_PRESUME_AVX2_HELP_STR}
|
||||
OFF
|
||||
"OPUS_X86_MAY_HAVE_AVX; NOT OPUS_DISABLE_INTRINSICS"
|
||||
"OPUS_X86_MAY_HAVE_AVX2; NOT OPUS_DISABLE_INTRINSICS"
|
||||
OFF)
|
||||
add_feature_info(OPUS_X86_PRESUME_AVX OPUS_X86_PRESUME_AVX ${OPUS_X86_PRESUME_AVX_HELP_STR})
|
||||
add_feature_info(OPUS_X86_PRESUME_AVX2 OPUS_X86_PRESUME_AVX2 ${OPUS_X86_PRESUME_AVX2_HELP_STR})
|
||||
endif()
|
||||
|
||||
feature_summary(WHAT ALL)
|
||||
@ -253,6 +271,14 @@ if(OPUS_CUSTOM_MODES)
|
||||
list(APPEND Opus_PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/opus_custom.h)
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
if(OPUS_STATIC_RUNTIME)
|
||||
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
|
||||
else()
|
||||
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>DLL")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_library(opus ${opus_headers} ${opus_sources} ${opus_sources_float} ${Opus_PUBLIC_HEADER})
|
||||
add_library(Opus::opus ALIAS opus)
|
||||
|
||||
@ -274,6 +300,7 @@ target_include_directories(
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/opus>
|
||||
PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dnn
|
||||
celt
|
||||
silk)
|
||||
|
||||
@ -309,6 +336,10 @@ if(OPUS_CHECK_ASM)
|
||||
target_compile_definitions(opus PRIVATE OPUS_CHECK_ASM)
|
||||
endif()
|
||||
|
||||
if(NOT OPUS_DNN_FLOAT_DEBUG)
|
||||
target_compile_definitions(opus PRIVATE DISABLE_DEBUG_FLOAT)
|
||||
endif()
|
||||
|
||||
if(OPUS_VAR_ARRAYS)
|
||||
target_compile_definitions(opus PRIVATE VAR_ARRAYS)
|
||||
elseif(OPUS_USE_ALLOCA)
|
||||
@ -365,11 +396,33 @@ if(NOT OPUS_ENABLE_FLOAT_API)
|
||||
target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
|
||||
endif()
|
||||
|
||||
if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE)
|
||||
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
|
||||
set(OPUS_DNN TRUE)
|
||||
else()
|
||||
set(OPUS_DNN FALSE)
|
||||
endif()
|
||||
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
|
||||
target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
|
||||
endif()
|
||||
|
||||
if (OPUS_DRED)
|
||||
add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
|
||||
target_compile_definitions(opus PRIVATE ENABLE_DRED)
|
||||
endif()
|
||||
|
||||
if (OPUS_OSCE)
|
||||
add_sources_group(opus lpcnet ${osce_headers} ${osce_sources})
|
||||
target_compile_definitions(opus PRIVATE ENABLE_OSCE)
|
||||
endif()
|
||||
|
||||
if(NOT OPUS_DISABLE_INTRINSICS)
|
||||
if(((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR
|
||||
(OPUS_X86_MAY_HAVE_SSE2 AND NOT OPUS_X86_PRESUME_SSE2) OR
|
||||
(OPUS_X86_MAY_HAVE_SSE4_1 AND NOT OPUS_X86_PRESUME_SSE4_1) OR
|
||||
(OPUS_X86_MAY_HAVE_AVX AND NOT OPUS_X86_PRESUME_AVX)) AND
|
||||
(OPUS_X86_MAY_HAVE_AVX2 AND NOT OPUS_X86_PRESUME_AVX2)) AND
|
||||
RUNTIME_CPU_CAPABILITY_DETECTION)
|
||||
target_compile_definitions(opus PRIVATE OPUS_HAVE_RTCD)
|
||||
if(NOT MSVC)
|
||||
@ -383,6 +436,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
||||
endif()
|
||||
add_sources_group(opus celt ${celt_sources_x86_rtcd})
|
||||
add_sources_group(opus silk ${silk_sources_x86_rtcd})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(SSE1_SUPPORTED)
|
||||
@ -404,9 +460,12 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
||||
if(SSE2_SUPPORTED)
|
||||
if(OPUS_X86_MAY_HAVE_SSE2)
|
||||
add_sources_group(opus celt ${celt_sources_sse2})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_sse2})
|
||||
endif()
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
|
||||
if(NOT MSVC)
|
||||
set_source_files_properties(${celt_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
|
||||
set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
|
||||
endif()
|
||||
endif()
|
||||
if(OPUS_X86_PRESUME_SSE2)
|
||||
@ -421,9 +480,12 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
||||
if(OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
add_sources_group(opus celt ${celt_sources_sse4_1})
|
||||
add_sources_group(opus silk ${silk_sources_sse4_1})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
|
||||
endif()
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
|
||||
if(NOT MSVC)
|
||||
set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
|
||||
set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
|
||||
endif()
|
||||
|
||||
if(OPUS_FIXED_POINT)
|
||||
@ -441,22 +503,41 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(AVX_SUPPORTED)
|
||||
# mostly placeholder in case of avx intrinsics is added
|
||||
if(OPUS_X86_MAY_HAVE_AVX)
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX)
|
||||
if(AVX2_SUPPORTED)
|
||||
if(OPUS_X86_MAY_HAVE_AVX2)
|
||||
add_sources_group(opus celt ${celt_sources_avx2})
|
||||
add_sources_group(opus silk ${silk_sources_avx2})
|
||||
if (NOT OPUS_FIXED_POINT)
|
||||
add_sources_group(opus silk ${silk_sources_float_avx2})
|
||||
endif()
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_avx2})
|
||||
endif()
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)
|
||||
if(MSVC)
|
||||
set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")
|
||||
else()
|
||||
set(AVX2_FLAGS "${AVX2_FLAGS} -mavx2 -mfma -mavx")
|
||||
endif()
|
||||
set_source_files_properties(${celt_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
|
||||
set_source_files_properties(${silk_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
|
||||
if (NOT OPUS_FIXED_POINT)
|
||||
set_source_files_properties(${silk_sources_float_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
|
||||
endif()
|
||||
set_source_files_properties(${dnn_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
|
||||
endif()
|
||||
if(OPUS_X86_PRESUME_AVX)
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX)
|
||||
if(OPUS_X86_PRESUME_AVX2)
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX2)
|
||||
target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE4_1)
|
||||
if(NOT MSVC)
|
||||
target_compile_options(opus PRIVATE -mavx)
|
||||
target_compile_options(opus PRIVATE -mavx2 -mfma -mavx)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(MSVC)
|
||||
if(AVX_SUPPORTED AND OPUS_X86_PRESUME_AVX) # on 64 bit and 32 bits
|
||||
add_definitions(/arch:AVX)
|
||||
if(AVX2_SUPPORTED AND OPUS_X86_PRESUME_AVX2) # on 64 bit and 32 bits
|
||||
add_definitions(/arch:AVX2)
|
||||
elseif(OPUS_CPU_X86) # if AVX not supported then set SSE flag
|
||||
if((SSE4_1_SUPPORTED AND OPUS_X86_PRESUME_SSE4_1)
|
||||
OR (SSE2_SUPPORTED AND OPUS_X86_PRESUME_SSE2))
|
||||
@ -486,6 +567,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
|
||||
|
||||
add_sources_group(opus celt ${celt_sources_arm_neon_intr})
|
||||
add_sources_group(opus silk ${silk_sources_arm_neon_intr})
|
||||
if (OPUS_DNN)
|
||||
add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
|
||||
endif()
|
||||
|
||||
# silk arm neon depends on main_Fix.h
|
||||
target_include_directories(opus PRIVATE silk/fixed)
|
||||
@ -582,6 +666,7 @@ if(OPUS_BUILD_PROGRAMS)
|
||||
target_include_directories(opus_demo PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories(opus_demo PRIVATE silk) # debug.h
|
||||
target_include_directories(opus_demo PRIVATE celt) # arch.h
|
||||
target_include_directories(opus_demo PRIVATE dnn)
|
||||
target_link_libraries(opus_demo PRIVATE opus ${OPUS_REQUIRED_LIBRARIES})
|
||||
target_compile_definitions(opus_demo PRIVATE OPUS_BUILD)
|
||||
|
||||
@ -589,10 +674,6 @@ if(OPUS_BUILD_PROGRAMS)
|
||||
add_executable(opus_compare ${opus_compare_sources})
|
||||
target_include_directories(opus_compare PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_link_libraries(opus_compare PRIVATE opus ${OPUS_REQUIRED_LIBRARIES})
|
||||
if(MSVC)
|
||||
# move cosmetic warning to level 4 for opus_compare
|
||||
target_compile_options(opus_compare PRIVATE /w44244)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS)
|
||||
@ -636,11 +717,39 @@ if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS)
|
||||
|
||||
add_executable(test_opus_encode ${test_opus_encode_sources})
|
||||
target_include_directories(test_opus_encode
|
||||
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt)
|
||||
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt dnn)
|
||||
target_link_libraries(test_opus_encode PRIVATE opus)
|
||||
target_compile_definitions(test_opus_encode PRIVATE OPUS_BUILD)
|
||||
add_test(NAME test_opus_encode COMMAND ${CMAKE_COMMAND}
|
||||
-DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_encode>
|
||||
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
|
||||
-P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
|
||||
|
||||
add_executable(test_opus_extensions ${test_opus_extensions_sources})
|
||||
target_include_directories(test_opus_extensions
|
||||
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt dnn)
|
||||
target_link_libraries(test_opus_extensions PRIVATE opus)
|
||||
target_compile_definitions(test_opus_extensions PRIVATE OPUS_BUILD)
|
||||
add_test(NAME test_opus_extensions COMMAND ${CMAKE_COMMAND}
|
||||
-DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_extensions>
|
||||
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
|
||||
-P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
|
||||
if(OPUS_DRED)
|
||||
add_executable(test_opus_dred ${test_opus_dred_sources})
|
||||
target_include_directories(test_opus_dred
|
||||
PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_link_libraries(test_opus_dred PRIVATE opus)
|
||||
target_compile_definitions(test_opus_dred PRIVATE OPUS_BUILD)
|
||||
add_test(NAME test_opus_dred COMMAND ${CMAKE_COMMAND}
|
||||
-DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_dred>
|
||||
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
|
||||
-P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
|
||||
endif()
|
||||
if(OPUS_CUSTOM_MODES)
|
||||
add_executable(test_opus_custom ${test_opus_custom_sources})
|
||||
target_include_directories(test_opus_custom
|
||||
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt dnn)
|
||||
target_link_libraries(test_opus_custom PRIVATE opus)
|
||||
target_compile_definitions(test_opus_custom PRIVATE OPUS_BUILD)
|
||||
endif()
|
||||
endif()
|
||||
|
4
third_party/opus/src/COPYING
vendored
4
third_party/opus/src/COPYING
vendored
@ -1,7 +1,7 @@
|
||||
Copyright 2001-2011 Xiph.Org, Skype Limited, Octasic,
|
||||
Copyright 2001-2023 Xiph.Org, Skype Limited, Octasic,
|
||||
Jean-Marc Valin, Timothy B. Terriberry,
|
||||
CSIRO, Gregory Maxwell, Mark Borgerding,
|
||||
Erik de Castro Lopo
|
||||
Erik de Castro Lopo, Mozilla, Amazon
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
|
165
third_party/opus/src/Makefile.am
vendored
165
third_party/opus/src/Makefile.am
vendored
@ -10,12 +10,25 @@ lib_LTLIBRARIES = libopus.la
|
||||
DIST_SUBDIRS = doc
|
||||
|
||||
AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \
|
||||
-I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS)
|
||||
-I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS) \
|
||||
-I$(top_srcdir)/dnn
|
||||
|
||||
include celt_sources.mk
|
||||
include lpcnet_sources.mk
|
||||
include silk_sources.mk
|
||||
include opus_sources.mk
|
||||
|
||||
LPCNET_SOURCES =
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DEEP_PLC_SOURCES)
|
||||
endif
|
||||
if ENABLE_DRED
|
||||
LPCNET_SOURCES += $(DRED_SOURCES)
|
||||
endif
|
||||
if ENABLE_OSCE
|
||||
LPCNET_SOURCES += $(OSCE_SOURCES)
|
||||
endif
|
||||
|
||||
if FIXED_POINT
|
||||
SILK_SOURCES += $(SILK_SOURCES_FIXED)
|
||||
if HAVE_SSE4_1
|
||||
@ -29,6 +42,9 @@ SILK_SOURCES += $(SILK_SOURCES_FLOAT)
|
||||
if HAVE_SSE4_1
|
||||
SILK_SOURCES += $(SILK_SOURCES_SSE4_1)
|
||||
endif
|
||||
if HAVE_AVX2
|
||||
SILK_SOURCES += $(SILK_SOURCES_FLOAT_AVX2)
|
||||
endif
|
||||
endif
|
||||
|
||||
if DISABLE_FLOAT_API
|
||||
@ -40,15 +56,31 @@ if CPU_X86
|
||||
if HAVE_RTCD
|
||||
CELT_SOURCES += $(CELT_SOURCES_X86_RTCD)
|
||||
SILK_SOURCES += $(SILK_SOURCES_X86_RTCD)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_X86_RTCD)
|
||||
endif
|
||||
endif
|
||||
if HAVE_SSE
|
||||
CELT_SOURCES += $(CELT_SOURCES_SSE)
|
||||
endif
|
||||
if HAVE_SSE2
|
||||
CELT_SOURCES += $(CELT_SOURCES_SSE2)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_SSE2)
|
||||
endif
|
||||
endif
|
||||
if HAVE_SSE4_1
|
||||
CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_SSE4_1)
|
||||
endif
|
||||
endif
|
||||
if HAVE_AVX2
|
||||
SILK_SOURCES += $(SILK_SOURCES_AVX2)
|
||||
CELT_SOURCES += $(CELT_SOURCES_AVX2)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_AVX2)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
@ -56,6 +88,18 @@ if CPU_ARM
|
||||
if HAVE_RTCD
|
||||
CELT_SOURCES += $(CELT_SOURCES_ARM_RTCD)
|
||||
SILK_SOURCES += $(SILK_SOURCES_ARM_RTCD)
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_ARM_RTCD)
|
||||
endif
|
||||
endif
|
||||
|
||||
if ENABLE_DEEP_PLC
|
||||
if HAVE_ARM_DOTPROD
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_DOTPROD)
|
||||
endif
|
||||
if HAVE_ARM_NEON_INTR
|
||||
LPCNET_SOURCES += $(DNN_SOURCES_NEON)
|
||||
endif
|
||||
endif
|
||||
|
||||
if HAVE_ARM_NEON_INTR
|
||||
@ -80,10 +124,25 @@ CLEANFILES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S) \
|
||||
$(CELT_AM_SOURCES_ARM_ASM:.s.in=-gnu.S)
|
||||
|
||||
include celt_headers.mk
|
||||
include lpcnet_headers.mk
|
||||
include silk_headers.mk
|
||||
include opus_headers.mk
|
||||
|
||||
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES)
|
||||
LPCNET_HEAD =
|
||||
if ENABLE_DEEP_PLC
|
||||
LPCNET_HEAD += $(DEEP_PLC_HEAD)
|
||||
endif
|
||||
if ENABLE_DRED
|
||||
LPCNET_HEAD += $(DRED_HEAD)
|
||||
endif
|
||||
if ENABLE_OSCE
|
||||
LPCNET_HEAD += $(OSCE_HEAD)
|
||||
endif
|
||||
if ENABLE_LOSSGEN
|
||||
LPCNET_HEAD += $(LOSSGEN_HEAD)
|
||||
endif
|
||||
|
||||
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES)
|
||||
libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
|
||||
libopus_la_LIBADD = $(NE10_LIBS) $(LIBM)
|
||||
if OPUS_ARM_EXTERNAL_ASM
|
||||
@ -92,7 +151,7 @@ endif
|
||||
|
||||
pkginclude_HEADERS = include/opus.h include/opus_multistream.h include/opus_types.h include/opus_defines.h include/opus_projection.h
|
||||
|
||||
noinst_HEADERS = $(OPUS_HEAD) $(SILK_HEAD) $(CELT_HEAD)
|
||||
noinst_HEADERS = $(OPUS_HEAD) $(SILK_HEAD) $(CELT_HEAD) $(LPCNET_HEAD)
|
||||
|
||||
if EXTRA_PROGRAMS
|
||||
noinst_PROGRAMS = celt/tests/test_unit_cwrs32 \
|
||||
@ -109,7 +168,9 @@ noinst_PROGRAMS = celt/tests/test_unit_cwrs32 \
|
||||
silk/tests/test_unit_LPC_inv_pred_gain \
|
||||
tests/test_opus_api \
|
||||
tests/test_opus_decode \
|
||||
tests/test_opus_dred \
|
||||
tests/test_opus_encode \
|
||||
tests/test_opus_extensions \
|
||||
tests/test_opus_padding \
|
||||
tests/test_opus_projection \
|
||||
trivial_example
|
||||
@ -126,10 +187,14 @@ TESTS = celt/tests/test_unit_cwrs32 \
|
||||
tests/test_opus_api \
|
||||
tests/test_opus_decode \
|
||||
tests/test_opus_encode \
|
||||
tests/test_opus_extensions \
|
||||
tests/test_opus_padding \
|
||||
tests/test_opus_projection
|
||||
|
||||
opus_demo_SOURCES = src/opus_demo.c
|
||||
if ENABLE_LOSSGEN
|
||||
opus_demo_SOURCES += $(LOSSGEN_SOURCES)
|
||||
endif
|
||||
|
||||
opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
|
||||
|
||||
@ -155,18 +220,33 @@ tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
|
||||
tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h
|
||||
tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
|
||||
|
||||
tests_test_opus_dred_SOURCES = tests/test_opus_dred.c tests/test_opus_common.h
|
||||
tests_test_opus_dred_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
|
||||
|
||||
if CUSTOM_MODES
|
||||
tests_test_opus_custom_SOURCES = tests/test_opus_custom.c tests/test_opus_common.h
|
||||
tests_test_opus_custom_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
|
||||
endif
|
||||
|
||||
CELT_OBJ = $(CELT_SOURCES:.c=.lo)
|
||||
SILK_OBJ = $(SILK_SOURCES:.c=.lo)
|
||||
LPCNET_OBJ = $(LPCNET_SOURCES:.c=.lo)
|
||||
OPUS_OBJ = $(OPUS_SOURCES:.c=.lo)
|
||||
|
||||
tests_test_opus_extensions_SOURCES = tests/test_opus_extensions.c tests/test_opus_common.h
|
||||
tests_test_opus_extensions_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
if OPUS_ARM_EXTERNAL_ASM
|
||||
tests_test_opus_extensions_LDADD += libarmasm.la
|
||||
endif
|
||||
|
||||
tests_test_opus_projection_SOURCES = tests/test_opus_projection.c tests/test_opus_common.h
|
||||
tests_test_opus_projection_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
tests_test_opus_projection_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
if OPUS_ARM_EXTERNAL_ASM
|
||||
tests_test_opus_projection_LDADD += libarmasm.la
|
||||
endif
|
||||
|
||||
silk_tests_test_unit_LPC_inv_pred_gain_SOURCES = silk/tests/test_unit_LPC_inv_pred_gain.c
|
||||
silk_tests_test_unit_LPC_inv_pred_gain_LDADD = $(SILK_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
silk_tests_test_unit_LPC_inv_pred_gain_LDADD = $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
if OPUS_ARM_EXTERNAL_ASM
|
||||
silk_tests_test_unit_LPC_inv_pred_gain_LDADD += libarmasm.la
|
||||
endif
|
||||
@ -175,7 +255,7 @@ celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c
|
||||
celt_tests_test_unit_cwrs32_LDADD = $(LIBM)
|
||||
|
||||
celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c
|
||||
celt_tests_test_unit_dft_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
celt_tests_test_unit_dft_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
if OPUS_ARM_EXTERNAL_ASM
|
||||
celt_tests_test_unit_dft_LDADD += libarmasm.la
|
||||
endif
|
||||
@ -187,19 +267,19 @@ celt_tests_test_unit_laplace_SOURCES = celt/tests/test_unit_laplace.c
|
||||
celt_tests_test_unit_laplace_LDADD = $(LIBM)
|
||||
|
||||
celt_tests_test_unit_mathops_SOURCES = celt/tests/test_unit_mathops.c
|
||||
celt_tests_test_unit_mathops_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
celt_tests_test_unit_mathops_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
if OPUS_ARM_EXTERNAL_ASM
|
||||
celt_tests_test_unit_mathops_LDADD += libarmasm.la
|
||||
endif
|
||||
|
||||
celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c
|
||||
celt_tests_test_unit_mdct_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
celt_tests_test_unit_mdct_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
if OPUS_ARM_EXTERNAL_ASM
|
||||
celt_tests_test_unit_mdct_LDADD += libarmasm.la
|
||||
endif
|
||||
|
||||
celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c
|
||||
celt_tests_test_unit_rotation_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
celt_tests_test_unit_rotation_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
|
||||
if OPUS_ARM_EXTERNAL_ASM
|
||||
celt_tests_test_unit_rotation_LDADD += libarmasm.la
|
||||
endif
|
||||
@ -214,9 +294,38 @@ if EXTRA_PROGRAMS
|
||||
noinst_PROGRAMS += opus_custom_demo
|
||||
opus_custom_demo_SOURCES = celt/opus_custom_demo.c
|
||||
opus_custom_demo_LDADD = libopus.la $(LIBM)
|
||||
|
||||
TESTS += tests/test_opus_custom
|
||||
noinst_PROGRAMS += tests/test_opus_custom
|
||||
endif
|
||||
endif
|
||||
|
||||
if EXTRA_PROGRAMS
|
||||
if ENABLE_DEEP_PLC
|
||||
noinst_PROGRAMS += fargan_demo dump_data dump_weights_blob
|
||||
fargan_demo_SOURCES = dnn/fargan_demo.c
|
||||
fargan_demo_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
|
||||
|
||||
dump_data_SOURCES = dnn/dump_data.c
|
||||
dump_data_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
|
||||
|
||||
dump_weights_blob_SOURCES = dnn/write_lpcnet_weights.c
|
||||
dump_weights_blob_LDADD = $(LIBM)
|
||||
dump_weights_blob_CFLAGS = $(AM_CFLAGS) -DDUMP_BINARY_WEIGHTS
|
||||
endif
|
||||
if ENABLE_DRED
|
||||
TESTS += tests/test_opus_dred
|
||||
endif
|
||||
|
||||
if ENABLE_LOSSGEN
|
||||
noinst_PROGRAMS += lossgen_demo
|
||||
lossgen_demo_SOURCES = dnn/lossgen_demo.c $(LOSSGEN_SOURCES)
|
||||
lossgen_demo_LDADD = $(LIBM)
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
EXTRA_DIST = opus.pc.in \
|
||||
opus-uninstalled.pc.in \
|
||||
opus.m4 \
|
||||
@ -230,6 +339,7 @@ EXTRA_DIST = opus.pc.in \
|
||||
cmake/OpusFunctions.cmake \
|
||||
cmake/OpusPackageVersion.cmake \
|
||||
cmake/OpusSources.cmake \
|
||||
cmake/README.md \
|
||||
cmake/RunTest.cmake \
|
||||
cmake/config.h.cmake.in \
|
||||
cmake/vla.c \
|
||||
@ -237,11 +347,14 @@ EXTRA_DIST = opus.pc.in \
|
||||
cmake/cpu_info_by_c.c \
|
||||
meson/get-version.py \
|
||||
meson/read-sources-list.py \
|
||||
meson/README.md \
|
||||
meson.build \
|
||||
meson_options.txt \
|
||||
include/meson.build \
|
||||
celt/meson.build \
|
||||
celt/tests/meson.build \
|
||||
dnn/meson.build \
|
||||
dnn/README.md \
|
||||
silk/meson.build \
|
||||
silk/tests/meson.build \
|
||||
src/meson.build \
|
||||
@ -249,21 +362,7 @@ EXTRA_DIST = opus.pc.in \
|
||||
doc/meson.build \
|
||||
tests/run_vectors.sh \
|
||||
celt/arm/arm2gnu.pl \
|
||||
celt/arm/celt_pitch_xcorr_arm.s \
|
||||
win32/VS2015/opus.vcxproj \
|
||||
win32/VS2015/test_opus_encode.vcxproj.filters \
|
||||
win32/VS2015/test_opus_encode.vcxproj \
|
||||
win32/VS2015/opus_demo.vcxproj \
|
||||
win32/VS2015/test_opus_api.vcxproj.filters \
|
||||
win32/VS2015/test_opus_api.vcxproj \
|
||||
win32/VS2015/test_opus_decode.vcxproj.filters \
|
||||
win32/VS2015/opus_demo.vcxproj.filters \
|
||||
win32/VS2015/opus.vcxproj.filters \
|
||||
win32/VS2015/test_opus_decode.vcxproj \
|
||||
win32/VS2015/opus.sln \
|
||||
win32/VS2015/common.props \
|
||||
win32/genversion.bat \
|
||||
win32/config.h
|
||||
celt/arm/celt_pitch_xcorr_arm.s
|
||||
|
||||
pkgconfigdir = $(libdir)/pkgconfig
|
||||
pkgconfig_DATA = opus.pc
|
||||
@ -362,21 +461,37 @@ $(SSE_OBJ): CFLAGS += $(OPUS_X86_SSE_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_SSE2
|
||||
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo)
|
||||
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo) \
|
||||
$(DNN_SOURCES_SSE2:.c=.lo)
|
||||
$(SSE2_OBJ): CFLAGS += $(OPUS_X86_SSE2_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_SSE4_1
|
||||
SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
|
||||
$(DNN_SOURCES_SSE4_1:.c=.lo) \
|
||||
$(SILK_SOURCES_SSE4_1:.c=.lo) \
|
||||
$(SILK_SOURCES_FIXED_SSE4_1:.c=.lo)
|
||||
$(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_AVX2
|
||||
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) \
|
||||
$(SILK_SOURCES_AVX2:.c=.lo) \
|
||||
$(SILK_SOURCES_FLOAT_AVX2:.c=.lo) \
|
||||
$(DNN_SOURCES_AVX2:.c=.lo)
|
||||
$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_ARM_NEON_INTR
|
||||
ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
|
||||
$(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \
|
||||
$(DNN_SOURCES_NEON:.c=.lo) \
|
||||
$(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo)
|
||||
$(ARM_NEON_INTR_OBJ): CFLAGS += \
|
||||
$(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS)
|
||||
endif
|
||||
|
||||
if HAVE_ARM_DOTPROD
|
||||
ARM_DOTPROD_OBJ = $(DNN_SOURCES_DOTPROD:.c=.lo)
|
||||
$(ARM_DOTPROD_OBJ): CFLAGS += $(ARM_DOTPROD_INTR_CFLAGS)
|
||||
endif
|
||||
|
14
third_party/opus/src/Makefile.mips
vendored
14
third_party/opus/src/Makefile.mips
vendored
@ -102,13 +102,16 @@ TESTOPUSDECODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSDECODE_SRCS_C))
|
||||
TESTOPUSENCODE_SRCS_C = tests/test_opus_encode.c tests/opus_encode_regressions.c
|
||||
TESTOPUSENCODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSENCODE_SRCS_C))
|
||||
|
||||
TESTOPUSEXTENSIONS_SRCS_C = tests/test_opus_extensions.c
|
||||
TESTOPUSEXTENSIONS_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSEXTENSIONS_SRCS_C))
|
||||
|
||||
TESTOPUSPADDING_SRCS_C = tests/test_opus_padding.c
|
||||
TESTOPUSPADDING_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSPADDING_SRCS_C))
|
||||
|
||||
OPUSCOMPARE_SRCS_C = src/opus_compare.c
|
||||
OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C))
|
||||
|
||||
TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_padding
|
||||
TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_extensions test_opus_padding
|
||||
|
||||
# Rules
|
||||
all: lib opus_demo opus_compare $(TESTS)
|
||||
@ -133,6 +136,9 @@ test_opus_decode$(EXESUFFIX): $(TESTOPUSDECODE_OBJS) $(TARGET)
|
||||
test_opus_encode$(EXESUFFIX): $(TESTOPUSENCODE_OBJS) $(TARGET)
|
||||
$(LINK.o.cmdline)
|
||||
|
||||
test_opus_extensions$(EXESUFFIX): $(TESTOPUSEXTENSIONS_OBJS) $(TARGET)
|
||||
$(LINK.o.cmdline)
|
||||
|
||||
test_opus_padding$(EXESUFFIX): $(TESTOPUSPADDING_OBJS) $(TARGET)
|
||||
$(LINK.o.cmdline)
|
||||
|
||||
@ -154,8 +160,10 @@ force:
|
||||
clean:
|
||||
rm -f opus_demo$(EXESUFFIX) opus_compare$(EXESUFFIX) $(TARGET) \
|
||||
test_opus_api$(EXESUFFIX) test_opus_decode$(EXESUFFIX) \
|
||||
test_opus_encode$(EXESUFFIX) test_opus_padding$(EXESUFFIX) \
|
||||
test_opus_encode$(EXESUFFIX) test_opus_extensions$(EXESUFFIX) \
|
||||
test_opus_padding$(EXESUFFIX)
|
||||
$(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \
|
||||
$(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS)
|
||||
$(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) \
|
||||
$(TESTOPUSEXTENSIONS_OBJS) $(TESTOPUSPADDING_OBJS)
|
||||
|
||||
.PHONY: all lib clean force check
|
||||
|
14
third_party/opus/src/Makefile.unix
vendored
14
third_party/opus/src/Makefile.unix
vendored
@ -100,13 +100,16 @@ TESTOPUSDECODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSDECODE_SRCS_C))
|
||||
TESTOPUSENCODE_SRCS_C = tests/test_opus_encode.c tests/opus_encode_regressions.c
|
||||
TESTOPUSENCODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSENCODE_SRCS_C))
|
||||
|
||||
TESTOPUSEXTENSIONS_SRCS_C = tests/test_opus_extensions.c
|
||||
TESTOPUSEXTENSIONS_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSEXTENSIONS_SRCS_C))
|
||||
|
||||
TESTOPUSPADDING_SRCS_C = tests/test_opus_padding.c
|
||||
TESTOPUSPADDING_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSPADDING_SRCS_C))
|
||||
|
||||
OPUSCOMPARE_SRCS_C = src/opus_compare.c
|
||||
OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C))
|
||||
|
||||
TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_padding
|
||||
TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_extensions test_opus_padding
|
||||
|
||||
# Rules
|
||||
all: lib opus_demo opus_compare $(TESTS)
|
||||
@ -131,6 +134,9 @@ test_opus_decode$(EXESUFFIX): $(TESTOPUSDECODE_OBJS) $(TARGET)
|
||||
test_opus_encode$(EXESUFFIX): $(TESTOPUSENCODE_OBJS) $(TARGET)
|
||||
$(LINK.o.cmdline)
|
||||
|
||||
test_opus_extensions$(EXESUFFIX): $(TESTOPUSEXTENSIONS_OBJS) $(TARGET)
|
||||
$(LINK.o.cmdline)
|
||||
|
||||
test_opus_padding$(EXESUFFIX): $(TESTOPUSPADDING_OBJS) $(TARGET)
|
||||
$(LINK.o.cmdline)
|
||||
|
||||
@ -152,8 +158,10 @@ force:
|
||||
clean:
|
||||
rm -f opus_demo$(EXESUFFIX) opus_compare$(EXESUFFIX) $(TARGET) \
|
||||
test_opus_api$(EXESUFFIX) test_opus_decode$(EXESUFFIX) \
|
||||
test_opus_encode$(EXESUFFIX) test_opus_padding$(EXESUFFIX) \
|
||||
test_opus_encode$(EXESUFFIX) test_opus_extensions$(EXESUFFIX) \
|
||||
test_opus_padding$(EXESUFFIX)
|
||||
$(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \
|
||||
$(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS)
|
||||
$(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) \
|
||||
$(TESTOPUSEXTENSIONS_OBJS) $(TESTOPUSPADDING_OBJS)
|
||||
|
||||
.PHONY: all lib clean force check
|
||||
|
30
third_party/opus/src/README
vendored
30
third_party/opus/src/README
vendored
@ -22,7 +22,7 @@ This package implements a shared library for encoding and decoding raw Opus
|
||||
bitstreams. Raw Opus bitstreams should be used over RTP according to
|
||||
https://tools.ietf.org/html/rfc7587
|
||||
|
||||
The package also includes a number of test tools used for testing the
|
||||
The package also includes a number of test tools used for testing the
|
||||
correct operation of the library. The bitstreams read/written by these
|
||||
tools should not be used for Opus file distribution: They include
|
||||
additional debugging data and cannot support seeking.
|
||||
@ -39,6 +39,28 @@ Opus-tools can be found at:
|
||||
or on the main Opus website:
|
||||
https://opus-codec.org/
|
||||
|
||||
== Deep Learning and Opus ==
|
||||
|
||||
Lossy networks continue to be a challenge for real-time communications.
|
||||
While the original implementation of Opus provides an excellent packet loss
|
||||
concealment mechanism, the team has continued to advance the methodology used
|
||||
to improve audio quality in challenge network environments.
|
||||
|
||||
In Opus 1.5, we added a deep learning based redundancy encoder that enhances
|
||||
audio in lossy networks by embedding one second of recovery data in the padding
|
||||
data of each packet. The underlying algorithm behind encoding and decoding the
|
||||
recovery data is called the deep redundancy (DRED) algorithm. By leveraging
|
||||
the padding data within the packet, Opus 1.5 is fully backward compatible with
|
||||
prior revisions of Opus. Please see the README under the "dnn" subdirectory to
|
||||
understand DRED.
|
||||
|
||||
DRED was developed by a team that Amazon Web Services initially sponsored,
|
||||
who open-sourced the implementation as well as began the
|
||||
standardization process at the IETF:
|
||||
https://datatracker.ietf.org/doc/draft-ietf-mlcodec-opus-extension/
|
||||
The license behind Opus or the intellectual property position of Opus does
|
||||
not change with Opus 1.5.
|
||||
|
||||
== Compiling libopus ==
|
||||
|
||||
To build from a distribution tarball, you only need to do the following:
|
||||
@ -77,6 +99,8 @@ On Apple macOS, install Xcode and brew.sh, then in the Terminal enter:
|
||||
% ./configure
|
||||
% make
|
||||
|
||||
On x86, it's a good idea to use a -march= option that allows the use of AVX2.
|
||||
|
||||
3) Install the codec libraries (optional)
|
||||
|
||||
% sudo make install
|
||||
@ -133,6 +157,10 @@ To run compare the code to these test vectors:
|
||||
% tar -zxf opus_testvectors-rfc8251.tar.gz
|
||||
% ./tests/run_vectors.sh ./ opus_newvectors 48000
|
||||
|
||||
== Compiling libopus for Windows and alternative build systems ==
|
||||
|
||||
See cmake/README.md or meson/README.md.
|
||||
|
||||
== Portability notes ==
|
||||
|
||||
This implementation uses floating-point by default but can be compiled to
|
||||
|
13
third_party/opus/src/autogen.bat
vendored
Executable file
13
third_party/opus/src/autogen.bat
vendored
Executable file
@ -0,0 +1,13 @@
|
||||
@echo off
|
||||
REM Run this to set up the build system: configure, makefiles, etc.
|
||||
|
||||
setlocal enabledelayedexpansion
|
||||
|
||||
REM Parse the real autogen.sh script for version
|
||||
for /F "tokens=2 delims= " %%A in ('findstr "dnn/download_model.sh" autogen.sh') do (
|
||||
set "model=%%A"
|
||||
)
|
||||
|
||||
call dnn\download_model.bat %model%
|
||||
|
||||
echo Updating build configuration files, please wait....
|
2
third_party/opus/src/autogen.sh
vendored
2
third_party/opus/src/autogen.sh
vendored
@ -9,6 +9,8 @@ set -e
|
||||
srcdir=`dirname $0`
|
||||
test -n "$srcdir" && cd "$srcdir"
|
||||
|
||||
dnn/download_model.sh "160753e983198f29f1aae67c54caa0e30bd90f1ce916a52f15bdad2df8e35e58"
|
||||
|
||||
echo "Updating build configuration files, please wait...."
|
||||
|
||||
autoreconf -isf
|
||||
|
8
third_party/opus/src/celt/_kiss_fft_guts.h
vendored
8
third_party/opus/src/celt/_kiss_fft_guts.h
vendored
@ -54,8 +54,13 @@
|
||||
|
||||
#define SAMP_MIN -SAMP_MAX
|
||||
|
||||
|
||||
#ifdef ENABLE_QEXT
|
||||
# define S_MUL(a,b) MULT32_32_Q31(b, a)
|
||||
# define S_MUL2(a,b) MULT32_32_Q31(b, a)
|
||||
#else
|
||||
# define S_MUL(a,b) MULT16_32_Q15(b, a)
|
||||
# define S_MUL2(a,b) MULT16_32_Q16(b, a)
|
||||
#endif
|
||||
|
||||
# define C_MUL(m,a,b) \
|
||||
do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \
|
||||
@ -104,6 +109,7 @@
|
||||
#else /* not FIXED_POINT*/
|
||||
|
||||
# define S_MUL(a,b) ( (a)*(b) )
|
||||
# define S_MUL2(a,b) ( (a)*(b) )
|
||||
#define C_MUL(m,a,b) \
|
||||
do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
|
||||
(m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
|
||||
|
121
third_party/opus/src/celt/arch.h
vendored
121
third_party/opus/src/celt/arch.h
vendored
@ -56,7 +56,7 @@
|
||||
|
||||
#define CELT_SIG_SCALE 32768.f
|
||||
|
||||
#define CELT_FATAL(str) celt_fatal(str, __FILE__, __LINE__);
|
||||
#define CELT_FATAL(str) celt_fatal(str, __FILE__, __LINE__)
|
||||
|
||||
#if defined(ENABLE_ASSERTIONS) || defined(ENABLE_HARDENING)
|
||||
#ifdef __GNUC__
|
||||
@ -74,9 +74,9 @@ void celt_fatal(const char *str, const char *file, int line)
|
||||
{
|
||||
#if !defined(CHROMIUM_NO_LOGGING)
|
||||
fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
|
||||
#endif
|
||||
#if defined(_MSC_VER)
|
||||
_set_abort_behavior( 0, _WRITE_ABORT_MSG);
|
||||
#endif
|
||||
#endif
|
||||
abort();
|
||||
}
|
||||
@ -107,6 +107,16 @@ void celt_fatal(const char *str, const char *file, int line)
|
||||
#define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */
|
||||
#define UADD32(a,b) ((a)+(b))
|
||||
#define USUB32(a,b) ((a)-(b))
|
||||
#define MAXG(a,b) MAX32(a, b)
|
||||
#define MING(a,b) MIN32(a, b)
|
||||
|
||||
/* Throughout the code, we use the following scaling for signals:
|
||||
FLOAT: used for float API, normalized to +/-1.
|
||||
INT16: used for 16-bit API, normalized to +/- 32768
|
||||
RES: internal Opus resolution, defined as +/-1. in float builds, or either 16-bit or 24-bit int for fixed-point builds
|
||||
SIG: internal CELT resolution: defined as +/- 32768. in float builds, or Q27 in fixed-point builds (int16 shifted by 12)
|
||||
*/
|
||||
|
||||
|
||||
/* Set this if opus_int64 is a native type of the CPU. */
|
||||
/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
|
||||
@ -117,6 +127,12 @@ void celt_fatal(const char *str, const char *file, int line)
|
||||
#define OPUS_FAST_INT64 0
|
||||
#endif
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#define ARG_FIXED(arg) , arg
|
||||
#else
|
||||
#define ARG_FIXED(arg)
|
||||
#endif
|
||||
|
||||
#define PRINT_MIPS(file)
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
@ -128,27 +144,85 @@ typedef opus_int64 opus_val64;
|
||||
typedef opus_val32 celt_sig;
|
||||
typedef opus_val16 celt_norm;
|
||||
typedef opus_val32 celt_ener;
|
||||
typedef opus_val32 celt_glog;
|
||||
|
||||
#ifdef ENABLE_RES24
|
||||
typedef opus_val32 opus_res;
|
||||
#define RES_SHIFT 8
|
||||
#define SIG2RES(a) PSHR32(a, SIG_SHIFT-RES_SHIFT)
|
||||
#define RES2INT16(a) SAT16(PSHR32(a, RES_SHIFT))
|
||||
#define RES2INT24(a) (a)
|
||||
#define RES2FLOAT(a) ((1.f/32768.f/256.)*(a))
|
||||
#define INT16TORES(a) SHL32(EXTEND32(a), RES_SHIFT)
|
||||
#define INT24TORES(a) (a)
|
||||
#define ADD_RES(a, b) ADD32(a, b)
|
||||
#define FLOAT2RES(a) float2int(32768.f*256.f*(a))
|
||||
#define RES2SIG(a) SHL32((a), SIG_SHIFT-RES_SHIFT)
|
||||
#define MULT16_RES_Q15(a,b) MULT16_32_Q15(a,b)
|
||||
#define MAX_ENCODING_DEPTH 24
|
||||
#else
|
||||
typedef opus_val16 opus_res;
|
||||
#define RES_SHIFT 0
|
||||
#define SIG2RES(a) SIG2WORD16(a)
|
||||
#define RES2INT16(a) (a)
|
||||
#define RES2INT24(a) SHL32(EXTEND32(a), 8)
|
||||
#define RES2FLOAT(a) ((1.f/32768.f)*(a))
|
||||
#define INT16TORES(a) (a)
|
||||
#define INT24TORES(a) SAT16(PSHR32(a, 8))
|
||||
#define ADD_RES(a, b) SAT16(ADD32((a), (b)));
|
||||
#define FLOAT2RES(a) FLOAT2INT16(a)
|
||||
#define RES2SIG(a) SHL32(EXTEND32(a), SIG_SHIFT)
|
||||
#define MULT16_RES_Q15(a,b) MULT16_16_Q15(a,b)
|
||||
#define MAX_ENCODING_DEPTH 16
|
||||
#endif
|
||||
|
||||
#define RES2VAL16(a) RES2INT16(a)
|
||||
#define FLOAT2SIG(a) float2int(((opus_int32)32768<<SIG_SHIFT)*(a))
|
||||
#define INT16TOSIG(a) SHL32(EXTEND32(a), SIG_SHIFT)
|
||||
#define INT24TOSIG(a) SHL32(a, SIG_SHIFT-8)
|
||||
|
||||
#ifdef ENABLE_QEXT
|
||||
typedef opus_val32 celt_coef;
|
||||
#define COEF_ONE Q31ONE
|
||||
#define MULT_COEF_32(a, b) MULT32_32_Q31(a,b)
|
||||
#define MAC_COEF_32_ARM(c, a, b) ADD32((c), MULT32_32_Q32(a,b))
|
||||
#define MULT_COEF(a, b) MULT32_32_Q31(a,b)
|
||||
#define MULT_COEF_TAPS(a, b) SHL32(MULT16_16(a,b), 1)
|
||||
#define COEF2VAL16(x) EXTRACT16(SHR32(x, 16))
|
||||
#else
|
||||
typedef opus_val16 celt_coef;
|
||||
#define COEF_ONE Q15ONE
|
||||
#define MULT_COEF_32(a, b) MULT16_32_Q15(a,b)
|
||||
#define MAC_COEF_32_ARM(a, b, c) MAC16_32_Q16(a,b,c)
|
||||
#define MULT_COEF(a, b) MULT16_16_Q15(a,b)
|
||||
#define MULT_COEF_TAPS(a, b) MULT16_16_P15(a,b)
|
||||
#define COEF2VAL16(x) (x)
|
||||
#endif
|
||||
|
||||
#define celt_isnan(x) 0
|
||||
|
||||
#define Q15ONE 32767
|
||||
#define Q31ONE 2147483647
|
||||
|
||||
#define SIG_SHIFT 12
|
||||
/* Safe saturation value for 32-bit signals. Should be less than
|
||||
2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
|
||||
#define SIG_SAT (300000000)
|
||||
/* Safe saturation value for 32-bit signals. We need to make sure that we can
|
||||
add two sig values and that the first stages of the MDCT don't cause an overflow.
|
||||
The most constraining is the ARM_ASM comb filter where we shift left by one
|
||||
and then add two values. Because of that, we use 2^29-1. SIG_SAT must be large
|
||||
enough to fit a full-scale high-freq tone through the prefilter and comb filter,
|
||||
meaning 1.85*1.75*2^(15+SIG_SHIFT) = 434529895.
|
||||
so the limit should be about 2^31*sqrt(.5). */
|
||||
#define SIG_SAT (536870911)
|
||||
|
||||
#define NORM_SCALING 16384
|
||||
|
||||
#define DB_SHIFT 10
|
||||
#define DB_SHIFT 24
|
||||
|
||||
#define EPSILON 1
|
||||
#define VERY_SMALL 0
|
||||
#define VERY_LARGE16 ((opus_val16)32767)
|
||||
#define Q15_ONE ((opus_val16)32767)
|
||||
|
||||
#define SCALEIN(a) (a)
|
||||
#define SCALEOUT(a) (a)
|
||||
|
||||
#define ABS16(x) ((x) < 0 ? (-(x)) : (x))
|
||||
#define ABS32(x) ((x) < 0 ? (-(x)) : (x))
|
||||
@ -188,6 +262,10 @@ typedef float opus_val64;
|
||||
typedef float celt_sig;
|
||||
typedef float celt_norm;
|
||||
typedef float celt_ener;
|
||||
typedef float celt_glog;
|
||||
|
||||
typedef float opus_res;
|
||||
typedef float celt_coef;
|
||||
|
||||
#ifdef FLOAT_APPROX
|
||||
/* This code should reliably detect NaN/inf even when -ffast-math is used.
|
||||
@ -206,6 +284,9 @@ static OPUS_INLINE int celt_isnan(float x)
|
||||
#endif
|
||||
|
||||
#define Q15ONE 1.0f
|
||||
#define Q31ONE 1.0f
|
||||
#define COEF_ONE 1.0f
|
||||
#define COEF2VAL16(x) (x)
|
||||
|
||||
#define NORM_SCALING 1.f
|
||||
|
||||
@ -220,6 +301,7 @@ static OPUS_INLINE int celt_isnan(float x)
|
||||
|
||||
#define QCONST16(x,bits) (x)
|
||||
#define QCONST32(x,bits) (x)
|
||||
#define GCONST(x) (x)
|
||||
|
||||
#define NEG16(x) (-(x))
|
||||
#define NEG32(x) (-(x))
|
||||
@ -261,6 +343,7 @@ static OPUS_INLINE int celt_isnan(float x)
|
||||
|
||||
#define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
|
||||
#define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))
|
||||
#define MAC_COEF_32_ARM(c,a,b) ((c)+(a)*(b))
|
||||
|
||||
#define MULT16_16_Q11_32(a,b) ((a)*(b))
|
||||
#define MULT16_16_Q11(a,b) ((a)*(b))
|
||||
@ -272,13 +355,29 @@ static OPUS_INLINE int celt_isnan(float x)
|
||||
#define MULT16_16_P14(a,b) ((a)*(b))
|
||||
#define MULT16_32_P16(a,b) ((a)*(b))
|
||||
|
||||
#define MULT_COEF_32(a, b) ((a)*(b))
|
||||
#define MULT_COEF(a, b) ((a)*(b))
|
||||
#define MULT_COEF_TAPS(a, b) ((a)*(b))
|
||||
|
||||
#define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b))
|
||||
#define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b))
|
||||
|
||||
#define SCALEIN(a) ((a)*CELT_SIG_SCALE)
|
||||
#define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE))
|
||||
#define SIG2RES(a) ((1/CELT_SIG_SCALE)*(a))
|
||||
#define RES2INT16(a) FLOAT2INT16(a)
|
||||
#define RES2INT24(a) float2int(32768.f*256.f*(a))
|
||||
#define RES2FLOAT(a) (a)
|
||||
#define INT16TORES(a) ((a)*(1/CELT_SIG_SCALE))
|
||||
#define INT24TORES(a) ((1.f/32768.f/256.)*(a))
|
||||
#define ADD_RES(a, b) ADD32(a, b)
|
||||
#define FLOAT2RES(a) (a)
|
||||
#define RES2SIG(a) (CELT_SIG_SCALE*(a))
|
||||
#define MULT16_RES_Q15(a,b) MULT16_16_Q15(a,b)
|
||||
|
||||
#define SIG2WORD16(x) (x)
|
||||
#define RES2VAL16(a) (a)
|
||||
#define FLOAT2SIG(a) ((a)*CELT_SIG_SCALE)
|
||||
#define INT16TOSIG(a) ((float)(a))
|
||||
#define INT24TOSIG(a) ((float)(a)*(1.f/256.f))
|
||||
#define MAX_ENCODING_DEPTH 24
|
||||
|
||||
#endif /* !FIXED_POINT */
|
||||
|
||||
|
31
third_party/opus/src/celt/arm/arm_celt_map.c
vendored
31
third_party/opus/src/celt/arm/arm_celt_map.c
vendored
@ -40,7 +40,8 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, c
|
||||
celt_inner_prod_c, /* ARMv4 */
|
||||
celt_inner_prod_c, /* EDSP */
|
||||
celt_inner_prod_c, /* Media */
|
||||
celt_inner_prod_neon /* NEON */
|
||||
celt_inner_prod_neon,/* NEON */
|
||||
celt_inner_prod_neon /* DOTPROD */
|
||||
};
|
||||
|
||||
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
|
||||
@ -48,7 +49,8 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const o
|
||||
dual_inner_prod_c, /* ARMv4 */
|
||||
dual_inner_prod_c, /* EDSP */
|
||||
dual_inner_prod_c, /* Media */
|
||||
dual_inner_prod_neon /* NEON */
|
||||
dual_inner_prod_neon,/* NEON */
|
||||
dual_inner_prod_neon /* DOTPROD */
|
||||
};
|
||||
# endif
|
||||
|
||||
@ -61,7 +63,8 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
|
||||
celt_pitch_xcorr_c, /* ARMv4 */
|
||||
MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
|
||||
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
|
||||
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
|
||||
MAY_HAVE_NEON(celt_pitch_xcorr), /* NEON */
|
||||
MAY_HAVE_NEON(celt_pitch_xcorr) /* DOTPROD */
|
||||
};
|
||||
|
||||
# endif
|
||||
@ -72,7 +75,8 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
|
||||
celt_pitch_xcorr_c, /* ARMv4 */
|
||||
celt_pitch_xcorr_c, /* EDSP */
|
||||
celt_pitch_xcorr_c, /* Media */
|
||||
celt_pitch_xcorr_float_neon /* Neon */
|
||||
celt_pitch_xcorr_float_neon, /* Neon */
|
||||
celt_pitch_xcorr_float_neon /* DOTPROD */
|
||||
};
|
||||
# endif
|
||||
# endif /* FIXED_POINT */
|
||||
@ -90,6 +94,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
|
||||
xcorr_kernel_c, /* EDSP */
|
||||
xcorr_kernel_c, /* Media */
|
||||
xcorr_kernel_neon_fixed, /* Neon */
|
||||
xcorr_kernel_neon_fixed /* DOTPROD */
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -101,14 +106,16 @@ int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
|
||||
opus_fft_alloc_arch_c, /* ARMv4 */
|
||||
opus_fft_alloc_arch_c, /* EDSP */
|
||||
opus_fft_alloc_arch_c, /* Media */
|
||||
opus_fft_alloc_arm_neon /* Neon with NE10 library support */
|
||||
opus_fft_alloc_arm_neon, /* Neon with NE10 library support */
|
||||
opus_fft_alloc_arm_neon /* DOTPROD with NE10 library support */
|
||||
};
|
||||
|
||||
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
|
||||
opus_fft_free_arch_c, /* ARMv4 */
|
||||
opus_fft_free_arch_c, /* EDSP */
|
||||
opus_fft_free_arch_c, /* Media */
|
||||
opus_fft_free_arm_neon /* Neon with NE10 */
|
||||
opus_fft_free_arm_neon, /* Neon with NE10 */
|
||||
opus_fft_free_arm_neon /* DOTPROD with NE10 */
|
||||
};
|
||||
# endif /* CUSTOM_MODES */
|
||||
|
||||
@ -118,7 +125,8 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
|
||||
opus_fft_c, /* ARMv4 */
|
||||
opus_fft_c, /* EDSP */
|
||||
opus_fft_c, /* Media */
|
||||
opus_fft_neon /* Neon with NE10 */
|
||||
opus_fft_neon, /* Neon with NE10 */
|
||||
opus_fft_neon /* DOTPROD with NE10 */
|
||||
};
|
||||
|
||||
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
|
||||
@ -127,7 +135,8 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
|
||||
opus_ifft_c, /* ARMv4 */
|
||||
opus_ifft_c, /* EDSP */
|
||||
opus_ifft_c, /* Media */
|
||||
opus_ifft_neon /* Neon with NE10 */
|
||||
opus_ifft_neon, /* Neon with NE10 */
|
||||
opus_ifft_neon /* DOTPROD with NE10 */
|
||||
};
|
||||
|
||||
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
|
||||
@ -139,7 +148,8 @@ void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
|
||||
clt_mdct_forward_c, /* ARMv4 */
|
||||
clt_mdct_forward_c, /* EDSP */
|
||||
clt_mdct_forward_c, /* Media */
|
||||
clt_mdct_forward_neon /* Neon with NE10 */
|
||||
clt_mdct_forward_neon, /* Neon with NE10 */
|
||||
clt_mdct_forward_neon /* DOTPROD with NE10 */
|
||||
};
|
||||
|
||||
void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
|
||||
@ -151,7 +161,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
|
||||
clt_mdct_backward_c, /* ARMv4 */
|
||||
clt_mdct_backward_c, /* EDSP */
|
||||
clt_mdct_backward_c, /* Media */
|
||||
clt_mdct_backward_neon /* Neon with NE10 */
|
||||
clt_mdct_backward_neon, /* Neon with NE10 */
|
||||
clt_mdct_backward_neon /* DOTPROD with NE10 */
|
||||
};
|
||||
|
||||
# endif /* HAVE_ARM_NE10 */
|
||||
|
101
third_party/opus/src/celt/arm/armcpu.c
vendored
101
third_party/opus/src/celt/arm/armcpu.c
vendored
@ -43,6 +43,7 @@
|
||||
#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
|
||||
#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
|
||||
#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
|
||||
#define OPUS_CPU_ARM_DOTPROD_FLAG (1<<OPUS_ARCH_ARM_DOTPROD)
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
|
||||
@ -95,7 +96,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
|
||||
/* Linux based */
|
||||
#include <stdio.h>
|
||||
|
||||
opus_uint32 opus_cpu_capabilities(void)
|
||||
static opus_uint32 opus_cpu_capabilities(void)
|
||||
{
|
||||
opus_uint32 flags = 0;
|
||||
FILE *cpuinfo;
|
||||
@ -126,6 +127,14 @@ opus_uint32 opus_cpu_capabilities(void)
|
||||
p = strstr(buf, " neon");
|
||||
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
|
||||
flags |= OPUS_CPU_ARM_NEON_FLAG;
|
||||
p = strstr(buf, " asimd");
|
||||
if(p != NULL && (p[6] == ' ' || p[6] == '\n'))
|
||||
flags |= OPUS_CPU_ARM_NEON_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_EDSP_FLAG;
|
||||
# endif
|
||||
# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
|
||||
p = strstr(buf, " asimddp");
|
||||
if(p != NULL && (p[8] == ' ' || p[8] == '\n'))
|
||||
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
|
||||
# endif
|
||||
}
|
||||
# endif
|
||||
@ -144,10 +153,92 @@ opus_uint32 opus_cpu_capabilities(void)
|
||||
# endif
|
||||
}
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
|
||||
flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
|
||||
# if defined(OPUS_ARM_PRESUME_DOTPROD)
|
||||
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
fclose(cpuinfo);
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
static opus_uint32 opus_cpu_capabilities(void)
|
||||
{
|
||||
opus_uint32 flags = 0;
|
||||
|
||||
#if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
|
||||
size_t size = sizeof(uint32_t);
|
||||
uint32_t value = 0;
|
||||
if (!sysctlbyname("hw.optional.arm.FEAT_DotProd", &value, &size, NULL, 0) && value)
|
||||
{
|
||||
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
|
||||
flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
|
||||
# if defined(OPUS_ARM_PRESUME_DOTPROD)
|
||||
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
|
||||
# endif
|
||||
#endif
|
||||
return flags;
|
||||
}
|
||||
|
||||
#elif defined(__FreeBSD__)
|
||||
#include <sys/auxv.h>
|
||||
|
||||
static opus_uint32 opus_cpu_capabilities(void)
|
||||
{
|
||||
long hwcap = 0;
|
||||
opus_uint32 flags = 0;
|
||||
|
||||
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|
||||
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
/* FreeBSD requires armv6+, which always supports media instructions */
|
||||
flags |= OPUS_CPU_ARM_MEDIA_FLAG;
|
||||
# endif
|
||||
|
||||
elf_aux_info(AT_HWCAP, &hwcap, sizeof hwcap);
|
||||
|
||||
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|
||||
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
# ifdef HWCAP_EDSP
|
||||
if (hwcap & HWCAP_EDSP)
|
||||
flags |= OPUS_CPU_ARM_EDSP_FLAG;
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
|
||||
# ifdef HWCAP_NEON
|
||||
if (hwcap & HWCAP_NEON)
|
||||
flags |= OPUS_CPU_ARM_NEON_FLAG;
|
||||
# elif defined(HWCAP_ASIMD)
|
||||
if (hwcap & HWCAP_ASIMD)
|
||||
flags |= OPUS_CPU_ARM_NEON_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_EDSP_FLAG;
|
||||
# endif
|
||||
# endif
|
||||
# if defined(OPUS_ARM_MAY_HAVE_DOTPROD) && defined(HWCAP_ASIMDDP)
|
||||
if (hwcap & HWCAP_ASIMDDP)
|
||||
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
|
||||
flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
|
||||
# if defined(OPUS_ARM_PRESUME_DOTPROD)
|
||||
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
return (flags);
|
||||
}
|
||||
|
||||
#else
|
||||
/* The feature registers which can tell us what the processor supports are
|
||||
* accessible in priveleged modes only, so we can't have a general user-space
|
||||
@ -180,7 +271,13 @@ static int opus_select_arch_impl(void)
|
||||
}
|
||||
arch++;
|
||||
|
||||
celt_assert(arch == OPUS_ARCH_ARM_NEON);
|
||||
if(!(flags & OPUS_CPU_ARM_DOTPROD_FLAG)) {
|
||||
celt_assert(arch == OPUS_ARCH_ARM_NEON);
|
||||
return arch;
|
||||
}
|
||||
arch++;
|
||||
|
||||
celt_assert(arch == OPUS_ARCH_ARM_DOTPROD);
|
||||
return arch;
|
||||
}
|
||||
|
||||
|
13
third_party/opus/src/celt/arm/armcpu.h
vendored
13
third_party/opus/src/celt/arm/armcpu.h
vendored
@ -46,6 +46,12 @@
|
||||
# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
|
||||
# define MAY_HAVE_DOTPROD(name) name ## _dotprod
|
||||
# else
|
||||
# define MAY_HAVE_DOTPROD(name) MAY_HAVE_NEON(name)
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_ARM_PRESUME_EDSP)
|
||||
# define PRESUME_EDSP(name) name ## _edsp
|
||||
# else
|
||||
@ -64,6 +70,12 @@
|
||||
# define PRESUME_NEON(name) PRESUME_MEDIA(name)
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_ARM_PRESUME_DOTPROD)
|
||||
# define PRESUME_DOTPROD(name) name ## _dotprod
|
||||
# else
|
||||
# define PRESUME_DOTPROD(name) PRESUME_NEON(name)
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_HAVE_RTCD)
|
||||
int opus_select_arch(void);
|
||||
|
||||
@ -71,6 +83,7 @@ int opus_select_arch(void);
|
||||
#define OPUS_ARCH_ARM_EDSP (1)
|
||||
#define OPUS_ARCH_ARM_MEDIA (2)
|
||||
#define OPUS_ARCH_ARM_NEON (3)
|
||||
#define OPUS_ARCH_ARM_DOTPROD (4)
|
||||
|
||||
# endif
|
||||
|
||||
|
83
third_party/opus/src/celt/arm/celt_neon_intr.c
vendored
83
third_party/opus/src/celt/arm/celt_neon_intr.c
vendored
@ -38,6 +38,8 @@
|
||||
#include "../pitch.h"
|
||||
|
||||
#if defined(FIXED_POINT)
|
||||
#include <string.h>
|
||||
|
||||
void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
|
||||
{
|
||||
int j;
|
||||
@ -47,7 +49,10 @@ void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_va
|
||||
int16x4_t y0 = vld1_s16(y);
|
||||
y += 4;
|
||||
|
||||
for (j = 0; j + 8 <= len; j += 8)
|
||||
/* This loop loads one y value more than we actually need.
|
||||
Therefore we have to stop as soon as there are 8 or fewer samples left
|
||||
(instead of 7), to avoid reading past the end of the array. */
|
||||
for (j = 0; j + 8 < len; j += 8)
|
||||
{
|
||||
/* Load x[0...7] */
|
||||
int16x8_t xx = vld1q_s16(x);
|
||||
@ -80,23 +85,79 @@ void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_va
|
||||
x += 8;
|
||||
y += 8;
|
||||
}
|
||||
|
||||
for (; j < len; j++)
|
||||
{
|
||||
int16x4_t x0 = vld1_dup_s16(x); /* load next x */
|
||||
if (j + 4 < len) {
|
||||
/* Load x[0...3] */
|
||||
int16x4_t x0 = vld1_s16(x);
|
||||
/* Load y[4...7] */
|
||||
int16x4_t y4 = vld1_s16(y);
|
||||
int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
|
||||
int16x4_t y1 = vext_s16(y0, y4, 1);
|
||||
int32x4_t a1 = vmlal_lane_s16(a0, y1, x0, 1);
|
||||
int16x4_t y2 = vext_s16(y0, y4, 2);
|
||||
int32x4_t a2 = vmlal_lane_s16(a1, y2, x0, 2);
|
||||
int16x4_t y3 = vext_s16(y0, y4, 3);
|
||||
int32x4_t a3 = vmlal_lane_s16(a2, y3, x0, 3);
|
||||
y0 = y4;
|
||||
a = a3;
|
||||
x += 4;
|
||||
y += 4;
|
||||
j += 4;
|
||||
}
|
||||
if (j + 2 < len) {
|
||||
/* Load x[0...1] */
|
||||
int16x4x2_t xx = vld2_dup_s16(x);
|
||||
int16x4_t x0 = xx.val[0];
|
||||
int16x4_t x1 = xx.val[1];
|
||||
/* Load y[4...5].
|
||||
We would like to use vld1_dup_s32(), but casting the pointer would
|
||||
break strict aliasing rules and potentially have alignment issues.
|
||||
Fortunately the compiler seems capable of translating this memcpy()
|
||||
and vdup_n_s32() into the equivalent vld1_dup_s32().*/
|
||||
int32_t yy;
|
||||
memcpy(&yy, y, sizeof(yy));
|
||||
int16x4_t y4 = vreinterpret_s16_s32(vdup_n_s32(yy));
|
||||
int32x4_t a0 = vmlal_s16(a, y0, x0);
|
||||
|
||||
int16x4_t y4 = vld1_dup_s16(y); /* load next y */
|
||||
y0 = vext_s16(y0, y4, 1);
|
||||
int16x4_t y1 = vext_s16(y0, y4, 1);
|
||||
/* Replace bottom copy of {y[5], y[4]} in y4 with {y[3], y[2]} from y0,
|
||||
using VSRI instead of VEXT, since it's a data-processing
|
||||
instruction. */
|
||||
y0 = vreinterpret_s16_s64(vsri_n_s64(vreinterpret_s64_s16(y4),
|
||||
vreinterpret_s64_s16(y0), 32));
|
||||
int32x4_t a1 = vmlal_s16(a0, y1, x1);
|
||||
a = a1;
|
||||
x += 2;
|
||||
y += 2;
|
||||
j += 2;
|
||||
}
|
||||
if (j + 1 < len) {
|
||||
/* Load next x. */
|
||||
int16x4_t x0 = vld1_dup_s16(x);
|
||||
int32x4_t a0 = vmlal_s16(a, y0, x0);
|
||||
/* Load last y. */
|
||||
int16x4_t y4 = vld1_dup_s16(y);
|
||||
y0 = vreinterpret_s16_s64(vsri_n_s64(vreinterpret_s64_s16(y4),
|
||||
vreinterpret_s64_s16(y0), 16));
|
||||
a = a0;
|
||||
x++;
|
||||
y++;
|
||||
}
|
||||
|
||||
vst1q_s32(sum, a);
|
||||
/* Load last x. */
|
||||
int16x4_t x0 = vld1_dup_s16(x);
|
||||
int32x4_t a0 = vmlal_s16(a, y0, x0);
|
||||
vst1q_s32(sum, a0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#if defined(__ARM_FEATURE_FMA) && defined(__ARM_ARCH_ISA_A64)
|
||||
/* If we can, force the compiler to use an FMA instruction rather than break
|
||||
* vmlaq_f32() into fmul/fadd. */
|
||||
#ifdef vmlaq_lane_f32
|
||||
#undef vmlaq_lane_f32
|
||||
#endif
|
||||
#define vmlaq_lane_f32(a,b,c,lane) vfmaq_lane_f32(a,b,c,lane)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Function: xcorr_kernel_neon_float
|
||||
* ---------------------------------
|
||||
|
12
third_party/opus/src/celt/arm/meson.build
vendored
Normal file
12
third_party/opus/src/celt/arm/meson.build
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
arm2gnu = [find_program('arm2gnu.pl')] + arm2gnu_args
|
||||
celt_sources_arm_asm = configure_file(input: 'celt_pitch_xcorr_arm.s',
|
||||
output: '@BASENAME@-gnu.S',
|
||||
command: arm2gnu + ['@INPUT@'],
|
||||
capture: true)
|
||||
celt_arm_armopts_s_in = configure_file(input: 'armopts.s.in',
|
||||
output: 'armopts.s',
|
||||
configuration: opus_conf)
|
||||
celt_arm_armopts_s = configure_file(input: [celt_arm_armopts_s_in],
|
||||
output: '@BASENAME@-gnu.S',
|
||||
command: arm2gnu + ['@INPUT@'],
|
||||
capture: true)
|
@ -130,6 +130,13 @@ void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus
|
||||
|
||||
/* ========================================================================== */
|
||||
|
||||
#ifdef __ARM_FEATURE_FMA
|
||||
/* If we can, force the compiler to use an FMA instruction rather than break
|
||||
vmlaq_f32() into fmul/fadd. */
|
||||
#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef OPUS_CHECK_ASM
|
||||
|
||||
/* This part of code simulates floating-point NEON operations. */
|
||||
|
143
third_party/opus/src/celt/bands.c
vendored
143
third_party/opus/src/celt/bands.c
vendored
@ -108,22 +108,29 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band
|
||||
maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM);
|
||||
if (maxval > 0)
|
||||
{
|
||||
int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1);
|
||||
int shift, shift2;
|
||||
shift = celt_ilog2(maxval) - 14;
|
||||
shift2 = (((m->logN[i]>>BITRES)+LM+1)>>1);
|
||||
j=eBands[i]<<LM;
|
||||
if (shift>0)
|
||||
{
|
||||
do {
|
||||
sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)),
|
||||
EXTRACT16(SHR32(X[j+c*N],shift)));
|
||||
sum = ADD32(sum, SHR32(MULT16_16(EXTRACT16(SHR32(X[j+c*N],shift)),
|
||||
EXTRACT16(SHR32(X[j+c*N],shift))), 2*shift2));
|
||||
} while (++j<eBands[i+1]<<LM);
|
||||
} else {
|
||||
do {
|
||||
sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)),
|
||||
EXTRACT16(SHL32(X[j+c*N],-shift)));
|
||||
sum = ADD32(sum, SHR32(MULT16_16(EXTRACT16(SHL32(X[j+c*N],-shift)),
|
||||
EXTRACT16(SHL32(X[j+c*N],-shift))), 2*shift2));
|
||||
} while (++j<eBands[i+1]<<LM);
|
||||
}
|
||||
shift+=shift2;
|
||||
while (sum < 1<<28) {
|
||||
sum <<=2;
|
||||
shift -= 1;
|
||||
}
|
||||
/* We're adding one here to ensure the normalized band isn't larger than unity norm */
|
||||
bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift);
|
||||
bandE[i+c*m->nbEBands] = EPSILON+VSHR32(celt_sqrt(sum),-shift);
|
||||
} else {
|
||||
bandE[i+c*m->nbEBands] = EPSILON;
|
||||
}
|
||||
@ -143,13 +150,19 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
|
||||
i=0; do {
|
||||
opus_val16 g;
|
||||
int j,shift;
|
||||
opus_val16 E;
|
||||
shift = celt_zlog2(bandE[i+c*m->nbEBands])-13;
|
||||
E = VSHR32(bandE[i+c*m->nbEBands], shift);
|
||||
g = EXTRACT16(celt_rcp(SHL32(E,3)));
|
||||
j=M*eBands[i]; do {
|
||||
X[j+c*N] = MULT16_16_Q15(VSHR32(freq[j+c*N],shift-1),g);
|
||||
} while (++j<M*eBands[i+1]);
|
||||
opus_val32 E;
|
||||
shift = celt_zlog2(bandE[i+c*m->nbEBands])-14;
|
||||
E = VSHR32(bandE[i+c*m->nbEBands], shift-2);
|
||||
g = EXTRACT16(celt_rcp(E));
|
||||
if (shift > 0) {
|
||||
j=M*eBands[i]; do {
|
||||
X[j+c*N] = PSHR32(MULT16_32_Q15(g, freq[j+c*N]),shift);
|
||||
} while (++j<M*eBands[i+1]);
|
||||
} else {
|
||||
j=M*eBands[i]; do {
|
||||
X[j+c*N] = SHL32(MULT16_32_Q15(g, freq[j+c*N]),-shift);
|
||||
} while (++j<M*eBands[i+1]);
|
||||
}
|
||||
} while (++i<end);
|
||||
} while (++c<C);
|
||||
}
|
||||
@ -194,7 +207,7 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
|
||||
|
||||
/* De-normalise the energy to produce the synthesis from the unit-energy bands */
|
||||
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
|
||||
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start,
|
||||
celt_sig * OPUS_RESTRICT freq, const celt_glog *bandLogE, int start,
|
||||
int end, int M, int downsample, int silence)
|
||||
{
|
||||
int i, N;
|
||||
@ -218,26 +231,26 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
|
||||
for (i=start;i<end;i++)
|
||||
{
|
||||
int j, band_end;
|
||||
opus_val16 g;
|
||||
opus_val16 lg;
|
||||
opus_val32 g;
|
||||
celt_glog lg;
|
||||
#ifdef FIXED_POINT
|
||||
int shift;
|
||||
#endif
|
||||
j=M*eBands[i];
|
||||
band_end = M*eBands[i+1];
|
||||
lg = SATURATE16(ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],6)));
|
||||
lg = ADD32(bandLogE[i], SHL32((opus_val32)eMeans[i],DB_SHIFT-4));
|
||||
#ifndef FIXED_POINT
|
||||
g = celt_exp2(MIN32(32.f, lg));
|
||||
g = celt_exp2_db(MIN32(32.f, lg));
|
||||
#else
|
||||
/* Handle the integer part of the log energy */
|
||||
shift = 16-(lg>>DB_SHIFT);
|
||||
shift = 15-(lg>>DB_SHIFT);
|
||||
if (shift>31)
|
||||
{
|
||||
shift=0;
|
||||
g=0;
|
||||
} else {
|
||||
/* Handle the fractional part. */
|
||||
g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1));
|
||||
g = celt_exp2_db_frac((lg&((1<<DB_SHIFT)-1)));
|
||||
}
|
||||
/* Handle extreme gains with negative shift. */
|
||||
if (shift<0)
|
||||
@ -247,17 +260,19 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
|
||||
This shouldn't trigger unless the bitstream is already corrupted. */
|
||||
if (shift <= -2)
|
||||
{
|
||||
g = 16384;
|
||||
g = 16384*32768;
|
||||
shift = -2;
|
||||
}
|
||||
do {
|
||||
*f++ = SHL32(MULT16_16(*x++, g), -shift);
|
||||
*f++ = SHL32(MULT16_32_Q15(*x, g), -shift);
|
||||
x++;
|
||||
} while (++j<band_end);
|
||||
} else
|
||||
#endif
|
||||
/* Be careful of the fixed-point "else" just above when changing this code */
|
||||
do {
|
||||
*f++ = SHR32(MULT16_16(*x++, g), shift);
|
||||
*f++ = SHR32(MULT16_32_Q15(*x, g), shift);
|
||||
x++;
|
||||
} while (++j<band_end);
|
||||
}
|
||||
celt_assert(start <= end);
|
||||
@ -266,8 +281,8 @@ void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
|
||||
|
||||
/* This prevents energy collapse for transients with multiple short MDCTs */
|
||||
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size,
|
||||
int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE,
|
||||
const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch)
|
||||
int start, int end, const celt_glog *logE, const celt_glog *prev1logE,
|
||||
const celt_glog *prev2logE, const int *pulses, opus_uint32 seed, int encode, int arch)
|
||||
{
|
||||
int c, i, j, k;
|
||||
for (i=start;i<end;i++)
|
||||
@ -303,25 +318,25 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
|
||||
c=0; do
|
||||
{
|
||||
celt_norm *X;
|
||||
opus_val16 prev1;
|
||||
opus_val16 prev2;
|
||||
celt_glog prev1;
|
||||
celt_glog prev2;
|
||||
opus_val32 Ediff;
|
||||
opus_val16 r;
|
||||
int renormalize=0;
|
||||
prev1 = prev1logE[c*m->nbEBands+i];
|
||||
prev2 = prev2logE[c*m->nbEBands+i];
|
||||
if (C==1)
|
||||
if (!encode && C==1)
|
||||
{
|
||||
prev1 = MAX16(prev1,prev1logE[m->nbEBands+i]);
|
||||
prev2 = MAX16(prev2,prev2logE[m->nbEBands+i]);
|
||||
prev1 = MAXG(prev1,prev1logE[m->nbEBands+i]);
|
||||
prev2 = MAXG(prev2,prev2logE[m->nbEBands+i]);
|
||||
}
|
||||
Ediff = EXTEND32(logE[c*m->nbEBands+i])-EXTEND32(MIN16(prev1,prev2));
|
||||
Ediff = logE[c*m->nbEBands+i]-MING(prev1,prev2);
|
||||
Ediff = MAX32(0, Ediff);
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
if (Ediff < 16384)
|
||||
if (Ediff < GCONST(16.f))
|
||||
{
|
||||
opus_val32 r32 = SHR32(celt_exp2(-EXTRACT16(Ediff)),1);
|
||||
opus_val32 r32 = SHR32(celt_exp2_db(-Ediff),1);
|
||||
r = 2*MIN16(16383,r32);
|
||||
} else {
|
||||
r = 0;
|
||||
@ -333,7 +348,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
|
||||
#else
|
||||
/* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
|
||||
short blocks don't have the same energy as long */
|
||||
r = 2.f*celt_exp2(-Ediff);
|
||||
r = 2.f*celt_exp2_db(-Ediff);
|
||||
if (LM==3)
|
||||
r *= 1.41421356f;
|
||||
r = MIN16(thresh, r);
|
||||
@ -356,7 +371,7 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
|
||||
}
|
||||
/* We just added some energy, so we need to renormalise */
|
||||
if (renormalize)
|
||||
renormalise_vector(X, N0<<LM, Q15ONE, arch);
|
||||
renormalise_vector(X, N0<<LM, Q31ONE, arch);
|
||||
} while (++c<C);
|
||||
}
|
||||
}
|
||||
@ -423,12 +438,11 @@ static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
|
||||
}
|
||||
}
|
||||
|
||||
static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N, int arch)
|
||||
static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val32 mid, int N, int arch)
|
||||
{
|
||||
int j;
|
||||
opus_val32 xp=0, side=0;
|
||||
opus_val32 El, Er;
|
||||
opus_val16 mid2;
|
||||
#ifdef FIXED_POINT
|
||||
int kl, kr;
|
||||
#endif
|
||||
@ -437,11 +451,10 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
|
||||
/* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
|
||||
dual_inner_prod(Y, X, Y, N, &xp, &side, arch);
|
||||
/* Compensating for the mid normalization */
|
||||
xp = MULT16_32_Q15(mid, xp);
|
||||
xp = MULT32_32_Q31(mid, xp);
|
||||
/* mid and side are in Q15, not Q14 like X and Y */
|
||||
mid2 = SHR16(mid, 1);
|
||||
El = MULT16_16(mid2, mid2) + side - 2*xp;
|
||||
Er = MULT16_16(mid2, mid2) + side + 2*xp;
|
||||
El = SHR32(MULT32_32_Q31(mid, mid),3) + side - 2*xp;
|
||||
Er = SHR32(MULT32_32_Q31(mid, mid),3) + side + 2*xp;
|
||||
if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
|
||||
{
|
||||
OPUS_COPY(Y, X, N);
|
||||
@ -468,7 +481,7 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
|
||||
{
|
||||
celt_norm r, l;
|
||||
/* Apply mid scaling (side is already scaled) */
|
||||
l = MULT16_16_P15(mid, X[j]);
|
||||
l = MULT32_32_Q31(mid, X[j]);
|
||||
r = Y[j];
|
||||
X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1));
|
||||
Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1));
|
||||
@ -943,14 +956,14 @@ static unsigned quant_band_n1(struct band_ctx *ctx, celt_norm *X, celt_norm *Y,
|
||||
static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
|
||||
int N, int b, int B, celt_norm *lowband,
|
||||
int LM,
|
||||
opus_val16 gain, int fill)
|
||||
opus_val32 gain, int fill)
|
||||
{
|
||||
const unsigned char *cache;
|
||||
int q;
|
||||
int curr_bits;
|
||||
int imid=0, iside=0;
|
||||
int B0=B;
|
||||
opus_val16 mid=0, side=0;
|
||||
opus_val32 mid=0, side=0;
|
||||
unsigned cm=0;
|
||||
celt_norm *Y=NULL;
|
||||
int encode;
|
||||
@ -990,8 +1003,8 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
|
||||
itheta = sctx.itheta;
|
||||
qalloc = sctx.qalloc;
|
||||
#ifdef FIXED_POINT
|
||||
mid = imid;
|
||||
side = iside;
|
||||
mid = SHL32(EXTEND32(imid), 16);
|
||||
side = SHL32(EXTEND32(iside), 16);
|
||||
#else
|
||||
mid = (1.f/32768)*imid;
|
||||
side = (1.f/32768)*iside;
|
||||
@ -1018,20 +1031,20 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
|
||||
if (mbits >= sbits)
|
||||
{
|
||||
cm = quant_partition(ctx, X, N, mbits, B, lowband, LM,
|
||||
MULT16_16_P15(gain,mid), fill);
|
||||
MULT32_32_Q31(gain,mid), fill);
|
||||
rebalance = mbits - (rebalance-ctx->remaining_bits);
|
||||
if (rebalance > 3<<BITRES && itheta!=0)
|
||||
sbits += rebalance - (3<<BITRES);
|
||||
cm |= quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
|
||||
MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
|
||||
MULT32_32_Q31(gain,side), fill>>B)<<(B0>>1);
|
||||
} else {
|
||||
cm = quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
|
||||
MULT16_16_P15(gain,side), fill>>B)<<(B0>>1);
|
||||
MULT32_32_Q31(gain,side), fill>>B)<<(B0>>1);
|
||||
rebalance = sbits - (rebalance-ctx->remaining_bits);
|
||||
if (rebalance > 3<<BITRES && itheta!=16384)
|
||||
mbits += rebalance - (3<<BITRES);
|
||||
cm |= quant_partition(ctx, X, N, mbits, B, lowband, LM,
|
||||
MULT16_16_P15(gain,mid), fill);
|
||||
MULT32_32_Q31(gain,mid), fill);
|
||||
}
|
||||
} else {
|
||||
/* This is the basic no-split case */
|
||||
@ -1109,7 +1122,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X,
|
||||
static unsigned quant_band(struct band_ctx *ctx, celt_norm *X,
|
||||
int N, int b, int B, celt_norm *lowband,
|
||||
int LM, celt_norm *lowband_out,
|
||||
opus_val16 gain, celt_norm *lowband_scratch, int fill)
|
||||
opus_val32 gain, celt_norm *lowband_scratch, int fill)
|
||||
{
|
||||
int N0=N;
|
||||
int N_B=N;
|
||||
@ -1239,7 +1252,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
|
||||
{
|
||||
int imid=0, iside=0;
|
||||
int inv = 0;
|
||||
opus_val16 mid=0, side=0;
|
||||
opus_val32 mid=0, side=0;
|
||||
unsigned cm=0;
|
||||
int mbits, sbits, delta;
|
||||
int itheta;
|
||||
@ -1268,8 +1281,8 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
|
||||
itheta = sctx.itheta;
|
||||
qalloc = sctx.qalloc;
|
||||
#ifdef FIXED_POINT
|
||||
mid = imid;
|
||||
side = iside;
|
||||
mid = SHL32(EXTEND32(imid), 16);
|
||||
side = SHL32(EXTEND32(iside), 16);
|
||||
#else
|
||||
mid = (1.f/32768)*imid;
|
||||
side = (1.f/32768)*iside;
|
||||
@ -1308,7 +1321,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
|
||||
sign = 1-2*sign;
|
||||
/* We use orig_fill here because we want to fold the side, but if
|
||||
itheta==16384, we'll have cleared the low bits of fill. */
|
||||
cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
|
||||
cm = quant_band(ctx, x2, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
|
||||
lowband_scratch, orig_fill);
|
||||
/* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
|
||||
and there's no need to worry about mixing with the other channel. */
|
||||
@ -1317,10 +1330,10 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
|
||||
if (ctx->resynth)
|
||||
{
|
||||
celt_norm tmp;
|
||||
X[0] = MULT16_16_Q15(mid, X[0]);
|
||||
X[1] = MULT16_16_Q15(mid, X[1]);
|
||||
Y[0] = MULT16_16_Q15(side, Y[0]);
|
||||
Y[1] = MULT16_16_Q15(side, Y[1]);
|
||||
X[0] = MULT32_32_Q31(mid, X[0]);
|
||||
X[1] = MULT32_32_Q31(mid, X[1]);
|
||||
Y[0] = MULT32_32_Q31(side, Y[0]);
|
||||
Y[1] = MULT32_32_Q31(side, Y[1]);
|
||||
tmp = X[0];
|
||||
X[0] = SUB16(tmp,Y[0]);
|
||||
Y[0] = ADD16(tmp,Y[0]);
|
||||
@ -1341,7 +1354,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
|
||||
{
|
||||
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
|
||||
mid for folding later. */
|
||||
cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
|
||||
cm = quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
|
||||
lowband_scratch, fill);
|
||||
rebalance = mbits - (rebalance-ctx->remaining_bits);
|
||||
if (rebalance > 3<<BITRES && itheta!=0)
|
||||
@ -1359,7 +1372,7 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm
|
||||
mbits += rebalance - (3<<BITRES);
|
||||
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
|
||||
mid for folding later. */
|
||||
cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q15ONE,
|
||||
cm |= quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
|
||||
lowband_scratch, fill);
|
||||
}
|
||||
}
|
||||
@ -1450,7 +1463,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
|
||||
if (encode && resynth)
|
||||
lowband_scratch = _lowband_scratch;
|
||||
else
|
||||
lowband_scratch = X_+M*eBands[m->nbEBands-1];
|
||||
lowband_scratch = X_+M*eBands[m->effEBands-1];
|
||||
ALLOC(X_save, resynth_alloc, celt_norm);
|
||||
ALLOC(Y_save, resynth_alloc, celt_norm);
|
||||
ALLOC(X_save2, resynth_alloc, celt_norm);
|
||||
@ -1573,10 +1586,10 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
|
||||
{
|
||||
x_cm = quant_band(&ctx, X, N, b/2, B,
|
||||
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
|
||||
last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm);
|
||||
last?NULL:norm+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, x_cm);
|
||||
y_cm = quant_band(&ctx, Y, N, b/2, B,
|
||||
effective_lowband != -1 ? norm2+effective_lowband : NULL, LM,
|
||||
last?NULL:norm2+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, y_cm);
|
||||
last?NULL:norm2+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, y_cm);
|
||||
} else {
|
||||
if (Y!=NULL)
|
||||
{
|
||||
@ -1652,7 +1665,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
|
||||
} else {
|
||||
x_cm = quant_band(&ctx, X, N, b, B,
|
||||
effective_lowband != -1 ? norm+effective_lowband : NULL, LM,
|
||||
last?NULL:norm+M*eBands[i]-norm_offset, Q15ONE, lowband_scratch, x_cm|y_cm);
|
||||
last?NULL:norm+M*eBands[i]-norm_offset, Q31ONE, lowband_scratch, x_cm|y_cm);
|
||||
}
|
||||
y_cm = x_cm;
|
||||
}
|
||||
|
8
third_party/opus/src/celt/bands.h
vendored
8
third_party/opus/src/celt/bands.h
vendored
@ -62,7 +62,7 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel
|
||||
* @param bandE Square root of the energy for each band
|
||||
*/
|
||||
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X,
|
||||
celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandE, int start,
|
||||
celt_sig * OPUS_RESTRICT freq, const celt_glog *bandE, int start,
|
||||
int end, int M, int downsample, int silence);
|
||||
|
||||
#define SPREAD_NONE (0)
|
||||
@ -112,9 +112,9 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
|
||||
|
||||
void anti_collapse(const CELTMode *m, celt_norm *X_,
|
||||
unsigned char *collapse_masks, int LM, int C, int size, int start,
|
||||
int end, const opus_val16 *logE, const opus_val16 *prev1logE,
|
||||
const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed,
|
||||
int arch);
|
||||
int end, const celt_glog *logE, const celt_glog *prev1logE,
|
||||
const celt_glog *prev2logE, const int *pulses, opus_uint32 seed,
|
||||
int encode, int arch);
|
||||
|
||||
opus_uint32 celt_lcg_rand(opus_uint32 seed);
|
||||
|
||||
|
79
third_party/opus/src/celt/celt.c
vendored
79
third_party/opus/src/celt/celt.c
vendored
@ -89,6 +89,7 @@ int resampling_factor(opus_int32 rate)
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#if !defined(OVERRIDE_COMB_FILTER_CONST) || defined(NON_STATIC_COMB_FILTER_CONST_C)
|
||||
/* This version should be faster on ARM */
|
||||
#ifdef OPUS_ARM_ASM
|
||||
@ -96,7 +97,7 @@ int resampling_factor(opus_int32 rate)
|
||||
static
|
||||
#endif
|
||||
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
opus_val16 g10, opus_val16 g11, opus_val16 g12)
|
||||
celt_coef g10, celt_coef g11, celt_coef g12)
|
||||
{
|
||||
opus_val32 x0, x1, x2, x3, x4;
|
||||
int i;
|
||||
@ -108,33 +109,33 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
{
|
||||
opus_val32 t;
|
||||
x0=SHL32(x[i-T+2],1);
|
||||
t = MAC16_32_Q16(x[i], g10, x2);
|
||||
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
|
||||
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
|
||||
t = MAC_COEF_32_ARM(x[i], g10, x2);
|
||||
t = MAC_COEF_32_ARM(t, g11, ADD32(x1,x3));
|
||||
t = MAC_COEF_32_ARM(t, g12, ADD32(x0,x4));
|
||||
t = SATURATE(t, SIG_SAT);
|
||||
y[i] = t;
|
||||
x4=SHL32(x[i-T+3],1);
|
||||
t = MAC16_32_Q16(x[i+1], g10, x1);
|
||||
t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
|
||||
t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
|
||||
t = MAC_COEF_32_ARM(x[i+1], g10, x1);
|
||||
t = MAC_COEF_32_ARM(t, g11, ADD32(x0,x2));
|
||||
t = MAC_COEF_32_ARM(t, g12, ADD32(x4,x3));
|
||||
t = SATURATE(t, SIG_SAT);
|
||||
y[i+1] = t;
|
||||
x3=SHL32(x[i-T+4],1);
|
||||
t = MAC16_32_Q16(x[i+2], g10, x0);
|
||||
t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
|
||||
t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
|
||||
t = MAC_COEF_32_ARM(x[i+2], g10, x0);
|
||||
t = MAC_COEF_32_ARM(t, g11, ADD32(x4,x1));
|
||||
t = MAC_COEF_32_ARM(t, g12, ADD32(x3,x2));
|
||||
t = SATURATE(t, SIG_SAT);
|
||||
y[i+2] = t;
|
||||
x2=SHL32(x[i-T+5],1);
|
||||
t = MAC16_32_Q16(x[i+3], g10, x4);
|
||||
t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
|
||||
t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
|
||||
t = MAC_COEF_32_ARM(x[i+3], g10, x4);
|
||||
t = MAC_COEF_32_ARM(t, g11, ADD32(x3,x0));
|
||||
t = MAC_COEF_32_ARM(t, g12, ADD32(x2,x1));
|
||||
t = SATURATE(t, SIG_SAT);
|
||||
y[i+3] = t;
|
||||
x1=SHL32(x[i-T+6],1);
|
||||
t = MAC16_32_Q16(x[i+4], g10, x3);
|
||||
t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
|
||||
t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
|
||||
t = MAC_COEF_32_ARM(x[i+4], g10, x3);
|
||||
t = MAC_COEF_32_ARM(t, g11, ADD32(x2,x4));
|
||||
t = MAC_COEF_32_ARM(t, g12, ADD32(x1,x0));
|
||||
t = SATURATE(t, SIG_SAT);
|
||||
y[i+4] = t;
|
||||
}
|
||||
@ -143,9 +144,9 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
{
|
||||
opus_val32 t;
|
||||
x0=SHL32(x[i-T+2],1);
|
||||
t = MAC16_32_Q16(x[i], g10, x2);
|
||||
t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
|
||||
t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
|
||||
t = MAC_COEF_32_ARM(x[i], g10, x2);
|
||||
t = MAC_COEF_32_ARM(t, g11, ADD32(x1,x3));
|
||||
t = MAC_COEF_32_ARM(t, g12, ADD32(x0,x4));
|
||||
t = SATURATE(t, SIG_SAT);
|
||||
y[i] = t;
|
||||
x4=x3;
|
||||
@ -160,7 +161,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
static
|
||||
#endif
|
||||
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
opus_val16 g10, opus_val16 g11, opus_val16 g12)
|
||||
celt_coef g10, celt_coef g11, celt_coef g12)
|
||||
{
|
||||
opus_val32 x0, x1, x2, x3, x4;
|
||||
int i;
|
||||
@ -172,9 +173,9 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
{
|
||||
x0=x[i-T+2];
|
||||
y[i] = x[i]
|
||||
+ MULT16_32_Q15(g10,x2)
|
||||
+ MULT16_32_Q15(g11,ADD32(x1,x3))
|
||||
+ MULT16_32_Q15(g12,ADD32(x0,x4));
|
||||
+ MULT_COEF_32(g10,x2)
|
||||
+ MULT_COEF_32(g11,ADD32(x1,x3))
|
||||
+ MULT_COEF_32(g12,ADD32(x0,x4));
|
||||
y[i] = SATURATE(y[i], SIG_SAT);
|
||||
x4=x3;
|
||||
x3=x2;
|
||||
@ -189,11 +190,11 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
#ifndef OVERRIDE_comb_filter
|
||||
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
|
||||
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
|
||||
const opus_val16 *window, int overlap, int arch)
|
||||
const celt_coef *window, int overlap, int arch)
|
||||
{
|
||||
int i;
|
||||
/* printf ("%d %d %f %f\n", T0, T1, g0, g1); */
|
||||
opus_val16 g00, g01, g02, g10, g11, g12;
|
||||
celt_coef g00, g01, g02, g10, g11, g12;
|
||||
opus_val32 x0, x1, x2, x3, x4;
|
||||
static const opus_val16 gains[3][3] = {
|
||||
{QCONST16(0.3066406250f, 15), QCONST16(0.2170410156f, 15), QCONST16(0.1296386719f, 15)},
|
||||
@ -211,12 +212,12 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
|
||||
to have then be at least 2 to avoid processing garbage data. */
|
||||
T0 = IMAX(T0, COMBFILTER_MINPERIOD);
|
||||
T1 = IMAX(T1, COMBFILTER_MINPERIOD);
|
||||
g00 = MULT16_16_P15(g0, gains[tapset0][0]);
|
||||
g01 = MULT16_16_P15(g0, gains[tapset0][1]);
|
||||
g02 = MULT16_16_P15(g0, gains[tapset0][2]);
|
||||
g10 = MULT16_16_P15(g1, gains[tapset1][0]);
|
||||
g11 = MULT16_16_P15(g1, gains[tapset1][1]);
|
||||
g12 = MULT16_16_P15(g1, gains[tapset1][2]);
|
||||
g00 = MULT_COEF_TAPS(g0, gains[tapset0][0]);
|
||||
g01 = MULT_COEF_TAPS(g0, gains[tapset0][1]);
|
||||
g02 = MULT_COEF_TAPS(g0, gains[tapset0][2]);
|
||||
g10 = MULT_COEF_TAPS(g1, gains[tapset1][0]);
|
||||
g11 = MULT_COEF_TAPS(g1, gains[tapset1][1]);
|
||||
g12 = MULT_COEF_TAPS(g1, gains[tapset1][2]);
|
||||
x1 = x[-T1+1];
|
||||
x2 = x[-T1 ];
|
||||
x3 = x[-T1-1];
|
||||
@ -226,16 +227,16 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
|
||||
overlap=0;
|
||||
for (i=0;i<overlap;i++)
|
||||
{
|
||||
opus_val16 f;
|
||||
celt_coef f;
|
||||
x0=x[i-T1+2];
|
||||
f = MULT16_16_Q15(window[i],window[i]);
|
||||
f = MULT_COEF(window[i],window[i]);
|
||||
y[i] = x[i]
|
||||
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g00),x[i-T0])
|
||||
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
|
||||
+ MULT16_32_Q15(MULT16_16_Q15((Q15ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
|
||||
+ MULT16_32_Q15(MULT16_16_Q15(f,g10),x2)
|
||||
+ MULT16_32_Q15(MULT16_16_Q15(f,g11),ADD32(x1,x3))
|
||||
+ MULT16_32_Q15(MULT16_16_Q15(f,g12),ADD32(x0,x4));
|
||||
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g00),x[i-T0])
|
||||
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g01),ADD32(x[i-T0+1],x[i-T0-1]))
|
||||
+ MULT_COEF_32(MULT_COEF((COEF_ONE-f),g02),ADD32(x[i-T0+2],x[i-T0-2]))
|
||||
+ MULT_COEF_32(MULT_COEF(f,g10),x2)
|
||||
+ MULT_COEF_32(MULT_COEF(f,g11),ADD32(x1,x3))
|
||||
+ MULT_COEF_32(MULT_COEF(f,g12),ADD32(x0,x4));
|
||||
y[i] = SATURATE(y[i], SIG_SAT);
|
||||
x4=x3;
|
||||
x3=x2;
|
||||
|
40
third_party/opus/src/celt/celt.h
vendored
40
third_party/opus/src/celt/celt.h
vendored
@ -41,6 +41,11 @@
|
||||
#include "entenc.h"
|
||||
#include "entdec.h"
|
||||
#include "arch.h"
|
||||
#include "kiss_fft.h"
|
||||
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
#include "lpcnet.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -79,6 +84,8 @@ typedef struct {
|
||||
|
||||
#define __celt_check_silkinfo_ptr(ptr) ((ptr) + ((ptr) - (const SILKInfo*)(ptr)))
|
||||
|
||||
#define __celt_check_glog_ptr(ptr) ((ptr) + ((ptr) - (celt_glog*)(ptr)))
|
||||
|
||||
/* Encoder/decoder Requests */
|
||||
|
||||
|
||||
@ -126,7 +133,7 @@ typedef struct {
|
||||
#define OPUS_SET_LFE(x) OPUS_SET_LFE_REQUEST, __opus_check_int(x)
|
||||
|
||||
#define OPUS_SET_ENERGY_MASK_REQUEST 10026
|
||||
#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __opus_check_val16_ptr(x)
|
||||
#define OPUS_SET_ENERGY_MASK(x) OPUS_SET_ENERGY_MASK_REQUEST, __celt_check_glog_ptr(x)
|
||||
|
||||
#define CELT_SET_SILK_INFO_REQUEST 10028
|
||||
#define CELT_SET_SILK_INFO(x) CELT_SET_SILK_INFO_REQUEST, __celt_check_silkinfo_ptr(x)
|
||||
@ -135,7 +142,7 @@ typedef struct {
|
||||
|
||||
int celt_encoder_get_size(int channels);
|
||||
|
||||
int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);
|
||||
int celt_encode_with_ec(OpusCustomEncoder * OPUS_RESTRICT st, const opus_res * pcm, int frame_size, unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);
|
||||
|
||||
int celt_encoder_init(CELTEncoder *st, opus_int32 sampling_rate, int channels,
|
||||
int arch);
|
||||
@ -149,8 +156,15 @@ int celt_decoder_get_size(int channels);
|
||||
|
||||
int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
|
||||
|
||||
int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
|
||||
int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
,LPCNetPLCState *lpcnet
|
||||
#endif
|
||||
);
|
||||
|
||||
int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
|
||||
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
|
||||
int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
|
||||
|
||||
#define celt_encoder_ctl opus_custom_encoder_ctl
|
||||
#define celt_decoder_ctl opus_custom_decoder_ctl
|
||||
@ -218,30 +232,20 @@ void validate_celt_decoder(CELTDecoder *st);
|
||||
|
||||
int resampling_factor(opus_int32 rate);
|
||||
|
||||
void celt_preemphasis(const opus_val16 * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
|
||||
void celt_preemphasis(const opus_res * OPUS_RESTRICT pcmp, celt_sig * OPUS_RESTRICT inp,
|
||||
int N, int CC, int upsample, const opus_val16 *coef, celt_sig *mem, int clip);
|
||||
|
||||
void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
|
||||
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
|
||||
const opus_val16 *window, int overlap, int arch);
|
||||
|
||||
#ifdef NON_STATIC_COMB_FILTER_CONST_C
|
||||
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
opus_val16 g10, opus_val16 g11, opus_val16 g12);
|
||||
#endif
|
||||
|
||||
#ifndef OVERRIDE_COMB_FILTER_CONST
|
||||
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
|
||||
((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
|
||||
#endif
|
||||
const celt_coef *window, int overlap, int arch);
|
||||
|
||||
void init_caps(const CELTMode *m,int *cap,int LM,int C);
|
||||
|
||||
#ifdef RESYNTH
|
||||
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
|
||||
void deemphasis(celt_sig *in[], opus_res *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, int accum);
|
||||
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
|
||||
opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
|
||||
int LM, int downsample, int silence);
|
||||
celt_glog *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
|
||||
int LM, int downsample, int silence, int arch);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
545
third_party/opus/src/celt/celt_decoder.c
vendored
545
third_party/opus/src/celt/celt_decoder.c
vendored
@ -51,6 +51,11 @@
|
||||
#include "celt_lpc.h"
|
||||
#include "vq.h"
|
||||
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
#include "lpcnet.h"
|
||||
#include "lpcnet_private.h"
|
||||
#endif
|
||||
|
||||
/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
|
||||
CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
|
||||
current value corresponds to a pitch of 66.67 Hz. */
|
||||
@ -59,9 +64,6 @@
|
||||
pitch of 480 Hz. */
|
||||
#define PLC_PITCH_LAG_MIN (100)
|
||||
|
||||
#if defined(SMALL_FOOTPRINT) && defined(FIXED_POINT)
|
||||
#define NORM_ALIASING_HACK
|
||||
#endif
|
||||
/**********************************************************************/
|
||||
/* */
|
||||
/* DECODER */
|
||||
@ -69,6 +71,9 @@
|
||||
/**********************************************************************/
|
||||
#define DECODE_BUFFER_SIZE 2048
|
||||
|
||||
#define PLC_UPDATE_FRAMES 4
|
||||
#define PLC_UPDATE_SAMPLES (PLC_UPDATE_FRAMES*FRAME_SIZE)
|
||||
|
||||
/** Decoder state
|
||||
@brief Decoder state
|
||||
*/
|
||||
@ -82,6 +87,7 @@ struct OpusCustomDecoder {
|
||||
int start, end;
|
||||
int signalling;
|
||||
int disable_inv;
|
||||
int complexity;
|
||||
int arch;
|
||||
|
||||
/* Everything beyond this point gets cleared on a reset */
|
||||
@ -98,15 +104,22 @@ struct OpusCustomDecoder {
|
||||
opus_val16 postfilter_gain_old;
|
||||
int postfilter_tapset;
|
||||
int postfilter_tapset_old;
|
||||
int prefilter_and_fold;
|
||||
|
||||
celt_sig preemph_memD[2];
|
||||
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
opus_int16 plc_pcm[PLC_UPDATE_SAMPLES];
|
||||
int plc_fill;
|
||||
float plc_preemphasis_mem;
|
||||
#endif
|
||||
|
||||
celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
|
||||
/* opus_val16 lpc[], Size = channels*LPC_ORDER */
|
||||
/* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
|
||||
/* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
|
||||
/* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
|
||||
/* opus_val16 backgroundLogE[], Size = 2*mode->nbEBands */
|
||||
/* opus_val16 lpc[], Size = channels*CELT_LPC_ORDER */
|
||||
/* celt_glog oldEBands[], Size = 2*mode->nbEBands */
|
||||
/* celt_glog oldLogE[], Size = 2*mode->nbEBands */
|
||||
/* celt_glog oldLogE2[], Size = 2*mode->nbEBands */
|
||||
/* celt_glog backgroundLogE[], Size = 2*mode->nbEBands */
|
||||
};
|
||||
|
||||
#if defined(ENABLE_HARDENING) || defined(ENABLE_ASSERTIONS)
|
||||
@ -157,8 +170,8 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int
|
||||
{
|
||||
int size = sizeof(struct CELTDecoder)
|
||||
+ (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
|
||||
+ channels*LPC_ORDER*sizeof(opus_val16)
|
||||
+ 4*2*mode->nbEBands*sizeof(opus_val16);
|
||||
+ channels*CELT_LPC_ORDER*sizeof(opus_val16)
|
||||
+ 4*2*mode->nbEBands*sizeof(celt_glog);
|
||||
return size;
|
||||
}
|
||||
|
||||
@ -233,7 +246,7 @@ void opus_custom_decoder_destroy(CELTDecoder *st)
|
||||
/* Special case for stereo with no downsampling and no accumulation. This is
|
||||
quite common and we can make it faster by processing both channels in the
|
||||
same loop, reducing overhead due to the dependency loop in the IIR filter. */
|
||||
static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, const opus_val16 coef0,
|
||||
static void deemphasis_stereo_simple(celt_sig *in[], opus_res *pcm, int N, const opus_val16 coef0,
|
||||
celt_sig *mem)
|
||||
{
|
||||
celt_sig * OPUS_RESTRICT x0;
|
||||
@ -248,12 +261,12 @@ static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, con
|
||||
{
|
||||
celt_sig tmp0, tmp1;
|
||||
/* Add VERY_SMALL to x[] first to reduce dependency chain. */
|
||||
tmp0 = x0[j] + VERY_SMALL + m0;
|
||||
tmp1 = x1[j] + VERY_SMALL + m1;
|
||||
tmp0 = SATURATE(x0[j] + VERY_SMALL + m0, SIG_SAT);
|
||||
tmp1 = SATURATE(x1[j] + VERY_SMALL + m1, SIG_SAT);
|
||||
m0 = MULT16_32_Q15(coef0, tmp0);
|
||||
m1 = MULT16_32_Q15(coef0, tmp1);
|
||||
pcm[2*j ] = SCALEOUT(SIG2WORD16(tmp0));
|
||||
pcm[2*j+1] = SCALEOUT(SIG2WORD16(tmp1));
|
||||
pcm[2*j ] = SIG2RES(tmp0);
|
||||
pcm[2*j+1] = SIG2RES(tmp1);
|
||||
}
|
||||
mem[0] = m0;
|
||||
mem[1] = m1;
|
||||
@ -263,7 +276,7 @@ static void deemphasis_stereo_simple(celt_sig *in[], opus_val16 *pcm, int N, con
|
||||
#ifndef RESYNTH
|
||||
static
|
||||
#endif
|
||||
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef,
|
||||
void deemphasis(celt_sig *in[], opus_res *pcm, int N, int C, int downsample, const opus_val16 *coef,
|
||||
celt_sig *mem, int accum)
|
||||
{
|
||||
int c;
|
||||
@ -279,10 +292,6 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
|
||||
deemphasis_stereo_simple(in, pcm, N, coef[0], mem);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#ifndef FIXED_POINT
|
||||
(void)accum;
|
||||
celt_assert(accum==0);
|
||||
#endif
|
||||
ALLOC(scratch, N, celt_sig);
|
||||
coef0 = coef[0];
|
||||
@ -290,7 +299,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
|
||||
c=0; do {
|
||||
int j;
|
||||
celt_sig * OPUS_RESTRICT x;
|
||||
opus_val16 * OPUS_RESTRICT y;
|
||||
opus_res * OPUS_RESTRICT y;
|
||||
celt_sig m = mem[c];
|
||||
x =in[c];
|
||||
y = pcm+c;
|
||||
@ -301,7 +310,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
|
||||
opus_val16 coef3 = coef[3];
|
||||
for (j=0;j<N;j++)
|
||||
{
|
||||
celt_sig tmp = x[j] + m + VERY_SMALL;
|
||||
celt_sig tmp = SATURATE(x[j] + m + VERY_SMALL, SIG_SAT);
|
||||
m = MULT16_32_Q15(coef0, tmp)
|
||||
- MULT16_32_Q15(coef1, x[j]);
|
||||
tmp = SHL32(MULT16_32_Q15(coef3, tmp), 2);
|
||||
@ -315,30 +324,28 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
|
||||
/* Shortcut for the standard (non-custom modes) case */
|
||||
for (j=0;j<N;j++)
|
||||
{
|
||||
celt_sig tmp = x[j] + VERY_SMALL + m;
|
||||
celt_sig tmp = SATURATE(x[j] + VERY_SMALL + m, SIG_SAT);
|
||||
m = MULT16_32_Q15(coef0, tmp);
|
||||
scratch[j] = tmp;
|
||||
}
|
||||
apply_downsampling=1;
|
||||
} else {
|
||||
/* Shortcut for the standard (non-custom modes) case */
|
||||
#ifdef FIXED_POINT
|
||||
if (accum)
|
||||
{
|
||||
for (j=0;j<N;j++)
|
||||
{
|
||||
celt_sig tmp = x[j] + m + VERY_SMALL;
|
||||
celt_sig tmp = SATURATE(x[j] + m + VERY_SMALL, SIG_SAT);
|
||||
m = MULT16_32_Q15(coef0, tmp);
|
||||
y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(tmp))));
|
||||
y[j*C] = ADD_RES(y[j*C], SIG2RES(tmp));
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
for (j=0;j<N;j++)
|
||||
{
|
||||
celt_sig tmp = x[j] + VERY_SMALL + m;
|
||||
celt_sig tmp = SATURATE(x[j] + VERY_SMALL + m, SIG_SAT);
|
||||
m = MULT16_32_Q15(coef0, tmp);
|
||||
y[j*C] = SCALEOUT(SIG2WORD16(tmp));
|
||||
y[j*C] = SIG2RES(tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -347,16 +354,14 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
|
||||
if (apply_downsampling)
|
||||
{
|
||||
/* Perform down-sampling */
|
||||
#ifdef FIXED_POINT
|
||||
if (accum)
|
||||
{
|
||||
for (j=0;j<Nd;j++)
|
||||
y[j*C] = SAT16(ADD32(y[j*C], SCALEOUT(SIG2WORD16(scratch[j*downsample]))));
|
||||
y[j*C] = ADD_RES(y[j*C], SIG2RES(scratch[j*downsample]));
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
for (j=0;j<Nd;j++)
|
||||
y[j*C] = SCALEOUT(SIG2WORD16(scratch[j*downsample]));
|
||||
y[j*C] = SIG2RES(scratch[j*downsample]);
|
||||
}
|
||||
}
|
||||
} while (++c<C);
|
||||
@ -367,7 +372,7 @@ void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, c
|
||||
static
|
||||
#endif
|
||||
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
|
||||
opus_val16 *oldBandE, int start, int effEnd, int C, int CC,
|
||||
celt_glog *oldBandE, int start, int effEnd, int C, int CC,
|
||||
int isTransient, int LM, int downsample,
|
||||
int silence, int arch)
|
||||
{
|
||||
@ -499,7 +504,100 @@ static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
|
||||
return pitch_index;
|
||||
}
|
||||
|
||||
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
static void prefilter_and_fold(CELTDecoder * OPUS_RESTRICT st, int N)
|
||||
{
|
||||
int c;
|
||||
int CC;
|
||||
int i;
|
||||
int overlap;
|
||||
celt_sig *decode_mem[2];
|
||||
const OpusCustomMode *mode;
|
||||
VARDECL(opus_val32, etmp);
|
||||
mode = st->mode;
|
||||
overlap = st->overlap;
|
||||
CC = st->channels;
|
||||
ALLOC(etmp, overlap, opus_val32);
|
||||
c=0; do {
|
||||
decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
|
||||
} while (++c<CC);
|
||||
|
||||
c=0; do {
|
||||
/* Apply the pre-filter to the MDCT overlap for the next frame because
|
||||
the post-filter will be re-applied in the decoder after the MDCT
|
||||
overlap. */
|
||||
comb_filter(etmp, decode_mem[c]+DECODE_BUFFER_SIZE-N,
|
||||
st->postfilter_period_old, st->postfilter_period, overlap,
|
||||
-st->postfilter_gain_old, -st->postfilter_gain,
|
||||
st->postfilter_tapset_old, st->postfilter_tapset, NULL, 0, st->arch);
|
||||
|
||||
/* Simulate TDAC on the concealed audio so that it blends with the
|
||||
MDCT of the next frame. */
|
||||
for (i=0;i<overlap/2;i++)
|
||||
{
|
||||
decode_mem[c][DECODE_BUFFER_SIZE-N+i] =
|
||||
MULT16_32_Q15(COEF2VAL16(mode->window[i]), etmp[overlap-1-i])
|
||||
+ MULT16_32_Q15 (COEF2VAL16(mode->window[overlap-i-1]), etmp[i]);
|
||||
}
|
||||
} while (++c<CC);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
|
||||
#define SINC_ORDER 48
|
||||
/* h=cos(pi/2*abs(sin([-24:24]/48*pi*23./24)).^2);
|
||||
b=sinc([-24:24]/3*1.02).*h;
|
||||
b=b/sum(b); */
|
||||
static const float sinc_filter[SINC_ORDER+1] = {
|
||||
4.2931e-05f, -0.000190293f, -0.000816132f, -0.000637162f, 0.00141662f, 0.00354764f, 0.00184368f, -0.00428274f,
|
||||
-0.00856105f, -0.0034003f, 0.00930201f, 0.0159616f, 0.00489785f, -0.0169649f, -0.0259484f, -0.00596856f,
|
||||
0.0286551f, 0.0405872f, 0.00649994f, -0.0509284f, -0.0716655f, -0.00665212f, 0.134336f, 0.278927f,
|
||||
0.339995f, 0.278927f, 0.134336f, -0.00665212f, -0.0716655f, -0.0509284f, 0.00649994f, 0.0405872f,
|
||||
0.0286551f, -0.00596856f, -0.0259484f, -0.0169649f, 0.00489785f, 0.0159616f, 0.00930201f, -0.0034003f,
|
||||
-0.00856105f, -0.00428274f, 0.00184368f, 0.00354764f, 0.00141662f, -0.000637162f, -0.000816132f, -0.000190293f,
|
||||
4.2931e-05f
|
||||
};
|
||||
|
||||
void update_plc_state(LPCNetPLCState *lpcnet, celt_sig *decode_mem[2], float *plc_preemphasis_mem, int CC)
|
||||
{
|
||||
int i;
|
||||
int tmp_read_post, tmp_fec_skip;
|
||||
int offset;
|
||||
celt_sig buf48k[DECODE_BUFFER_SIZE];
|
||||
opus_int16 buf16k[PLC_UPDATE_SAMPLES];
|
||||
if (CC == 1) OPUS_COPY(buf48k, decode_mem[0], DECODE_BUFFER_SIZE);
|
||||
else {
|
||||
for (i=0;i<DECODE_BUFFER_SIZE;i++) {
|
||||
buf48k[i] = .5*(decode_mem[0][i] + decode_mem[1][i]);
|
||||
}
|
||||
}
|
||||
/* Down-sample the last 40 ms. */
|
||||
for (i=1;i<DECODE_BUFFER_SIZE;i++) buf48k[i] += PREEMPHASIS*buf48k[i-1];
|
||||
*plc_preemphasis_mem = buf48k[DECODE_BUFFER_SIZE-1];
|
||||
offset = DECODE_BUFFER_SIZE-SINC_ORDER-1 - 3*(PLC_UPDATE_SAMPLES-1);
|
||||
celt_assert(3*(PLC_UPDATE_SAMPLES-1) + SINC_ORDER + offset == DECODE_BUFFER_SIZE-1);
|
||||
for (i=0;i<PLC_UPDATE_SAMPLES;i++) {
|
||||
int j;
|
||||
float sum = 0;
|
||||
for (j=0;j<SINC_ORDER+1;j++) {
|
||||
sum += buf48k[3*i + j + offset]*sinc_filter[j];
|
||||
}
|
||||
buf16k[i] = float2int(MIN32(32767.f, MAX32(-32767.f, sum)));
|
||||
}
|
||||
tmp_read_post = lpcnet->fec_read_pos;
|
||||
tmp_fec_skip = lpcnet->fec_skip;
|
||||
for (i=0;i<PLC_UPDATE_FRAMES;i++) {
|
||||
lpcnet_plc_update(lpcnet, &buf16k[FRAME_SIZE*i]);
|
||||
}
|
||||
lpcnet->fec_read_pos = tmp_read_post;
|
||||
lpcnet->fec_skip = tmp_fec_skip;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
,LPCNetPLCState *lpcnet
|
||||
#endif
|
||||
)
|
||||
{
|
||||
int c;
|
||||
int i;
|
||||
@ -507,7 +605,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
celt_sig *decode_mem[2];
|
||||
celt_sig *out_syn[2];
|
||||
opus_val16 *lpc;
|
||||
opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
|
||||
celt_glog *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
|
||||
const OpusCustomMode *mode;
|
||||
int nbEBands;
|
||||
int overlap;
|
||||
@ -527,47 +625,45 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
|
||||
} while (++c<C);
|
||||
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
|
||||
oldBandE = lpc+C*LPC_ORDER;
|
||||
oldBandE = (celt_glog*)(lpc+C*CELT_LPC_ORDER);
|
||||
oldLogE = oldBandE + 2*nbEBands;
|
||||
oldLogE2 = oldLogE + 2*nbEBands;
|
||||
backgroundLogE = oldLogE2 + 2*nbEBands;
|
||||
|
||||
loss_duration = st->loss_duration;
|
||||
start = st->start;
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
if (lpcnet != NULL) noise_based = start != 0 || (lpcnet->fec_fill_pos == 0 && (st->skip_plc || loss_duration >= 80));
|
||||
else
|
||||
#endif
|
||||
noise_based = loss_duration >= 40 || start != 0 || st->skip_plc;
|
||||
if (noise_based)
|
||||
{
|
||||
/* Noise-based PLC/CNG */
|
||||
#ifdef NORM_ALIASING_HACK
|
||||
celt_norm *X;
|
||||
#else
|
||||
VARDECL(celt_norm, X);
|
||||
#endif
|
||||
opus_uint32 seed;
|
||||
int end;
|
||||
int effEnd;
|
||||
opus_val16 decay;
|
||||
celt_glog decay;
|
||||
end = st->end;
|
||||
effEnd = IMAX(start, IMIN(end, mode->effEBands));
|
||||
|
||||
#ifdef NORM_ALIASING_HACK
|
||||
/* This is an ugly hack that breaks aliasing rules and would be easily broken,
|
||||
but it saves almost 4kB of stack. */
|
||||
X = (celt_norm*)(out_syn[C-1]+overlap/2);
|
||||
#else
|
||||
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
|
||||
#endif
|
||||
c=0; do {
|
||||
OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
|
||||
DECODE_BUFFER_SIZE-N+(overlap>>1));
|
||||
DECODE_BUFFER_SIZE-N+overlap);
|
||||
} while (++c<C);
|
||||
|
||||
if (st->prefilter_and_fold) {
|
||||
prefilter_and_fold(st, N);
|
||||
}
|
||||
|
||||
/* Energy decay */
|
||||
decay = loss_duration==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
|
||||
decay = loss_duration==0 ? GCONST(1.5f) : GCONST(.5f);
|
||||
c=0; do
|
||||
{
|
||||
for (i=start;i<end;i++)
|
||||
oldBandE[c*nbEBands+i] = MAX16(backgroundLogE[c*nbEBands+i], oldBandE[c*nbEBands+i] - decay);
|
||||
oldBandE[c*nbEBands+i] = MAXG(backgroundLogE[c*nbEBands+i], oldBandE[c*nbEBands+i] - decay);
|
||||
} while (++c<C);
|
||||
seed = st->rng;
|
||||
for (c=0;c<C;c++)
|
||||
@ -584,25 +680,30 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
seed = celt_lcg_rand(seed);
|
||||
X[boffs+j] = (celt_norm)((opus_int32)seed>>20);
|
||||
}
|
||||
renormalise_vector(X+boffs, blen, Q15ONE, st->arch);
|
||||
renormalise_vector(X+boffs, blen, Q31ONE, st->arch);
|
||||
}
|
||||
}
|
||||
st->rng = seed;
|
||||
|
||||
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
|
||||
st->prefilter_and_fold = 0;
|
||||
/* Skip regular PLC until we get two consecutive packets. */
|
||||
st->skip_plc = 1;
|
||||
} else {
|
||||
int exc_length;
|
||||
/* Pitch-based PLC */
|
||||
const opus_val16 *window;
|
||||
const celt_coef *window;
|
||||
opus_val16 *exc;
|
||||
opus_val16 fade = Q15ONE;
|
||||
int pitch_index;
|
||||
VARDECL(opus_val32, etmp);
|
||||
VARDECL(opus_val16, _exc);
|
||||
VARDECL(opus_val16, fir_tmp);
|
||||
|
||||
if (loss_duration == 0)
|
||||
{
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
if (lpcnet != NULL && lpcnet->loaded) update_plc_state(lpcnet, decode_mem, &st->plc_preemphasis_mem, C);
|
||||
#endif
|
||||
st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
|
||||
} else {
|
||||
pitch_index = st->last_pitch_index;
|
||||
@ -613,10 +714,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
decaying signal, but we can't get more than MAX_PERIOD. */
|
||||
exc_length = IMIN(2*pitch_index, MAX_PERIOD);
|
||||
|
||||
ALLOC(etmp, overlap, opus_val32);
|
||||
ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
|
||||
ALLOC(_exc, MAX_PERIOD+CELT_LPC_ORDER, opus_val16);
|
||||
ALLOC(fir_tmp, exc_length, opus_val16);
|
||||
exc = _exc+LPC_ORDER;
|
||||
exc = _exc+CELT_LPC_ORDER;
|
||||
window = mode->window;
|
||||
c=0; do {
|
||||
opus_val16 decay;
|
||||
@ -628,16 +728,16 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
int j;
|
||||
|
||||
buf = decode_mem[c];
|
||||
for (i=0;i<MAX_PERIOD+LPC_ORDER;i++)
|
||||
exc[i-LPC_ORDER] = SROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT);
|
||||
for (i=0;i<MAX_PERIOD+CELT_LPC_ORDER;i++)
|
||||
exc[i-CELT_LPC_ORDER] = SROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-CELT_LPC_ORDER+i], SIG_SHIFT);
|
||||
|
||||
if (loss_duration == 0)
|
||||
{
|
||||
opus_val32 ac[LPC_ORDER+1];
|
||||
opus_val32 ac[CELT_LPC_ORDER+1];
|
||||
/* Compute LPC coefficients for the last MAX_PERIOD samples before
|
||||
the first loss so we can work in the excitation-filter domain. */
|
||||
_celt_autocorr(exc, ac, window, overlap,
|
||||
LPC_ORDER, MAX_PERIOD, st->arch);
|
||||
CELT_LPC_ORDER, MAX_PERIOD, st->arch);
|
||||
/* Add a noise floor of -40 dB. */
|
||||
#ifdef FIXED_POINT
|
||||
ac[0] += SHR32(ac[0],13);
|
||||
@ -645,7 +745,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
ac[0] *= 1.0001f;
|
||||
#endif
|
||||
/* Use lag windowing to stabilize the Levinson-Durbin recursion. */
|
||||
for (i=1;i<=LPC_ORDER;i++)
|
||||
for (i=1;i<=CELT_LPC_ORDER;i++)
|
||||
{
|
||||
/*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
|
||||
#ifdef FIXED_POINT
|
||||
@ -654,7 +754,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
|
||||
#endif
|
||||
}
|
||||
_celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
|
||||
_celt_lpc(lpc+c*CELT_LPC_ORDER, ac, CELT_LPC_ORDER);
|
||||
#ifdef FIXED_POINT
|
||||
/* For fixed-point, apply bandwidth expansion until we can guarantee that
|
||||
no overflow can happen in the IIR filter. This means:
|
||||
@ -662,13 +762,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
while (1) {
|
||||
opus_val16 tmp=Q15ONE;
|
||||
opus_val32 sum=QCONST16(1., SIG_SHIFT);
|
||||
for (i=0;i<LPC_ORDER;i++)
|
||||
sum += ABS16(lpc[c*LPC_ORDER+i]);
|
||||
for (i=0;i<CELT_LPC_ORDER;i++)
|
||||
sum += ABS16(lpc[c*CELT_LPC_ORDER+i]);
|
||||
if (sum < 65535) break;
|
||||
for (i=0;i<LPC_ORDER;i++)
|
||||
for (i=0;i<CELT_LPC_ORDER;i++)
|
||||
{
|
||||
tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
|
||||
lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
|
||||
lpc[c*CELT_LPC_ORDER+i] = MULT16_16_Q15(lpc[c*CELT_LPC_ORDER+i], tmp);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@ -678,8 +778,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
{
|
||||
/* Compute the excitation for exc_length samples before the loss. We need the copy
|
||||
because celt_fir() cannot filter in-place. */
|
||||
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
|
||||
fir_tmp, exc_length, LPC_ORDER, st->arch);
|
||||
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*CELT_LPC_ORDER,
|
||||
fir_tmp, exc_length, CELT_LPC_ORDER, st->arch);
|
||||
OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length);
|
||||
}
|
||||
|
||||
@ -737,15 +837,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
S1 += SHR32(MULT16_16(tmp, tmp), 10);
|
||||
}
|
||||
{
|
||||
opus_val16 lpc_mem[LPC_ORDER];
|
||||
opus_val16 lpc_mem[CELT_LPC_ORDER];
|
||||
/* Copy the last decoded samples (prior to the overlap region) to
|
||||
synthesis filter memory so we can have a continuous signal. */
|
||||
for (i=0;i<LPC_ORDER;i++)
|
||||
for (i=0;i<CELT_LPC_ORDER;i++)
|
||||
lpc_mem[i] = SROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
|
||||
/* Apply the synthesis filter to convert the excitation back into
|
||||
the signal domain. */
|
||||
celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
|
||||
buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
|
||||
celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*CELT_LPC_ORDER,
|
||||
buf+DECODE_BUFFER_SIZE-N, extrapolation_len, CELT_LPC_ORDER,
|
||||
lpc_mem, st->arch);
|
||||
#ifdef FIXED_POINT
|
||||
for (i=0; i < extrapolation_len; i++)
|
||||
@ -780,7 +880,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
for (i=0;i<overlap;i++)
|
||||
{
|
||||
opus_val16 tmp_g = Q15ONE
|
||||
- MULT16_16_Q15(window[i], Q15ONE-ratio);
|
||||
- MULT16_16_Q15(COEF2VAL16(window[i]), Q15ONE-ratio);
|
||||
buf[DECODE_BUFFER_SIZE-N+i] =
|
||||
MULT16_32_Q15(tmp_g, buf[DECODE_BUFFER_SIZE-N+i]);
|
||||
}
|
||||
@ -792,23 +892,65 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
}
|
||||
}
|
||||
|
||||
/* Apply the pre-filter to the MDCT overlap for the next frame because
|
||||
the post-filter will be re-applied in the decoder after the MDCT
|
||||
overlap. */
|
||||
comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
|
||||
st->postfilter_period, st->postfilter_period, overlap,
|
||||
-st->postfilter_gain, -st->postfilter_gain,
|
||||
st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
|
||||
|
||||
/* Simulate TDAC on the concealed audio so that it blends with the
|
||||
MDCT of the next frame. */
|
||||
for (i=0;i<overlap/2;i++)
|
||||
{
|
||||
buf[DECODE_BUFFER_SIZE+i] =
|
||||
MULT16_32_Q15(window[i], etmp[overlap-1-i])
|
||||
+ MULT16_32_Q15(window[overlap-i-1], etmp[i]);
|
||||
}
|
||||
} while (++c<C);
|
||||
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
if (lpcnet != NULL && lpcnet->loaded && (st->complexity >= 5 || lpcnet->fec_fill_pos > 0)) {
|
||||
float overlap_mem;
|
||||
int samples_needed16k;
|
||||
celt_sig *buf;
|
||||
VARDECL(float, buf_copy);
|
||||
buf = decode_mem[0];
|
||||
ALLOC(buf_copy, C*overlap, float);
|
||||
c=0; do {
|
||||
OPUS_COPY(buf_copy+c*overlap, &decode_mem[c][DECODE_BUFFER_SIZE-N], overlap);
|
||||
} while (++c<C);
|
||||
|
||||
/* Need enough samples from the PLC to cover the frame size, resampling delay,
|
||||
and the overlap at the end. */
|
||||
samples_needed16k = (N+SINC_ORDER+overlap)/3;
|
||||
if (loss_duration == 0) {
|
||||
st->plc_fill = 0;
|
||||
}
|
||||
while (st->plc_fill < samples_needed16k) {
|
||||
lpcnet_plc_conceal(lpcnet, &st->plc_pcm[st->plc_fill]);
|
||||
st->plc_fill += FRAME_SIZE;
|
||||
}
|
||||
/* Resample to 48 kHz. */
|
||||
for (i=0;i<(N+overlap)/3;i++) {
|
||||
int j;
|
||||
float sum;
|
||||
for (sum=0, j=0;j<17;j++) sum += 3*st->plc_pcm[i+j]*sinc_filter[3*j];
|
||||
buf[DECODE_BUFFER_SIZE-N+3*i] = sum;
|
||||
for (sum=0, j=0;j<16;j++) sum += 3*st->plc_pcm[i+j+1]*sinc_filter[3*j+2];
|
||||
buf[DECODE_BUFFER_SIZE-N+3*i+1] = sum;
|
||||
for (sum=0, j=0;j<16;j++) sum += 3*st->plc_pcm[i+j+1]*sinc_filter[3*j+1];
|
||||
buf[DECODE_BUFFER_SIZE-N+3*i+2] = sum;
|
||||
}
|
||||
OPUS_MOVE(st->plc_pcm, &st->plc_pcm[N/3], st->plc_fill-N/3);
|
||||
st->plc_fill -= N/3;
|
||||
for (i=0;i<N;i++) {
|
||||
float tmp = buf[DECODE_BUFFER_SIZE-N+i];
|
||||
buf[DECODE_BUFFER_SIZE-N+i] -= PREEMPHASIS*st->plc_preemphasis_mem;
|
||||
st->plc_preemphasis_mem = tmp;
|
||||
}
|
||||
overlap_mem = st->plc_preemphasis_mem;
|
||||
for (i=0;i<overlap;i++) {
|
||||
float tmp = buf[DECODE_BUFFER_SIZE+i];
|
||||
buf[DECODE_BUFFER_SIZE+i] -= PREEMPHASIS*overlap_mem;
|
||||
overlap_mem = tmp;
|
||||
}
|
||||
/* For now, we just do mono PLC. */
|
||||
if (C==2) OPUS_COPY(decode_mem[1], decode_mem[0], DECODE_BUFFER_SIZE+overlap);
|
||||
c=0; do {
|
||||
/* Cross-fade with 48-kHz non-neural PLC for the first 2.5 ms to avoid a discontinuity. */
|
||||
if (loss_duration == 0) {
|
||||
for (i=0;i<overlap;i++) decode_mem[c][DECODE_BUFFER_SIZE-N+i] = (1-window[i])*buf_copy[c*overlap+i] + (window[i])*decode_mem[c][DECODE_BUFFER_SIZE-N+i];
|
||||
}
|
||||
} while (++c<C);
|
||||
}
|
||||
#endif
|
||||
st->prefilter_and_fold = 1;
|
||||
}
|
||||
|
||||
/* Saturate to soemthing large to avoid wrap-around. */
|
||||
@ -817,18 +959,18 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
|
||||
RESTORE_STACK;
|
||||
}
|
||||
|
||||
int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
|
||||
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
|
||||
int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
|
||||
int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
,LPCNetPLCState *lpcnet
|
||||
#endif
|
||||
)
|
||||
{
|
||||
int c, i, N;
|
||||
int spread_decision;
|
||||
opus_int32 bits;
|
||||
ec_dec _dec;
|
||||
#ifdef NORM_ALIASING_HACK
|
||||
celt_norm *X;
|
||||
#else
|
||||
VARDECL(celt_norm, X);
|
||||
#endif
|
||||
VARDECL(int, fine_quant);
|
||||
VARDECL(int, pulses);
|
||||
VARDECL(int, cap);
|
||||
@ -839,7 +981,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
celt_sig *decode_mem[2];
|
||||
celt_sig *out_syn[2];
|
||||
opus_val16 *lpc;
|
||||
opus_val16 *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
|
||||
celt_glog *oldBandE, *oldLogE, *oldLogE2, *backgroundLogE;
|
||||
|
||||
int shortBlocks;
|
||||
int isTransient;
|
||||
@ -868,7 +1010,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
int nbEBands;
|
||||
int overlap;
|
||||
const opus_int16 *eBands;
|
||||
opus_val16 max_background_increase;
|
||||
celt_glog max_background_increase;
|
||||
ALLOC_STACK;
|
||||
|
||||
VALIDATE_CELT_DECODER(st);
|
||||
@ -881,7 +1023,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
frame_size *= st->downsample;
|
||||
|
||||
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
|
||||
oldBandE = lpc+CC*LPC_ORDER;
|
||||
oldBandE = (celt_glog*)(lpc+CC*CELT_LPC_ORDER);
|
||||
oldLogE = oldBandE + 2*nbEBands;
|
||||
oldLogE2 = oldLogE + 2*nbEBands;
|
||||
backgroundLogE = oldLogE2 + 2*nbEBands;
|
||||
@ -935,15 +1077,25 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
|
||||
if (data == NULL || len<=1)
|
||||
{
|
||||
celt_decode_lost(st, N, LM);
|
||||
celt_decode_lost(st, N, LM
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
, lpcnet
|
||||
#endif
|
||||
);
|
||||
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
|
||||
RESTORE_STACK;
|
||||
return frame_size/st->downsample;
|
||||
}
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
else {
|
||||
/* FIXME: This is a bit of a hack just to make sure opus_decode_native() knows we're no longer in PLC. */
|
||||
if (lpcnet) lpcnet->blend = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Check if there are at least two packets received consecutively before
|
||||
* turning on the pitch-based PLC */
|
||||
st->skip_plc = st->loss_duration != 0;
|
||||
if (st->loss_duration == 0) st->skip_plc = 0;
|
||||
|
||||
if (dec == NULL)
|
||||
{
|
||||
@ -954,7 +1106,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
if (C==1)
|
||||
{
|
||||
for (i=0;i<nbEBands;i++)
|
||||
oldBandE[i]=MAX16(oldBandE[i],oldBandE[nbEBands+i]);
|
||||
oldBandE[i]=MAXG(oldBandE[i],oldBandE[nbEBands+i]);
|
||||
}
|
||||
|
||||
total_bits = len*8;
|
||||
@ -1006,6 +1158,37 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
|
||||
/* Decode the global flags (first symbols in the stream) */
|
||||
intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
|
||||
/* If recovering from packet loss, make sure we make the energy prediction safe to reduce the
|
||||
risk of getting loud artifacts. */
|
||||
if (!intra_ener && st->loss_duration != 0) {
|
||||
c=0; do
|
||||
{
|
||||
celt_glog safety = 0;
|
||||
int missing = IMIN(10, st->loss_duration>>LM);
|
||||
if (LM==0) safety = GCONST(1.5f);
|
||||
else if (LM==1) safety = GCONST(.5f);
|
||||
for (i=start;i<end;i++)
|
||||
{
|
||||
if (oldBandE[c*nbEBands+i] < MAXG(oldLogE[c*nbEBands+i], oldLogE2[c*nbEBands+i])) {
|
||||
/* If energy is going down already, continue the trend. */
|
||||
opus_val32 slope;
|
||||
opus_val32 E0, E1, E2;
|
||||
E0 = oldBandE[c*nbEBands+i];
|
||||
E1 = oldLogE[c*nbEBands+i];
|
||||
E2 = oldLogE2[c*nbEBands+i];
|
||||
slope = MAX32(E1 - E0, HALF32(E2 - E0));
|
||||
slope = MING(slope, GCONST(2.f));
|
||||
E0 -= MAX32(0, (1+missing)*slope);
|
||||
oldBandE[c*nbEBands+i] = MAX32(-GCONST(20.f), E0);
|
||||
} else {
|
||||
/* Otherwise take the min of the last frames. */
|
||||
oldBandE[c*nbEBands+i] = MING(MING(oldBandE[c*nbEBands+i], oldLogE[c*nbEBands+i]), oldLogE2[c*nbEBands+i]);
|
||||
}
|
||||
/* Shorter frames have more natural fluctuations -- play it safe. */
|
||||
oldBandE[c*nbEBands+i] -= safety;
|
||||
}
|
||||
} while (++c<2);
|
||||
}
|
||||
/* Get band energies */
|
||||
unquant_coarse_energy(mode, start, end, oldBandE,
|
||||
intra_ener, dec, C, LM);
|
||||
@ -1059,7 +1242,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
alloc_trim = tell+(6<<BITRES) <= total_bits ?
|
||||
ec_dec_icdf(dec, trim_icdf, 7) : 5;
|
||||
|
||||
bits = (((opus_int32)len*8)<<BITRES) - ec_tell_frac(dec) - 1;
|
||||
bits = (((opus_int32)len*8)<<BITRES) - (opus_int32)ec_tell_frac(dec) - 1;
|
||||
anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
|
||||
bits -= anti_collapse_rsv;
|
||||
|
||||
@ -1073,19 +1256,13 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
|
||||
|
||||
c=0; do {
|
||||
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
|
||||
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap);
|
||||
} while (++c<CC);
|
||||
|
||||
/* Decode fixed codebook */
|
||||
ALLOC(collapse_masks, C*nbEBands, unsigned char);
|
||||
|
||||
#ifdef NORM_ALIASING_HACK
|
||||
/* This is an ugly hack that breaks aliasing rules and would be easily broken,
|
||||
but it saves almost 4kB of stack. */
|
||||
X = (celt_norm*)(out_syn[CC-1]+overlap/2);
|
||||
#else
|
||||
ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
|
||||
#endif
|
||||
|
||||
quant_all_bands(0, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks,
|
||||
NULL, pulses, shortBlocks, spread_decision, dual_stereo, intensity, tf_res,
|
||||
@ -1102,14 +1279,16 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
|
||||
if (anti_collapse_on)
|
||||
anti_collapse(mode, X, collapse_masks, LM, C, N,
|
||||
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
|
||||
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, 0, st->arch);
|
||||
|
||||
if (silence)
|
||||
{
|
||||
for (i=0;i<C*nbEBands;i++)
|
||||
oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
|
||||
oldBandE[i] = -GCONST(28.f);
|
||||
}
|
||||
if (st->prefilter_and_fold) {
|
||||
prefilter_and_fold(st, N);
|
||||
}
|
||||
|
||||
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
|
||||
C, CC, isTransient, LM, st->downsample, silence, st->arch);
|
||||
|
||||
@ -1147,32 +1326,33 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
OPUS_COPY(oldLogE, oldBandE, 2*nbEBands);
|
||||
} else {
|
||||
for (i=0;i<2*nbEBands;i++)
|
||||
oldLogE[i] = MIN16(oldLogE[i], oldBandE[i]);
|
||||
oldLogE[i] = MING(oldLogE[i], oldBandE[i]);
|
||||
}
|
||||
/* In normal circumstances, we only allow the noise floor to increase by
|
||||
up to 2.4 dB/second, but when we're in DTX we give the weight of
|
||||
all missing packets to the update packet. */
|
||||
max_background_increase = IMIN(160, st->loss_duration+M)*QCONST16(0.001f,DB_SHIFT);
|
||||
max_background_increase = IMIN(160, st->loss_duration+M)*GCONST(0.001f);
|
||||
for (i=0;i<2*nbEBands;i++)
|
||||
backgroundLogE[i] = MIN16(backgroundLogE[i] + max_background_increase, oldBandE[i]);
|
||||
backgroundLogE[i] = MING(backgroundLogE[i] + max_background_increase, oldBandE[i]);
|
||||
/* In case start or end were to change */
|
||||
c=0; do
|
||||
{
|
||||
for (i=0;i<start;i++)
|
||||
{
|
||||
oldBandE[c*nbEBands+i]=0;
|
||||
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
|
||||
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-GCONST(28.f);
|
||||
}
|
||||
for (i=end;i<nbEBands;i++)
|
||||
{
|
||||
oldBandE[c*nbEBands+i]=0;
|
||||
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-QCONST16(28.f,DB_SHIFT);
|
||||
oldLogE[c*nbEBands+i]=oldLogE2[c*nbEBands+i]=-GCONST(28.f);
|
||||
}
|
||||
} while (++c<2);
|
||||
st->rng = dec->rng;
|
||||
|
||||
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
|
||||
st->loss_duration = 0;
|
||||
st->prefilter_and_fold = 0;
|
||||
RESTORE_STACK;
|
||||
if (ec_tell(dec) > 8*len)
|
||||
return OPUS_INTERNAL_ERROR;
|
||||
@ -1181,50 +1361,89 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
|
||||
return frame_size/st->downsample;
|
||||
}
|
||||
|
||||
int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
|
||||
int len, opus_res * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
|
||||
{
|
||||
return celt_decode_with_ec_dred(st, data, len, pcm, frame_size, dec, accum
|
||||
#ifdef ENABLE_DEEP_PLC
|
||||
, NULL
|
||||
#endif
|
||||
);
|
||||
}
|
||||
|
||||
#ifdef CUSTOM_MODES
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#if defined(FIXED_POINT) && !defined(ENABLE_RES24)
|
||||
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
|
||||
{
|
||||
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
|
||||
}
|
||||
#else
|
||||
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
|
||||
{
|
||||
int j, ret, C, N;
|
||||
VARDECL(opus_res, out);
|
||||
ALLOC_STACK;
|
||||
|
||||
if (pcm==NULL)
|
||||
return OPUS_BAD_ARG;
|
||||
|
||||
C = st->channels;
|
||||
N = frame_size;
|
||||
|
||||
ALLOC(out, C*N, opus_res);
|
||||
ret = celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
|
||||
if (ret>0)
|
||||
for (j=0;j<C*ret;j++)
|
||||
pcm[j]=RES2INT16(out[j]);
|
||||
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(FIXED_POINT) && defined(ENABLE_RES24)
|
||||
int opus_custom_decode24(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int32 * OPUS_RESTRICT pcm, int frame_size)
|
||||
{
|
||||
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
|
||||
}
|
||||
#else
|
||||
int opus_custom_decode24(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int32 * OPUS_RESTRICT pcm, int frame_size)
|
||||
{
|
||||
int j, ret, C, N;
|
||||
VARDECL(opus_res, out);
|
||||
ALLOC_STACK;
|
||||
|
||||
if (pcm==NULL)
|
||||
return OPUS_BAD_ARG;
|
||||
|
||||
C = st->channels;
|
||||
N = frame_size;
|
||||
|
||||
ALLOC(out, C*N, opus_res);
|
||||
ret = celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
|
||||
if (ret>0)
|
||||
for (j=0;j<C*ret;j++)
|
||||
pcm[j]=RES2INT24(out[j]);
|
||||
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef DISABLE_FLOAT_API
|
||||
int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
|
||||
{
|
||||
int j, ret, C, N;
|
||||
VARDECL(opus_int16, out);
|
||||
ALLOC_STACK;
|
||||
|
||||
if (pcm==NULL)
|
||||
return OPUS_BAD_ARG;
|
||||
|
||||
C = st->channels;
|
||||
N = frame_size;
|
||||
|
||||
ALLOC(out, C*N, opus_int16);
|
||||
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
|
||||
if (ret>0)
|
||||
for (j=0;j<C*ret;j++)
|
||||
pcm[j]=out[j]*(1.f/32768.f);
|
||||
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
#endif /* DISABLE_FLOAT_API */
|
||||
|
||||
#else
|
||||
|
||||
# if !defined(FIXED_POINT)
|
||||
int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
|
||||
{
|
||||
return celt_decode_with_ec(st, data, len, pcm, frame_size, NULL, 0);
|
||||
}
|
||||
|
||||
int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, opus_int16 * OPUS_RESTRICT pcm, int frame_size)
|
||||
# else
|
||||
int opus_custom_decode_float(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data, int len, float * OPUS_RESTRICT pcm, int frame_size)
|
||||
{
|
||||
int j, ret, C, N;
|
||||
VARDECL(celt_sig, out);
|
||||
VARDECL(opus_res, out);
|
||||
ALLOC_STACK;
|
||||
|
||||
if (pcm==NULL)
|
||||
@ -1232,19 +1451,20 @@ int opus_custom_decode(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data
|
||||
|
||||
C = st->channels;
|
||||
N = frame_size;
|
||||
ALLOC(out, C*N, celt_sig);
|
||||
|
||||
ALLOC(out, C*N, opus_res);
|
||||
ret=celt_decode_with_ec(st, data, len, out, frame_size, NULL, 0);
|
||||
|
||||
if (ret>0)
|
||||
for (j=0;j<C*ret;j++)
|
||||
pcm[j] = FLOAT2INT16 (out[j]);
|
||||
pcm[j]=RES2FLOAT(out[j]);
|
||||
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* CUSTOM_MODES */
|
||||
|
||||
int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
|
||||
@ -1254,6 +1474,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
|
||||
va_start(ap, request);
|
||||
switch (request)
|
||||
{
|
||||
case OPUS_SET_COMPLEXITY_REQUEST:
|
||||
{
|
||||
opus_int32 value = va_arg(ap, opus_int32);
|
||||
if(value<0 || value>10)
|
||||
{
|
||||
goto bad_arg;
|
||||
}
|
||||
st->complexity = value;
|
||||
}
|
||||
break;
|
||||
case OPUS_GET_COMPLEXITY_REQUEST:
|
||||
{
|
||||
opus_int32 *value = va_arg(ap, opus_int32*);
|
||||
if (!value)
|
||||
{
|
||||
goto bad_arg;
|
||||
}
|
||||
*value = st->complexity;
|
||||
}
|
||||
break;
|
||||
case CELT_SET_START_BAND_REQUEST:
|
||||
{
|
||||
opus_int32 value = va_arg(ap, opus_int32);
|
||||
@ -1298,16 +1538,17 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
|
||||
case OPUS_RESET_STATE:
|
||||
{
|
||||
int i;
|
||||
opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
|
||||
opus_val16 *lpc;
|
||||
celt_glog *oldBandE, *oldLogE, *oldLogE2;
|
||||
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
|
||||
oldBandE = lpc+st->channels*LPC_ORDER;
|
||||
oldBandE = (celt_glog*)(lpc+st->channels*CELT_LPC_ORDER);
|
||||
oldLogE = oldBandE + 2*st->mode->nbEBands;
|
||||
oldLogE2 = oldLogE + 2*st->mode->nbEBands;
|
||||
OPUS_CLEAR((char*)&st->DECODER_RESET_START,
|
||||
opus_custom_decoder_get_size(st->mode, st->channels)-
|
||||
((char*)&st->DECODER_RESET_START - (char*)st));
|
||||
for (i=0;i<2*st->mode->nbEBands;i++)
|
||||
oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
|
||||
oldLogE[i]=oldLogE2[i]=-GCONST(28.f);
|
||||
st->skip_plc = 1;
|
||||
}
|
||||
break;
|
||||
|
647
third_party/opus/src/celt/celt_encoder.c
vendored
647
third_party/opus/src/celt/celt_encoder.c
vendored
File diff suppressed because it is too large
Load Diff
34
third_party/opus/src/celt/celt_lpc.c
vendored
34
third_party/opus/src/celt/celt_lpc.c
vendored
@ -44,7 +44,7 @@ int p
|
||||
opus_val32 r;
|
||||
opus_val32 error = ac[0];
|
||||
#ifdef FIXED_POINT
|
||||
opus_val32 lpc[LPC_ORDER];
|
||||
opus_val32 lpc[CELT_LPC_ORDER];
|
||||
#else
|
||||
float *lpc = _lpc;
|
||||
#endif
|
||||
@ -158,7 +158,17 @@ void celt_fir_c(
|
||||
sum[1] = SHL32(EXTEND32(x[i+1]), SIG_SHIFT);
|
||||
sum[2] = SHL32(EXTEND32(x[i+2]), SIG_SHIFT);
|
||||
sum[3] = SHL32(EXTEND32(x[i+3]), SIG_SHIFT);
|
||||
xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
|
||||
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
|
||||
{
|
||||
opus_val32 sum_c[4];
|
||||
memcpy(sum_c, sum, sizeof(sum_c));
|
||||
xcorr_kernel_c(rnum, x+i-ord, sum_c, ord);
|
||||
#endif
|
||||
xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
|
||||
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
|
||||
celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
|
||||
}
|
||||
#endif
|
||||
y[i ] = SROUND16(sum[0], SIG_SHIFT);
|
||||
y[i+1] = SROUND16(sum[1], SIG_SHIFT);
|
||||
y[i+2] = SROUND16(sum[2], SIG_SHIFT);
|
||||
@ -222,8 +232,17 @@ void celt_iir(const opus_val32 *_x,
|
||||
sum[1]=_x[i+1];
|
||||
sum[2]=_x[i+2];
|
||||
sum[3]=_x[i+3];
|
||||
xcorr_kernel(rden, y+i, sum, ord, arch);
|
||||
|
||||
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
|
||||
{
|
||||
opus_val32 sum_c[4];
|
||||
memcpy(sum_c, sum, sizeof(sum_c));
|
||||
xcorr_kernel_c(rden, y+i, sum_c, ord);
|
||||
#endif
|
||||
xcorr_kernel(rden, y+i, sum, ord, arch);
|
||||
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
|
||||
celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
|
||||
}
|
||||
#endif
|
||||
/* Patch up the result to compensate for the fact that this is an IIR */
|
||||
y[i+ord ] = -SROUND16(sum[0],SIG_SHIFT);
|
||||
_y[i ] = sum[0];
|
||||
@ -258,7 +277,7 @@ void celt_iir(const opus_val32 *_x,
|
||||
int _celt_autocorr(
|
||||
const opus_val16 *x, /* in: [0...n-1] samples x */
|
||||
opus_val32 *ac, /* out: [0...lag-1] ac values */
|
||||
const opus_val16 *window,
|
||||
const celt_coef *window,
|
||||
int overlap,
|
||||
int lag,
|
||||
int n,
|
||||
@ -283,8 +302,9 @@ int _celt_autocorr(
|
||||
xx[i] = x[i];
|
||||
for (i=0;i<overlap;i++)
|
||||
{
|
||||
xx[i] = MULT16_16_Q15(x[i],window[i]);
|
||||
xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]);
|
||||
opus_val16 w = COEF2VAL16(window[i]);
|
||||
xx[i] = MULT16_16_Q15(x[i],w);
|
||||
xx[n-i-1] = MULT16_16_Q15(x[n-i-1],w);
|
||||
}
|
||||
xptr = xx;
|
||||
}
|
||||
|
4
third_party/opus/src/celt/celt_lpc.h
vendored
4
third_party/opus/src/celt/celt_lpc.h
vendored
@ -35,7 +35,7 @@
|
||||
#include "x86/celt_lpc_sse.h"
|
||||
#endif
|
||||
|
||||
#define LPC_ORDER 24
|
||||
#define CELT_LPC_ORDER 24
|
||||
|
||||
void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);
|
||||
|
||||
@ -61,6 +61,6 @@ void celt_iir(const opus_val32 *x,
|
||||
int arch);
|
||||
|
||||
int _celt_autocorr(const opus_val16 *x, opus_val32 *ac,
|
||||
const opus_val16 *window, int overlap, int lag, int n, int arch);
|
||||
const celt_coef *window, int overlap, int lag, int n, int arch);
|
||||
|
||||
#endif /* PLC_H */
|
||||
|
7
third_party/opus/src/celt/cpu_support.h
vendored
7
third_party/opus/src/celt/cpu_support.h
vendored
@ -35,19 +35,20 @@
|
||||
(defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
|
||||
#include "arm/armcpu.h"
|
||||
|
||||
/* We currently support 4 ARM variants:
|
||||
/* We currently support 5 ARM variants:
|
||||
* arch[0] -> ARMv4
|
||||
* arch[1] -> ARMv5E
|
||||
* arch[2] -> ARMv6
|
||||
* arch[3] -> NEON
|
||||
* arch[4] -> NEON+DOTPROD
|
||||
*/
|
||||
#define OPUS_ARCHMASK 3
|
||||
#define OPUS_ARCHMASK 7
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && \
|
||||
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
|
||||
|
||||
#include "x86/x86cpu.h"
|
||||
/* We currently support 5 x86 variants:
|
||||
|
@ -49,6 +49,11 @@
|
||||
#define WORD32 FLOAT
|
||||
#endif
|
||||
|
||||
#define COEF16(x, a) ((opus_int16)SATURATE(((opus_int64)(x)+(1<<(a)>>1))>>(a), 32767))
|
||||
int opus_select_arch(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
|
||||
{
|
||||
int i, j, k;
|
||||
@ -94,9 +99,19 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
|
||||
|
||||
fprintf(file, "#ifndef DEF_WINDOW%d\n", mode->overlap);
|
||||
fprintf(file, "#define DEF_WINDOW%d\n", mode->overlap);
|
||||
fprintf (file, "static const opus_val16 window%d[%d] = {\n", mode->overlap, mode->overlap);
|
||||
fprintf (file, "static const celt_coef window%d[%d] = {\n", mode->overlap, mode->overlap);
|
||||
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
|
||||
fprintf(file, "#ifdef ENABLE_QEXT\n");
|
||||
for (j=0;j<mode->overlap;j++)
|
||||
fprintf (file, WORD32 ",%c", mode->window[j],(j+6)%5==0?'\n':' ');
|
||||
fprintf(file, "#else\n");
|
||||
for (j=0;j<mode->overlap;j++)
|
||||
fprintf (file, WORD16 ",%c", COEF16(mode->window[j], 16),(j+6)%5==0?'\n':' ');
|
||||
fprintf(file, "#endif\n");
|
||||
#else
|
||||
for (j=0;j<mode->overlap;j++)
|
||||
fprintf (file, WORD16 ",%c", mode->window[j],(j+6)%5==0?'\n':' ');
|
||||
#endif
|
||||
fprintf (file, "};\n");
|
||||
fprintf(file, "#endif\n");
|
||||
fprintf(file, "\n");
|
||||
@ -148,10 +163,21 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
|
||||
/* FFT twiddles */
|
||||
fprintf(file, "#ifndef FFT_TWIDDLES%d_%d\n", mode->Fs, mdctSize);
|
||||
fprintf(file, "#define FFT_TWIDDLES%d_%d\n", mode->Fs, mdctSize);
|
||||
|
||||
fprintf (file, "static const kiss_twiddle_cpx fft_twiddles%d_%d[%d] = {\n",
|
||||
mode->Fs, mdctSize, mode->mdct.kfft[0]->nfft);
|
||||
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
|
||||
fprintf(file, "#ifdef ENABLE_QEXT\n");
|
||||
for (j=0;j<mode->mdct.kfft[0]->nfft;j++)
|
||||
fprintf (file, "{" WORD32 ", " WORD32 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
|
||||
fprintf(file, "#else\n");
|
||||
for (j=0;j<mode->mdct.kfft[0]->nfft;j++)
|
||||
fprintf (file, "{" WORD16 ", " WORD16 "},%c", COEF16(mode->mdct.kfft[0]->twiddles[j].r,16), COEF16(mode->mdct.kfft[0]->twiddles[j].i,16),(j+3)%2==0?'\n':' ');
|
||||
fprintf(file, "#endif\n");
|
||||
#else
|
||||
for (j=0;j<mode->mdct.kfft[0]->nfft;j++)
|
||||
fprintf (file, "{" WORD16 ", " WORD16 "},%c", mode->mdct.kfft[0]->twiddles[j].r, mode->mdct.kfft[0]->twiddles[j].i,(j+3)%2==0?'\n':' ');
|
||||
#endif
|
||||
fprintf (file, "};\n");
|
||||
|
||||
#ifdef OVERRIDE_FFT
|
||||
@ -180,7 +206,16 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
|
||||
fprintf (file, "static const kiss_fft_state fft_state%d_%d_%d = {\n",
|
||||
mode->Fs, mdctSize, k);
|
||||
fprintf (file, "%d, /* nfft */\n", mode->mdct.kfft[k]->nfft);
|
||||
|
||||
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
|
||||
fprintf(file, "#ifdef ENABLE_QEXT\n");
|
||||
fprintf (file, WORD32 ", /* scale */\n", mode->mdct.kfft[k]->scale);
|
||||
fprintf(file, "#else\n");
|
||||
fprintf (file, WORD16 ", /* scale */\n", COEF16(mode->mdct.kfft[k]->scale, 15));
|
||||
fprintf(file, "#endif\n");
|
||||
#else
|
||||
fprintf (file, WORD16 ", /* scale */\n", mode->mdct.kfft[k]->scale);
|
||||
#endif
|
||||
#ifdef FIXED_POINT
|
||||
fprintf (file, "%d, /* scale_shift */\n", mode->mdct.kfft[k]->scale_shift);
|
||||
#endif
|
||||
@ -211,10 +246,22 @@ void dump_modes(FILE *file, CELTMode **modes, int nb_modes)
|
||||
mdct_twiddles_size = mode->mdct.n-(mode->mdct.n/2>>mode->mdct.maxshift);
|
||||
fprintf(file, "#ifndef MDCT_TWIDDLES%d\n", mdctSize);
|
||||
fprintf(file, "#define MDCT_TWIDDLES%d\n", mdctSize);
|
||||
fprintf (file, "static const opus_val16 mdct_twiddles%d[%d] = {\n",
|
||||
fprintf (file, "static const celt_coef mdct_twiddles%d[%d] = {\n",
|
||||
mdctSize, mdct_twiddles_size);
|
||||
|
||||
#if defined(FIXED_POINT) && defined(ENABLE_QEXT)
|
||||
fprintf(file, "#ifdef ENABLE_QEXT\n");
|
||||
for (j=0;j<mdct_twiddles_size;j++)
|
||||
fprintf (file, WORD32 ",%c", mode->mdct.trig[j],(j+6)%5==0?'\n':' ');
|
||||
fprintf(file, "#else\n");
|
||||
for (j=0;j<mdct_twiddles_size;j++)
|
||||
fprintf (file, WORD16 ",%c", COEF16(mode->mdct.trig[j], 16),(j+6)%5==0?'\n':' ');
|
||||
fprintf(file, "#endif\n");
|
||||
#else
|
||||
for (j=0;j<mdct_twiddles_size;j++)
|
||||
fprintf (file, WORD16 ",%c", mode->mdct.trig[j],(j+6)%5==0?'\n':' ');
|
||||
#endif
|
||||
|
||||
fprintf (file, "};\n");
|
||||
|
||||
fprintf(file, "#endif\n");
|
||||
|
21
third_party/opus/src/celt/entdec.c
vendored
21
third_party/opus/src/celt/entdec.c
vendored
@ -195,6 +195,27 @@ int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ec_dec_icdf16(ec_dec *_this,const opus_uint16 *_icdf,unsigned _ftb){
|
||||
opus_uint32 r;
|
||||
opus_uint32 d;
|
||||
opus_uint32 s;
|
||||
opus_uint32 t;
|
||||
int ret;
|
||||
s=_this->rng;
|
||||
d=_this->val;
|
||||
r=s>>_ftb;
|
||||
ret=-1;
|
||||
do{
|
||||
t=s;
|
||||
s=IMUL32(r,_icdf[++ret]);
|
||||
}
|
||||
while(d<s);
|
||||
_this->val=d-s;
|
||||
_this->rng=t-s;
|
||||
ec_dec_normalize(_this);
|
||||
return ret;
|
||||
}
|
||||
|
||||
opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){
|
||||
unsigned ft;
|
||||
unsigned s;
|
||||
|
10
third_party/opus/src/celt/entdec.h
vendored
10
third_party/opus/src/celt/entdec.h
vendored
@ -81,6 +81,16 @@ int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
|
||||
Return: The decoded symbol s.*/
|
||||
int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
|
||||
|
||||
/*Decodes a symbol given an "inverse" CDF table.
|
||||
No call to ec_dec_update() is necessary after this call.
|
||||
_icdf: The "inverse" CDF, such that symbol s falls in the range
|
||||
[s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb.
|
||||
The values must be monotonically non-increasing, and the last value
|
||||
must be 0.
|
||||
_ftb: The number of bits of precision in the cumulative distribution.
|
||||
Return: The decoded symbol s.*/
|
||||
int ec_dec_icdf16(ec_dec *_this,const opus_uint16 *_icdf,unsigned _ftb);
|
||||
|
||||
/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
|
||||
The bits must have been encoded with ec_enc_uint().
|
||||
No call to ec_dec_update() is necessary after this call.
|
||||
|
11
third_party/opus/src/celt/entenc.c
vendored
11
third_party/opus/src/celt/entenc.c
vendored
@ -172,6 +172,17 @@ void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){
|
||||
ec_enc_normalize(_this);
|
||||
}
|
||||
|
||||
void ec_enc_icdf16(ec_enc *_this,int _s,const opus_uint16 *_icdf,unsigned _ftb){
|
||||
opus_uint32 r;
|
||||
r=_this->rng>>_ftb;
|
||||
if(_s>0){
|
||||
_this->val+=_this->rng-IMUL32(r,_icdf[_s-1]);
|
||||
_this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]);
|
||||
}
|
||||
else _this->rng-=IMUL32(r,_icdf[_s]);
|
||||
ec_enc_normalize(_this);
|
||||
}
|
||||
|
||||
void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){
|
||||
unsigned ft;
|
||||
unsigned fl;
|
||||
|
9
third_party/opus/src/celt/entenc.h
vendored
9
third_party/opus/src/celt/entenc.h
vendored
@ -64,6 +64,15 @@ void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp);
|
||||
_ftb: The number of bits of precision in the cumulative distribution.*/
|
||||
void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
|
||||
|
||||
/*Encodes a symbol given an "inverse" CDF table.
|
||||
_s: The index of the symbol to encode.
|
||||
_icdf: The "inverse" CDF, such that symbol _s falls in the range
|
||||
[_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb.
|
||||
The values must be monotonically non-increasing, and the last value
|
||||
must be 0.
|
||||
_ftb: The number of bits of precision in the cumulative distribution.*/
|
||||
void ec_enc_icdf16(ec_enc *_this,int _s,const opus_uint16 *_icdf,unsigned _ftb);
|
||||
|
||||
/*Encodes a raw unsigned integer in the stream.
|
||||
_fl: The integer to encode.
|
||||
_ft: The number of integers that can be encoded (one more than the max).
|
||||
|
5
third_party/opus/src/celt/fixed_debug.h
vendored
5
third_party/opus/src/celt/fixed_debug.h
vendored
@ -43,6 +43,7 @@ extern opus_int64 celt_mips;
|
||||
|
||||
#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
|
||||
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL32(MULT16_16(SHR32((a),16),SHR((b),16)),1), SHR32(MULT16_16SU(SHR32((a),16),((b)&0x0000ffff)),15)), SHR32(MULT16_16SU(SHR32((b),16),((a)&0x0000ffff)),15))
|
||||
#define MULT32_32_Q32(a,b) ADD32(ADD32(MULT16_16(SHR((a),16),SHR((b),16)), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),16)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),16))
|
||||
|
||||
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
|
||||
#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR32((b),16)), SHR32(MULT16_16SU((a),((b)&0x0000ffff)),16))
|
||||
@ -50,7 +51,9 @@ extern opus_int64 celt_mips;
|
||||
#define MULT16_32_P16(a,b) MULT16_32_PX(a,b,16)
|
||||
|
||||
#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
|
||||
#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
|
||||
#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val64)1)<<(bits))))
|
||||
#define GCONST2(x,bits) ((celt_glog)(.5+(x)*(((celt_glog)1)<<(bits))))
|
||||
#define GCONST(x) GCONST2((x),DB_SHIFT)
|
||||
|
||||
#define VERIFY_SHORT(x) ((x)<=32767&&(x)>=-32768)
|
||||
#define VERIFY_INT(x) ((x)<=2147483647LL&&(x)>=-2147483648LL)
|
||||
|
15
third_party/opus/src/celt/fixed_generic.h
vendored
15
third_party/opus/src/celt/fixed_generic.h
vendored
@ -71,11 +71,24 @@
|
||||
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
|
||||
#endif
|
||||
|
||||
/** 32x32 multiplication, followed by a 32-bit shift right. Results fits in 32 bits */
|
||||
#if OPUS_FAST_INT64
|
||||
#define MULT32_32_Q32(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),32))
|
||||
#else
|
||||
#define MULT32_32_Q32(a,b) ADD32(ADD32(MULT16_16(SHR((a),16),SHR((b),16)), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),16)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),16))
|
||||
#endif
|
||||
|
||||
/** Compile-time conversion of float constant to 16-bit value */
|
||||
#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))
|
||||
|
||||
/** Compile-time conversion of float constant to 32-bit value */
|
||||
#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_val32)1)<<(bits))))
|
||||
#define QCONST32(x,bits) ((opus_val32)(.5+(x)*(((opus_int64)1)<<(bits))))
|
||||
|
||||
/** Compile-time conversion of float constant to log gain value */
|
||||
#define GCONST2(x,bits) ((celt_glog)(.5+(x)*(((celt_glog)1)<<(bits))))
|
||||
|
||||
/** Compile-time conversion of float constant to DB_SHFIT log gain value */
|
||||
#define GCONST(x) GCONST2((x),DB_SHIFT)
|
||||
|
||||
/** Negate a 16-bit value */
|
||||
#define NEG16(x) (-(x))
|
||||
|
70
third_party/opus/src/celt/kiss_fft.c
vendored
70
third_party/opus/src/celt/kiss_fft.c
vendored
@ -41,6 +41,10 @@
|
||||
#include "mathops.h"
|
||||
#include "stack_alloc.h"
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.141592653
|
||||
#endif
|
||||
|
||||
/* The guts header contains all the multiplication and addition macros that are defined for
|
||||
complex numbers. It also delares the kf_ internal functions.
|
||||
*/
|
||||
@ -70,8 +74,8 @@ static void kf_bfly2(
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
opus_val16 tw;
|
||||
tw = QCONST16(0.7071067812f, 15);
|
||||
celt_coef tw;
|
||||
tw = QCONST32(0.7071067812f, COEF_SHIFT-1);
|
||||
/* We know that m==4 here because the radix-2 is just after a radix-4 */
|
||||
celt_assert(m==4);
|
||||
for (i=0;i<N;i++)
|
||||
@ -192,7 +196,7 @@ static void kf_bfly3(
|
||||
kiss_fft_cpx * Fout_beg = Fout;
|
||||
#ifdef FIXED_POINT
|
||||
/*epi3.r = -16384;*/ /* Unused */
|
||||
epi3.i = -28378;
|
||||
epi3.i = -QCONST32(0.86602540f, COEF_SHIFT-1);
|
||||
#else
|
||||
epi3 = st->twiddles[fstride*m];
|
||||
#endif
|
||||
@ -249,10 +253,10 @@ static void kf_bfly5(
|
||||
kiss_fft_cpx * Fout_beg = Fout;
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
ya.r = 10126;
|
||||
ya.i = -31164;
|
||||
yb.r = -26510;
|
||||
yb.i = -19261;
|
||||
ya.r = QCONST32(0.30901699f, COEF_SHIFT-1);
|
||||
ya.i = -QCONST32(0.95105652f, COEF_SHIFT-1);
|
||||
yb.r = -QCONST32(0.80901699f, COEF_SHIFT-1);
|
||||
yb.i = -QCONST32(0.58778525f, COEF_SHIFT-1);
|
||||
#else
|
||||
ya = st->twiddles[fstride*m];
|
||||
yb = st->twiddles[fstride*2*m];
|
||||
@ -412,7 +416,12 @@ static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
|
||||
#ifdef FIXED_POINT
|
||||
for (i=0;i<nfft;++i) {
|
||||
opus_val32 phase = -i;
|
||||
#ifdef ENABLE_QEXT
|
||||
twiddles[i].r = (int)MIN32(2147483647, floor(.5+2147483648*cos((2*M_PI/nfft)*phase)));
|
||||
twiddles[i].i = (int)MIN32(2147483647, floor(.5+2147483648*sin((2*M_PI/nfft)*phase)));
|
||||
#else
|
||||
kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
for (i=0;i<nfft;++i) {
|
||||
@ -454,10 +463,17 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
|
||||
st->nfft=nfft;
|
||||
#ifdef FIXED_POINT
|
||||
st->scale_shift = celt_ilog2(st->nfft);
|
||||
# ifdef ENABLE_QEXT
|
||||
if (st->nfft == 1<<st->scale_shift)
|
||||
st->scale = QCONST32(1.0f, 30);
|
||||
else
|
||||
st->scale = (((opus_int64)1073741824<<st->scale_shift)+st->nfft/2)/st->nfft;
|
||||
# else
|
||||
if (st->nfft == 1<<st->scale_shift)
|
||||
st->scale = Q15ONE;
|
||||
else
|
||||
st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
|
||||
# endif
|
||||
#else
|
||||
st->scale = 1.f/nfft;
|
||||
#endif
|
||||
@ -518,7 +534,30 @@ void opus_fft_free(const kiss_fft_state *cfg, int arch)
|
||||
|
||||
#endif /* CUSTOM_MODES */
|
||||
|
||||
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
|
||||
#ifdef FIXED_POINT
|
||||
static void fft_downshift(kiss_fft_cpx *x, int N, int *total, int step) {
|
||||
int shift;
|
||||
shift = IMIN(step, *total);
|
||||
*total -= shift;
|
||||
if (shift == 1) {
|
||||
int i;
|
||||
for (i=0;i<N;i++) {
|
||||
x[i].r = SHR32(x[i].r, 1);
|
||||
x[i].i = SHR32(x[i].i, 1);
|
||||
}
|
||||
} else if (shift>0) {
|
||||
int i;
|
||||
for (i=0;i<N;i++) {
|
||||
x[i].r = PSHR32(x[i].r, shift);
|
||||
x[i].i = PSHR32(x[i].i, shift);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define fft_downshift(x, N, total, step)
|
||||
#endif
|
||||
|
||||
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift))
|
||||
{
|
||||
int m2, m;
|
||||
int p;
|
||||
@ -548,28 +587,33 @@ void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
|
||||
switch (st->factors[2*i])
|
||||
{
|
||||
case 2:
|
||||
fft_downshift(fout, st->nfft, &downshift, 1);
|
||||
kf_bfly2(fout, m, fstride[i]);
|
||||
break;
|
||||
case 4:
|
||||
fft_downshift(fout, st->nfft, &downshift, 2);
|
||||
kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
||||
break;
|
||||
#ifndef RADIX_TWO_ONLY
|
||||
case 3:
|
||||
fft_downshift(fout, st->nfft, &downshift, 2);
|
||||
kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
||||
break;
|
||||
case 5:
|
||||
fft_downshift(fout, st->nfft, &downshift, 3);
|
||||
kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
m = m2;
|
||||
}
|
||||
fft_downshift(fout, st->nfft, &downshift, downshift);
|
||||
}
|
||||
|
||||
void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
||||
{
|
||||
int i;
|
||||
opus_val16 scale;
|
||||
celt_coef scale;
|
||||
#ifdef FIXED_POINT
|
||||
/* Allows us to scale with MULT16_32_Q16(), which is faster than
|
||||
MULT16_32_Q15() on ARM. */
|
||||
@ -582,10 +626,10 @@ void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *f
|
||||
for (i=0;i<st->nfft;i++)
|
||||
{
|
||||
kiss_fft_cpx x = fin[i];
|
||||
fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
|
||||
fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
|
||||
fout[st->bitrev[i]].r = S_MUL2(x.r, scale);
|
||||
fout[st->bitrev[i]].i = S_MUL2(x.i, scale);
|
||||
}
|
||||
opus_fft_impl(st, fout);
|
||||
opus_fft_impl(st, fout ARG_FIXED(scale_shift));
|
||||
}
|
||||
|
||||
|
||||
@ -598,7 +642,7 @@ void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *
|
||||
fout[st->bitrev[i]] = fin[i];
|
||||
for (i=0;i<st->nfft;i++)
|
||||
fout[i].i = -fout[i].i;
|
||||
opus_fft_impl(st, fout);
|
||||
opus_fft_impl(st, fout ARG_FIXED(0));
|
||||
for (i=0;i<st->nfft;i++)
|
||||
fout[i].i = -fout[i].i;
|
||||
}
|
||||
|
18
third_party/opus/src/celt/kiss_fft.h
vendored
18
third_party/opus/src/celt/kiss_fft.h
vendored
@ -50,14 +50,21 @@ extern "C" {
|
||||
#include "arch.h"
|
||||
|
||||
# define kiss_fft_scalar opus_int32
|
||||
# define kiss_twiddle_scalar opus_int16
|
||||
# ifdef ENABLE_QEXT
|
||||
# define COEF_SHIFT 32
|
||||
# else
|
||||
# define COEF_SHIFT 16
|
||||
# endif
|
||||
|
||||
# define kiss_twiddle_scalar celt_coef
|
||||
|
||||
/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory
|
||||
* access, and could benefit from additional alignment.
|
||||
*/
|
||||
#define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32))
|
||||
# define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32))
|
||||
|
||||
#else
|
||||
|
||||
# ifndef kiss_fft_scalar
|
||||
/* default is float */
|
||||
# define kiss_fft_scalar float
|
||||
@ -67,8 +74,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT)
|
||||
#define KISS_TWIDDLE_CPX_ALIGNED \
|
||||
__attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT)))
|
||||
#define KISS_TWIDDLE_CPX_ALIGNED __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT)))
|
||||
#else
|
||||
#define KISS_TWIDDLE_CPX_ALIGNED
|
||||
#endif
|
||||
@ -96,7 +102,7 @@ typedef struct arch_fft_state{
|
||||
|
||||
typedef struct kiss_fft_state{
|
||||
int nfft;
|
||||
opus_val16 scale;
|
||||
celt_coef scale;
|
||||
#ifdef FIXED_POINT
|
||||
int scale_shift;
|
||||
#endif
|
||||
@ -153,7 +159,7 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch);
|
||||
void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
|
||||
void opus_ifft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
|
||||
|
||||
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
|
||||
void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout ARG_FIXED(int downshift));
|
||||
void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout);
|
||||
|
||||
void opus_fft_free(const kiss_fft_state *cfg, int arch);
|
||||
|
101
third_party/opus/src/celt/laplace.c
vendored
101
third_party/opus/src/celt/laplace.c
vendored
@ -132,3 +132,104 @@ int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay)
|
||||
ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768);
|
||||
return val;
|
||||
}
|
||||
|
||||
void ec_laplace_encode_p0(ec_enc *enc, int value, opus_uint16 p0, opus_uint16 decay)
|
||||
{
|
||||
int s;
|
||||
opus_uint16 sign_icdf[3];
|
||||
sign_icdf[0] = 32768-p0;
|
||||
sign_icdf[1] = sign_icdf[0]/2;
|
||||
sign_icdf[2] = 0;
|
||||
s = value == 0 ? 0 : (value > 0 ? 1 : 2);
|
||||
ec_enc_icdf16(enc, s, sign_icdf, 15);
|
||||
value = abs(value);
|
||||
if (value)
|
||||
{
|
||||
int i;
|
||||
opus_uint16 icdf[8];
|
||||
icdf[0] = IMAX(7, decay);
|
||||
for (i=1;i<7;i++)
|
||||
{
|
||||
icdf[i] = IMAX(7-i, (icdf[i-1] * (opus_int32)decay) >> 15);
|
||||
}
|
||||
icdf[7] = 0;
|
||||
value--;
|
||||
do {
|
||||
ec_enc_icdf16(enc, IMIN(value, 7), icdf, 15);
|
||||
value -= 7;
|
||||
} while (value >= 0);
|
||||
}
|
||||
}
|
||||
|
||||
int ec_laplace_decode_p0(ec_dec *dec, opus_uint16 p0, opus_uint16 decay)
|
||||
{
|
||||
int s;
|
||||
int value;
|
||||
opus_uint16 sign_icdf[3];
|
||||
sign_icdf[0] = 32768-p0;
|
||||
sign_icdf[1] = sign_icdf[0]/2;
|
||||
sign_icdf[2] = 0;
|
||||
s = ec_dec_icdf16(dec, sign_icdf, 15);
|
||||
if (s==2) s = -1;
|
||||
if (s != 0)
|
||||
{
|
||||
int i;
|
||||
int v;
|
||||
opus_uint16 icdf[8];
|
||||
icdf[0] = IMAX(7, decay);
|
||||
for (i=1;i<7;i++)
|
||||
{
|
||||
icdf[i] = IMAX(7-i, (icdf[i-1] * (opus_int32)decay) >> 15);
|
||||
}
|
||||
icdf[7] = 0;
|
||||
value = 1;
|
||||
do {
|
||||
v = ec_dec_icdf16(dec, icdf, 15);
|
||||
value += v;
|
||||
} while (v == 7);
|
||||
return s*value;
|
||||
} else return 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
#include <stdio.h>
|
||||
#define NB_VALS 10
|
||||
#define DATA_SIZE 10000
|
||||
int main() {
|
||||
ec_enc enc;
|
||||
ec_dec dec;
|
||||
unsigned char *ptr;
|
||||
int i;
|
||||
int decay, p0;
|
||||
int val[NB_VALS] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
/*for (i=0;i<NB_VALS;i++) {
|
||||
val[i] = -log(rand()/(float)RAND_MAX);
|
||||
if (rand()%2) val[i] = -val[i];
|
||||
}*/
|
||||
p0 = 16000;
|
||||
decay = 16000;
|
||||
ptr = (unsigned char *)malloc(DATA_SIZE);
|
||||
ec_enc_init(&enc,ptr,DATA_SIZE);
|
||||
for (i=0;i<NB_VALS;i++) {
|
||||
printf("%d ", val[i]);
|
||||
}
|
||||
printf("\n");
|
||||
for (i=0;i<NB_VALS;i++) {
|
||||
ec_laplace_encode_p0(&enc, val[i], p0, decay);
|
||||
}
|
||||
|
||||
ec_enc_done(&enc);
|
||||
|
||||
ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc));
|
||||
|
||||
for (i=0;i<NB_VALS;i++) {
|
||||
val[i] = ec_laplace_decode_p0(&dec, p0, decay);
|
||||
}
|
||||
for (i=0;i<NB_VALS;i++) {
|
||||
printf("%d ", val[i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
#endif
|
||||
|
9
third_party/opus/src/celt/laplace.h
vendored
9
third_party/opus/src/celt/laplace.h
vendored
@ -26,6 +26,9 @@
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef LAPLACE_H
|
||||
#define LAPLACE_H
|
||||
|
||||
#include "entenc.h"
|
||||
#include "entdec.h"
|
||||
|
||||
@ -46,3 +49,9 @@ void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay);
|
||||
@return Value decoded
|
||||
*/
|
||||
int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay);
|
||||
|
||||
|
||||
int ec_laplace_decode_p0(ec_dec *dec, opus_uint16 p0, opus_uint16 decay);
|
||||
void ec_laplace_encode_p0(ec_enc *enc, int value, opus_uint16 p0, opus_uint16 decay);
|
||||
|
||||
#endif
|
||||
|
16
third_party/opus/src/celt/mathops.c
vendored
16
third_party/opus/src/celt/mathops.c
vendored
@ -67,7 +67,7 @@ unsigned isqrt32(opus_uint32 _val){
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
|
||||
opus_val32 frac_div32(opus_val32 a, opus_val32 b)
|
||||
opus_val32 frac_div32_q29(opus_val32 a, opus_val32 b)
|
||||
{
|
||||
opus_val16 rcp;
|
||||
opus_val32 result, rem;
|
||||
@ -79,6 +79,11 @@ opus_val32 frac_div32(opus_val32 a, opus_val32 b)
|
||||
result = MULT16_32_Q15(rcp, a);
|
||||
rem = PSHR32(a,2)-MULT32_32_Q31(result, b);
|
||||
result = ADD32(result, SHL32(MULT16_32_Q15(rcp, rem),2));
|
||||
return result;
|
||||
}
|
||||
|
||||
opus_val32 frac_div32(opus_val32 a, opus_val32 b) {
|
||||
opus_val32 result = frac_div32_q29(a,b);
|
||||
if (result >= 536870912) /* 2^29 */
|
||||
return 2147483647; /* 2^31 - 1 */
|
||||
else if (result <= -536870912) /* -2^29 */
|
||||
@ -121,7 +126,10 @@ opus_val32 celt_sqrt(opus_val32 x)
|
||||
int k;
|
||||
opus_val16 n;
|
||||
opus_val32 rt;
|
||||
static const opus_val16 C[5] = {23175, 11561, -3011, 1699, -664};
|
||||
/* These coeffs are optimized in fixed-point to minimize both RMS and max error
|
||||
of sqrt(x) over .25<x<1 without exceeding 32767.
|
||||
The RMS error is 3.4e-5 and the max is 8.2e-5. */
|
||||
static const opus_val16 C[6] = {23171, 11574, -2901, 1592, -1002, 336};
|
||||
if (x==0)
|
||||
return 0;
|
||||
else if (x>=1073741824)
|
||||
@ -129,8 +137,8 @@ opus_val32 celt_sqrt(opus_val32 x)
|
||||
k = (celt_ilog2(x)>>1)-7;
|
||||
x = VSHR32(x, 2*k);
|
||||
n = x-32768;
|
||||
rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
||||
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4])))))))));
|
||||
rt = ADD32(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2],
|
||||
MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, ADD16(C[4], MULT16_16_Q15(n, (C[5])))))))))));
|
||||
rt = VSHR32(rt,7-k);
|
||||
return rt;
|
||||
}
|
||||
|
229
third_party/opus/src/celt/mathops.h
vendored
229
third_party/opus/src/celt/mathops.h
vendored
@ -1,7 +1,7 @@
|
||||
/* Copyright (c) 2002-2008 Jean-Marc Valin
|
||||
Copyright (c) 2007-2008 CSIRO
|
||||
Copyright (c) 2007-2009 Xiph.Org Foundation
|
||||
Written by Jean-Marc Valin */
|
||||
Written by Jean-Marc Valin, and Yunho Huh */
|
||||
/**
|
||||
@file mathops.h
|
||||
@brief Various math functions
|
||||
@ -91,6 +91,26 @@ static OPUS_INLINE opus_val32 celt_maxabs16(const opus_val16 *x, int len)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_RES24
|
||||
static OPUS_INLINE opus_res celt_maxabs_res(const opus_res *x, int len)
|
||||
{
|
||||
int i;
|
||||
opus_res maxval = 0;
|
||||
opus_res minval = 0;
|
||||
for (i=0;i<len;i++)
|
||||
{
|
||||
maxval = MAX32(maxval, x[i]);
|
||||
minval = MIN32(minval, x[i]);
|
||||
}
|
||||
/* opus_res should never reach such amplitude, so we should be safe. */
|
||||
celt_sig_assert(minval != -2147483648);
|
||||
return MAX32(maxval,-minval);
|
||||
}
|
||||
#else
|
||||
#define celt_maxabs_res celt_maxabs16
|
||||
#endif
|
||||
|
||||
|
||||
#ifndef OVERRIDE_CELT_MAXABS32
|
||||
#ifdef FIXED_POINT
|
||||
static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
|
||||
@ -120,34 +140,88 @@ static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
|
||||
#define celt_rcp(x) (1.f/(x))
|
||||
#define celt_div(a,b) ((a)/(b))
|
||||
#define frac_div32(a,b) ((float)(a)/(b))
|
||||
#define frac_div32_q29(a,b) frac_div32(a,b)
|
||||
|
||||
#ifdef FLOAT_APPROX
|
||||
/* Calculates the base-2 logarithm (log2(x)) of a number. It is designed for
|
||||
* systems using radix-2 floating-point representation, with the exponent
|
||||
* located at bits 23 to 30 and an offset of 127. Note that special cases like
|
||||
* denormalized numbers, positive/negative infinity, and NaN are not handled.
|
||||
* log2(x) = log2(x^exponent * mantissa)
|
||||
* = exponent + log2(mantissa) */
|
||||
|
||||
/* Note: This assumes radix-2 floating point with the exponent at bits 23..30 and an offset of 127
|
||||
denorm, +/- inf and NaN are *not* handled */
|
||||
/* Log2 x normalization single precision coefficients calculated by
|
||||
* 1 / (1 + 0.125 * index).
|
||||
* Coefficients in Double Precision
|
||||
* double log2_x_norm_coeff[8] = {
|
||||
* 1.0000000000000000000, 8.888888888888888e-01,
|
||||
* 8.000000000000000e-01, 7.272727272727273e-01,
|
||||
* 6.666666666666666e-01, 6.153846153846154e-01,
|
||||
* 5.714285714285714e-01, 5.333333333333333e-01} */
|
||||
static const float log2_x_norm_coeff[8] = {
|
||||
1.000000000000000000000000000f, 8.88888895511627197265625e-01f,
|
||||
8.00000000000000000000000e-01f, 7.27272748947143554687500e-01f,
|
||||
6.66666686534881591796875e-01f, 6.15384638309478759765625e-01f,
|
||||
5.71428596973419189453125e-01f, 5.33333361148834228515625e-01f};
|
||||
|
||||
/* Log2 y normalization single precision coefficients calculated by
|
||||
* log2(1 + 0.125 * index).
|
||||
* Coefficients in Double Precision
|
||||
* double log2_y_norm_coeff[8] = {
|
||||
* 0.0000000000000000000, 1.699250014423124e-01,
|
||||
* 3.219280948873623e-01, 4.594316186372973e-01,
|
||||
* 5.849625007211562e-01, 7.004397181410922e-01,
|
||||
* 8.073549220576041e-01, 9.068905956085185e-01}; */
|
||||
static const float log2_y_norm_coeff[8] = {
|
||||
0.0000000000000000000000000000f, 1.699250042438507080078125e-01f,
|
||||
3.219280838966369628906250e-01f, 4.594316184520721435546875e-01f,
|
||||
5.849624872207641601562500e-01f, 7.004396915435791015625000e-01f,
|
||||
8.073549270629882812500000e-01f, 9.068905711174011230468750e-01f};
|
||||
|
||||
/** Base-2 log approximation (log2(x)). */
|
||||
static OPUS_INLINE float celt_log2(float x)
|
||||
{
|
||||
int integer;
|
||||
float frac;
|
||||
opus_int32 integer;
|
||||
opus_int32 range_idx;
|
||||
union {
|
||||
float f;
|
||||
opus_uint32 i;
|
||||
} in;
|
||||
in.f = x;
|
||||
integer = (in.i>>23)-127;
|
||||
in.i -= (opus_uint32)integer<<23;
|
||||
frac = in.f - 1.5f;
|
||||
frac = -0.41445418f + frac*(0.95909232f
|
||||
+ frac*(-0.33951290f + frac*0.16541097f));
|
||||
return 1+integer+frac;
|
||||
integer = (opus_int32)(in.i>>23)-127;
|
||||
in.i = (opus_int32)in.i - (opus_int32)((opus_uint32)integer<<23);
|
||||
|
||||
/* Normalize the mantissa range from [1, 2] to [1,1.125], and then shift x
|
||||
* by 1.0625 to [-0.0625, 0.0625]. */
|
||||
range_idx = (in.i >> 20) & 0x7;
|
||||
in.f = in.f * log2_x_norm_coeff[range_idx] - 1.0625f;
|
||||
|
||||
/* Polynomial coefficients approximated in the [1, 1.125] range.
|
||||
* Lolremez command: lolremez --degree 4 --range -0.0625:0.0625
|
||||
* "log(x+1.0625)/log(2)"
|
||||
* Coefficients in Double Precision
|
||||
* A0: 8.7462840624502679e-2 A1: 1.3578296070972002
|
||||
* A2: -6.3897703690210047e-1 A3: 4.0197125617419959e-1
|
||||
* A4: -2.8415445877832832e-1 */
|
||||
#define LOG2_COEFF_A0 8.74628424644470214843750000e-02f
|
||||
#define LOG2_COEFF_A1 1.357829570770263671875000000000f
|
||||
#define LOG2_COEFF_A2 -6.3897705078125000000000000e-01f
|
||||
#define LOG2_COEFF_A3 4.01971250772476196289062500e-01f
|
||||
#define LOG2_COEFF_A4 -2.8415444493293762207031250e-01f
|
||||
in.f = LOG2_COEFF_A0 + in.f * (LOG2_COEFF_A1
|
||||
+ in.f * (LOG2_COEFF_A2
|
||||
+ in.f * (LOG2_COEFF_A3
|
||||
+ in.f * (LOG2_COEFF_A4))));
|
||||
return integer + in.f + log2_y_norm_coeff[range_idx];
|
||||
}
|
||||
|
||||
/** Base-2 exponential approximation (2^x). */
|
||||
/* Calculates an approximation of 2^x. The approximation was achieved by
|
||||
* employing a base-2 exponential function and utilizing a Remez approximation
|
||||
* of order 5, ensuring a controlled relative error.
|
||||
* exp2(x) = exp2(integer + fraction)
|
||||
* = exp2(integer) * exp2(fraction) */
|
||||
static OPUS_INLINE float celt_exp2(float x)
|
||||
{
|
||||
int integer;
|
||||
opus_int32 integer;
|
||||
float frac;
|
||||
union {
|
||||
float f;
|
||||
@ -157,10 +231,23 @@ static OPUS_INLINE float celt_exp2(float x)
|
||||
if (integer < -50)
|
||||
return 0;
|
||||
frac = x-integer;
|
||||
/* K0 = 1, K1 = log(2), K2 = 3-4*log(2), K3 = 3*log(2) - 2 */
|
||||
res.f = 0.99992522f + frac * (0.69583354f
|
||||
+ frac * (0.22606716f + 0.078024523f*frac));
|
||||
res.i = (res.i + ((opus_uint32)integer<<23)) & 0x7fffffff;
|
||||
|
||||
/* Polynomial coefficients approximated in the [0, 1] range.
|
||||
* Lolremez command: lolremez --degree 5 --range 0:1
|
||||
* "exp(x*0.693147180559945)" "exp(x*0.693147180559945)"
|
||||
* NOTE: log(2) ~ 0.693147180559945 */
|
||||
#define EXP2_COEFF_A0 9.999999403953552246093750000000e-01f
|
||||
#define EXP2_COEFF_A1 6.931530833244323730468750000000e-01f
|
||||
#define EXP2_COEFF_A2 2.401536107063293457031250000000e-01f
|
||||
#define EXP2_COEFF_A3 5.582631751894950866699218750000e-02f
|
||||
#define EXP2_COEFF_A4 8.989339694380760192871093750000e-03f
|
||||
#define EXP2_COEFF_A5 1.877576694823801517486572265625e-03f
|
||||
res.f = EXP2_COEFF_A0 + frac * (EXP2_COEFF_A1
|
||||
+ frac * (EXP2_COEFF_A2
|
||||
+ frac * (EXP2_COEFF_A3
|
||||
+ frac * (EXP2_COEFF_A4
|
||||
+ frac * (EXP2_COEFF_A5)))));
|
||||
res.i = (opus_uint32)((opus_int32)res.i + (opus_int32)((opus_uint32)integer<<23)) & 0x7fffffff;
|
||||
return res.f;
|
||||
}
|
||||
|
||||
@ -169,6 +256,9 @@ static OPUS_INLINE float celt_exp2(float x)
|
||||
#define celt_exp2(x) ((float)exp(0.6931471805599453094*(x)))
|
||||
#endif
|
||||
|
||||
#define celt_exp2_db celt_exp2
|
||||
#define celt_log2_db celt_log2
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
@ -204,13 +294,13 @@ static OPUS_INLINE opus_val16 celt_log2(opus_val32 x)
|
||||
opus_val16 n, frac;
|
||||
/* -0.41509302963303146, 0.9609890551383969, -0.31836011537636605,
|
||||
0.15530808010959576, -0.08556153059057618 */
|
||||
static const opus_val16 C[5] = {-6801+(1<<(13-DB_SHIFT)), 15746, -5217, 2545, -1401};
|
||||
static const opus_val16 C[5] = {-6801+(1<<(13-10)), 15746, -5217, 2545, -1401};
|
||||
if (x==0)
|
||||
return -32767;
|
||||
i = celt_ilog2(x);
|
||||
n = VSHR32(x,i-15)-32768-16384;
|
||||
frac = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, C[4]))))))));
|
||||
return SHL16(i-13,DB_SHIFT)+SHR16(frac,14-DB_SHIFT);
|
||||
return SHL32(i-13,10)+SHR32(frac,14-10);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -230,6 +320,12 @@ static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
|
||||
frac = SHL16(x, 4);
|
||||
return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
|
||||
}
|
||||
|
||||
#undef D0
|
||||
#undef D1
|
||||
#undef D2
|
||||
#undef D3
|
||||
|
||||
/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
|
||||
static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
|
||||
{
|
||||
@ -244,10 +340,103 @@ static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
|
||||
return VSHR32(EXTEND32(frac), -integer-2);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_QEXT
|
||||
|
||||
/* Calculates the base-2 logarithm of a Q14 input value. The result is returned
|
||||
* in Q(DB_SHIFT). If the input value is 0, the function will output -32.0f. */
|
||||
static OPUS_INLINE opus_val32 celt_log2_db(opus_val32 x) {
|
||||
/* Q30 */
|
||||
static const opus_val32 log2_x_norm_coeff[8] = {
|
||||
1073741824, 954437184, 858993472, 780903168,
|
||||
715827904, 660764224, 613566784, 572662336};
|
||||
/* Q24 */
|
||||
static const opus_val32 log2_y_norm_coeff[8] = {
|
||||
0, 2850868, 5401057, 7707983,
|
||||
9814042, 11751428, 13545168, 15215099};
|
||||
static const opus_val32 LOG2_COEFF_A0 = 1467383; /* Q24 */
|
||||
static const opus_val32 LOG2_COEFF_A1 = 182244800; /* Q27 */
|
||||
static const opus_val32 LOG2_COEFF_A2 = -21440512; /* Q25 */
|
||||
static const opus_val32 LOG2_COEFF_A3 = 107903336; /* Q28 */
|
||||
static const opus_val32 LOG2_COEFF_A4 = -610217024; /* Q31 */
|
||||
|
||||
opus_int32 integer, norm_coeff_idx, tmp;
|
||||
opus_val32 mantissa;
|
||||
if (x==0) {
|
||||
return -536870912; /* -32.0f */
|
||||
}
|
||||
integer = SUB32(celt_ilog2(x), 14); /* Q0 */
|
||||
mantissa = VSHR32(x, integer + 14 - 29); /* Q29 */
|
||||
norm_coeff_idx = SHR32(mantissa, 29 - 3) & 0x7;
|
||||
/* mantissa is in Q28 (29 + Q_NORM_CONST - 31 where Q_NORM_CONST is Q30)
|
||||
* 285212672 (Q28) is 1.0625f. */
|
||||
mantissa = SUB32(MULT32_32_Q31(mantissa, log2_x_norm_coeff[norm_coeff_idx]),
|
||||
285212672);
|
||||
|
||||
/* q_a3(Q28): q_mantissa + q_a4 - 31
|
||||
* q_a2(Q25): q_mantissa + q_a3 - 31
|
||||
* q_a1(Q27): q_mantissa + q_a2 - 31 + 5
|
||||
* q_a0(Q24): q_mantissa + q_a1 - 31
|
||||
* where q_mantissa is Q28 */
|
||||
/* Split evaluation in steps to avoid exploding macro expansion. */
|
||||
tmp = MULT32_32_Q31(mantissa, LOG2_COEFF_A4);
|
||||
tmp = MULT32_32_Q31(mantissa, ADD32(LOG2_COEFF_A3, tmp));
|
||||
tmp = SHL32(MULT32_32_Q31(mantissa, ADD32(LOG2_COEFF_A2, tmp)), 5 /* SHL32 for LOG2_COEFF_A1 */);
|
||||
tmp = MULT32_32_Q31(mantissa, ADD32(LOG2_COEFF_A1, tmp));
|
||||
return ADD32(log2_y_norm_coeff[norm_coeff_idx],
|
||||
ADD32(SHL32(integer, DB_SHIFT),
|
||||
ADD32(LOG2_COEFF_A0, tmp)));
|
||||
}
|
||||
|
||||
/* Calculates exp2 for Q28 within a specific range (0 to 1.0) using fixed-point
|
||||
* arithmetic. The input number must be adjusted for Q DB_SHIFT. */
|
||||
static OPUS_INLINE opus_val32 celt_exp2_db_frac(opus_val32 x)
|
||||
{
|
||||
/* Approximation constants. */
|
||||
static const opus_int32 EXP2_COEFF_A0 = 268435440; /* Q28 */
|
||||
static const opus_int32 EXP2_COEFF_A1 = 744267456; /* Q30 */
|
||||
static const opus_int32 EXP2_COEFF_A2 = 1031451904; /* Q32 */
|
||||
static const opus_int32 EXP2_COEFF_A3 = 959088832; /* Q34 */
|
||||
static const opus_int32 EXP2_COEFF_A4 = 617742720; /* Q36 */
|
||||
static const opus_int32 EXP2_COEFF_A5 = 516104352; /* Q38 */
|
||||
opus_int32 tmp;
|
||||
/* Converts input value from Q24 to Q29. */
|
||||
opus_val32 x_q29 = SHL32(x, 29 - 24);
|
||||
/* Split evaluation in steps to avoid exploding macro expansion. */
|
||||
tmp = ADD32(EXP2_COEFF_A4, MULT32_32_Q31(x_q29, EXP2_COEFF_A5));
|
||||
tmp = ADD32(EXP2_COEFF_A3, MULT32_32_Q31(x_q29, tmp));
|
||||
tmp = ADD32(EXP2_COEFF_A2, MULT32_32_Q31(x_q29, tmp));
|
||||
tmp = ADD32(EXP2_COEFF_A1, MULT32_32_Q31(x_q29, tmp));
|
||||
return ADD32(EXP2_COEFF_A0, MULT32_32_Q31(x_q29, tmp));
|
||||
}
|
||||
|
||||
/* Calculates exp2 for Q16 using fixed-point arithmetic. The input number must
|
||||
* be adjusted for Q DB_SHIFT. */
|
||||
static OPUS_INLINE opus_val32 celt_exp2_db(opus_val32 x)
|
||||
{
|
||||
int integer;
|
||||
opus_val32 frac;
|
||||
integer = SHR32(x,DB_SHIFT);
|
||||
if (integer>14)
|
||||
return 0x7f000000;
|
||||
else if (integer <= -17)
|
||||
return 0;
|
||||
frac = celt_exp2_db_frac(x-SHL32(integer, DB_SHIFT)); /* Q28 */
|
||||
return VSHR32(frac, -integer + 28 - 16); /* Q16 */
|
||||
}
|
||||
#else
|
||||
|
||||
#define celt_log2_db(x) SHL32(EXTEND32(celt_log2(x)), DB_SHIFT-10)
|
||||
#define celt_exp2_db_frac(x) SHL32(celt_exp2_frac(PSHR32(x, DB_SHIFT-10)), 14)
|
||||
#define celt_exp2_db(x) celt_exp2(PSHR32(x, DB_SHIFT-10))
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
opus_val32 celt_rcp(opus_val32 x);
|
||||
|
||||
#define celt_div(a,b) MULT32_32_Q31((opus_val32)(a),celt_rcp(b))
|
||||
|
||||
opus_val32 frac_div32_q29(opus_val32 a, opus_val32 b);
|
||||
opus_val32 frac_div32(opus_val32 a, opus_val32 b);
|
||||
|
||||
#define M1 32767
|
||||
|
83
third_party/opus/src/celt/mdct.c
vendored
83
third_party/opus/src/celt/mdct.c
vendored
@ -57,6 +57,9 @@
|
||||
#include "mips/mdct_mipsr1.h"
|
||||
#endif
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.141592653
|
||||
#endif
|
||||
|
||||
#ifdef CUSTOM_MODES
|
||||
|
||||
@ -86,12 +89,12 @@ int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch)
|
||||
{
|
||||
/* We have enough points that sine isn't necessary */
|
||||
#if defined(FIXED_POINT)
|
||||
#if 1
|
||||
#ifndef ENABLE_QEXT
|
||||
for (i=0;i<N2;i++)
|
||||
trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2+16384),N));
|
||||
#else
|
||||
for (i=0;i<N2;i++)
|
||||
trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768*cos(2*M_PI*(i+.125)/N))));
|
||||
trig[i] = (kiss_twiddle_scalar)MAX32(-2147483647,MIN32(2147483647,floor(.5+2147483648*cos(2*M_PI*(i+.125)/N))));
|
||||
#endif
|
||||
#else
|
||||
for (i=0;i<N2;i++)
|
||||
@ -117,7 +120,7 @@ void clt_mdct_clear(mdct_lookup *l, int arch)
|
||||
/* Forward MDCT trashes the input array */
|
||||
#ifndef OVERRIDE_clt_mdct_forward
|
||||
void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
|
||||
const opus_val16 *window, int overlap, int shift, int stride, int arch)
|
||||
const celt_coef *window, int overlap, int shift, int stride, int arch)
|
||||
{
|
||||
int i;
|
||||
int N, N2, N4;
|
||||
@ -125,11 +128,12 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
|
||||
VARDECL(kiss_fft_cpx, f2);
|
||||
const kiss_fft_state *st = l->kfft[shift];
|
||||
const kiss_twiddle_scalar *trig;
|
||||
opus_val16 scale;
|
||||
celt_coef scale;
|
||||
#ifdef FIXED_POINT
|
||||
/* Allows us to scale with MULT16_32_Q16(), which is faster than
|
||||
MULT16_32_Q15() on ARM. */
|
||||
int scale_shift = st->scale_shift-1;
|
||||
int headroom;
|
||||
#endif
|
||||
SAVE_STACK;
|
||||
(void)arch;
|
||||
@ -155,13 +159,13 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
|
||||
const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
|
||||
const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
|
||||
kiss_fft_scalar * OPUS_RESTRICT yp = f;
|
||||
const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
|
||||
const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
|
||||
const celt_coef * OPUS_RESTRICT wp1 = window+(overlap>>1);
|
||||
const celt_coef * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
|
||||
for(i=0;i<((overlap+3)>>2);i++)
|
||||
{
|
||||
/* Real part arranged as -d-cR, Imag part arranged as -b+aR*/
|
||||
*yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2);
|
||||
*yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]);
|
||||
*yp++ = S_MUL(xp1[N2], *wp2) + S_MUL(*xp2, *wp1);
|
||||
*yp++ = S_MUL(*xp1, *wp1) - S_MUL(xp2[-N2], *wp2);
|
||||
xp1+=2;
|
||||
xp2-=2;
|
||||
wp1+=2;
|
||||
@ -180,8 +184,8 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
|
||||
for(;i<N4;i++)
|
||||
{
|
||||
/* Real part arranged as a-bR, Imag part arranged as -c-dR */
|
||||
*yp++ = -MULT16_32_Q15(*wp1, xp1[-N2]) + MULT16_32_Q15(*wp2, *xp2);
|
||||
*yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
|
||||
*yp++ = -S_MUL(xp1[-N2], *wp1) + S_MUL(*xp2, *wp2);
|
||||
*yp++ = S_MUL(*xp1, *wp2) + S_MUL(xp2[N2], *wp1);
|
||||
xp1+=2;
|
||||
xp2-=2;
|
||||
wp1+=2;
|
||||
@ -192,6 +196,9 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
|
||||
{
|
||||
kiss_fft_scalar * OPUS_RESTRICT yp = f;
|
||||
const kiss_twiddle_scalar *t = &trig[0];
|
||||
#ifdef FIXED_POINT
|
||||
opus_val32 maxval=1;
|
||||
#endif
|
||||
for(i=0;i<N4;i++)
|
||||
{
|
||||
kiss_fft_cpx yc;
|
||||
@ -203,16 +210,27 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
|
||||
im = *yp++;
|
||||
yr = S_MUL(re,t0) - S_MUL(im,t1);
|
||||
yi = S_MUL(im,t0) + S_MUL(re,t1);
|
||||
/* For QEXT, it's best to scale before the FFT, but otherwise it's best to scale after.
|
||||
For floating-point it doesn't matter. */
|
||||
#ifdef ENABLE_QEXT
|
||||
yc.r = yr;
|
||||
yc.i = yi;
|
||||
yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
|
||||
yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
|
||||
#else
|
||||
yc.r = S_MUL2(yr, scale);
|
||||
yc.i = S_MUL2(yi, scale);
|
||||
#endif
|
||||
#ifdef FIXED_POINT
|
||||
maxval = MAX32(maxval, MAX32(ABS32(yc.r), ABS32(yc.i)));
|
||||
#endif
|
||||
f2[st->bitrev[i]] = yc;
|
||||
}
|
||||
#ifdef FIXED_POINT
|
||||
headroom = IMAX(0, IMIN(scale_shift, 28-celt_ilog2(maxval)));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* N/4 complex FFT, does not downscale anymore */
|
||||
opus_fft_impl(st, f2);
|
||||
opus_fft_impl(st, f2 ARG_FIXED(scale_shift-headroom));
|
||||
|
||||
/* Post-rotate */
|
||||
{
|
||||
@ -225,8 +243,16 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
|
||||
for(i=0;i<N4;i++)
|
||||
{
|
||||
kiss_fft_scalar yr, yi;
|
||||
yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
|
||||
yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
|
||||
kiss_fft_scalar t0, t1;
|
||||
#ifdef ENABLE_QEXT
|
||||
t0 = S_MUL2(t[i], scale);
|
||||
t1 = S_MUL2(t[N4+i], scale);
|
||||
#else
|
||||
t0 = t[i];
|
||||
t1 = t[N4+i];
|
||||
#endif
|
||||
yr = PSHR32(S_MUL(fp->i,t1) - S_MUL(fp->r,t0), headroom);
|
||||
yi = PSHR32(S_MUL(fp->r,t1) + S_MUL(fp->i,t0), headroom);
|
||||
*yp1 = yr;
|
||||
*yp2 = yi;
|
||||
fp++;
|
||||
@ -240,7 +266,7 @@ void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scal
|
||||
|
||||
#ifndef OVERRIDE_clt_mdct_backward
|
||||
void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
|
||||
const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
|
||||
const celt_coef * OPUS_RESTRICT window, int overlap, int shift, int stride, int arch)
|
||||
{
|
||||
int i;
|
||||
int N, N2, N4;
|
||||
@ -269,9 +295,12 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
|
||||
{
|
||||
int rev;
|
||||
kiss_fft_scalar yr, yi;
|
||||
opus_val32 x1, x2;
|
||||
rev = *bitrev++;
|
||||
yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i]));
|
||||
yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i]));
|
||||
x1 = SHL32(*xp1, IMDCT_HEADROOM);
|
||||
x2 = SHL32(*xp2, IMDCT_HEADROOM);
|
||||
yr = ADD32_ovflw(S_MUL(x2, t[i]), S_MUL(x1, t[N4+i]));
|
||||
yi = SUB32_ovflw(S_MUL(x1, t[i]), S_MUL(x2, t[N4+i]));
|
||||
/* We swap real and imag because we use an FFT instead of an IFFT. */
|
||||
yp[2*rev+1] = yr;
|
||||
yp[2*rev] = yi;
|
||||
@ -281,7 +310,7 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
|
||||
}
|
||||
}
|
||||
|
||||
opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
|
||||
opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)) ARG_FIXED(0));
|
||||
|
||||
/* Post-rotate and de-shuffle from both ends of the buffer at once to make
|
||||
it in-place. */
|
||||
@ -301,8 +330,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
|
||||
t0 = t[i];
|
||||
t1 = t[N4+i];
|
||||
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
|
||||
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
|
||||
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
|
||||
yr = PSHR32(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);
|
||||
yi = PSHR32(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);
|
||||
/* We swap real and imag because we're using an FFT instead of an IFFT. */
|
||||
re = yp1[1];
|
||||
im = yp1[0];
|
||||
@ -312,8 +341,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
|
||||
t0 = t[(N4-i-1)];
|
||||
t1 = t[(N2-i-1)];
|
||||
/* We'd scale up by 2 here, but instead it's done when mixing the windows */
|
||||
yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1));
|
||||
yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0));
|
||||
yr = PSHR32(ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)), IMDCT_HEADROOM);
|
||||
yi = PSHR32(SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)), IMDCT_HEADROOM);
|
||||
yp1[0] = yr;
|
||||
yp0[1] = yi;
|
||||
yp0 += 2;
|
||||
@ -325,16 +354,16 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca
|
||||
{
|
||||
kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
|
||||
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
|
||||
const opus_val16 * OPUS_RESTRICT wp1 = window;
|
||||
const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
|
||||
const celt_coef * OPUS_RESTRICT wp1 = window;
|
||||
const celt_coef * OPUS_RESTRICT wp2 = window+overlap-1;
|
||||
|
||||
for(i = 0; i < overlap/2; i++)
|
||||
{
|
||||
kiss_fft_scalar x1, x2;
|
||||
x1 = *xp1;
|
||||
x2 = *yp1;
|
||||
*yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1));
|
||||
*xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1));
|
||||
*yp1++ = SUB32_ovflw(S_MUL(x2, *wp2), S_MUL(x1, *wp1));
|
||||
*xp1-- = ADD32_ovflw(S_MUL(x2, *wp1), S_MUL(x1, *wp2));
|
||||
wp1++;
|
||||
wp2--;
|
||||
}
|
||||
|
11
third_party/opus/src/celt/mdct.h
vendored
11
third_party/opus/src/celt/mdct.h
vendored
@ -57,6 +57,9 @@ typedef struct {
|
||||
#include "arm/mdct_arm.h"
|
||||
#endif
|
||||
|
||||
/* There should be 2 bits of headroom in the IMDCT which we can take
|
||||
advantage of to maximize accuracy. */
|
||||
#define IMDCT_HEADROOM 2
|
||||
|
||||
int clt_mdct_init(mdct_lookup *l,int N, int maxshift, int arch);
|
||||
void clt_mdct_clear(mdct_lookup *l, int arch);
|
||||
@ -64,14 +67,14 @@ void clt_mdct_clear(mdct_lookup *l, int arch);
|
||||
/** Compute a forward MDCT and scale by 4/N, trashes the input array */
|
||||
void clt_mdct_forward_c(const mdct_lookup *l, kiss_fft_scalar *in,
|
||||
kiss_fft_scalar * OPUS_RESTRICT out,
|
||||
const opus_val16 *window, int overlap,
|
||||
const celt_coef *window, int overlap,
|
||||
int shift, int stride, int arch);
|
||||
|
||||
/** Compute a backward MDCT (no scaling) and performs weighted overlap-add
|
||||
(scales implicitly by 1/2) */
|
||||
void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in,
|
||||
kiss_fft_scalar * OPUS_RESTRICT out,
|
||||
const opus_val16 * OPUS_RESTRICT window,
|
||||
const celt_coef * OPUS_RESTRICT window,
|
||||
int overlap, int shift, int stride, int arch);
|
||||
|
||||
#if !defined(OVERRIDE_OPUS_MDCT)
|
||||
@ -80,7 +83,7 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in,
|
||||
|
||||
extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
|
||||
const mdct_lookup *l, kiss_fft_scalar *in,
|
||||
kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
|
||||
kiss_fft_scalar * OPUS_RESTRICT out, const celt_coef *window,
|
||||
int overlap, int shift, int stride, int arch);
|
||||
|
||||
#define clt_mdct_forward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
|
||||
@ -90,7 +93,7 @@ extern void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(
|
||||
|
||||
extern void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(
|
||||
const mdct_lookup *l, kiss_fft_scalar *in,
|
||||
kiss_fft_scalar * OPUS_RESTRICT out, const opus_val16 *window,
|
||||
kiss_fft_scalar * OPUS_RESTRICT out, const celt_coef *window,
|
||||
int overlap, int shift, int stride, int arch);
|
||||
|
||||
#define clt_mdct_backward(_l, _in, _out, _window, _overlap, _shift, _stride, _arch) \
|
||||
|
13
third_party/opus/src/celt/meson.build
vendored
13
third_party/opus/src/celt/meson.build
vendored
@ -6,6 +6,8 @@ celt_sse2_sources = sources['CELT_SOURCES_SSE2']
|
||||
|
||||
celt_sse4_1_sources = sources['CELT_SOURCES_SSE4_1']
|
||||
|
||||
celt_avx2_sources = sources['CELT_SOURCES_AVX2']
|
||||
|
||||
celt_neon_intr_sources = sources['CELT_SOURCES_ARM_NEON_INTR']
|
||||
|
||||
celt_static_libs = []
|
||||
@ -14,7 +16,7 @@ if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD')
|
||||
celt_sources += sources['CELT_SOURCES_X86_RTCD']
|
||||
endif
|
||||
|
||||
foreach intr_name : ['sse', 'sse2', 'sse4_1', 'neon_intr']
|
||||
foreach intr_name : ['sse', 'sse2', 'sse4_1', 'avx2', 'neon_intr']
|
||||
have_intr = get_variable('have_' + intr_name)
|
||||
if not have_intr
|
||||
continue
|
||||
@ -41,14 +43,7 @@ if host_cpu_family in ['arm', 'aarch64'] and have_arm_intrinsics_or_asm
|
||||
celt_sources += sources['CELT_SOURCES_ARM_NE10']
|
||||
endif
|
||||
if opus_arm_external_asm
|
||||
arm2gnu = [find_program('arm/arm2gnu.pl')] + arm2gnu_args
|
||||
celt_sources_arm_asm = configure_file(input: 'arm/celt_pitch_xcorr_arm.s',
|
||||
output: '@BASENAME@-gnu.S',
|
||||
command: arm2gnu + ['@INPUT@'],
|
||||
capture: true)
|
||||
celt_arm_armopts_s = configure_file(input: 'arm/armopts.s.in',
|
||||
output: 'arm/armopts.s',
|
||||
configuration: opus_conf)
|
||||
subdir('arm')
|
||||
celt_static_libs += static_library('celt-armasm',
|
||||
celt_arm_armopts_s, celt_sources_arm_asm,
|
||||
install: false)
|
||||
|
6
third_party/opus/src/celt/mips/celt_mipsr1.h
vendored
6
third_party/opus/src/celt/mips/celt_mipsr1.h
vendored
@ -27,8 +27,8 @@
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __CELT_MIPSR1_H__
|
||||
#define __CELT_MIPSR1_H__
|
||||
#ifndef CELT_MIPSR1_H__
|
||||
#define CELT_MIPSR1_H__
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
@ -149,4 +149,4 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* __CELT_MIPSR1_H__ */
|
||||
#endif /* CELT_MIPSR1_H__ */
|
||||
|
6
third_party/opus/src/celt/mips/mdct_mipsr1.h
vendored
6
third_party/opus/src/celt/mips/mdct_mipsr1.h
vendored
@ -38,8 +38,8 @@
|
||||
MDCT implementation in FFMPEG, but has differences in signs, ordering
|
||||
and scaling in many places.
|
||||
*/
|
||||
#ifndef __MDCT_MIPSR1_H__
|
||||
#define __MDCT_MIPSR1_H__
|
||||
#ifndef MDCT_MIPSR1_H__
|
||||
#define MDCT_MIPSR1_H__
|
||||
|
||||
#ifndef SKIP_CONFIG_H
|
||||
#ifdef HAVE_CONFIG_H
|
||||
@ -285,4 +285,4 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif /* __MDCT_MIPSR1_H__ */
|
||||
#endif /* MDCT_MIPSR1_H__ */
|
||||
|
6
third_party/opus/src/celt/mips/vq_mipsr1.h
vendored
6
third_party/opus/src/celt/mips/vq_mipsr1.h
vendored
@ -26,8 +26,8 @@
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __VQ_MIPSR1_H__
|
||||
#define __VQ_MIPSR1_H__
|
||||
#ifndef VQ_MIPSR1_H__
|
||||
#define VQ_MIPSR1_H__
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
@ -113,4 +113,4 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
|
||||
/*return celt_sqrt(E);*/
|
||||
}
|
||||
|
||||
#endif /* __VQ_MIPSR1_H__ */
|
||||
#endif /* VQ_MIPSR1_H__ */
|
||||
|
9
third_party/opus/src/celt/modes.c
vendored
9
third_party/opus/src/celt/modes.c
vendored
@ -230,7 +230,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
|
||||
#ifdef CUSTOM_MODES
|
||||
CELTMode *mode=NULL;
|
||||
int res;
|
||||
opus_val16 *window;
|
||||
celt_coef *window;
|
||||
opus_int16 *logN;
|
||||
int LM;
|
||||
int arch = opus_select_arch();
|
||||
@ -370,7 +370,7 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
|
||||
if (mode->allocVectors==NULL)
|
||||
goto failure;
|
||||
|
||||
window = (opus_val16*)opus_alloc(mode->overlap*sizeof(opus_val16));
|
||||
window = (celt_coef*)opus_alloc(mode->overlap*sizeof(*window));
|
||||
if (window==NULL)
|
||||
goto failure;
|
||||
|
||||
@ -378,8 +378,13 @@ CELTMode *opus_custom_mode_create(opus_int32 Fs, int frame_size, int *error)
|
||||
for (i=0;i<mode->overlap;i++)
|
||||
window[i] = Q15ONE*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap));
|
||||
#else
|
||||
# ifdef ENABLE_QEXT
|
||||
for (i=0;i<mode->overlap;i++)
|
||||
window[i] = MIN32(2147483647, 2147483648*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap)));
|
||||
# else
|
||||
for (i=0;i<mode->overlap;i++)
|
||||
window[i] = MIN32(32767,floor(.5+32768.*sin(.5*M_PI* sin(.5*M_PI*(i+.5)/mode->overlap) * sin(.5*M_PI*(i+.5)/mode->overlap))));
|
||||
# endif
|
||||
#endif
|
||||
mode->window = window;
|
||||
|
||||
|
2
third_party/opus/src/celt/modes.h
vendored
2
third_party/opus/src/celt/modes.h
vendored
@ -66,7 +66,7 @@ struct OpusCustomMode {
|
||||
const unsigned char *allocVectors; /**< Number of bits in each band for several rates */
|
||||
const opus_int16 *logN;
|
||||
|
||||
const opus_val16 *window;
|
||||
const celt_coef *window;
|
||||
mdct_lookup mdct;
|
||||
PulseCache cache;
|
||||
};
|
||||
|
459
third_party/opus/src/celt/opus_custom_demo.c
vendored
459
third_party/opus/src/celt/opus_custom_demo.c
vendored
@ -39,172 +39,413 @@
|
||||
|
||||
#define MAX_PACKET 1275
|
||||
|
||||
static void print_usage(char **argv) {
|
||||
fprintf (stderr, "Usage: %s [-e | -d] <rate> <channels> <frame size> "
|
||||
" [<bytes per packet>] [options] "
|
||||
"<input> <output>\n", argv[0]);
|
||||
fprintf (stderr, " -e encode only (default is encode and decode)\n");
|
||||
fprintf (stderr, " -d decode only (default is encode and decode)\n");
|
||||
fprintf (stderr, " <bytes per packet>: required only when encoding\n");
|
||||
fprintf (stderr, "options:\n");
|
||||
fprintf (stderr, " -16 format is 16-bit little-endian (default)\n");
|
||||
fprintf (stderr, " -24 format is 24-bit little-endian\n");
|
||||
fprintf (stderr, " -f32 format is 32-bit float little-endian\n");
|
||||
fprintf (stderr, " -complexity <0-10> optional only when encoding\n");
|
||||
fprintf (stderr, " -loss <percentage> encoding (robsutness setting) and decoding (simulating loss)\n");
|
||||
}
|
||||
|
||||
static void int_to_char(opus_uint32 i, unsigned char ch[4])
|
||||
{
|
||||
ch[0] = i>>24;
|
||||
ch[1] = (i>>16)&0xFF;
|
||||
ch[2] = (i>>8)&0xFF;
|
||||
ch[3] = i&0xFF;
|
||||
}
|
||||
|
||||
static opus_uint32 char_to_int(unsigned char ch[4])
|
||||
{
|
||||
return ((opus_uint32)ch[0]<<24) | ((opus_uint32)ch[1]<<16)
|
||||
| ((opus_uint32)ch[2]<< 8) | (opus_uint32)ch[3];
|
||||
}
|
||||
|
||||
#define check_encoder_option(decode_only, opt) do {if (decode_only) {fprintf(stderr, "option %s is only for encoding\n", opt); goto failure;}} while(0)
|
||||
#define check_decoder_option(encode_only, opt) do {if (encode_only) {fprintf(stderr, "option %s is only for decoding\n", opt); goto failure;}} while(0)
|
||||
|
||||
#define FORMAT_S16_LE 0
|
||||
#define FORMAT_S24_LE 1
|
||||
#define FORMAT_F32_LE 2
|
||||
|
||||
static const int format_size[3] = {2, 3, 4};
|
||||
|
||||
typedef union {
|
||||
opus_int32 i;
|
||||
float f;
|
||||
} float_bits;
|
||||
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int err;
|
||||
int ret=1;
|
||||
int args;
|
||||
opus_uint32 enc_final_range;
|
||||
opus_uint32 dec_final_range;
|
||||
int encode_only=0, decode_only=0;
|
||||
char *inFile, *outFile;
|
||||
FILE *fin, *fout;
|
||||
FILE *fin=NULL, *fout=NULL;
|
||||
OpusCustomMode *mode=NULL;
|
||||
OpusCustomEncoder *enc;
|
||||
OpusCustomDecoder *dec;
|
||||
OpusCustomEncoder *enc=NULL;
|
||||
OpusCustomDecoder *dec=NULL;
|
||||
int len;
|
||||
opus_int32 frame_size, channels, rate;
|
||||
int bytes_per_packet;
|
||||
int format=FORMAT_S16_LE;
|
||||
int bytes_per_packet=0;
|
||||
unsigned char data[MAX_PACKET];
|
||||
int complexity;
|
||||
#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
|
||||
int complexity=-1;
|
||||
float percent_loss = -1;
|
||||
int i;
|
||||
#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
|
||||
double rmsd = 0;
|
||||
#endif
|
||||
int count = 0;
|
||||
opus_int32 skip;
|
||||
opus_int16 *in, *out;
|
||||
if (argc != 9 && argc != 8 && argc != 7)
|
||||
opus_int32 *in=NULL, *out=NULL;
|
||||
unsigned char *fbytes=NULL;
|
||||
args = 1;
|
||||
if (argc < 7)
|
||||
{
|
||||
fprintf (stderr, "Usage: test_opus_custom <rate> <channels> <frame size> "
|
||||
" <bytes per packet> [<complexity> [packet loss rate]] "
|
||||
"<input> <output>\n");
|
||||
return 1;
|
||||
print_usage(argv);
|
||||
goto failure;
|
||||
}
|
||||
if (strcmp(argv[args], "-e")==0)
|
||||
{
|
||||
encode_only = 1;
|
||||
args++;
|
||||
} else if (strcmp(argv[args], "-d")==0)
|
||||
{
|
||||
decode_only = 1;
|
||||
args++;
|
||||
}
|
||||
|
||||
rate = (opus_int32)atol(argv[args]);
|
||||
args++;
|
||||
|
||||
if (rate != 8000 && rate != 12000
|
||||
&& rate != 16000 && rate != 24000
|
||||
&& rate != 48000)
|
||||
{
|
||||
fprintf(stderr, "Supported sampling rates are 8000, 12000, "
|
||||
"16000, 24000 and 48000.\n");
|
||||
goto failure;
|
||||
}
|
||||
|
||||
channels = atoi(argv[args]);
|
||||
args++;
|
||||
|
||||
if (channels < 1 || channels > 2)
|
||||
{
|
||||
fprintf(stderr, "Opus_demo supports only 1 or 2 channels.\n");
|
||||
goto failure;
|
||||
}
|
||||
|
||||
frame_size = atoi(argv[args]);
|
||||
args++;
|
||||
|
||||
if (!decode_only)
|
||||
{
|
||||
bytes_per_packet = (opus_int32)atol(argv[args]);
|
||||
args++;
|
||||
if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET)
|
||||
{
|
||||
fprintf (stderr, "bytes per packet must be between 0 and %d\n",
|
||||
MAX_PACKET);
|
||||
goto failure;
|
||||
}
|
||||
}
|
||||
|
||||
rate = (opus_int32)atol(argv[1]);
|
||||
channels = atoi(argv[2]);
|
||||
frame_size = atoi(argv[3]);
|
||||
mode = opus_custom_mode_create(rate, frame_size, NULL);
|
||||
if (mode == NULL)
|
||||
{
|
||||
fprintf(stderr, "failed to create a mode\n");
|
||||
return 1;
|
||||
goto failure;
|
||||
}
|
||||
|
||||
bytes_per_packet = atoi(argv[4]);
|
||||
if (bytes_per_packet < 0 || bytes_per_packet > MAX_PACKET)
|
||||
while( args < argc - 2 ) {
|
||||
/* process command line options */
|
||||
if( strcmp( argv[ args ], "-complexity" ) == 0 ) {
|
||||
check_encoder_option(decode_only, "-complexity");
|
||||
args++;
|
||||
complexity=atoi(argv[args]);
|
||||
args++;
|
||||
} else if( strcmp( argv[ args ], "-loss" ) == 0 ) {
|
||||
args++;
|
||||
percent_loss = atof(argv[args]);
|
||||
args++;
|
||||
} else if( strcmp( argv[ args ], "-16" ) == 0 ) {
|
||||
format = FORMAT_S16_LE;
|
||||
args++;
|
||||
} else if( strcmp( argv[ args ], "-24" ) == 0 ) {
|
||||
format = FORMAT_S24_LE;
|
||||
args++;
|
||||
} else if( strcmp( argv[ args ], "-f32" ) == 0 ) {
|
||||
format = FORMAT_F32_LE;
|
||||
args++;
|
||||
} else {
|
||||
printf( "Error: unrecognized setting: %s\n\n", argv[ args ] );
|
||||
print_usage( argv );
|
||||
goto failure;
|
||||
}
|
||||
}
|
||||
if (!decode_only) {
|
||||
enc = opus_custom_encoder_create(mode, channels, &err);
|
||||
if (err != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err));
|
||||
goto failure;
|
||||
}
|
||||
if (complexity >= 0)
|
||||
{
|
||||
opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity));
|
||||
}
|
||||
if (percent_loss >= 0) {
|
||||
opus_custom_encoder_ctl(enc, OPUS_SET_PACKET_LOSS_PERC((int)percent_loss));
|
||||
}
|
||||
}
|
||||
if (!encode_only) {
|
||||
dec = opus_custom_decoder_create(mode, channels, &err);
|
||||
if (err != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err));
|
||||
goto failure;
|
||||
}
|
||||
opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip));
|
||||
}
|
||||
if (argc-args != 2)
|
||||
{
|
||||
fprintf (stderr, "bytes per packet must be between 0 and %d\n",
|
||||
MAX_PACKET);
|
||||
return 1;
|
||||
print_usage(argv);
|
||||
goto failure;
|
||||
}
|
||||
|
||||
inFile = argv[argc-2];
|
||||
fin = fopen(inFile, "rb");
|
||||
if (!fin)
|
||||
{
|
||||
fprintf (stderr, "Could not open input file %s\n", argv[argc-2]);
|
||||
return 1;
|
||||
goto failure;
|
||||
}
|
||||
outFile = argv[argc-1];
|
||||
fout = fopen(outFile, "wb+");
|
||||
if (!fout)
|
||||
{
|
||||
fprintf (stderr, "Could not open output file %s\n", argv[argc-1]);
|
||||
fclose(fin);
|
||||
return 1;
|
||||
goto failure;
|
||||
}
|
||||
|
||||
enc = opus_custom_encoder_create(mode, channels, &err);
|
||||
if (err != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to create the encoder: %s\n", opus_strerror(err));
|
||||
fclose(fin);
|
||||
fclose(fout);
|
||||
return 1;
|
||||
}
|
||||
dec = opus_custom_decoder_create(mode, channels, &err);
|
||||
if (err != 0)
|
||||
{
|
||||
fprintf(stderr, "Failed to create the decoder: %s\n", opus_strerror(err));
|
||||
fclose(fin);
|
||||
fclose(fout);
|
||||
return 1;
|
||||
}
|
||||
opus_custom_decoder_ctl(dec, OPUS_GET_LOOKAHEAD(&skip));
|
||||
|
||||
if (argc>7)
|
||||
{
|
||||
complexity=atoi(argv[5]);
|
||||
opus_custom_encoder_ctl(enc,OPUS_SET_COMPLEXITY(complexity));
|
||||
}
|
||||
|
||||
in = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
|
||||
out = (opus_int16*)malloc(frame_size*channels*sizeof(opus_int16));
|
||||
in = (opus_int32*)malloc(frame_size*channels*sizeof(opus_int32));
|
||||
out = (opus_int32*)malloc(frame_size*channels*sizeof(opus_int32));
|
||||
fbytes = (unsigned char*)malloc(frame_size*channels*4);
|
||||
|
||||
while (!feof(fin))
|
||||
{
|
||||
int ret;
|
||||
err = fread(in, sizeof(short), frame_size*channels, fin);
|
||||
if (feof(fin))
|
||||
break;
|
||||
len = opus_custom_encode(enc, in, frame_size, data, bytes_per_packet);
|
||||
if (len <= 0)
|
||||
fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len));
|
||||
|
||||
/* This is for simulating bit errors */
|
||||
#if 0
|
||||
int errors = 0;
|
||||
int eid = 0;
|
||||
/* This simulates random bit error */
|
||||
for (i=0;i<len*8;i++)
|
||||
int lost = 0;
|
||||
if (decode_only)
|
||||
{
|
||||
if (rand()%atoi(argv[8])==0)
|
||||
{
|
||||
if (i<64)
|
||||
unsigned char ch[4];
|
||||
size_t num_read = fread(ch, 1, 4, fin);
|
||||
if (num_read!=4)
|
||||
break;
|
||||
len = char_to_int(ch);
|
||||
if (len>MAX_PACKET || len<0)
|
||||
{
|
||||
fprintf(stderr, "Invalid payload length: %d\n",len);
|
||||
break;
|
||||
}
|
||||
num_read = fread(ch, 1, 4, fin);
|
||||
if (num_read!=4)
|
||||
break;
|
||||
enc_final_range = char_to_int(ch);
|
||||
num_read = fread(data, 1, len, fin);
|
||||
if (num_read!=(size_t)len)
|
||||
{
|
||||
fprintf(stderr, "Ran out of input, "
|
||||
"expecting %d bytes got %d\n",
|
||||
len,(int)num_read);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
err = fread(fbytes, format_size[format], frame_size*channels, fin);
|
||||
if (feof(fin))
|
||||
break;
|
||||
if (format == FORMAT_S16_LE) {
|
||||
for(i=0;i<frame_size*channels;i++)
|
||||
{
|
||||
errors++;
|
||||
eid = i;
|
||||
opus_int32 s;
|
||||
s=fbytes[2*i+1]<<8|fbytes[2*i];
|
||||
s=((s&0xFFFF)^0x8000)-0x8000;
|
||||
in[i]=s*256;
|
||||
}
|
||||
} else if (format == FORMAT_S24_LE) {
|
||||
for(i=0;i<frame_size*channels;i++)
|
||||
{
|
||||
opus_int32 s;
|
||||
s=fbytes[3*i+2]<<16|fbytes[3*i+1]<<8|fbytes[3*i];
|
||||
s=((s&0xFFFFFF)^0x800000)-0x800000;
|
||||
in[i]=s;
|
||||
}
|
||||
} else if (format == FORMAT_F32_LE) {
|
||||
for(i=0;i<frame_size*channels;i++)
|
||||
{
|
||||
float_bits s;
|
||||
s.i=fbytes[4*i+3]<<24|fbytes[4*i+2]<<16|fbytes[4*i+1]<<8|fbytes[4*i];
|
||||
in[i]=(int)floor(.5 + s.f*8388608);
|
||||
}
|
||||
data[i/8] ^= 1<<(7-(i%8));
|
||||
}
|
||||
len = opus_custom_encode24(enc, in, frame_size, data, bytes_per_packet);
|
||||
opus_custom_encoder_ctl(enc, OPUS_GET_FINAL_RANGE(&enc_final_range));
|
||||
if (len <= 0)
|
||||
fprintf (stderr, "opus_custom_encode() failed: %s\n", opus_strerror(len));
|
||||
}
|
||||
if (errors == 1)
|
||||
data[eid/8] ^= 1<<(7-(eid%8));
|
||||
else if (errors%2 == 1)
|
||||
data[rand()%8] ^= 1<<rand()%8;
|
||||
|
||||
if (encode_only)
|
||||
{
|
||||
unsigned char int_field[4];
|
||||
int_to_char(len, int_field);
|
||||
if (fwrite(int_field, 1, 4, fout) != 4) {
|
||||
fprintf(stderr, "Error writing.\n");
|
||||
goto failure;
|
||||
}
|
||||
int_to_char(enc_final_range, int_field);
|
||||
if (fwrite(int_field, 1, 4, fout) != 4) {
|
||||
fprintf(stderr, "Error writing.\n");
|
||||
goto failure;
|
||||
}
|
||||
if (fwrite(data, 1, len, fout) != (unsigned)len) {
|
||||
fprintf(stderr, "Error writing.\n");
|
||||
goto failure;
|
||||
}
|
||||
} else {
|
||||
/* This is for simulating bit errors */
|
||||
#if 0
|
||||
int errors = 0;
|
||||
int eid = 0;
|
||||
/* This simulates random bit error */
|
||||
for (i=0;i<len*8;i++)
|
||||
{
|
||||
if (rand()%atoi(argv[8])==0)
|
||||
{
|
||||
if (i<64)
|
||||
{
|
||||
errors++;
|
||||
eid = i;
|
||||
}
|
||||
data[i/8] ^= 1<<(7-(i%8));
|
||||
}
|
||||
}
|
||||
if (errors == 1)
|
||||
data[eid/8] ^= 1<<(7-(eid%8));
|
||||
else if (errors%2 == 1)
|
||||
data[rand()%8] ^= 1<<rand()%8;
|
||||
#endif
|
||||
|
||||
#if 1 /* Set to zero to use the encoder's output instead */
|
||||
/* This is to simulate packet loss */
|
||||
if (argc==9 && rand()%1000<atoi(argv[argc-3]))
|
||||
/*if (errors && (errors%2==0))*/
|
||||
ret = opus_custom_decode(dec, NULL, len, out, frame_size);
|
||||
else
|
||||
ret = opus_custom_decode(dec, data, len, out, frame_size);
|
||||
if (ret < 0)
|
||||
fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret));
|
||||
/* This is to simulate packet loss */
|
||||
lost = percent_loss != 0 && (float)rand()/RAND_MAX<.01*percent_loss;
|
||||
if (lost)
|
||||
/*if (errors && (errors%2==0))*/
|
||||
ret = opus_custom_decode24(dec, NULL, len, out, frame_size);
|
||||
else
|
||||
ret = opus_custom_decode24(dec, data, len, out, frame_size);
|
||||
opus_custom_decoder_ctl(dec, OPUS_GET_FINAL_RANGE(&dec_final_range));
|
||||
if (ret < 0)
|
||||
fprintf(stderr, "opus_custom_decode() failed: %s\n", opus_strerror(ret));
|
||||
#else
|
||||
for (i=0;i<ret*channels;i++)
|
||||
out[i] = in[i];
|
||||
for (i=0;i<ret*channels;i++)
|
||||
out[i] = in[i];
|
||||
#endif
|
||||
#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
|
||||
for (i=0;i<ret*channels;i++)
|
||||
{
|
||||
rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]);
|
||||
/*out[i] -= in[i];*/
|
||||
}
|
||||
if (!encode_only && !decode_only)
|
||||
{
|
||||
for (i=0;i<ret*channels;i++)
|
||||
{
|
||||
rmsd += (in[i]-out[i])*1.0*(in[i]-out[i]);
|
||||
/*out[i] -= in[i];*/
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (format == FORMAT_S16_LE) {
|
||||
for(i=0;i<(ret-skip)*channels;i++)
|
||||
{
|
||||
opus_int32 s;
|
||||
s=(out[i+(skip*channels)]+128)>>8;
|
||||
if (s > 32767) s = 32767;
|
||||
if (s < -32767) s = -32767;
|
||||
fbytes[2*i]=s&0xFF;
|
||||
fbytes[2*i+1]=(s>>8)&0xFF;
|
||||
}
|
||||
} else if (format == FORMAT_S24_LE) {
|
||||
for(i=0;i<(ret-skip)*channels;i++)
|
||||
{
|
||||
opus_int32 s;
|
||||
s=out[i+(skip*channels)];
|
||||
if (s > 8388607) s = 8388607;
|
||||
if (s < -8388607) s = -8388607;
|
||||
fbytes[3*i]=s&0xFF;
|
||||
fbytes[3*i+1]=(s>>8)&0xFF;
|
||||
fbytes[3*i+2]=(s>>16)&0xFF;
|
||||
}
|
||||
} else if (format == FORMAT_F32_LE) {
|
||||
for(i=0;i<(ret-skip)*channels;i++)
|
||||
{
|
||||
float_bits s;
|
||||
s.f=out[i+(skip*channels)]*(1.f/8388608.f);
|
||||
fbytes[4*i]=s.i&0xFF;
|
||||
fbytes[4*i+1]=(s.i>>8)&0xFF;
|
||||
fbytes[4*i+2]=(s.i>>16)&0xFF;
|
||||
fbytes[4*i+3]=(s.i>>24)&0xFF;
|
||||
}
|
||||
}
|
||||
fwrite(fbytes, format_size[format], (ret-skip)*channels, fout);
|
||||
}
|
||||
|
||||
/* compare final range encoder rng values of encoder and decoder */
|
||||
if( enc_final_range!=0 && !encode_only
|
||||
&& !lost
|
||||
&& dec_final_range != enc_final_range ) {
|
||||
fprintf (stderr, "Error: Range coder state mismatch "
|
||||
"between encoder and decoder "
|
||||
"in frame %ld: 0x%8lx vs 0x%8lx\n",
|
||||
(long)count,
|
||||
(unsigned long)enc_final_range,
|
||||
(unsigned long)dec_final_range);
|
||||
goto failure;
|
||||
}
|
||||
|
||||
count++;
|
||||
fwrite(out+skip*channels, sizeof(short), (ret-skip)*channels, fout);
|
||||
skip = 0;
|
||||
}
|
||||
PRINT_MIPS(stderr);
|
||||
|
||||
opus_custom_encoder_destroy(enc);
|
||||
opus_custom_decoder_destroy(dec);
|
||||
fclose(fin);
|
||||
fclose(fout);
|
||||
opus_custom_mode_destroy(mode);
|
||||
free(in);
|
||||
free(out);
|
||||
ret = EXIT_SUCCESS;
|
||||
#if !(defined (FIXED_POINT) && !defined(CUSTOM_MODES)) && defined(RESYNTH)
|
||||
if (rmsd > 0)
|
||||
if (!encode_only && !decode_only)
|
||||
{
|
||||
rmsd = sqrt(rmsd/(1.0*frame_size*channels*count));
|
||||
fprintf (stderr, "Error: encoder doesn't match decoder\n");
|
||||
fprintf (stderr, "RMS mismatch is %f\n", rmsd);
|
||||
return 1;
|
||||
} else {
|
||||
fprintf (stderr, "Encoder matches decoder!!\n");
|
||||
if (rmsd > 0)
|
||||
{
|
||||
rmsd = sqrt(rmsd/(1.0*frame_size*channels*count));
|
||||
fprintf (stderr, "Error: encoder doesn't match decoder\n");
|
||||
fprintf (stderr, "RMS mismatch is %f\n", rmsd);
|
||||
ret = 1;
|
||||
} else {
|
||||
fprintf (stderr, "Encoder matches decoder!!\n");
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
failure:
|
||||
/* Cleanup after ourselves. */
|
||||
if (enc) opus_custom_encoder_destroy(enc);
|
||||
if (dec) opus_custom_decoder_destroy(dec);
|
||||
if (fin) fclose(fin);
|
||||
if (fout) fclose(fout);
|
||||
if (mode) opus_custom_mode_destroy(mode);
|
||||
if (in) free(in);
|
||||
if (out) free(out);
|
||||
if (fbytes) free(fbytes);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
14
third_party/opus/src/celt/os_support.h
vendored
14
third_party/opus/src/celt/os_support.h
vendored
@ -41,7 +41,7 @@
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
|
||||
/** Opus wrapper for malloc(). To do your own dynamic allocation replace this function, opus_realloc, and opus_free */
|
||||
#ifndef OVERRIDE_OPUS_ALLOC
|
||||
static OPUS_INLINE void *opus_alloc (size_t size)
|
||||
{
|
||||
@ -49,7 +49,15 @@ static OPUS_INLINE void *opus_alloc (size_t size)
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
|
||||
#ifndef OVERRIDE_OPUS_REALLOC
|
||||
static OPUS_INLINE void *opus_realloc (void *ptr, size_t size)
|
||||
{
|
||||
return realloc(ptr, size);
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Used only for non-threadsafe pseudostack.
|
||||
If desired, this can always return the same area of memory rather than allocating a new one every time. */
|
||||
#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
|
||||
static OPUS_INLINE void *opus_alloc_scratch (size_t size)
|
||||
{
|
||||
@ -58,7 +66,7 @@ static OPUS_INLINE void *opus_alloc_scratch (size_t size)
|
||||
}
|
||||
#endif
|
||||
|
||||
/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
|
||||
/** Opus wrapper for free(). To do your own dynamic allocation replace this function, opus_realloc, and opus_free */
|
||||
#ifndef OVERRIDE_OPUS_FREE
|
||||
static OPUS_INLINE void opus_free (void *ptr)
|
||||
{
|
||||
|
13
third_party/opus/src/celt/pitch.c
vendored
13
third_party/opus/src/celt/pitch.c
vendored
@ -262,7 +262,16 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
|
||||
for (i=0;i<max_pitch-3;i+=4)
|
||||
{
|
||||
opus_val32 sum[4]={0,0,0,0};
|
||||
xcorr_kernel(_x, _y+i, sum, len, arch);
|
||||
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
|
||||
{
|
||||
opus_val32 sum_c[4]={0,0,0,0};
|
||||
xcorr_kernel_c(_x, _y+i, sum_c, len);
|
||||
#endif
|
||||
xcorr_kernel(_x, _y+i, sum, len, arch);
|
||||
#if defined(OPUS_CHECK_ASM) && defined(FIXED_POINT)
|
||||
celt_assert(memcmp(sum, sum_c, sizeof(sum)) == 0);
|
||||
}
|
||||
#endif
|
||||
xcorr[i]=sum[0];
|
||||
xcorr[i+1]=sum[1];
|
||||
xcorr[i+2]=sum[2];
|
||||
@ -427,7 +436,7 @@ static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy
|
||||
den = celt_rsqrt_norm(x2y2);
|
||||
g = MULT16_32_Q15(den, xy);
|
||||
g = VSHR32(g, (shift>>1)-1);
|
||||
return EXTRACT16(MIN32(g, Q15ONE));
|
||||
return EXTRACT16(MAX32(-Q15ONE, MIN32(g, Q15ONE)));
|
||||
}
|
||||
#else
|
||||
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
|
||||
|
11
third_party/opus/src/celt/pitch.h
vendored
11
third_party/opus/src/celt/pitch.h
vendored
@ -189,4 +189,15 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
|
||||
# define celt_pitch_xcorr celt_pitch_xcorr_c
|
||||
#endif
|
||||
|
||||
#ifdef NON_STATIC_COMB_FILTER_CONST_C
|
||||
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
|
||||
opus_val16 g10, opus_val16 g11, opus_val16 g12);
|
||||
#endif
|
||||
|
||||
#ifndef OVERRIDE_COMB_FILTER_CONST
|
||||
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
|
||||
((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
109
third_party/opus/src/celt/quant_bands.c
vendored
109
third_party/opus/src/celt/quant_bands.c
vendored
@ -139,25 +139,25 @@ static const unsigned char e_prob_model[4][2][42] = {
|
||||
|
||||
static const unsigned char small_energy_icdf[3]={2,1,0};
|
||||
|
||||
static opus_val32 loss_distortion(const opus_val16 *eBands, opus_val16 *oldEBands, int start, int end, int len, int C)
|
||||
static opus_val32 loss_distortion(const celt_glog *eBands, celt_glog *oldEBands, int start, int end, int len, int C)
|
||||
{
|
||||
int c, i;
|
||||
opus_val32 dist = 0;
|
||||
c=0; do {
|
||||
for (i=start;i<end;i++)
|
||||
{
|
||||
opus_val16 d = SUB16(SHR16(eBands[i+c*len], 3), SHR16(oldEBands[i+c*len], 3));
|
||||
celt_glog d = PSHR32(SUB32(eBands[i+c*len], oldEBands[i+c*len]), DB_SHIFT-7);
|
||||
dist = MAC16_16(dist, d,d);
|
||||
}
|
||||
} while (++c<C);
|
||||
return MIN32(200,SHR32(dist,2*DB_SHIFT-6));
|
||||
return MIN32(200,SHR32(dist,14));
|
||||
}
|
||||
|
||||
static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
|
||||
const opus_val16 *eBands, opus_val16 *oldEBands,
|
||||
const celt_glog *eBands, celt_glog *oldEBands,
|
||||
opus_int32 budget, opus_int32 tell,
|
||||
const unsigned char *prob_model, opus_val16 *error, ec_enc *enc,
|
||||
int C, int LM, int intra, opus_val16 max_decay, int lfe)
|
||||
const unsigned char *prob_model, celt_glog *error, ec_enc *enc,
|
||||
int C, int LM, int intra, celt_glog max_decay, int lfe)
|
||||
{
|
||||
int i, c;
|
||||
int badness = 0;
|
||||
@ -184,29 +184,28 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
|
||||
int bits_left;
|
||||
int qi, qi0;
|
||||
opus_val32 q;
|
||||
opus_val16 x;
|
||||
celt_glog x;
|
||||
opus_val32 f, tmp;
|
||||
opus_val16 oldE;
|
||||
opus_val16 decay_bound;
|
||||
celt_glog oldE;
|
||||
celt_glog decay_bound;
|
||||
x = eBands[i+c*m->nbEBands];
|
||||
oldE = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
|
||||
oldE = MAXG(-GCONST(9.f), oldEBands[i+c*m->nbEBands]);
|
||||
#ifdef FIXED_POINT
|
||||
f = SHL32(EXTEND32(x),7) - PSHR32(MULT16_16(coef,oldE), 8) - prev[c];
|
||||
f = x - MULT16_32_Q15(coef,oldE) - prev[c];
|
||||
/* Rounding to nearest integer here is really important! */
|
||||
qi = (f+QCONST32(.5f,DB_SHIFT+7))>>(DB_SHIFT+7);
|
||||
decay_bound = EXTRACT16(MAX32(-QCONST16(28.f,DB_SHIFT),
|
||||
SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay)));
|
||||
qi = (f+QCONST32(.5f,DB_SHIFT))>>DB_SHIFT;
|
||||
decay_bound = MAXG(-GCONST(28.f), SUB32((opus_val32)oldEBands[i+c*m->nbEBands],max_decay));
|
||||
#else
|
||||
f = x-coef*oldE-prev[c];
|
||||
/* Rounding to nearest integer here is really important! */
|
||||
qi = (int)floor(.5f+f);
|
||||
decay_bound = MAX16(-QCONST16(28.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]) - max_decay;
|
||||
decay_bound = MAXG(-GCONST(28.f), oldEBands[i+c*m->nbEBands]) - max_decay;
|
||||
#endif
|
||||
/* Prevent the energy from going down too quickly (e.g. for bands
|
||||
that have just one bin) */
|
||||
if (qi < 0 && x < decay_bound)
|
||||
{
|
||||
qi += (int)SHR16(SUB16(decay_bound,x), DB_SHIFT);
|
||||
qi += (int)SHR32(SUB32(decay_bound,x), DB_SHIFT);
|
||||
if (qi > 0)
|
||||
qi = 0;
|
||||
}
|
||||
@ -243,30 +242,30 @@ static int quant_coarse_energy_impl(const CELTMode *m, int start, int end,
|
||||
}
|
||||
else
|
||||
qi = -1;
|
||||
error[i+c*m->nbEBands] = PSHR32(f,7) - SHL16(qi,DB_SHIFT);
|
||||
error[i+c*m->nbEBands] = f - SHL32(qi,DB_SHIFT);
|
||||
badness += abs(qi0-qi);
|
||||
q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
|
||||
|
||||
tmp = PSHR32(MULT16_16(coef,oldE),8) + prev[c] + SHL32(q,7);
|
||||
tmp = MULT16_32_Q15(coef,oldE) + prev[c] + q;
|
||||
#ifdef FIXED_POINT
|
||||
tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
|
||||
tmp = MAX32(-GCONST(28.f), tmp);
|
||||
#endif
|
||||
oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
|
||||
prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
|
||||
oldEBands[i+c*m->nbEBands] = tmp;
|
||||
prev[c] = prev[c] + q - MULT16_32_Q15(beta,q);
|
||||
} while (++c < C);
|
||||
}
|
||||
return lfe ? 0 : badness;
|
||||
}
|
||||
|
||||
void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
|
||||
const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
|
||||
opus_val16 *error, ec_enc *enc, int C, int LM, int nbAvailableBytes,
|
||||
const celt_glog *eBands, celt_glog *oldEBands, opus_uint32 budget,
|
||||
celt_glog *error, ec_enc *enc, int C, int LM, int nbAvailableBytes,
|
||||
int force_intra, opus_val32 *delayedIntra, int two_pass, int loss_rate, int lfe)
|
||||
{
|
||||
int intra;
|
||||
opus_val16 max_decay;
|
||||
VARDECL(opus_val16, oldEBands_intra);
|
||||
VARDECL(opus_val16, error_intra);
|
||||
celt_glog max_decay;
|
||||
VARDECL(celt_glog, oldEBands_intra);
|
||||
VARDECL(celt_glog, error_intra);
|
||||
ec_enc enc_start_state;
|
||||
opus_uint32 tell;
|
||||
int badness1=0;
|
||||
@ -282,21 +281,21 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
|
||||
if (tell+3 > budget)
|
||||
two_pass = intra = 0;
|
||||
|
||||
max_decay = QCONST16(16.f,DB_SHIFT);
|
||||
max_decay = GCONST(16.f);
|
||||
if (end-start>10)
|
||||
{
|
||||
#ifdef FIXED_POINT
|
||||
max_decay = MIN32(max_decay, SHL32(EXTEND32(nbAvailableBytes),DB_SHIFT-3));
|
||||
max_decay = SHL32(MIN32(SHR32(max_decay,DB_SHIFT-3), EXTEND32(nbAvailableBytes)),DB_SHIFT-3);
|
||||
#else
|
||||
max_decay = MIN32(max_decay, .125f*nbAvailableBytes);
|
||||
#endif
|
||||
}
|
||||
if (lfe)
|
||||
max_decay = QCONST16(3.f,DB_SHIFT);
|
||||
max_decay = GCONST(3.f);
|
||||
enc_start_state = *enc;
|
||||
|
||||
ALLOC(oldEBands_intra, C*m->nbEBands, opus_val16);
|
||||
ALLOC(error_intra, C*m->nbEBands, opus_val16);
|
||||
ALLOC(oldEBands_intra, C*m->nbEBands, celt_glog);
|
||||
ALLOC(error_intra, C*m->nbEBands, celt_glog);
|
||||
OPUS_COPY(oldEBands_intra, oldEBands, C*m->nbEBands);
|
||||
|
||||
if (two_pass || intra)
|
||||
@ -358,7 +357,7 @@ void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
|
||||
RESTORE_STACK;
|
||||
}
|
||||
|
||||
void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C)
|
||||
void quant_fine_energy(const CELTMode *m, int start, int end, celt_glog *oldEBands, celt_glog *error, int *fine_quant, ec_enc *enc, int C)
|
||||
{
|
||||
int i, c;
|
||||
|
||||
@ -371,10 +370,10 @@ void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBa
|
||||
c=0;
|
||||
do {
|
||||
int q2;
|
||||
opus_val16 offset;
|
||||
celt_glog offset;
|
||||
#ifdef FIXED_POINT
|
||||
/* Has to be without rounding */
|
||||
q2 = (error[i+c*m->nbEBands]+QCONST16(.5f,DB_SHIFT))>>(DB_SHIFT-fine_quant[i]);
|
||||
q2 = (error[i+c*m->nbEBands]+GCONST(.5f))>>(DB_SHIFT-fine_quant[i]);
|
||||
#else
|
||||
q2 = (int)floor((error[i+c*m->nbEBands]+.5f)*frac);
|
||||
#endif
|
||||
@ -384,7 +383,7 @@ void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBa
|
||||
q2 = 0;
|
||||
ec_enc_bits(enc, q2, fine_quant[i]);
|
||||
#ifdef FIXED_POINT
|
||||
offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
|
||||
offset = SUB32(VSHR32(2*q2+1, fine_quant[i]-DB_SHIFT+1), GCONST(.5f));
|
||||
#else
|
||||
offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
|
||||
#endif
|
||||
@ -395,7 +394,7 @@ void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBa
|
||||
}
|
||||
}
|
||||
|
||||
void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C)
|
||||
void quant_energy_finalise(const CELTMode *m, int start, int end, celt_glog *oldEBands, celt_glog *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C)
|
||||
{
|
||||
int i, prio, c;
|
||||
|
||||
@ -409,11 +408,11 @@ void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *ol
|
||||
c=0;
|
||||
do {
|
||||
int q2;
|
||||
opus_val16 offset;
|
||||
celt_glog offset;
|
||||
q2 = error[i+c*m->nbEBands]<0 ? 0 : 1;
|
||||
ec_enc_bits(enc, q2, 1);
|
||||
#ifdef FIXED_POINT
|
||||
offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
|
||||
offset = SHR32(SHL32(q2,DB_SHIFT)-GCONST(.5f),fine_quant[i]+1);
|
||||
#else
|
||||
offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
|
||||
#endif
|
||||
@ -425,7 +424,7 @@ void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *ol
|
||||
}
|
||||
}
|
||||
|
||||
void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM)
|
||||
void unquant_coarse_energy(const CELTMode *m, int start, int end, celt_glog *oldEBands, int intra, ec_dec *dec, int C, int LM)
|
||||
{
|
||||
const unsigned char *prob_model = e_prob_model[LM][intra];
|
||||
int i, c;
|
||||
@ -479,18 +478,18 @@ void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *ol
|
||||
qi = -1;
|
||||
q = (opus_val32)SHL32(EXTEND32(qi),DB_SHIFT);
|
||||
|
||||
oldEBands[i+c*m->nbEBands] = MAX16(-QCONST16(9.f,DB_SHIFT), oldEBands[i+c*m->nbEBands]);
|
||||
tmp = PSHR32(MULT16_16(coef,oldEBands[i+c*m->nbEBands]),8) + prev[c] + SHL32(q,7);
|
||||
oldEBands[i+c*m->nbEBands] = MAXG(-GCONST(9.f), oldEBands[i+c*m->nbEBands]);
|
||||
tmp = MULT16_32_Q15(coef,oldEBands[i+c*m->nbEBands]) + prev[c] + q;
|
||||
#ifdef FIXED_POINT
|
||||
tmp = MAX32(-QCONST32(28.f, DB_SHIFT+7), tmp);
|
||||
tmp = MAX32(-GCONST(28.f), tmp);
|
||||
#endif
|
||||
oldEBands[i+c*m->nbEBands] = PSHR32(tmp, 7);
|
||||
prev[c] = prev[c] + SHL32(q,7) - MULT16_16(beta,PSHR32(q,8));
|
||||
oldEBands[i+c*m->nbEBands] = tmp;
|
||||
prev[c] = prev[c] + q - MULT16_32_Q15(beta,q);
|
||||
} while (++c < C);
|
||||
}
|
||||
}
|
||||
|
||||
void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C)
|
||||
void unquant_fine_energy(const CELTMode *m, int start, int end, celt_glog *oldEBands, int *fine_quant, ec_dec *dec, int C)
|
||||
{
|
||||
int i, c;
|
||||
/* Decode finer resolution */
|
||||
@ -501,10 +500,10 @@ void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldE
|
||||
c=0;
|
||||
do {
|
||||
int q2;
|
||||
opus_val16 offset;
|
||||
celt_glog offset;
|
||||
q2 = ec_dec_bits(dec, fine_quant[i]);
|
||||
#ifdef FIXED_POINT
|
||||
offset = SUB16(SHR32(SHL32(EXTEND32(q2),DB_SHIFT)+QCONST16(.5f,DB_SHIFT),fine_quant[i]),QCONST16(.5f,DB_SHIFT));
|
||||
offset = SUB32(VSHR32(2*q2+1, fine_quant[i]-DB_SHIFT+1), GCONST(.5f));
|
||||
#else
|
||||
offset = (q2+.5f)*(1<<(14-fine_quant[i]))*(1.f/16384) - .5f;
|
||||
#endif
|
||||
@ -513,7 +512,7 @@ void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldE
|
||||
}
|
||||
}
|
||||
|
||||
void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C)
|
||||
void unquant_energy_finalise(const CELTMode *m, int start, int end, celt_glog *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C)
|
||||
{
|
||||
int i, prio, c;
|
||||
|
||||
@ -527,10 +526,10 @@ void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *
|
||||
c=0;
|
||||
do {
|
||||
int q2;
|
||||
opus_val16 offset;
|
||||
celt_glog offset;
|
||||
q2 = ec_dec_bits(dec, 1);
|
||||
#ifdef FIXED_POINT
|
||||
offset = SHR16(SHL16(q2,DB_SHIFT)-QCONST16(.5f,DB_SHIFT),fine_quant[i]+1);
|
||||
offset = SHR32(SHL32(q2,DB_SHIFT)-GCONST(.5f),fine_quant[i]+1);
|
||||
#else
|
||||
offset = (q2-.5f)*(1<<(14-fine_quant[i]-1))*(1.f/16384);
|
||||
#endif
|
||||
@ -542,7 +541,7 @@ void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *
|
||||
}
|
||||
|
||||
void amp2Log2(const CELTMode *m, int effEnd, int end,
|
||||
celt_ener *bandE, opus_val16 *bandLogE, int C)
|
||||
celt_ener *bandE, celt_glog *bandLogE, int C)
|
||||
{
|
||||
int c, i;
|
||||
c=0;
|
||||
@ -550,14 +549,14 @@ void amp2Log2(const CELTMode *m, int effEnd, int end,
|
||||
for (i=0;i<effEnd;i++)
|
||||
{
|
||||
bandLogE[i+c*m->nbEBands] =
|
||||
celt_log2(bandE[i+c*m->nbEBands])
|
||||
- SHL16((opus_val16)eMeans[i],6);
|
||||
celt_log2_db(bandE[i+c*m->nbEBands])
|
||||
- SHL32((celt_glog)eMeans[i],DB_SHIFT-4);
|
||||
#ifdef FIXED_POINT
|
||||
/* Compensate for bandE[] being Q12 but celt_log2() taking a Q14 input. */
|
||||
bandLogE[i+c*m->nbEBands] += QCONST16(2.f, DB_SHIFT);
|
||||
bandLogE[i+c*m->nbEBands] += GCONST(2.f);
|
||||
#endif
|
||||
}
|
||||
for (i=effEnd;i<end;i++)
|
||||
bandLogE[c*m->nbEBands+i] = -QCONST16(14.f,DB_SHIFT);
|
||||
bandLogE[c*m->nbEBands+i] = -GCONST(14.f);
|
||||
} while (++c < C);
|
||||
}
|
||||
|
18
third_party/opus/src/celt/quant_bands.h
vendored
18
third_party/opus/src/celt/quant_bands.h
vendored
@ -42,25 +42,25 @@ extern const opus_val16 eMeans[25];
|
||||
#endif
|
||||
|
||||
void amp2Log2(const CELTMode *m, int effEnd, int end,
|
||||
celt_ener *bandE, opus_val16 *bandLogE, int C);
|
||||
celt_ener *bandE, celt_glog *bandLogE, int C);
|
||||
|
||||
void log2Amp(const CELTMode *m, int start, int end,
|
||||
celt_ener *eBands, const opus_val16 *oldEBands, int C);
|
||||
celt_ener *eBands, const celt_glog *oldEBands, int C);
|
||||
|
||||
void quant_coarse_energy(const CELTMode *m, int start, int end, int effEnd,
|
||||
const opus_val16 *eBands, opus_val16 *oldEBands, opus_uint32 budget,
|
||||
opus_val16 *error, ec_enc *enc, int C, int LM,
|
||||
const celt_glog *eBands, celt_glog *oldEBands, opus_uint32 budget,
|
||||
celt_glog *error, ec_enc *enc, int C, int LM,
|
||||
int nbAvailableBytes, int force_intra, opus_val32 *delayedIntra,
|
||||
int two_pass, int loss_rate, int lfe);
|
||||
|
||||
void quant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, ec_enc *enc, int C);
|
||||
void quant_fine_energy(const CELTMode *m, int start, int end, celt_glog *oldEBands, celt_glog *error, int *fine_quant, ec_enc *enc, int C);
|
||||
|
||||
void quant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, opus_val16 *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C);
|
||||
void quant_energy_finalise(const CELTMode *m, int start, int end, celt_glog *oldEBands, celt_glog *error, int *fine_quant, int *fine_priority, int bits_left, ec_enc *enc, int C);
|
||||
|
||||
void unquant_coarse_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int intra, ec_dec *dec, int C, int LM);
|
||||
void unquant_coarse_energy(const CELTMode *m, int start, int end, celt_glog *oldEBands, int intra, ec_dec *dec, int C, int LM);
|
||||
|
||||
void unquant_fine_energy(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, ec_dec *dec, int C);
|
||||
void unquant_fine_energy(const CELTMode *m, int start, int end, celt_glog *oldEBands, int *fine_quant, ec_dec *dec, int C);
|
||||
|
||||
void unquant_energy_finalise(const CELTMode *m, int start, int end, opus_val16 *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C);
|
||||
void unquant_energy_finalise(const CELTMode *m, int start, int end, celt_glog *oldEBands, int *fine_quant, int *fine_priority, int bits_left, ec_dec *dec, int C);
|
||||
|
||||
#endif /* QUANT_BANDS */
|
||||
|
2
third_party/opus/src/celt/rate.c
vendored
2
third_party/opus/src/celt/rate.c
vendored
@ -189,7 +189,7 @@ void compute_pulse_cache(CELTMode *m, int LM)
|
||||
/* Offset the number of qtheta bits by log2(N)/2
|
||||
+ QTHETA_OFFSET compared to their "fair share" of
|
||||
total/N */
|
||||
offset = ((m->logN[j]+((LM0+k)<<BITRES))>>1)-QTHETA_OFFSET;
|
||||
offset = ((m->logN[j]+(opus_int32)((opus_uint32)(LM0+k)<<BITRES))>>1)-QTHETA_OFFSET;
|
||||
/* The number of qtheta bits we'll allocate if the remainder
|
||||
is to be max_bits.
|
||||
The average measured cost for theta is 0.89701 times qb,
|
||||
|
3
third_party/opus/src/celt/stack_alloc.h
vendored
3
third_party/opus/src/celt/stack_alloc.h
vendored
@ -140,8 +140,9 @@ extern char *global_stack_top;
|
||||
|
||||
#else
|
||||
|
||||
#include "arch.h"
|
||||
#define ALIGN(stack, size) ((stack) += ((size) - (long)(stack)) & ((size) - 1))
|
||||
#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/sizeof(char)),(stack)+=(size)*(sizeof(type)/sizeof(char)),(type*)((stack)-(size)*(sizeof(type)/sizeof(char))))
|
||||
#define PUSH(stack, size, type) (ALIGN((stack),sizeof(type)/(sizeof(char))),(void)(((int)((size)*(sizeof(type)/(sizeof(char)))) <= (scratch_ptr)+GLOBAL_STACK_SIZE-(stack))?0:CELT_FATAL("pseudostack overflow")),(stack)+=(size)*(sizeof(type)/(sizeof(char))),(type*)((stack)-(size)*(sizeof(type)/(sizeof(char)))))
|
||||
#if 0 /* Set this to 1 to instrument pseudostack usage */
|
||||
#define RESTORE_STACK (printf("%ld %s:%d\n", global_stack-scratch_ptr, __FILE__, __LINE__),global_stack = _saved_stack)
|
||||
#else
|
||||
|
1159
third_party/opus/src/celt/static_modes_fixed.h
vendored
1159
third_party/opus/src/celt/static_modes_fixed.h
vendored
File diff suppressed because it is too large
Load Diff
@ -157,5 +157,6 @@ int main(void){
|
||||
/*printf("\n");*/
|
||||
}
|
||||
}
|
||||
RESTORE_STACK;
|
||||
return 0;
|
||||
}
|
||||
|
@ -176,5 +176,6 @@ int main(int argc,char ** argv)
|
||||
test1d(480,1,arch);
|
||||
#endif
|
||||
}
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
|
@ -89,5 +89,6 @@ int main(void)
|
||||
}
|
||||
|
||||
free(ptr);
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
|
117
third_party/opus/src/celt/tests/test_unit_mathops.c
vendored
117
third_party/opus/src/celt/tests/test_unit_mathops.c
vendored
@ -1,6 +1,7 @@
|
||||
/* Copyright (c) 2008-2011 Xiph.Org Foundation, Mozilla Corporation,
|
||||
Gregory Maxwell
|
||||
Written by Jean-Marc Valin, Gregory Maxwell, and Timothy B. Terriberry */
|
||||
Written by Jean-Marc Valin, Gregory Maxwell, Timothy B. Terriberry,
|
||||
and Yunho Huh */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
@ -41,6 +42,8 @@
|
||||
|
||||
#ifdef FIXED_POINT
|
||||
#define WORD "%d"
|
||||
#define FIX_INT_TO_DOUBLE(x,q) ((double)(x) / (double)(1L << q))
|
||||
#define DOUBLE_TO_FIX_INT(x,q) (((double)x * (double)(1L << q)))
|
||||
#else
|
||||
#define WORD "%f"
|
||||
#endif
|
||||
@ -143,45 +146,107 @@ void testbitexactlog2tan(void)
|
||||
void testlog2(void)
|
||||
{
|
||||
float x;
|
||||
float error_threshold = 2.2e-06;
|
||||
float max_error = 0;
|
||||
for (x=0.001f;x<1677700.0;x+=(x/8.0))
|
||||
{
|
||||
float error = fabs((1.442695040888963387*log(x))-celt_log2(x));
|
||||
if (error>0.0009)
|
||||
if (max_error < error)
|
||||
{
|
||||
fprintf (stderr, "celt_log2 failed: fabs((1.442695040888963387*log(x))-celt_log2(x))>0.001 (x = %f, error = %f)\n", x,error);
|
||||
max_error = error;
|
||||
}
|
||||
|
||||
if (error > error_threshold)
|
||||
{
|
||||
fprintf (stderr,
|
||||
"celt_log2 failed: "
|
||||
"fabs((1.442695040888963387*log(x))-celt_log2(x))>%15.25e "
|
||||
"(x = %f, error = %15.25e)\n", error_threshold, x, error);
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
fprintf (stdout, "celt_log2 max_error: %15.25e\n", max_error);
|
||||
}
|
||||
|
||||
void testexp2(void)
|
||||
{
|
||||
float x;
|
||||
float error_threshold = 2.3e-07;
|
||||
float max_error = 0;
|
||||
for (x=-11.0;x<24.0;x+=0.0007f)
|
||||
{
|
||||
float error = fabs(x-(1.442695040888963387*log(celt_exp2(x))));
|
||||
if (error>0.0002)
|
||||
if (max_error < error)
|
||||
{
|
||||
fprintf (stderr, "celt_exp2 failed: fabs(x-(1.442695040888963387*log(celt_exp2(x))))>0.0005 (x = %f, error = %f)\n", x,error);
|
||||
max_error = error;
|
||||
}
|
||||
|
||||
if (error > error_threshold)
|
||||
{
|
||||
fprintf (stderr,
|
||||
"celt_exp2 failed: "
|
||||
"fabs(x-(1.442695040888963387*log(celt_exp2(x))))>%15.25e "
|
||||
"(x = %f, error = %15.25e)\n", error_threshold, x, error);
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
fprintf (stdout, "celt_exp2 max_error: %15.25e\n", max_error);
|
||||
}
|
||||
|
||||
void testexp2log2(void)
|
||||
{
|
||||
float x;
|
||||
float error_threshold = 2.0e-06;
|
||||
float max_error = 0;
|
||||
for (x=-11.0;x<24.0;x+=0.0007f)
|
||||
{
|
||||
float error = fabs(x-(celt_log2(celt_exp2(x))));
|
||||
if (error>0.001)
|
||||
if (max_error < error)
|
||||
{
|
||||
fprintf (stderr, "celt_log2/celt_exp2 failed: fabs(x-(celt_log2(celt_exp2(x))))>0.001 (x = %f, error = %f)\n", x,error);
|
||||
max_error = error;
|
||||
}
|
||||
|
||||
if (error > error_threshold)
|
||||
{
|
||||
fprintf (stderr,
|
||||
"celt_log2/celt_exp2 failed: "
|
||||
"fabs(x-(celt_log2(celt_exp2(x))))>%15.25e "
|
||||
"(x = %f, error = %15.25e)\n", error_threshold, x, error);
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
fprintf (stdout, "celt_exp2, celt_log2 max_error: %15.25e\n", max_error);
|
||||
}
|
||||
#else
|
||||
|
||||
void testlog2_db(void)
|
||||
{
|
||||
#if defined(ENABLE_QEXT)
|
||||
/* celt_log2_db test */
|
||||
float error = -1;
|
||||
float max_error = -2;
|
||||
float error_threshold = 2.e-07;
|
||||
opus_int32 x = 0;
|
||||
int q_input = 14;
|
||||
for (x = 8; x < 1073741824; x += (x >> 3))
|
||||
{
|
||||
error = fabs((1.442695040888963387*log(FIX_INT_TO_DOUBLE(x, q_input))) -
|
||||
FIX_INT_TO_DOUBLE(celt_log2_db(x), DB_SHIFT));
|
||||
if (error > max_error)
|
||||
{
|
||||
max_error = error;
|
||||
}
|
||||
if (error > error_threshold)
|
||||
{
|
||||
fprintf(stderr, "celt_log2_db failed: error: [%.5e > %.5e] (x = %f)\n",
|
||||
error, error_threshold, FIX_INT_TO_DOUBLE(x, DB_SHIFT));
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
fprintf(stdout, "celt_log2_db max_error: %.7e\n", max_error);
|
||||
#endif /* defined(ENABLE_QEXT) */
|
||||
}
|
||||
|
||||
void testlog2(void)
|
||||
{
|
||||
opus_val32 x;
|
||||
@ -211,6 +276,42 @@ void testexp2(void)
|
||||
}
|
||||
}
|
||||
|
||||
void testexp2_db(void)
|
||||
{
|
||||
#if defined(ENABLE_QEXT)
|
||||
float absolute_error = -1;
|
||||
float absolute_error_threshold = FIX_INT_TO_DOUBLE(2, 16);
|
||||
float relative_error_threshold = -2;
|
||||
float fx;
|
||||
float quantized_fx;
|
||||
opus_val32 x_32;
|
||||
|
||||
for (fx = -32.0; fx < 15.0; fx += 0.0007)
|
||||
{
|
||||
double ground_truth;
|
||||
x_32 = DOUBLE_TO_FIX_INT(fx, DB_SHIFT);
|
||||
quantized_fx = FIX_INT_TO_DOUBLE(x_32, DB_SHIFT);
|
||||
|
||||
ground_truth = (exp(0.6931471805599453094 * quantized_fx));
|
||||
absolute_error = fabs(ground_truth -
|
||||
FIX_INT_TO_DOUBLE(celt_exp2_db(x_32), 16));
|
||||
|
||||
relative_error_threshold = 1.24e-7 * ground_truth;
|
||||
if (absolute_error > absolute_error_threshold &&
|
||||
absolute_error > relative_error_threshold)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"celt_exp2_db failed: "
|
||||
"absolute_error: [%.5e > %.5e] "
|
||||
"relative_error: [%.5e > %.5e] (x = %f)\n",
|
||||
absolute_error, absolute_error_threshold,
|
||||
absolute_error, relative_error_threshold, quantized_fx);
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ENABLE_QEXT) */
|
||||
}
|
||||
|
||||
void testexp2log2(void)
|
||||
{
|
||||
opus_val32 x;
|
||||
@ -261,6 +362,8 @@ int main(void)
|
||||
testexp2log2();
|
||||
#ifdef FIXED_POINT
|
||||
testilog2();
|
||||
testlog2_db();
|
||||
testexp2_db();
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
@ -109,7 +109,7 @@ void test1d(int nfft,int isinverse,int arch)
|
||||
kiss_fft_scalar *in;
|
||||
kiss_fft_scalar *in_copy;
|
||||
kiss_fft_scalar *out;
|
||||
opus_val16 *window;
|
||||
celt_coef *window;
|
||||
int k;
|
||||
|
||||
#ifdef CUSTOM_MODES
|
||||
@ -133,14 +133,18 @@ void test1d(int nfft,int isinverse,int arch)
|
||||
in = (kiss_fft_scalar*)malloc(buflen);
|
||||
in_copy = (kiss_fft_scalar*)malloc(buflen);
|
||||
out = (kiss_fft_scalar*)malloc(buflen);
|
||||
window = (opus_val16*)malloc(sizeof(opus_val16)*nfft/2);
|
||||
window = (celt_coef*)malloc(sizeof(*window)*nfft/2);
|
||||
|
||||
for (k=0;k<nfft;++k) {
|
||||
in[k] = (rand() % 32768) - 16384;
|
||||
}
|
||||
|
||||
for (k=0;k<nfft/2;++k) {
|
||||
#ifdef ENABLE_QEXT
|
||||
window[k] = Q31ONE;
|
||||
#else
|
||||
window[k] = Q15ONE;
|
||||
#endif
|
||||
}
|
||||
for (k=0;k<nfft;++k) {
|
||||
in[k] *= 32768;
|
||||
@ -224,5 +228,6 @@ int main(int argc,char ** argv)
|
||||
test1d(1920,1,arch);
|
||||
#endif
|
||||
}
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
|
@ -82,5 +82,6 @@ int main(void)
|
||||
test_rotation(23, 5);
|
||||
test_rotation(50, 3);
|
||||
test_rotation(80, 1);
|
||||
RESTORE_STACK;
|
||||
return ret;
|
||||
}
|
||||
|
12
third_party/opus/src/celt/vq.c
vendored
12
third_party/opus/src/celt/vq.c
vendored
@ -119,7 +119,7 @@ void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
|
||||
/** Takes the pitch vector and the decoded residual vector, computes the gain
|
||||
that will give ||p+g*y||=1 and mixes the residual with the pitch. */
|
||||
static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT X,
|
||||
int N, opus_val32 Ryy, opus_val16 gain)
|
||||
int N, opus_val32 Ryy, opus_val32 gain)
|
||||
{
|
||||
int i;
|
||||
#ifdef FIXED_POINT
|
||||
@ -132,7 +132,7 @@ static void normalise_residual(int * OPUS_RESTRICT iy, celt_norm * OPUS_RESTRICT
|
||||
k = celt_ilog2(Ryy)>>1;
|
||||
#endif
|
||||
t = VSHR32(Ryy, 2*(k-7));
|
||||
g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
|
||||
g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
|
||||
|
||||
i=0;
|
||||
do
|
||||
@ -328,7 +328,7 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
|
||||
}
|
||||
|
||||
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
|
||||
opus_val16 gain, int resynth, int arch)
|
||||
opus_val32 gain, int resynth, int arch)
|
||||
{
|
||||
VARDECL(int, iy);
|
||||
opus_val16 yy;
|
||||
@ -361,7 +361,7 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
|
||||
/** Decode pulse vector and combine the result with the pitch vector to produce
|
||||
the final normalised signal in the current band. */
|
||||
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
|
||||
ec_dec *dec, opus_val16 gain)
|
||||
ec_dec *dec, opus_val32 gain)
|
||||
{
|
||||
opus_val32 Ryy;
|
||||
unsigned collapse_mask;
|
||||
@ -380,7 +380,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
|
||||
}
|
||||
|
||||
#ifndef OVERRIDE_renormalise_vector
|
||||
void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
|
||||
void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch)
|
||||
{
|
||||
int i;
|
||||
#ifdef FIXED_POINT
|
||||
@ -395,7 +395,7 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
|
||||
k = celt_ilog2(E)>>1;
|
||||
#endif
|
||||
t = VSHR32(E, 2*(k-7));
|
||||
g = MULT16_16_P15(celt_rsqrt_norm(t),gain);
|
||||
g = MULT32_32_Q31(celt_rsqrt_norm(t),gain);
|
||||
|
||||
xptr = X;
|
||||
for (i=0;i<N;i++)
|
||||
|
6
third_party/opus/src/celt/vq.h
vendored
6
third_party/opus/src/celt/vq.h
vendored
@ -60,7 +60,7 @@ opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch);
|
||||
* @ret A mask indicating which blocks in the band received pulses
|
||||
*/
|
||||
unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
|
||||
opus_val16 gain, int resynth, int arch);
|
||||
opus_val32 gain, int resynth, int arch);
|
||||
|
||||
/** Algebraic pulse decoder
|
||||
* @param X Decoded normalised spectrum (returned)
|
||||
@ -70,9 +70,9 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
|
||||
* @ret A mask indicating which blocks in the band received pulses
|
||||
*/
|
||||
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
|
||||
ec_dec *dec, opus_val16 gain);
|
||||
ec_dec *dec, opus_val32 gain);
|
||||
|
||||
void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch);
|
||||
void renormalise_vector(celt_norm *X, int N, opus_val32 gain, int arch);
|
||||
|
||||
int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch);
|
||||
|
||||
|
13
third_party/opus/src/celt/x86/celt_lpc_sse4_1.c
vendored
13
third_party/opus/src/celt/x86/celt_lpc_sse4_1.c
vendored
@ -64,9 +64,16 @@ void celt_fir_sse4_1(const opus_val16 *x,
|
||||
{
|
||||
opus_val32 sums[4] = {0};
|
||||
__m128i vecSum, vecX;
|
||||
|
||||
xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
|
||||
|
||||
#if defined(OPUS_CHECK_ASM)
|
||||
{
|
||||
opus_val32 sums_c[4] = {0};
|
||||
xcorr_kernel_c(rnum, x+i-ord, sums_c, ord);
|
||||
#endif
|
||||
xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
|
||||
#if defined(OPUS_CHECK_ASM)
|
||||
celt_assert(memcmp(sums, sums_c, sizeof(sums)) == 0);
|
||||
}
|
||||
#endif
|
||||
vecSum = _mm_loadu_si128((__m128i *)sums);
|
||||
vecSum = _mm_add_epi32(vecSum, vecNoA);
|
||||
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
|
||||
|
101
third_party/opus/src/celt/x86/pitch_avx.c
vendored
Normal file
101
third_party/opus/src/celt/x86/pitch_avx.c
vendored
Normal file
@ -0,0 +1,101 @@
|
||||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "x86cpu.h"
|
||||
#include "pitch.h"
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(FIXED_POINT)
|
||||
|
||||
/* Like the "regular" xcorr_kernel(), but computes 8 results at a time. */
|
||||
static void xcorr_kernel_avx(const float *x, const float *y, float sum[8], int len)
|
||||
{
|
||||
__m256 xsum0, xsum1, xsum2, xsum3, xsum4, xsum5, xsum6, xsum7;
|
||||
xsum7 = xsum6 = xsum5 = xsum4 = xsum3 = xsum2 = xsum1 = xsum0 = _mm256_setzero_ps();
|
||||
int i;
|
||||
__m256 x0;
|
||||
/* Compute 8 inner products using partial sums. */
|
||||
for (i=0;i<len-7;i+=8)
|
||||
{
|
||||
x0 = _mm256_loadu_ps(x+i);
|
||||
xsum0 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i ), xsum0);
|
||||
xsum1 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+1), xsum1);
|
||||
xsum2 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+2), xsum2);
|
||||
xsum3 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+3), xsum3);
|
||||
xsum4 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+4), xsum4);
|
||||
xsum5 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+5), xsum5);
|
||||
xsum6 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+6), xsum6);
|
||||
xsum7 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+7), xsum7);
|
||||
}
|
||||
if (i != len) {
|
||||
static const int mask[15] = {-1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
__m256i m;
|
||||
m = _mm256_loadu_si256((__m256i*)(void*)(mask + 7+i-len));
|
||||
x0 = _mm256_maskload_ps(x+i, m);
|
||||
xsum0 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i , m), xsum0);
|
||||
xsum1 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+1, m), xsum1);
|
||||
xsum2 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+2, m), xsum2);
|
||||
xsum3 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+3, m), xsum3);
|
||||
xsum4 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+4, m), xsum4);
|
||||
xsum5 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+5, m), xsum5);
|
||||
xsum6 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+6, m), xsum6);
|
||||
xsum7 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+7, m), xsum7);
|
||||
}
|
||||
/* 8 horizontal adds. */
|
||||
/* Compute [0 4] [1 5] [2 6] [3 7] */
|
||||
xsum0 = _mm256_add_ps(_mm256_permute2f128_ps(xsum0, xsum4, 2<<4), _mm256_permute2f128_ps(xsum0, xsum4, 1 | (3<<4)));
|
||||
xsum1 = _mm256_add_ps(_mm256_permute2f128_ps(xsum1, xsum5, 2<<4), _mm256_permute2f128_ps(xsum1, xsum5, 1 | (3<<4)));
|
||||
xsum2 = _mm256_add_ps(_mm256_permute2f128_ps(xsum2, xsum6, 2<<4), _mm256_permute2f128_ps(xsum2, xsum6, 1 | (3<<4)));
|
||||
xsum3 = _mm256_add_ps(_mm256_permute2f128_ps(xsum3, xsum7, 2<<4), _mm256_permute2f128_ps(xsum3, xsum7, 1 | (3<<4)));
|
||||
/* Compute [0 1 4 5] [2 3 6 7] */
|
||||
xsum0 = _mm256_hadd_ps(xsum0, xsum1);
|
||||
xsum1 = _mm256_hadd_ps(xsum2, xsum3);
|
||||
/* Compute [0 1 2 3 4 5 6 7] */
|
||||
xsum0 = _mm256_hadd_ps(xsum0, xsum1);
|
||||
_mm256_storeu_ps(sum, xsum0);
|
||||
}
|
||||
|
||||
void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch)
|
||||
{
|
||||
int i;
|
||||
celt_assert(max_pitch>0);
|
||||
(void)arch;
|
||||
for (i=0;i<max_pitch-7;i+=8)
|
||||
{
|
||||
xcorr_kernel_avx(_x, _y+i, &xcorr[i], len);
|
||||
}
|
||||
for (;i<max_pitch;i++)
|
||||
{
|
||||
xcorr[i] = celt_inner_prod(_x, _y+i, len, arch);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
40
third_party/opus/src/celt/x86/pitch_sse.h
vendored
40
third_party/opus/src/celt/x86/pitch_sse.h
vendored
@ -131,12 +131,6 @@ extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
|
||||
|
||||
#define OVERRIDE_DUAL_INNER_PROD
|
||||
#define OVERRIDE_COMB_FILTER_CONST
|
||||
|
||||
#undef dual_inner_prod
|
||||
#undef comb_filter_const
|
||||
|
||||
void dual_inner_prod_sse(const opus_val16 *x,
|
||||
const opus_val16 *y01,
|
||||
const opus_val16 *y02,
|
||||
@ -154,13 +148,17 @@ void comb_filter_const_sse(opus_val32 *y,
|
||||
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_SSE)
|
||||
#define OVERRIDE_DUAL_INNER_PROD
|
||||
#define OVERRIDE_COMB_FILTER_CONST
|
||||
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
|
||||
((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
|
||||
|
||||
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
|
||||
((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
|
||||
#else
|
||||
#elif defined(OPUS_HAVE_RTCD)
|
||||
|
||||
#define OVERRIDE_DUAL_INNER_PROD
|
||||
#define OVERRIDE_COMB_FILTER_CONST
|
||||
extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const opus_val16 *x,
|
||||
const opus_val16 *y01,
|
||||
@ -187,6 +185,32 @@ extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
|
||||
#define NON_STATIC_COMB_FILTER_CONST_C
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch);
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_AVX2)
|
||||
|
||||
#define OVERRIDE_PITCH_XCORR
|
||||
# define celt_pitch_xcorr celt_pitch_xcorr_avx2
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
|
||||
#define OVERRIDE_PITCH_XCORR
|
||||
extern void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const float *_x,
|
||||
const float *_y,
|
||||
float *xcorr,
|
||||
int len,
|
||||
int max_pitch,
|
||||
int arch
|
||||
);
|
||||
|
||||
#define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
|
||||
((*PITCH_XCORR_IMPL[(arch) & OPUS_ARCHMASK])(_x, _y, xcorr, len, max_pitch, arch))
|
||||
|
||||
|
||||
#endif /* OPUS_X86_PRESUME_AVX2 && !OPUS_HAVE_RTCD */
|
||||
|
||||
#endif /* OPUS_X86_MAY_HAVE_SSE && !FIXED_POINT */
|
||||
|
||||
#endif
|
||||
|
6
third_party/opus/src/celt/x86/vq_sse.h
vendored
6
third_party/opus/src/celt/x86/vq_sse.h
vendored
@ -28,16 +28,18 @@
|
||||
#define VQ_SSE_H
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
|
||||
#define OVERRIDE_OP_PVQ_SEARCH
|
||||
|
||||
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
|
||||
|
||||
#if defined(OPUS_X86_PRESUME_SSE2)
|
||||
|
||||
#define OVERRIDE_OP_PVQ_SEARCH
|
||||
#define op_pvq_search(x, iy, K, N, arch) \
|
||||
(op_pvq_search_sse2(x, iy, K, N, arch))
|
||||
|
||||
#else
|
||||
#elif defined(OPUS_HAVE_RTCD)
|
||||
|
||||
#define OVERRIDE_OP_PVQ_SEARCH
|
||||
extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
|
||||
celt_norm *_X, int *iy, int K, int N, int arch);
|
||||
|
||||
|
8
third_party/opus/src/celt/x86/vq_sse2.c
vendored
8
third_party/opus/src/celt/x86/vq_sse2.c
vendored
@ -75,7 +75,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
|
||||
sums = _mm_add_ps(sums, x4);
|
||||
/* Clear y and iy in case we don't do the projection. */
|
||||
_mm_storeu_ps(&y[j], _mm_setzero_ps());
|
||||
_mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
|
||||
_mm_storeu_si128((__m128i*)(void*)&iy[j], _mm_setzero_si128());
|
||||
_mm_storeu_ps(&X[j], x4);
|
||||
_mm_storeu_ps(&signy[j], s4);
|
||||
}
|
||||
@ -116,7 +116,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
|
||||
rx4 = _mm_mul_ps(x4, rcp4);
|
||||
iy4 = _mm_cvttps_epi32(rx4);
|
||||
pulses_sum = _mm_add_epi32(pulses_sum, iy4);
|
||||
_mm_storeu_si128((__m128i*)&iy[j], iy4);
|
||||
_mm_storeu_si128((__m128i*)(void*)&iy[j], iy4);
|
||||
y4 = _mm_cvtepi32_ps(iy4);
|
||||
xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
|
||||
yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
|
||||
@ -205,10 +205,10 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
|
||||
{
|
||||
__m128i y4;
|
||||
__m128i s4;
|
||||
y4 = _mm_loadu_si128((__m128i*)&iy[j]);
|
||||
y4 = _mm_loadu_si128((__m128i*)(void*)&iy[j]);
|
||||
s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
|
||||
y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
|
||||
_mm_storeu_si128((__m128i*)&iy[j], y4);
|
||||
_mm_storeu_si128((__m128i*)(void*)&iy[j], y4);
|
||||
}
|
||||
RESTORE_STACK;
|
||||
return yy;
|
||||
|
47
third_party/opus/src/celt/x86/x86_arch_macros.h
vendored
Normal file
47
third_party/opus/src/celt/x86/x86_arch_macros.h
vendored
Normal file
@ -0,0 +1,47 @@
|
||||
/* Copyright (c) 2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
# ifdef OPUS_X86_MAY_HAVE_SSE
|
||||
# ifndef __SSE__
|
||||
# define __SSE__
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef OPUS_X86_MAY_HAVE_SSE2
|
||||
# ifndef __SSE2__
|
||||
# define __SSE2__
|
||||
# endif
|
||||
# endif
|
||||
|
||||
# ifdef OPUS_X86_MAY_HAVE_SSE4_1
|
||||
# ifndef __SSE4_1__
|
||||
# define __SSE4_1__
|
||||
# endif
|
||||
# endif
|
||||
|
||||
#endif
|
20
third_party/opus/src/celt/x86/x86_celt_map.c
vendored
20
third_party/opus/src/celt/x86/x86_celt_map.c
vendored
@ -90,6 +90,26 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
|
||||
|
||||
# else
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)
|
||||
|
||||
void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const float *_x,
|
||||
const float *_y,
|
||||
float *xcorr,
|
||||
int len,
|
||||
int max_pitch,
|
||||
int arch
|
||||
) = {
|
||||
celt_pitch_xcorr_c, /* non-sse */
|
||||
celt_pitch_xcorr_c,
|
||||
celt_pitch_xcorr_c,
|
||||
celt_pitch_xcorr_c,
|
||||
MAY_HAVE_AVX2(celt_pitch_xcorr)
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
|
||||
|
||||
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
|
||||
|
16
third_party/opus/src/celt/x86/x86cpu.c
vendored
16
third_party/opus/src/celt/x86/x86cpu.c
vendored
@ -39,7 +39,7 @@
|
||||
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
@ -105,7 +105,7 @@ typedef struct CPU_Feature{
|
||||
int HW_SSE2;
|
||||
int HW_SSE41;
|
||||
/* SIMD: 256-bit */
|
||||
int HW_AVX;
|
||||
int HW_AVX2;
|
||||
} CPU_Feature;
|
||||
|
||||
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
|
||||
@ -121,13 +121,19 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
|
||||
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
|
||||
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
|
||||
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
|
||||
cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
|
||||
cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
|
||||
if (cpu_feature->HW_AVX2 && nIds >= 7) {
|
||||
cpuid(info, 7);
|
||||
cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
|
||||
} else {
|
||||
cpu_feature->HW_AVX2 = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
cpu_feature->HW_SSE = 0;
|
||||
cpu_feature->HW_SSE2 = 0;
|
||||
cpu_feature->HW_SSE41 = 0;
|
||||
cpu_feature->HW_AVX = 0;
|
||||
cpu_feature->HW_AVX2 = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -157,7 +163,7 @@ static int opus_select_arch_impl(void)
|
||||
}
|
||||
arch++;
|
||||
|
||||
if (!cpu_feature.HW_AVX)
|
||||
if (!cpu_feature.HW_AVX2)
|
||||
{
|
||||
return arch;
|
||||
}
|
||||
|
63
third_party/opus/src/celt/x86/x86cpu.h
vendored
63
third_party/opus/src/celt/x86/x86cpu.h
vendored
@ -46,28 +46,67 @@
|
||||
# define MAY_HAVE_SSE4_1(name) name ## _c
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_X86_MAY_HAVE_AVX)
|
||||
# define MAY_HAVE_AVX(name) name ## _avx
|
||||
# if defined(OPUS_X86_MAY_HAVE_AVX2)
|
||||
# define MAY_HAVE_AVX2(name) name ## _avx2
|
||||
# else
|
||||
# define MAY_HAVE_AVX(name) name ## _c
|
||||
# define MAY_HAVE_AVX2(name) name ## _c
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_HAVE_RTCD)
|
||||
# if defined(OPUS_HAVE_RTCD) && \
|
||||
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
|
||||
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
|
||||
int opus_select_arch(void);
|
||||
# endif
|
||||
|
||||
# if defined(OPUS_X86_MAY_HAVE_SSE2)
|
||||
# include "opus_defines.h"
|
||||
|
||||
/*MOVD should not impose any alignment restrictions, but the C standard does,
|
||||
and UBSan will report errors if we actually make unaligned accesses.
|
||||
Use this to work around those restrictions (which should hopefully all get
|
||||
optimized to a single MOVD instruction).*/
|
||||
#define OP_LOADU_EPI32(x) \
|
||||
(int)((*(unsigned char *)(x) | *((unsigned char *)(x) + 1) << 8U |\
|
||||
*((unsigned char *)(x) + 2) << 16U | (opus_uint32)*((unsigned char *)(x) + 3) << 24U))
|
||||
optimized to a single MOVD instruction).
|
||||
GCC implemented _mm_loadu_si32() since GCC 11; HOWEVER, there is a bug!
|
||||
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99754
|
||||
LLVM implemented _mm_loadu_si32() since Clang 8.0, however the
|
||||
__clang_major__ version number macro is unreliable, as vendors
|
||||
(specifically, Apple) will use different numbering schemes than upstream.
|
||||
Clang's advice is "use feature detection", but they do not provide feature
|
||||
detection support for specific SIMD functions.
|
||||
We follow the approach from the SIMDe project and instead detect unrelated
|
||||
features that should be available in the version we want (see
|
||||
<https://github.com/simd-everywhere/simde/blob/master/simde/simde-detect-clang.h>).*/
|
||||
# if defined(__clang__)
|
||||
# if __has_warning("-Wextra-semi-stmt") || \
|
||||
__has_builtin(__builtin_rotateleft32)
|
||||
# define OPUS_CLANG_8 (1)
|
||||
# endif
|
||||
# endif
|
||||
# if !defined(_MSC_VER) && !OPUS_GNUC_PREREQ(11,3) && !defined(OPUS_CLANG_8)
|
||||
# include <string.h>
|
||||
# include <emmintrin.h>
|
||||
|
||||
#define OP_CVTEPI8_EPI32_M32(x) \
|
||||
(_mm_cvtepi8_epi32(_mm_cvtsi32_si128(OP_LOADU_EPI32(x))))
|
||||
# ifdef _mm_loadu_si32
|
||||
# undef _mm_loadu_si32
|
||||
# endif
|
||||
# define _mm_loadu_si32 WORKAROUND_mm_loadu_si32
|
||||
static inline __m128i WORKAROUND_mm_loadu_si32(void const* mem_addr) {
|
||||
int val;
|
||||
memcpy(&val, mem_addr, sizeof(val));
|
||||
return _mm_cvtsi32_si128(val);
|
||||
}
|
||||
# elif defined(_MSC_VER)
|
||||
/* MSVC needs this for _mm_loadu_si32 */
|
||||
# include <immintrin.h>
|
||||
# endif
|
||||
|
||||
#define OP_CVTEPI16_EPI32_M64(x) \
|
||||
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
|
||||
# define OP_CVTEPI8_EPI32_M32(x) \
|
||||
(_mm_cvtepi8_epi32(_mm_loadu_si32(x)))
|
||||
|
||||
# define OP_CVTEPI16_EPI32_M64(x) \
|
||||
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(void*)(x))))
|
||||
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
5
third_party/opus/src/cmake/OpusConfig.cmake
vendored
5
third_party/opus/src/cmake/OpusConfig.cmake
vendored
@ -102,7 +102,10 @@ if(MINGW)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT MSVC)
|
||||
if(MSVC)
|
||||
# move cosmetic warnings to level 4
|
||||
add_compile_options(/w44244 /w44305 /w44267)
|
||||
else()
|
||||
set(WARNING_LIST -Wall -W -Wstrict-prototypes -Wextra -Wcast-align -Wnested-externs -Wshadow)
|
||||
include(CheckCCompilerFlag)
|
||||
foreach(WARNING_FLAG ${WARNING_LIST})
|
||||
|
22
third_party/opus/src/cmake/OpusFunctions.cmake
vendored
22
third_party/opus/src/cmake/OpusFunctions.cmake
vendored
@ -47,10 +47,12 @@ function(check_flag NAME FLAG)
|
||||
endfunction()
|
||||
|
||||
include(CheckIncludeFile)
|
||||
# function to check if compiler supports SSE, SSE2, SSE4.1 and AVX if target
|
||||
# systems may not have SSE support then use OPUS_MAY_HAVE_SSE option if target
|
||||
# system is guaranteed to have SSE support then OPUS_PRESUME_SSE can be used to
|
||||
# skip SSE runtime check
|
||||
|
||||
# This function determines if the compiler has support for SSE, SSE2, SSE4.1, AVX,
|
||||
# AVX2 and FMA. Should the target systems potentially lack SSE support, the
|
||||
# OPUS_MAY_HAVE_SSE option is recommended for use. If, however, the target system is
|
||||
# assured to support SSE, the OPUS_PRESUME_SSE option can be employed, thus
|
||||
# eliminating the necessity for an SSE runtime check.
|
||||
function(opus_detect_sse COMPILER_SUPPORT_SIMD)
|
||||
message(STATUS "Check SIMD support by compiler")
|
||||
check_include_file(xmmintrin.h HAVE_XMMINTRIN_H) # SSE1
|
||||
@ -111,20 +113,20 @@ function(opus_detect_sse COMPILER_SUPPORT_SIMD)
|
||||
PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX
|
||||
check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX2
|
||||
if(HAVE_IMMINTRIN_H)
|
||||
if(MSVC)
|
||||
check_flag(AVX /arch:AVX)
|
||||
check_flag(AVX2 /arch:AVX2)
|
||||
else()
|
||||
check_flag(AVX -mavx)
|
||||
check_flag(AVX2 -mavx2 -mfma -mavx)
|
||||
endif()
|
||||
else()
|
||||
set(AVX_SUPPORTED
|
||||
set(AVX2_SUPPORTED
|
||||
0
|
||||
PARENT_SCOPE)
|
||||
endif()
|
||||
|
||||
if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX_SUPPORTED)
|
||||
if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX2_SUPPORTED)
|
||||
set(COMPILER_SUPPORT_SIMD 1 PARENT_SCOPE)
|
||||
else()
|
||||
message(STATUS "No SIMD support in compiler")
|
||||
@ -215,7 +217,7 @@ function(get_opus_sources SOURCE_GROUP MAKE_FILE SOURCES)
|
||||
if(${list_length} LESS 1)
|
||||
message(
|
||||
FATAL_ERROR
|
||||
"No files parsed succesfully from ${SOURCE_GROUP} in ${MAKE_FILE}")
|
||||
"No files parsed successfully from ${SOURCE_GROUP} in ${MAKE_FILE}")
|
||||
endif()
|
||||
|
||||
# remove trailing whitespaces
|
||||
|
22
third_party/opus/src/cmake/OpusSources.cmake
vendored
22
third_party/opus/src/cmake/OpusSources.cmake
vendored
@ -13,6 +13,8 @@ get_opus_sources(SILK_SOURCES_X86_RTCD silk_sources.mk silk_sources_x86_rtcd)
|
||||
get_opus_sources(SILK_SOURCES_SSE4_1 silk_sources.mk silk_sources_sse4_1)
|
||||
get_opus_sources(SILK_SOURCES_FIXED_SSE4_1 silk_sources.mk
|
||||
silk_sources_fixed_sse4_1)
|
||||
get_opus_sources(SILK_SOURCES_AVX2 silk_sources.mk silk_sources_avx2)
|
||||
get_opus_sources(SILK_SOURCES_FLOAT_AVX2 silk_sources.mk silk_sources_float_avx2)
|
||||
get_opus_sources(SILK_SOURCES_ARM_RTCD silk_sources.mk silk_sources_arm_rtcd)
|
||||
get_opus_sources(SILK_SOURCES_ARM_NEON_INTR silk_sources.mk
|
||||
silk_sources_arm_neon_intr)
|
||||
@ -29,6 +31,7 @@ get_opus_sources(CELT_SOURCES_X86_RTCD celt_sources.mk celt_sources_x86_rtcd)
|
||||
get_opus_sources(CELT_SOURCES_SSE celt_sources.mk celt_sources_sse)
|
||||
get_opus_sources(CELT_SOURCES_SSE2 celt_sources.mk celt_sources_sse2)
|
||||
get_opus_sources(CELT_SOURCES_SSE4_1 celt_sources.mk celt_sources_sse4_1)
|
||||
get_opus_sources(CELT_SOURCES_AVX2 celt_sources.mk celt_sources_avx2)
|
||||
get_opus_sources(CELT_SOURCES_ARM_RTCD celt_sources.mk celt_sources_arm_rtcd)
|
||||
get_opus_sources(CELT_SOURCES_ARM_ASM celt_sources.mk celt_sources_arm_asm)
|
||||
get_opus_sources(CELT_AM_SOURCES_ARM_ASM celt_sources.mk
|
||||
@ -37,13 +40,32 @@ get_opus_sources(CELT_SOURCES_ARM_NEON_INTR celt_sources.mk
|
||||
celt_sources_arm_neon_intr)
|
||||
get_opus_sources(CELT_SOURCES_ARM_NE10 celt_sources.mk celt_sources_arm_ne10)
|
||||
|
||||
get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers)
|
||||
get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers)
|
||||
get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers)
|
||||
get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources)
|
||||
get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources)
|
||||
get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources)
|
||||
get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)
|
||||
get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
|
||||
get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)
|
||||
get_opus_sources(DNN_SOURCES_AVX2 lpcnet_sources.mk dnn_sources_avx2)
|
||||
get_opus_sources(DNN_SOURCES_NEON lpcnet_sources.mk dnn_sources_arm_neon)
|
||||
get_opus_sources(DNN_SOURCES_DOTPROD lpcnet_sources.mk dnn_sources_arm_dotprod)
|
||||
|
||||
get_opus_sources(opus_demo_SOURCES Makefile.am opus_demo_sources)
|
||||
get_opus_sources(opus_custom_demo_SOURCES Makefile.am opus_custom_demo_sources)
|
||||
get_opus_sources(opus_compare_SOURCES Makefile.am opus_compare_sources)
|
||||
get_opus_sources(tests_test_opus_api_SOURCES Makefile.am test_opus_api_sources)
|
||||
get_opus_sources(tests_test_opus_encode_SOURCES Makefile.am
|
||||
test_opus_encode_sources)
|
||||
get_opus_sources(tests_test_opus_extensions_SOURCES Makefile.am
|
||||
test_opus_extensions_sources)
|
||||
get_opus_sources(tests_test_opus_decode_SOURCES Makefile.am
|
||||
test_opus_decode_sources)
|
||||
get_opus_sources(tests_test_opus_padding_SOURCES Makefile.am
|
||||
test_opus_padding_sources)
|
||||
get_opus_sources(tests_test_opus_dred_SOURCES Makefile.am
|
||||
test_opus_dred_sources)
|
||||
get_opus_sources(tests_test_opus_custom_SOURCES Makefile.am
|
||||
test_opus_custom_sources)
|
||||
|
132
third_party/opus/src/cmake/README.md
vendored
Normal file
132
third_party/opus/src/cmake/README.md
vendored
Normal file
@ -0,0 +1,132 @@
|
||||
# Using CMake for the Opus Project
|
||||
|
||||
This guide provides instructions for using CMake to build the Opus project with various configuration options. CMake is a widely used build system generator that helps manage the build process across different platforms.
|
||||
|
||||
Note: Please keep in mind that software documentation can sometimes go out of date as new versions are released. It is always recommended to refer to the official CMake documentation for the most up-to-date and accurate information. You can find the official CMake documentation at [cmake.org/documentation](https://cmake.org/documentation/).
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Before proceeding, make sure you have the following prerequisites installed:
|
||||
|
||||
- CMake
|
||||
- Git (optional, but recommended for version control integration)
|
||||
- Working C compiler
|
||||
|
||||
## Build Instructions
|
||||
|
||||
Follow the steps below to build the Opus project using CMake:
|
||||
|
||||
1. Clone the Opus repository using Git:
|
||||
|
||||
```shell
|
||||
git clone https://gitlab.xiph.org/xiph/opus
|
||||
```
|
||||
|
||||
2. Create a build directory within the Opus repository:
|
||||
|
||||
```shell
|
||||
cd opus
|
||||
mkdir build
|
||||
cd build
|
||||
```
|
||||
|
||||
3. Configure the build with CMake. You can set the desired configuration options using CMake's `-D` flag. Here are some available options:
|
||||
|
||||
- `OPUS_BUILD_SHARED_LIBRARY`: build shared library.
|
||||
- `OPUS_BUILD_TESTING`: build tests.
|
||||
- `OPUS_BUILD_PROGRAMS`: build programs.
|
||||
- `OPUS_CUSTOM_MODES`, enable non-Opus modes, e.g. 44.1 kHz & 2^n frames.
|
||||
|
||||
For example, to enable the custom modes and build programs, use the following command:
|
||||
|
||||
```shell
|
||||
cmake .. -DOPUS_BUILD_PROGRAMS=ON -DOPUS_BUILD_TESTING=ON
|
||||
```
|
||||
|
||||
4. Build the Opus project:
|
||||
|
||||
```shell
|
||||
cmake --build .
|
||||
```
|
||||
|
||||
5. After a successful build, you can find the compiled Opus library and associated files in the build directory.
|
||||
|
||||
## Testing with CTest
|
||||
|
||||
Opus provides a comprehensive test suite to ensure the functionality and correctness of the project. You can execute the tests using CTest, a part of the CMake build system. CTest allows for automated testing and provides useful features for managing and evaluating the test results.
|
||||
|
||||
To run the Opus tests using CTest, follow these steps:
|
||||
|
||||
1. Navigate to the build directory after configuring and building the project with CMake:
|
||||
|
||||
```shell
|
||||
cd build
|
||||
```
|
||||
|
||||
2. Execute the tests using CTest:
|
||||
|
||||
```shell
|
||||
ctest
|
||||
```
|
||||
|
||||
Note: For Windows you need to specify which configuration to test
|
||||
|
||||
```shell
|
||||
ctest -C Debug
|
||||
```
|
||||
|
||||
## Platform Support and Bug Reporting
|
||||
|
||||
CMake aims to provide broad platform support, allowing the Opus project to be built and used on major operating systems and platforms. The supported platforms include:
|
||||
|
||||
- Windows
|
||||
- macOS
|
||||
- Linux
|
||||
- Android
|
||||
- iOS
|
||||
|
||||
CMake achieves platform support by generating platform-specific build files (e.g., Makefiles, Visual Studio projects) based on the target platform. This allows developers to build and configure the Opus project consistently across different operating systems and environments.
|
||||
|
||||
While CMake strives to ensure compatibility and stability across platforms, bugs or issues may still arise in specific configurations. If you encounter any problems during the configuration process or while building the Opus project, we encourage you to file an issue in the [project's issue tracker](https://gitlab.xiph.org/xiph/opus/-/issues).
|
||||
|
||||
When reporting an issue, please provide the following information to help us understand and reproduce the configuration problem effectively:
|
||||
|
||||
1. Detailed description of the issue, including any error messages or unexpected behavior observed.
|
||||
2. Steps to reproduce the problem, including the CMake command and any specific configuration options used.
|
||||
3. Operating system and version (e.g., Windows 10, macOS Big Sur, Ubuntu 20.04).
|
||||
4. CMake version (e.g., CMake 3.21.1).
|
||||
5. Any relevant information about the platform, toolchain, or dependencies used.
|
||||
6. Additional context or details that might assist in troubleshooting the issue.
|
||||
|
||||
By providing thorough information when reporting configuration issues, you contribute to improving the Opus project's compatibility and reliability across different platforms.
|
||||
|
||||
We appreciate your help in identifying and addressing any configuration-related problems, ensuring a better experience for all users of the Opus project.
|
||||
|
||||
## Platform Specific Examples
|
||||
|
||||
Note: Examples can go out of date. Always refer to documentation for latest reference.
|
||||
|
||||
### Cross compiling for Android
|
||||
|
||||
```shell
|
||||
cmake .. -DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a
|
||||
```
|
||||
|
||||
For more information about cross compiling for android, you can refer to the [Cross compiling for Android documentation](https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html#cross-compiling-for-android).
|
||||
|
||||
### Cross compiling for iOS
|
||||
|
||||
```shell
|
||||
cmake .. -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
|
||||
```
|
||||
|
||||
For more information about cross compilation for iOS, you can refer to the [Cross compiling for iOS documentation](https://cmake.org/cmake/help/latest/manual/cmake-toolchains.7.html#cross-compiling-for-ios-tvos-or-watchos).
|
||||
|
||||
|
||||
### Windows Visual Studio
|
||||
|
||||
```shell
|
||||
cmake .. -G "Visual Studio 17 2022" -A x64
|
||||
```
|
||||
|
||||
For more information about the Visual Studio generator options and additional customization, you can refer to the [Visual Studio Generator documentation](https://cmake.org/cmake/help/latest/generator/Visual%20Studio%2017%202022.html).
|
31
third_party/opus/src/cmake/cpu_info_by_asm.c
vendored
Normal file
31
third_party/opus/src/cmake/cpu_info_by_asm.c
vendored
Normal file
@ -0,0 +1,31 @@
|
||||
#include <stdio.h>
|
||||
int main() {
|
||||
unsigned int CPUInfo0;
|
||||
unsigned int CPUInfo1;
|
||||
unsigned int CPUInfo2;
|
||||
unsigned int CPUInfo3;
|
||||
unsigned int InfoType;
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
/* %ebx is PIC register in 32-bit, so mustn't clobber it. */
|
||||
__asm__ __volatile__ (
|
||||
"xchg %%ebx, %1\n"
|
||||
"cpuid\n"
|
||||
"xchg %%ebx, %1\n":
|
||||
"=a" (CPUInfo0),
|
||||
"=r" (CPUInfo1),
|
||||
"=c" (CPUInfo2),
|
||||
"=d" (CPUInfo3) :
|
||||
"0" (InfoType), "2" (0)
|
||||
);
|
||||
#else
|
||||
__asm__ __volatile__ (
|
||||
"cpuid":
|
||||
"=a" (CPUInfo0),
|
||||
"=b" (CPUInfo1),
|
||||
"=c" (CPUInfo2),
|
||||
"=d" (CPUInfo3) :
|
||||
"0" (InfoType), "2" (0)
|
||||
);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
9
third_party/opus/src/cmake/cpu_info_by_c.c
vendored
Normal file
9
third_party/opus/src/cmake/cpu_info_by_c.c
vendored
Normal file
@ -0,0 +1,9 @@
|
||||
#include <cpuid.h>
|
||||
int main() {
|
||||
unsigned int CPUInfo0;
|
||||
unsigned int CPUInfo1;
|
||||
unsigned int CPUInfo2;
|
||||
unsigned int CPUInfo3;
|
||||
unsigned int InfoType;
|
||||
return __get_cpuid_count(InfoType, 0, &CPUInfo0, &CPUInfo1, &CPUInfo2, &CPUInfo3);
|
||||
}
|
204
third_party/opus/src/configure.ac
vendored
204
third_party/opus/src/configure.ac
vendored
@ -22,9 +22,9 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
|
||||
# For libtool.
|
||||
dnl Please update these for releases.
|
||||
OPUS_LT_CURRENT=8
|
||||
OPUS_LT_REVISION=0
|
||||
OPUS_LT_AGE=8
|
||||
OPUS_LT_CURRENT=10
|
||||
OPUS_LT_REVISION=1
|
||||
OPUS_LT_AGE=10
|
||||
|
||||
AC_SUBST(OPUS_LT_CURRENT)
|
||||
AC_SUBST(OPUS_LT_REVISION)
|
||||
@ -151,6 +151,14 @@ AS_IF([test "$enable_float_api" = "no"],[
|
||||
AC_DEFINE([DISABLE_FLOAT_API], [1], [Do not build the float API])
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE([fixed-res24],
|
||||
[AS_HELP_STRING([--enable-fixed-res24], [Use 24-bit internal resolution for fixed-point implementation])],,
|
||||
[enable_fixed_res24=no])
|
||||
|
||||
AS_IF([test "$enable_fixed_res24" = "yes"],[
|
||||
AC_DEFINE([ENABLE_RES24], [1], [24-bit internal resolution for fixed-point])
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE([custom-modes],
|
||||
[AS_HELP_STRING([--enable-custom-modes], [enable non-Opus modes, e.g. 44.1 kHz & 2^n frames])],,
|
||||
[enable_custom_modes=no])
|
||||
@ -162,12 +170,39 @@ AS_IF([test "$enable_custom_modes" = "yes"],[
|
||||
|
||||
AM_CONDITIONAL([CUSTOM_MODES], [test "$enable_custom_modes" = "yes"])
|
||||
|
||||
AC_ARG_ENABLE([dred],
|
||||
[AS_HELP_STRING([--enable-dred], [Use Deep REDundancy (DRED)])],,
|
||||
[enable_dred=no])
|
||||
|
||||
AS_IF([test "$enable_dred" = "yes"],[
|
||||
AC_DEFINE([ENABLE_DRED], [1], [DRED])
|
||||
])
|
||||
AM_CONDITIONAL([ENABLE_DRED], [test "$enable_dred" = "yes"])
|
||||
|
||||
AC_ARG_ENABLE([deep-plc],
|
||||
[AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,
|
||||
[enable_deep_plc=no])
|
||||
|
||||
AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[
|
||||
AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])
|
||||
])
|
||||
AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
|
||||
|
||||
AC_ARG_ENABLE([lossgen],
|
||||
[AS_HELP_STRING([--enable-lossgen], [Build opus_demo with packet loss simulator])],,
|
||||
[enable_lossgen=no])
|
||||
|
||||
AS_IF([test "$enable_lossgen" = "yes"],[
|
||||
AC_DEFINE([ENABLE_LOSSGEN], [1], [LOSSGEN])
|
||||
])
|
||||
AM_CONDITIONAL([ENABLE_LOSSGEN], [test "$enable_lossgen" = "yes"])
|
||||
|
||||
has_float_approx=no
|
||||
#case "$host_cpu" in
|
||||
#i[[3456]]86 | x86_64 | powerpc64 | powerpc32 | ia64)
|
||||
# has_float_approx=yes
|
||||
# ;;
|
||||
#esac
|
||||
case "$host_cpu" in
|
||||
i[[3456]]86 | x86_64 | arm* | aarch64* | powerpc64 | powerpc32 | ia64)
|
||||
has_float_approx=yes
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_ARG_ENABLE([float-approx],
|
||||
[AS_HELP_STRING([--enable-float-approx], [enable fast approximations for floating point])],
|
||||
@ -202,7 +237,7 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
|
||||
case $host_cpu in
|
||||
arm*)
|
||||
dnl Currently we only have asm for fixed-point
|
||||
AS_IF([test "$enable_float" != "yes"],[
|
||||
#AS_IF([test "$enable_float" != "yes"],[
|
||||
cpu_arm=yes
|
||||
AC_DEFINE([OPUS_ARM_ASM], [], [Make use of ARM asm optimization])
|
||||
AS_GCC_INLINE_ASSEMBLY(
|
||||
@ -316,6 +351,18 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
|
||||
)
|
||||
])
|
||||
AC_SUBST(OPUS_ARM_MAY_HAVE_NEON)
|
||||
AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],[
|
||||
AC_DEFINE(OPUS_ARM_MAY_HAVE_DOTPROD, 1,
|
||||
[Define if compiler supports DOTPROD instructions])
|
||||
AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"], [
|
||||
AC_DEFINE(OPUS_ARM_PRESUME_DOTPROD, 1,
|
||||
[Define if binary requires DOTPROD instruction support])
|
||||
asm_optimization="$asm_optimization (DOTPROD)"
|
||||
],
|
||||
[rtcd_support="$rtcd_support (DOTPROD)"]
|
||||
)
|
||||
])
|
||||
AC_SUBST(OPUS_ARM_MAY_HAVE_DOTPROD)
|
||||
dnl Make sure turning on RTCD gets us at least one
|
||||
dnl instruction set.
|
||||
AS_IF([test x"$rtcd_support" != x""],
|
||||
@ -336,7 +383,7 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
|
||||
[*** ARM assembly requires perl -- disabling optimizations])
|
||||
asm_optimization="(missing perl dependency for ARM)"
|
||||
])
|
||||
])
|
||||
#])
|
||||
;;
|
||||
esac
|
||||
],[
|
||||
@ -352,13 +399,14 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
|
||||
AM_CONDITIONAL([HAVE_SSE], [false])
|
||||
AM_CONDITIONAL([HAVE_SSE2], [false])
|
||||
AM_CONDITIONAL([HAVE_SSE4_1], [false])
|
||||
AM_CONDITIONAL([HAVE_AVX], [false])
|
||||
AM_CONDITIONAL([HAVE_AVX2], [false])
|
||||
|
||||
m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
|
||||
m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
|
||||
m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
|
||||
m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx])
|
||||
m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -mavx2])
|
||||
m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
|
||||
m4_define([DEFAULT_ARM_DOTPROD_INTR_CFLAGS], ["-march=armv8.2-a+dotprod"])
|
||||
# With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
|
||||
# -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
|
||||
# this option will break things.
|
||||
@ -374,14 +422,16 @@ AS_CASE([$host],
|
||||
AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
|
||||
AC_ARG_VAR([ARM_DOTPROD_INTR_CFLAGS], [C compiler flags to compile ARM DOTPROD intrinsics @<:@default=]DEFAULT_ARM_DOTPROD_INTR_CFLAGS[@:>@])
|
||||
|
||||
AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
|
||||
AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
|
||||
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
|
||||
AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")])
|
||||
AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
|
||||
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
|
||||
AS_VAR_SET_IF([ARM_DOTPROD_INTR_CFLAGS], [], [AS_VAR_SET([ARM_DOTPROD_INTR_CFLAGS], ["DEFAULT_ARM_DOTPROD_INTR_CFLAGS"])])
|
||||
|
||||
AC_DEFUN([OPUS_PATH_NE10],
|
||||
[
|
||||
@ -525,6 +575,46 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
|
||||
intrinsics_support="$intrinsics_support (NEON [Aarch64])"
|
||||
])
|
||||
|
||||
OPUS_CHECK_INTRINSICS(
|
||||
[Aarch64 dotprod],
|
||||
[$ARM_DOTPROD_INTR_CFLAGS],
|
||||
[OPUS_ARM_MAY_HAVE_DOTPROD],
|
||||
[OPUS_ARM_PRESUME_DOTPROD],
|
||||
[[#include <arm_neon.h>
|
||||
]],
|
||||
[[
|
||||
static int32x4_t acc;
|
||||
static int8x16_t a, b;
|
||||
acc = vdotq_s32(acc, a, b);
|
||||
]]
|
||||
)
|
||||
AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
|
||||
[
|
||||
OPUS_ARM_DOTPROD_INTR_CFLAGS="$ARM_NEON_DOTPROD_CFLAGS"
|
||||
AC_SUBST([OPUS_ARM_DOTPROD_INTR_CFLAGS])
|
||||
]
|
||||
)
|
||||
|
||||
AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],
|
||||
[
|
||||
AC_DEFINE([OPUS_ARM_MAY_HAVE_DOTPROD], 1, [Compiler supports Aarch64 DOTPROD Intrinsics])
|
||||
intrinsics_support="$intrinsics_support (DOTPROD)"
|
||||
|
||||
AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"],
|
||||
[
|
||||
AC_DEFINE([OPUS_ARM_PRESUME_DOTPROD], 1, [Define if binary requires Aarch64 dotprod Intrinsics])
|
||||
intrinsics_support="$intrinsics_support (DOTPROD [Aarch64])"
|
||||
])
|
||||
|
||||
AS_IF([test x"$enable_rtcd" != x"no" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
|
||||
[AS_IF([test x"$rtcd_support" = x"no"],
|
||||
[rtcd_support="ARM (DOTPROD Intrinsics)"],
|
||||
[rtcd_support="$rtcd_support (DOTPROD Intrinsics)"])])
|
||||
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
AS_IF([test x"$intrinsics_support" = x""],
|
||||
[intrinsics_support=no],
|
||||
[intrinsics_support="ARM$intrinsics_support"])
|
||||
@ -601,24 +691,31 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
|
||||
]
|
||||
)
|
||||
OPUS_CHECK_INTRINSICS(
|
||||
[AVX],
|
||||
[$X86_AVX_CFLAGS],
|
||||
[OPUS_X86_MAY_HAVE_AVX],
|
||||
[OPUS_X86_PRESUME_AVX],
|
||||
[AVX2],
|
||||
[$X86_AVX2_CFLAGS],
|
||||
[OPUS_X86_MAY_HAVE_AVX2],
|
||||
[OPUS_X86_PRESUME_AVX2],
|
||||
[[#include <immintrin.h>
|
||||
#include <time.h>
|
||||
]],
|
||||
[[
|
||||
unsigned char utest[[16]] = {1};
|
||||
__m256 mtest;
|
||||
__m256i mtest1;
|
||||
__m256i mtest2;
|
||||
mtest = _mm256_set1_ps((float)time(NULL));
|
||||
mtest = _mm256_addsub_ps(mtest, mtest);
|
||||
return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0));
|
||||
mtest = _mm256_fmadd_ps(mtest, mtest, mtest);
|
||||
mtest1 = _mm256_set_m128i(_mm_loadu_si64(utest), _mm_loadu_si64(utest));
|
||||
mtest2 =
|
||||
_mm256_cvtepi16_epi32(_mm_loadu_si128(utest));
|
||||
return _mm256_extract_epi16(_mm256_xor_si256(
|
||||
_mm256_xor_si256(mtest1, mtest2), _mm256_cvttps_epi32(mtest)), 0);
|
||||
]]
|
||||
)
|
||||
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"],
|
||||
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1" && test x"$OPUS_X86_PRESUME_AVX2" != x"1"],
|
||||
[
|
||||
OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS"
|
||||
AC_SUBST([OPUS_X86_AVX_CFLAGS])
|
||||
OPUS_X86_AVX2_CFLAGS="$X86_AVX2_CFLAGS"
|
||||
AC_SUBST([OPUS_X86_AVX2_CFLAGS])
|
||||
]
|
||||
)
|
||||
AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
|
||||
@ -660,17 +757,17 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
|
||||
[
|
||||
AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
|
||||
])
|
||||
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"],
|
||||
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"],
|
||||
[
|
||||
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics])
|
||||
intrinsics_support="$intrinsics_support AVX"
|
||||
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX2], 1, [Compiler supports X86 AVX2 Intrinsics])
|
||||
intrinsics_support="$intrinsics_support AVX2"
|
||||
|
||||
AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"],
|
||||
[AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])],
|
||||
[rtcd_support="$rtcd_support AVX"])
|
||||
AS_IF([test x"$OPUS_X86_PRESUME_AVX2" = x"1"],
|
||||
[AC_DEFINE([OPUS_X86_PRESUME_AVX2], 1, [Define if binary requires AVX2 intrinsics support])],
|
||||
[rtcd_support="$rtcd_support AVX2"])
|
||||
],
|
||||
[
|
||||
AC_MSG_WARN([Compiler does not support AVX intrinsics])
|
||||
AC_MSG_WARN([Compiler does not support AVX2 intrinsics])
|
||||
])
|
||||
|
||||
AS_IF([test x"$intrinsics_support" = x""],
|
||||
@ -742,6 +839,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
|
||||
])
|
||||
|
||||
AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
|
||||
AM_CONDITIONAL([HAVE_ARM_DOTPROD],
|
||||
[test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"])
|
||||
AM_CONDITIONAL([HAVE_ARM_NEON_INTR],
|
||||
[test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"])
|
||||
AM_CONDITIONAL([HAVE_ARM_NE10],
|
||||
@ -753,8 +852,8 @@ AM_CONDITIONAL([HAVE_SSE2],
|
||||
[test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
|
||||
AM_CONDITIONAL([HAVE_SSE4_1],
|
||||
[test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
|
||||
AM_CONDITIONAL([HAVE_AVX],
|
||||
[test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"])
|
||||
AM_CONDITIONAL([HAVE_AVX2],
|
||||
[test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"])
|
||||
|
||||
AM_CONDITIONAL([HAVE_RTCD],
|
||||
[test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"])
|
||||
@ -813,6 +912,47 @@ AS_IF([test "$enable_doc" = "yes"], [
|
||||
HAVE_DOXYGEN=no
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE([dot-product],
|
||||
AS_HELP_STRING([--disable-dot-product], [Disable dot product implementation]),,
|
||||
enable_dot_product=yes)
|
||||
|
||||
AS_IF([test "$enable_dot_product" = "no"], [
|
||||
AC_DEFINE([DISABLE_DOT_PROD], [1], [Disable dot product instructions])
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE([dnn-debug-float],
|
||||
AS_HELP_STRING([--enable-dnn-debug-float], [Use floating-point DNN computation everywhere]),,
|
||||
enable_dnn_debug_float=no)
|
||||
|
||||
AS_IF([test "$enable_dnn_debug_float" = "no"], [
|
||||
AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])
|
||||
])
|
||||
|
||||
AC_ARG_ENABLE([osce-training-data],
|
||||
AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,
|
||||
[enable_osc_training_data=no]
|
||||
)
|
||||
|
||||
AS_IF([test "$enable_osce_training_data" = "yes"], [
|
||||
AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])
|
||||
])
|
||||
|
||||
AC_MSG_CHECKING([argument osce training data])
|
||||
AS_IF([test "$enable_osce_training_data" = "yes"], [
|
||||
AC_MSG_RESULT([yes])
|
||||
], [AC_MSG_RESULT([no])])
|
||||
|
||||
AC_ARG_ENABLE([osce],
|
||||
AS_HELP_STRING([--enable-osce], [enables speech coding enhancement]),,
|
||||
[enable_osce=no]
|
||||
)
|
||||
|
||||
AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [
|
||||
AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
|
||||
])
|
||||
|
||||
AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
|
||||
|
||||
AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])
|
||||
|
||||
AC_ARG_ENABLE([extra-programs],
|
||||
|
24
third_party/opus/src/dnn/LPCNet.yml
vendored
Normal file
24
third_party/opus/src/dnn/LPCNet.yml
vendored
Normal file
@ -0,0 +1,24 @@
|
||||
#
|
||||
# install
|
||||
# conda env create -f=LPCNet.yml
|
||||
#
|
||||
# update
|
||||
# conda env update -f=LPCNet.yml
|
||||
#
|
||||
# activate
|
||||
# conda activate LPCNet
|
||||
#
|
||||
# remove
|
||||
# conda remove --name LPCNet --all
|
||||
#
|
||||
name: LPCNet
|
||||
channels:
|
||||
- anaconda
|
||||
- conda-forge
|
||||
dependencies:
|
||||
- keras==2.2.4
|
||||
- python>=3.6
|
||||
- tensorflow-gpu==1.12.0
|
||||
- cudatoolkit
|
||||
- h5py
|
||||
- numpy
|
1
third_party/opus/src/dnn/README
vendored
Normal file
1
third_party/opus/src/dnn/README
vendored
Normal file
@ -0,0 +1 @@
|
||||
See README.md
|
126
third_party/opus/src/dnn/README.md
vendored
Normal file
126
third_party/opus/src/dnn/README.md
vendored
Normal file
@ -0,0 +1,126 @@
|
||||
# LPCNet
|
||||
|
||||
Low complexity implementation of the WaveRNN-based LPCNet algorithm, as described in:
|
||||
|
||||
- J.-M. Valin, J. Skoglund, [LPCNet: Improving Neural Speech Synthesis Through Linear Prediction](https://jmvalin.ca/papers/lpcnet_icassp2019.pdf), *Proc. International Conference on Acoustics, Speech and Signal Processing (ICASSP)*, arXiv:1810.11846, 2019.
|
||||
- J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [Neural Speech Synthesis on a Shoestring: Improving the Efficiency of LPCNet](https://jmvalin.ca/papers/improved_lpcnet.pdf), *Proc. ICASSP*, arxiv:2106.04129, 2022.
|
||||
- K. Subramani, J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [End-to-end LPCNet: A Neural Vocoder With Fully-Differentiable LPC Estimation](https://jmvalin.ca/papers/lpcnet_end2end.pdf), *Proc. INTERSPEECH*, arxiv:2106.04129, 2022.
|
||||
|
||||
For coding/PLC applications of LPCNet, see:
|
||||
|
||||
- J.-M. Valin, J. Skoglund, [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://jmvalin.ca/papers/lpcnet_codec.pdf), *Proc. INTERSPEECH*, arxiv:1903.12087, 2019.
|
||||
- J. Skoglund, J.-M. Valin, [Improving Opus Low Bit Rate Quality with Neural Speech Synthesis](https://jmvalin.ca/papers/opusnet.pdf), *Proc. INTERSPEECH*, arxiv:1905.04628, 2020.
|
||||
- J.-M. Valin, A. Mustafa, C. Montgomery, T.B. Terriberry, M. Klingbeil, P. Smaragdis, A. Krishnaswamy, [Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model](https://jmvalin.ca/papers/lpcnet_plc.pdf), *Proc. INTERSPEECH*, arxiv:2205.05785, 2022.
|
||||
- J.-M. Valin, J. Büthe, A. Mustafa, [Low-Bitrate Redundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder](https://jmvalin.ca/papers/valin_dred.pdf), *Proc. ICASSP*, arXiv:2212.04453, 2023. ([blog post](https://www.amazon.science/blog/neural-encoding-enables-more-efficient-recovery-of-lost-audio-packets))
|
||||
|
||||
# Introduction
|
||||
|
||||
Work in progress software for researching low CPU complexity algorithms for speech synthesis and compression by applying Linear Prediction techniques to WaveRNN. High quality speech can be synthesised on regular CPUs (around 3 GFLOP) with SIMD support (SSE2, SSSE3, AVX, AVX2/FMA, NEON currently supported). The code also supports very low bitrate compression at 1.6 kb/s.
|
||||
|
||||
The BSD licensed software is written in C and Python/Keras. For training, a GTX 1080 Ti or better is recommended.
|
||||
|
||||
This software is an open source starting point for LPCNet/WaveRNN-based speech synthesis and coding.
|
||||
|
||||
# Using the existing software
|
||||
|
||||
You can build the code using:
|
||||
|
||||
```
|
||||
./autogen.sh
|
||||
./configure
|
||||
make
|
||||
```
|
||||
Note that the autogen.sh script is used when building from Git and will automatically download the latest model
|
||||
(models are too large to put in Git). By default, LPCNet will attempt to use 8-bit dot product instructions on AVX\*/Neon to
|
||||
speed up inference. To disable that (e.g. to avoid quantization effects when retraining), add --disable-dot-product to the
|
||||
configure script. LPCNet does not yet have a complete implementation for some of the integer operations on the ARMv7
|
||||
architecture so for now you will also need --disable-dot-product to successfully compile on 32-bit ARM.
|
||||
|
||||
It is highly recommended to set the CFLAGS environment variable to enable AVX or NEON *prior* to running configure, otherwise
|
||||
no vectorization will take place and the code will be very slow. On a recent x86 CPU, something like
|
||||
```
|
||||
export CFLAGS='-Ofast -g -march=native'
|
||||
```
|
||||
should work. On ARM, you can enable Neon with:
|
||||
```
|
||||
export CFLAGS='-Ofast -g -mfpu=neon'
|
||||
```
|
||||
While not strictly required, the -Ofast flag will help with auto-vectorization, especially for dot products that
|
||||
cannot be optimized without -ffast-math (which -Ofast enables). Additionally, -falign-loops=32 has been shown to
|
||||
help on x86.
|
||||
|
||||
You can test the capabilities of LPCNet using the lpcnet\_demo application. To encode a file:
|
||||
```
|
||||
./lpcnet_demo -encode input.pcm compressed.bin
|
||||
```
|
||||
where input.pcm is a 16-bit (machine endian) PCM file sampled at 16 kHz. The raw compressed data (no header)
|
||||
is written to compressed.bin and consists of 8 bytes per 40-ms packet.
|
||||
|
||||
To decode:
|
||||
```
|
||||
./lpcnet_demo -decode compressed.bin output.pcm
|
||||
```
|
||||
where output.pcm is also 16-bit, 16 kHz PCM.
|
||||
|
||||
Alternatively, you can run the uncompressed analysis/synthesis using -features
|
||||
instead of -encode and -synthesis instead of -decode.
|
||||
The same functionality is available in the form of a library. See include/lpcnet.h for the API.
|
||||
|
||||
To try packet loss concealment (PLC), you first need a PLC model, which you can get with:
|
||||
```
|
||||
./download_model.sh plc-3b1eab4
|
||||
```
|
||||
or (for the PLC challenge submission):
|
||||
```
|
||||
./download_model.sh plc_challenge
|
||||
```
|
||||
PLC can be tested with:
|
||||
```
|
||||
./lpcnet_demo -plc_file noncausal_dc error_pattern.txt input.pcm output.pcm
|
||||
```
|
||||
where error_pattern.txt is a text file with one entry per 20-ms packet, with 1 meaning "packet lost" and 0 meaning "packet not lost".
|
||||
noncausal_dc is the non-causal (5-ms look-ahead) with special handling for DC offsets. It's also possible to use "noncausal", "causal",
|
||||
or "causal_dc".
|
||||
|
||||
# Training a new model
|
||||
|
||||
This codebase is also meant for research and it is possible to train new models. These are the steps to do that:
|
||||
|
||||
1. Set up a Keras system with GPU.
|
||||
|
||||
1. Generate training data:
|
||||
```
|
||||
./dump_data -train input.s16 features.f32 data.s16
|
||||
```
|
||||
where the first file contains 16 kHz 16-bit raw PCM audio (no header) and the other files are output files. This program makes several passes over the data with different filters to generate a large amount of training data.
|
||||
|
||||
1. Now that you have your files, train with:
|
||||
```
|
||||
python3 training_tf2/train_lpcnet.py features.f32 data.s16 model_name
|
||||
```
|
||||
and it will generate an h5 file for each iteration, with model\_name as prefix. If it stops with a
|
||||
"Failed to allocate RNN reserve space" message try specifying a smaller --batch-size for train\_lpcnet.py.
|
||||
|
||||
1. You can synthesise speech with Python and your GPU card (very slow):
|
||||
```
|
||||
./dump_data -test test_input.s16 test_features.f32
|
||||
./training_tf2/test_lpcnet.py lpcnet_model_name.h5 test_features.f32 test.s16
|
||||
```
|
||||
|
||||
1. Or with C on a CPU (C inference is much faster):
|
||||
First extract the model files nnet\_data.h and nnet\_data.c
|
||||
```
|
||||
./training_tf2/dump_lpcnet.py lpcnet_model_name.h5
|
||||
```
|
||||
and move the generated nnet\_data.\* files to the src/ directory.
|
||||
Then you just need to rebuild the software and use lpcnet\_demo as explained above.
|
||||
|
||||
# Speech Material for Training
|
||||
|
||||
Suitable training material can be obtained from [Open Speech and Language Resources](https://www.openslr.org/). See the datasets.txt file for details on suitable training data.
|
||||
|
||||
# Reading Further
|
||||
|
||||
1. [LPCNet: DSP-Boosted Neural Speech Synthesis](https://people.xiph.org/~jm/demo/lpcnet/)
|
||||
1. [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://people.xiph.org/~jm/demo/lpcnet_codec/)
|
||||
1. Sample model files (check compatibility): https://media.xiph.org/lpcnet/data/
|
449
third_party/opus/src/dnn/adaconvtest.c
vendored
Normal file
449
third_party/opus/src/dnn/adaconvtest.c
vendored
Normal file
@ -0,0 +1,449 @@
|
||||
#include "lace_data.h"
|
||||
#include "nolace_data.h"
|
||||
#include "osce.h"
|
||||
#include "nndsp.h"
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
extern const WeightArray lacelayers_arrays[];
|
||||
extern const WeightArray nolacelayers_arrays[];
|
||||
|
||||
void adaconv_compare(
|
||||
const char * prefix,
|
||||
int num_frames,
|
||||
AdaConvState* hAdaConv,
|
||||
LinearLayer *kernel_layer,
|
||||
LinearLayer *gain_layer,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int in_channels,
|
||||
int out_channels,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float shape_gain
|
||||
)
|
||||
{
|
||||
char feature_file[256];
|
||||
char x_in_file[256];
|
||||
char x_out_file[256];
|
||||
char message[512];
|
||||
int i_frame, i_sample;
|
||||
float mse;
|
||||
float features[512];
|
||||
float x_in[512];
|
||||
float x_out_ref[512];
|
||||
float x_out[512];
|
||||
float window[40];
|
||||
|
||||
init_adaconv_state(hAdaConv);
|
||||
compute_overlap_window(window, 40);
|
||||
|
||||
FILE *f_features, *f_x_in, *f_x_out;
|
||||
|
||||
strcpy(feature_file, prefix);
|
||||
strcat(feature_file, "_features.f32");
|
||||
f_features = fopen(feature_file, "r");
|
||||
if (f_features == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", feature_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_in_file, prefix);
|
||||
strcat(x_in_file, "_x_in.f32");
|
||||
f_x_in = fopen(x_in_file, "r");
|
||||
if (f_x_in == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_in_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_out_file, prefix);
|
||||
strcat(x_out_file, "_x_out.f32");
|
||||
f_x_out = fopen(x_out_file, "r");
|
||||
if (f_x_out == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_out_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i_frame = 0; i_frame < num_frames; i_frame ++)
|
||||
{
|
||||
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,
|
||||
frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,
|
||||
filter_gain_a, filter_gain_b, shape_gain, window, 0);
|
||||
|
||||
mse = 0;
|
||||
for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)
|
||||
{
|
||||
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
|
||||
}
|
||||
mse = sqrt(mse / (frame_size * out_channels));
|
||||
printf("rmse[%d] %f\n", i_frame, mse);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void adacomb_compare(
|
||||
const char * prefix,
|
||||
int num_frames,
|
||||
AdaCombState* hAdaComb,
|
||||
LinearLayer *kernel_layer,
|
||||
LinearLayer *gain_layer,
|
||||
LinearLayer *global_gain_layer,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int overlap_size,
|
||||
int kernel_size,
|
||||
int left_padding,
|
||||
float filter_gain_a,
|
||||
float filter_gain_b,
|
||||
float log_gain_limit
|
||||
)
|
||||
{
|
||||
char feature_file[256];
|
||||
char x_in_file[256];
|
||||
char p_in_file[256];
|
||||
char x_out_file[256];
|
||||
char message[512];
|
||||
int i_frame, i_sample;
|
||||
float mse;
|
||||
float features[512];
|
||||
float x_in[512];
|
||||
float x_out_ref[512];
|
||||
float x_out[512];
|
||||
int pitch_lag;
|
||||
float window[40];
|
||||
|
||||
init_adacomb_state(hAdaComb);
|
||||
compute_overlap_window(window, 40);
|
||||
|
||||
FILE *f_features, *f_x_in, *f_p_in, *f_x_out;
|
||||
|
||||
strcpy(feature_file, prefix);
|
||||
strcat(feature_file, "_features.f32");
|
||||
f_features = fopen(feature_file, "r");
|
||||
if (f_features == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", feature_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_in_file, prefix);
|
||||
strcat(x_in_file, "_x_in.f32");
|
||||
f_x_in = fopen(x_in_file, "r");
|
||||
if (f_x_in == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_in_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(p_in_file, prefix);
|
||||
strcat(p_in_file, "_p_in.s32");
|
||||
f_p_in = fopen(p_in_file, "r");
|
||||
if (f_p_in == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", p_in_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_out_file, prefix);
|
||||
strcat(x_out_file, "_x_out.f32");
|
||||
f_x_out = fopen(x_out_file, "r");
|
||||
if (f_x_out == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_out_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i_frame = 0; i_frame < num_frames; i_frame ++)
|
||||
{
|
||||
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,
|
||||
pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);
|
||||
|
||||
|
||||
mse = 0;
|
||||
for (i_sample = 0; i_sample < frame_size; i_sample ++)
|
||||
{
|
||||
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
|
||||
}
|
||||
mse = sqrt(mse / (frame_size));
|
||||
printf("rmse[%d] %f\n", i_frame, mse);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void adashape_compare(
|
||||
const char * prefix,
|
||||
int num_frames,
|
||||
AdaShapeState* hAdaShape,
|
||||
LinearLayer *alpha1,
|
||||
LinearLayer *alpha2,
|
||||
int feature_dim,
|
||||
int frame_size,
|
||||
int avg_pool_k
|
||||
)
|
||||
{
|
||||
char feature_file[256];
|
||||
char x_in_file[256];
|
||||
char x_out_file[256];
|
||||
char message[512];
|
||||
int i_frame, i_sample;
|
||||
float mse;
|
||||
float features[512];
|
||||
float x_in[512];
|
||||
float x_out_ref[512];
|
||||
float x_out[512];
|
||||
|
||||
init_adashape_state(hAdaShape);
|
||||
|
||||
FILE *f_features, *f_x_in, *f_x_out;
|
||||
|
||||
strcpy(feature_file, prefix);
|
||||
strcat(feature_file, "_features.f32");
|
||||
f_features = fopen(feature_file, "r");
|
||||
if (f_features == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", feature_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_in_file, prefix);
|
||||
strcat(x_in_file, "_x_in.f32");
|
||||
f_x_in = fopen(x_in_file, "r");
|
||||
if (f_x_in == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_in_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
strcpy(x_out_file, prefix);
|
||||
strcat(x_out_file, "_x_out.f32");
|
||||
f_x_out = fopen(x_out_file, "r");
|
||||
if (f_x_out == NULL)
|
||||
{
|
||||
sprintf(message, "could not open file %s", x_out_file);
|
||||
perror(message);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i_frame = 0; i_frame < num_frames; i_frame ++)
|
||||
{
|
||||
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
|
||||
{
|
||||
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,
|
||||
frame_size, avg_pool_k, 0);
|
||||
|
||||
mse = 0;
|
||||
for (i_sample = 0; i_sample < frame_size; i_sample ++)
|
||||
{
|
||||
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
|
||||
}
|
||||
mse = sqrt(mse / (frame_size));
|
||||
printf("rmse[%d] %f\n", i_frame, mse);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
LACELayers hLACE;
|
||||
NOLACELayers hNoLACE;
|
||||
|
||||
AdaConvState hAdaConv;
|
||||
AdaCombState hAdaComb;
|
||||
AdaShapeState hAdaShape;
|
||||
|
||||
init_adaconv_state(&hAdaConv);
|
||||
|
||||
init_lacelayers(&hLACE, lacelayers_arrays);
|
||||
init_nolacelayers(&hNoLACE, nolacelayers_arrays);
|
||||
|
||||
printf("\ntesting lace.af1 (1 in, 1 out)...\n");
|
||||
adaconv_compare(
|
||||
"testvectors/lace_af1",
|
||||
5,
|
||||
&hAdaConv,
|
||||
&hLACE.lace_af1_kernel,
|
||||
&hLACE.lace_af1_gain,
|
||||
LACE_AF1_FEATURE_DIM,
|
||||
LACE_AF1_FRAME_SIZE,
|
||||
LACE_AF1_OVERLAP_SIZE,
|
||||
LACE_AF1_IN_CHANNELS,
|
||||
LACE_AF1_OUT_CHANNELS,
|
||||
LACE_AF1_KERNEL_SIZE,
|
||||
LACE_AF1_LEFT_PADDING,
|
||||
LACE_AF1_FILTER_GAIN_A,
|
||||
LACE_AF1_FILTER_GAIN_B,
|
||||
LACE_AF1_SHAPE_GAIN
|
||||
);
|
||||
|
||||
|
||||
printf("\ntesting nolace.af1 (1 in, 2 out)...\n");
|
||||
adaconv_compare(
|
||||
"testvectors/nolace_af1",
|
||||
5,
|
||||
&hAdaConv,
|
||||
&hNoLACE.nolace_af1_kernel,
|
||||
&hNoLACE.nolace_af1_gain,
|
||||
NOLACE_AF1_FEATURE_DIM,
|
||||
NOLACE_AF1_FRAME_SIZE,
|
||||
NOLACE_AF1_OVERLAP_SIZE,
|
||||
NOLACE_AF1_IN_CHANNELS,
|
||||
NOLACE_AF1_OUT_CHANNELS,
|
||||
NOLACE_AF1_KERNEL_SIZE,
|
||||
NOLACE_AF1_LEFT_PADDING,
|
||||
NOLACE_AF1_FILTER_GAIN_A,
|
||||
NOLACE_AF1_FILTER_GAIN_B,
|
||||
NOLACE_AF1_SHAPE_GAIN
|
||||
);
|
||||
|
||||
|
||||
printf("testing nolace.af4 (2 in, 1 out)...\n");
|
||||
adaconv_compare(
|
||||
"testvectors/nolace_af4",
|
||||
5,
|
||||
&hAdaConv,
|
||||
&hNoLACE.nolace_af4_kernel,
|
||||
&hNoLACE.nolace_af4_gain,
|
||||
NOLACE_AF4_FEATURE_DIM,
|
||||
NOLACE_AF4_FRAME_SIZE,
|
||||
NOLACE_AF4_OVERLAP_SIZE,
|
||||
NOLACE_AF4_IN_CHANNELS,
|
||||
NOLACE_AF4_OUT_CHANNELS,
|
||||
NOLACE_AF4_KERNEL_SIZE,
|
||||
NOLACE_AF4_LEFT_PADDING,
|
||||
NOLACE_AF4_FILTER_GAIN_A,
|
||||
NOLACE_AF4_FILTER_GAIN_B,
|
||||
NOLACE_AF4_SHAPE_GAIN
|
||||
);
|
||||
|
||||
printf("\ntesting nolace.af2 (2 in, 2 out)...\n");
|
||||
adaconv_compare(
|
||||
"testvectors/nolace_af2",
|
||||
5,
|
||||
&hAdaConv,
|
||||
&hNoLACE.nolace_af2_kernel,
|
||||
&hNoLACE.nolace_af2_gain,
|
||||
NOLACE_AF2_FEATURE_DIM,
|
||||
NOLACE_AF2_FRAME_SIZE,
|
||||
NOLACE_AF2_OVERLAP_SIZE,
|
||||
NOLACE_AF2_IN_CHANNELS,
|
||||
NOLACE_AF2_OUT_CHANNELS,
|
||||
NOLACE_AF2_KERNEL_SIZE,
|
||||
NOLACE_AF2_LEFT_PADDING,
|
||||
NOLACE_AF2_FILTER_GAIN_A,
|
||||
NOLACE_AF2_FILTER_GAIN_B,
|
||||
NOLACE_AF2_SHAPE_GAIN
|
||||
);
|
||||
|
||||
printf("\ntesting lace.cf1...\n");
|
||||
adacomb_compare(
|
||||
"testvectors/lace_cf1",
|
||||
5,
|
||||
&hAdaComb,
|
||||
&hLACE.lace_cf1_kernel,
|
||||
&hLACE.lace_cf1_gain,
|
||||
&hLACE.lace_cf1_global_gain,
|
||||
LACE_CF1_FEATURE_DIM,
|
||||
LACE_CF1_FRAME_SIZE,
|
||||
LACE_CF1_OVERLAP_SIZE,
|
||||
LACE_CF1_KERNEL_SIZE,
|
||||
LACE_CF1_LEFT_PADDING,
|
||||
LACE_CF1_FILTER_GAIN_A,
|
||||
LACE_CF1_FILTER_GAIN_B,
|
||||
LACE_CF1_LOG_GAIN_LIMIT
|
||||
);
|
||||
|
||||
printf("\ntesting nolace.tdshape1...\n");
|
||||
adashape_compare(
|
||||
"testvectors/nolace_tdshape1",
|
||||
5,
|
||||
&hAdaShape,
|
||||
&hNoLACE.nolace_tdshape1_alpha1,
|
||||
&hNoLACE.nolace_tdshape1_alpha2,
|
||||
NOLACE_TDSHAPE1_FEATURE_DIM,
|
||||
NOLACE_TDSHAPE1_FRAME_SIZE,
|
||||
NOLACE_TDSHAPE1_AVG_POOL_K
|
||||
);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* gcc -DVAR_ARRAYS -DENABLE_OSCE -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */
|
88
third_party/opus/src/dnn/arm/arm_dnn_map.c
vendored
Normal file
88
third_party/opus/src/dnn/arm/arm_dnn_map.c
vendored
Normal file
@ -0,0 +1,88 @@
|
||||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "arm/armcpu.h"
|
||||
#include "nnet.h"
|
||||
|
||||
#if defined(OPUS_HAVE_RTCD)
|
||||
|
||||
#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD))
|
||||
|
||||
void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const LinearLayer *linear,
|
||||
float *out,
|
||||
const float *in
|
||||
) = {
|
||||
compute_linear_c, /* default */
|
||||
compute_linear_c,
|
||||
compute_linear_c,
|
||||
MAY_HAVE_NEON(compute_linear), /* neon */
|
||||
MAY_HAVE_DOTPROD(compute_linear) /* dotprod */
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON)
|
||||
|
||||
void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
|
||||
float *output,
|
||||
const float *input,
|
||||
int N,
|
||||
int activation
|
||||
) = {
|
||||
compute_activation_c, /* default */
|
||||
compute_activation_c,
|
||||
compute_activation_c,
|
||||
MAY_HAVE_NEON(compute_activation), /* neon */
|
||||
MAY_HAVE_DOTPROD(compute_activation) /* dotprod */
|
||||
};
|
||||
|
||||
void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const Conv2dLayer *conv,
|
||||
float *out,
|
||||
float *mem,
|
||||
const float *in,
|
||||
int height,
|
||||
int hstride,
|
||||
int activation
|
||||
) = {
|
||||
compute_conv2d_c, /* default */
|
||||
compute_conv2d_c,
|
||||
compute_conv2d_c,
|
||||
MAY_HAVE_NEON(compute_conv2d), /* neon */
|
||||
MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod */
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
104
third_party/opus/src/dnn/arm/dnn_arm.h
vendored
Normal file
104
third_party/opus/src/dnn/arm/dnn_arm.h
vendored
Normal file
@ -0,0 +1,104 @@
|
||||
/* Copyright (c) 2011-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DNN_ARM_H
|
||||
#define DNN_ARM_H
|
||||
|
||||
#include "cpu_support.h"
|
||||
#include "opus_types.h"
|
||||
|
||||
void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
|
||||
void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
|
||||
|
||||
void compute_activation_neon(float *output, const float *input, int N, int activation);
|
||||
void compute_activation_dotprod(float *output, const float *input, int N, int activation);
|
||||
|
||||
void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_DOTPROD)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in))
|
||||
|
||||
#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD)
|
||||
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in))
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
|
||||
|
||||
extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const LinearLayer *linear,
|
||||
float *out,
|
||||
const float *in
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_LINEAR
|
||||
#define compute_linear(linear, out, in, arch) \
|
||||
((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(OPUS_ARM_PRESUME_NEON)
|
||||
|
||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation))
|
||||
#define OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation))
|
||||
|
||||
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
|
||||
|
||||
extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
|
||||
float *output,
|
||||
const float *input,
|
||||
int N,
|
||||
int activation
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_ACTIVATION
|
||||
#define compute_activation(output, input, N, activation, arch) \
|
||||
((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
|
||||
|
||||
|
||||
extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
|
||||
const Conv2dLayer *conv,
|
||||
float *out,
|
||||
float *mem,
|
||||
const float *in,
|
||||
int height,
|
||||
int hstride,
|
||||
int activation
|
||||
);
|
||||
#define OVERRIDE_COMPUTE_CONV2D
|
||||
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
|
||||
((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* DNN_ARM_H */
|
38
third_party/opus/src/dnn/arm/nnet_dotprod.c
vendored
Normal file
38
third_party/opus/src/dnn/arm/nnet_dotprod.c
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#ifndef __ARM_FEATURE_DOTPROD
|
||||
#error nnet_dotprod.c is being compiled without DOTPROD enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH dotprod
|
||||
|
||||
#include "nnet_arch.h"
|
38
third_party/opus/src/dnn/arm/nnet_neon.c
vendored
Normal file
38
third_party/opus/src/dnn/arm/nnet_neon.c
vendored
Normal file
@ -0,0 +1,38 @@
|
||||
/* Copyright (c) 2018-2019 Mozilla
|
||||
2023 Amazon */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#if !(defined(__ARM_NEON__) || defined(__ARM_NEON))
|
||||
#error nnet_neon.c is being compiled without Neon enabled
|
||||
#endif
|
||||
|
||||
#define RTCD_ARCH neon
|
||||
|
||||
#include "nnet_arch.h"
|
246
third_party/opus/src/dnn/burg.c
vendored
Normal file
246
third_party/opus/src/dnn/burg.c
vendored
Normal file
@ -0,0 +1,246 @@
|
||||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "arch.h"
|
||||
#include "burg.h"
|
||||
|
||||
#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
|
||||
#define SILK_MAX_ORDER_LPC 16
|
||||
#define FIND_LPC_COND_FAC 1e-5f
|
||||
|
||||
/* sum of squares of a silk_float array, with result as double */
|
||||
static double silk_energy_FLP(
|
||||
const float *data,
|
||||
int dataSize
|
||||
)
|
||||
{
|
||||
int i;
|
||||
double result;
|
||||
|
||||
/* 4x unrolled loop */
|
||||
result = 0.0;
|
||||
for( i = 0; i < dataSize - 3; i += 4 ) {
|
||||
result += data[ i + 0 ] * (double)data[ i + 0 ] +
|
||||
data[ i + 1 ] * (double)data[ i + 1 ] +
|
||||
data[ i + 2 ] * (double)data[ i + 2 ] +
|
||||
data[ i + 3 ] * (double)data[ i + 3 ];
|
||||
}
|
||||
|
||||
/* add any remaining products */
|
||||
for( ; i < dataSize; i++ ) {
|
||||
result += data[ i ] * (double)data[ i ];
|
||||
}
|
||||
|
||||
assert( result >= 0.0 );
|
||||
return result;
|
||||
}
|
||||
|
||||
/* inner product of two silk_float arrays, with result as double */
|
||||
static double silk_inner_product_FLP(
|
||||
const float *data1,
|
||||
const float *data2,
|
||||
int dataSize
|
||||
)
|
||||
{
|
||||
int i;
|
||||
double result;
|
||||
|
||||
/* 4x unrolled loop */
|
||||
result = 0.0;
|
||||
for( i = 0; i < dataSize - 3; i += 4 ) {
|
||||
result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
|
||||
data1[ i + 1 ] * (double)data2[ i + 1 ] +
|
||||
data1[ i + 2 ] * (double)data2[ i + 2 ] +
|
||||
data1[ i + 3 ] * (double)data2[ i + 3 ];
|
||||
}
|
||||
|
||||
/* add any remaining products */
|
||||
for( ; i < dataSize; i++ ) {
|
||||
result += data1[ i ] * (double)data2[ i ];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
/* Compute reflection coefficients from input signal */
|
||||
float silk_burg_analysis( /* O returns residual energy */
|
||||
float A[], /* O prediction coefficients (length order) */
|
||||
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
|
||||
const float minInvGain, /* I minimum inverse prediction gain */
|
||||
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
|
||||
const int nb_subfr, /* I number of subframes stacked in x */
|
||||
const int D /* I order */
|
||||
)
|
||||
{
|
||||
int k, n, s, reached_max_gain;
|
||||
double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
|
||||
const float *x_ptr;
|
||||
double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
|
||||
double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
|
||||
double Af[ SILK_MAX_ORDER_LPC ];
|
||||
|
||||
assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
|
||||
|
||||
/* Compute autocorrelations, added over subframes */
|
||||
C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
|
||||
memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
for( n = 1; n < D + 1; n++ ) {
|
||||
C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
|
||||
}
|
||||
}
|
||||
memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
|
||||
|
||||
/* Initialize */
|
||||
CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
|
||||
invGain = 1.0f;
|
||||
reached_max_gain = 0;
|
||||
for( n = 0; n < D; n++ ) {
|
||||
/* Update first row of correlation matrix (without first element) */
|
||||
/* Update last row of correlation matrix (without last element, stored in reversed order) */
|
||||
/* Update C * Af */
|
||||
/* Update C * flipud(Af) (stored in reversed order) */
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
x_ptr = x + s * subfr_length;
|
||||
tmp1 = x_ptr[ n ];
|
||||
tmp2 = x_ptr[ subfr_length - n - 1 ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
|
||||
C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
|
||||
Atmp = Af[ k ];
|
||||
tmp1 += x_ptr[ n - k - 1 ] * Atmp;
|
||||
tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
|
||||
}
|
||||
for( k = 0; k <= n; k++ ) {
|
||||
CAf[ k ] -= tmp1 * x_ptr[ n - k ];
|
||||
CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
|
||||
}
|
||||
}
|
||||
tmp1 = C_first_row[ n ];
|
||||
tmp2 = C_last_row[ n ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
tmp1 += C_last_row[ n - k - 1 ] * Atmp;
|
||||
tmp2 += C_first_row[ n - k - 1 ] * Atmp;
|
||||
}
|
||||
CAf[ n + 1 ] = tmp1;
|
||||
CAb[ n + 1 ] = tmp2;
|
||||
|
||||
/* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
|
||||
num = CAb[ n + 1 ];
|
||||
nrg_b = CAb[ 0 ];
|
||||
nrg_f = CAf[ 0 ];
|
||||
for( k = 0; k < n; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
num += CAb[ n - k ] * Atmp;
|
||||
nrg_b += CAb[ k + 1 ] * Atmp;
|
||||
nrg_f += CAf[ k + 1 ] * Atmp;
|
||||
}
|
||||
assert( nrg_f > 0.0 );
|
||||
assert( nrg_b > 0.0 );
|
||||
|
||||
/* Calculate the next order reflection (parcor) coefficient */
|
||||
rc = -2.0 * num / ( nrg_f + nrg_b );
|
||||
assert( rc > -1.0 && rc < 1.0 );
|
||||
|
||||
/* Update inverse prediction gain */
|
||||
tmp1 = invGain * ( 1.0 - rc * rc );
|
||||
if( tmp1 <= minInvGain ) {
|
||||
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
|
||||
rc = sqrt( 1.0 - minInvGain / invGain );
|
||||
if( num > 0 ) {
|
||||
/* Ensure adjusted reflection coefficients has the original sign */
|
||||
rc = -rc;
|
||||
}
|
||||
invGain = minInvGain;
|
||||
reached_max_gain = 1;
|
||||
} else {
|
||||
invGain = tmp1;
|
||||
}
|
||||
|
||||
/* Update the AR coefficients */
|
||||
for( k = 0; k < (n + 1) >> 1; k++ ) {
|
||||
tmp1 = Af[ k ];
|
||||
tmp2 = Af[ n - k - 1 ];
|
||||
Af[ k ] = tmp1 + rc * tmp2;
|
||||
Af[ n - k - 1 ] = tmp2 + rc * tmp1;
|
||||
}
|
||||
Af[ n ] = rc;
|
||||
|
||||
if( reached_max_gain ) {
|
||||
/* Reached max prediction gain; set remaining coefficients to zero and exit loop */
|
||||
for( k = n + 1; k < D; k++ ) {
|
||||
Af[ k ] = 0.0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* Update C * Af and C * Ab */
|
||||
for( k = 0; k <= n + 1; k++ ) {
|
||||
tmp1 = CAf[ k ];
|
||||
CAf[ k ] += rc * CAb[ n - k + 1 ];
|
||||
CAb[ n - k + 1 ] += rc * tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
if( reached_max_gain ) {
|
||||
/* Convert to float */
|
||||
for( k = 0; k < D; k++ ) {
|
||||
A[ k ] = (float)( -Af[ k ] );
|
||||
}
|
||||
/* Subtract energy of preceding samples from C0 */
|
||||
for( s = 0; s < nb_subfr; s++ ) {
|
||||
C0 -= silk_energy_FLP( x + s * subfr_length, D );
|
||||
}
|
||||
/* Approximate residual energy */
|
||||
nrg_f = C0 * invGain;
|
||||
} else {
|
||||
/* Compute residual energy and store coefficients as float */
|
||||
nrg_f = CAf[ 0 ];
|
||||
tmp1 = 1.0;
|
||||
for( k = 0; k < D; k++ ) {
|
||||
Atmp = Af[ k ];
|
||||
nrg_f += CAf[ k + 1 ] * Atmp;
|
||||
tmp1 += Atmp * Atmp;
|
||||
A[ k ] = (float)(-Atmp);
|
||||
}
|
||||
nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
|
||||
}
|
||||
|
||||
/* Return residual energy */
|
||||
return MAX32(0, (float)nrg_f);
|
||||
}
|
41
third_party/opus/src/dnn/burg.h
vendored
Normal file
41
third_party/opus/src/dnn/burg.h
vendored
Normal file
@ -0,0 +1,41 @@
|
||||
/***********************************************************************
|
||||
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
- Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of Internet Society, IETF or IETF Trust, nor the
|
||||
names of specific contributors, may be used to endorse or promote
|
||||
products derived from this software without specific prior written
|
||||
permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
***********************************************************************/
|
||||
|
||||
#ifndef BURG_H
|
||||
#define BURG_H
|
||||
|
||||
|
||||
float silk_burg_analysis( /* O returns residual energy */
|
||||
float A[], /* O prediction coefficients (length order) */
|
||||
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
|
||||
const float minInvGain, /* I minimum inverse prediction gain */
|
||||
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
|
||||
const int nb_subfr, /* I number of subframes stacked in x */
|
||||
const int D /* I order */
|
||||
);
|
||||
|
||||
#endif
|
56
third_party/opus/src/dnn/common.h
vendored
Normal file
56
third_party/opus/src/dnn/common.h
vendored
Normal file
@ -0,0 +1,56 @@
|
||||
|
||||
|
||||
#ifndef COMMON_H
|
||||
#define COMMON_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "opus_defines.h"
|
||||
|
||||
#define LOG256 5.5451774445f
|
||||
static OPUS_INLINE float log2_approx(float x)
|
||||
{
|
||||
int integer;
|
||||
float frac;
|
||||
union {
|
||||
float f;
|
||||
int i;
|
||||
} in;
|
||||
in.f = x;
|
||||
integer = (in.i>>23)-127;
|
||||
in.i -= integer<<23;
|
||||
frac = in.f - 1.5f;
|
||||
frac = -0.41445418f + frac*(0.95909232f
|
||||
+ frac*(-0.33951290f + frac*0.16541097f));
|
||||
return 1+integer+frac;
|
||||
}
|
||||
|
||||
#define log_approx(x) (0.69315f*log2_approx(x))
|
||||
|
||||
static OPUS_INLINE float ulaw2lin(float u)
|
||||
{
|
||||
float s;
|
||||
float scale_1 = 32768.f/255.f;
|
||||
u = u - 128.f;
|
||||
s = u >= 0.f ? 1.f : -1.f;
|
||||
u = fabs(u);
|
||||
return s*scale_1*(exp(u/128.*LOG256)-1);
|
||||
}
|
||||
|
||||
static OPUS_INLINE int lin2ulaw(float x)
|
||||
{
|
||||
float u;
|
||||
float scale = 255.f/32768.f;
|
||||
int s = x >= 0 ? 1 : -1;
|
||||
x = fabs(x);
|
||||
u = (s*(128*log_approx(1+scale*x)/LOG256));
|
||||
u = 128 + u;
|
||||
if (u < 0) u = 0;
|
||||
if (u > 255) u = 255;
|
||||
return (int)floor(.5 + u);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#endif
|
173
third_party/opus/src/dnn/datasets.txt
vendored
Normal file
173
third_party/opus/src/dnn/datasets.txt
vendored
Normal file
@ -0,0 +1,173 @@
|
||||
The following datasets can be used to train a language-independent LPCNet model.
|
||||
A good choice is to include all the data from these datasets, except for
|
||||
hi_fi_tts for which only a small subset is recommended (since it's very large
|
||||
but has few speakers). Note that this data typically needs to be resampled
|
||||
before it can be used.
|
||||
|
||||
https://www.openslr.org/resources/30/si_lk.tar.gz
|
||||
https://www.openslr.org/resources/32/af_za.tar.gz
|
||||
https://www.openslr.org/resources/32/st_za.tar.gz
|
||||
https://www.openslr.org/resources/32/tn_za.tar.gz
|
||||
https://www.openslr.org/resources/32/xh_za.tar.gz
|
||||
https://www.openslr.org/resources/37/bn_bd.zip
|
||||
https://www.openslr.org/resources/37/bn_in.zip
|
||||
https://www.openslr.org/resources/41/jv_id_female.zip
|
||||
https://www.openslr.org/resources/41/jv_id_male.zip
|
||||
https://www.openslr.org/resources/42/km_kh_male.zip
|
||||
https://www.openslr.org/resources/43/ne_np_female.zip
|
||||
https://www.openslr.org/resources/44/su_id_female.zip
|
||||
https://www.openslr.org/resources/44/su_id_male.zip
|
||||
https://www.openslr.org/resources/61/es_ar_female.zip
|
||||
https://www.openslr.org/resources/61/es_ar_male.zip
|
||||
https://www.openslr.org/resources/63/ml_in_female.zip
|
||||
https://www.openslr.org/resources/63/ml_in_male.zip
|
||||
https://www.openslr.org/resources/64/mr_in_female.zip
|
||||
https://www.openslr.org/resources/65/ta_in_female.zip
|
||||
https://www.openslr.org/resources/65/ta_in_male.zip
|
||||
https://www.openslr.org/resources/66/te_in_female.zip
|
||||
https://www.openslr.org/resources/66/te_in_male.zip
|
||||
https://www.openslr.org/resources/69/ca_es_female.zip
|
||||
https://www.openslr.org/resources/69/ca_es_male.zip
|
||||
https://www.openslr.org/resources/70/en_ng_female.zip
|
||||
https://www.openslr.org/resources/70/en_ng_male.zip
|
||||
https://www.openslr.org/resources/71/es_cl_female.zip
|
||||
https://www.openslr.org/resources/71/es_cl_male.zip
|
||||
https://www.openslr.org/resources/72/es_co_female.zip
|
||||
https://www.openslr.org/resources/72/es_co_male.zip
|
||||
https://www.openslr.org/resources/73/es_pe_female.zip
|
||||
https://www.openslr.org/resources/73/es_pe_male.zip
|
||||
https://www.openslr.org/resources/74/es_pr_female.zip
|
||||
https://www.openslr.org/resources/75/es_ve_female.zip
|
||||
https://www.openslr.org/resources/75/es_ve_male.zip
|
||||
https://www.openslr.org/resources/76/eu_es_female.zip
|
||||
https://www.openslr.org/resources/76/eu_es_male.zip
|
||||
https://www.openslr.org/resources/77/gl_es_female.zip
|
||||
https://www.openslr.org/resources/77/gl_es_male.zip
|
||||
https://www.openslr.org/resources/78/gu_in_female.zip
|
||||
https://www.openslr.org/resources/78/gu_in_male.zip
|
||||
https://www.openslr.org/resources/79/kn_in_female.zip
|
||||
https://www.openslr.org/resources/79/kn_in_male.zip
|
||||
https://www.openslr.org/resources/80/my_mm_female.zip
|
||||
https://www.openslr.org/resources/83/irish_english_male.zip
|
||||
https://www.openslr.org/resources/83/midlands_english_female.zip
|
||||
https://www.openslr.org/resources/83/midlands_english_male.zip
|
||||
https://www.openslr.org/resources/83/northern_english_female.zip
|
||||
https://www.openslr.org/resources/83/northern_english_male.zip
|
||||
https://www.openslr.org/resources/83/scottish_english_female.zip
|
||||
https://www.openslr.org/resources/83/scottish_english_male.zip
|
||||
https://www.openslr.org/resources/83/southern_english_female.zip
|
||||
https://www.openslr.org/resources/83/southern_english_male.zip
|
||||
https://www.openslr.org/resources/83/welsh_english_female.zip
|
||||
https://www.openslr.org/resources/83/welsh_english_male.zip
|
||||
https://www.openslr.org/resources/86/yo_ng_female.zip
|
||||
https://www.openslr.org/resources/86/yo_ng_male.zip
|
||||
https://www.openslr.org/resources/109/hi_fi_tts_v0.tar.gz
|
||||
|
||||
The corresponding citations for all these datasets are:
|
||||
|
||||
@inproceedings{demirsahin-etal-2020-open,
|
||||
title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},
|
||||
author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},
|
||||
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
|
||||
month = may,
|
||||
year = {2020},
|
||||
pages = {6532--6541},
|
||||
address = {Marseille, France},
|
||||
publisher = {European Language Resources Association (ELRA)},
|
||||
url = {https://www.aclweb.org/anthology/2020.lrec-1.804},
|
||||
ISBN = {979-10-95546-34-4},
|
||||
}
|
||||
@inproceedings{kjartansson-etal-2020-open,
|
||||
title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},
|
||||
author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},
|
||||
booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},
|
||||
year = {2020},
|
||||
pages = {21--27},
|
||||
month = may,
|
||||
address = {Marseille, France},
|
||||
publisher = {European Language Resources association (ELRA)},
|
||||
url = {https://www.aclweb.org/anthology/2020.sltu-1.3},
|
||||
ISBN = {979-10-95546-35-1},
|
||||
}
|
||||
|
||||
|
||||
@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,
|
||||
title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},
|
||||
author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin, Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},
|
||||
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
|
||||
year = {2020},
|
||||
month = may,
|
||||
address = {Marseille, France},
|
||||
publisher = {European Language Resources Association (ELRA)},
|
||||
url = {https://www.aclweb.org/anthology/2020.lrec-1.801},
|
||||
pages = {6504--6513},
|
||||
ISBN = {979-10-95546-34-4},
|
||||
}
|
||||
@inproceedings{he-etal-2020-open,
|
||||
title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and Telugu Speech Synthesis Systems}},
|
||||
author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin, Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},
|
||||
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
|
||||
month = may,
|
||||
year = {2020},
|
||||
address = {Marseille, France},
|
||||
publisher = {European Language Resources Association (ELRA)},
|
||||
pages = {6494--6503},
|
||||
url = {https://www.aclweb.org/anthology/2020.lrec-1.800},
|
||||
ISBN = "{979-10-95546-34-4}",
|
||||
}
|
||||
|
||||
|
||||
@inproceedings{kjartansson-etal-tts-sltu2018,
|
||||
title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese, Khmer, Nepali, Sinhala, and Sundanese}},
|
||||
author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu De Silva and Supheakmungkol Sarin},
|
||||
booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},
|
||||
year = {2018},
|
||||
address = {Gurugram, India},
|
||||
month = aug,
|
||||
pages = {66--70},
|
||||
URL = {http://dx.doi.org/10.21437/SLTU.2018-14}
|
||||
}
|
||||
|
||||
|
||||
@inproceedings{oo-etal-2020-burmese,
|
||||
title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application to Text-to-Speech}},
|
||||
author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin, Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},
|
||||
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
|
||||
month = may,
|
||||
year = {2020},
|
||||
pages = "6328--6339",
|
||||
address = {Marseille, France},
|
||||
publisher = {European Language Resources Association (ELRA)},
|
||||
url = {https://www.aclweb.org/anthology/2020.lrec-1.777},
|
||||
ISBN = {979-10-95546-34-4},
|
||||
}
|
||||
@inproceedings{van-niekerk-etal-2017,
|
||||
title = {{Rapid development of TTS corpora for four South African languages}},
|
||||
author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson and Martin Jansche and Linne Ha},
|
||||
booktitle = {Proc. Interspeech 2017},
|
||||
pages = {2178--2182},
|
||||
address = {Stockholm, Sweden},
|
||||
month = aug,
|
||||
year = {2017},
|
||||
URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}
|
||||
}
|
||||
|
||||
@inproceedings{gutkin-et-al-yoruba2020,
|
||||
title = {{Developing an Open-Source Corpus of Yoruba Speech}},
|
||||
author = {Alexander Gutkin and I{\c{s}}{\i}n Demir{\c{s}}ahin and Oddur Kjartansson and Clara Rivera and K\d{\'o}lá Túb\d{\`o}sún},
|
||||
booktitle = {Proceedings of Interspeech 2020},
|
||||
pages = {404--408},
|
||||
month = {October},
|
||||
year = {2020},
|
||||
address = {Shanghai, China},
|
||||
publisher = {International Speech and Communication Association (ISCA)},
|
||||
doi = {10.21437/Interspeech.2020-1096},
|
||||
url = {http://dx.doi.org/10.21437/Interspeech.2020-1096},
|
||||
}
|
||||
|
||||
@article{bakhturina2021hi,
|
||||
title={{Hi-Fi Multi-Speaker English TTS Dataset}},
|
||||
author={Bakhturina, Evelina and Lavrukhin, Vitaly and Ginsburg, Boris and Zhang, Yang},
|
||||
journal={arXiv preprint arXiv:2104.01497},
|
||||
year={2021}
|
||||
}
|
9
third_party/opus/src/dnn/download_model.bat
vendored
Executable file
9
third_party/opus/src/dnn/download_model.bat
vendored
Executable file
@ -0,0 +1,9 @@
|
||||
@echo off
|
||||
set model=opus_data-%1.tar.gz
|
||||
|
||||
if not exist %model% (
|
||||
echo Downloading latest model
|
||||
powershell -Command "(New-Object System.Net.WebClient).DownloadFile('https://media.xiph.org/opus/models/%model%', '%model%')"
|
||||
)
|
||||
|
||||
tar -xvzf %model%
|
30
third_party/opus/src/dnn/download_model.sh
vendored
Executable file
30
third_party/opus/src/dnn/download_model.sh
vendored
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
model=opus_data-$1.tar.gz
|
||||
|
||||
if [ ! -f $model ]; then
|
||||
echo "Downloading latest model"
|
||||
wget https://media.xiph.org/opus/models/$model
|
||||
fi
|
||||
|
||||
if command -v sha256sum
|
||||
then
|
||||
echo "Validating checksum"
|
||||
checksum="$1"
|
||||
checksum2=$(sha256sum $model | awk '{print $1}')
|
||||
if [ "$checksum" != "$checksum2" ]
|
||||
then
|
||||
echo "Aborting due to mismatching checksums. This could be caused by a corrupted download of $model."
|
||||
echo "Consider deleting local copy of $model and running this script again."
|
||||
exit 1
|
||||
else
|
||||
echo "checksums match"
|
||||
fi
|
||||
else
|
||||
echo "Could not find sha256 sum; skipping verification. Please verify manually that sha256 hash of ${model} matches ${1}."
|
||||
fi
|
||||
|
||||
|
||||
|
||||
tar xvomf $model
|
44
third_party/opus/src/dnn/dred_coding.c
vendored
Normal file
44
third_party/opus/src/dnn/dred_coding.c
vendored
Normal file
@ -0,0 +1,44 @@
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jean-Marc Valin */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "celt/entenc.h"
|
||||
#include "os_support.h"
|
||||
#include "dred_config.h"
|
||||
#include "dred_coding.h"
|
||||
|
||||
int compute_quantizer(int q0, int dQ, int qmax, int i) {
|
||||
int quant;
|
||||
static const int dQ_table[8] = {0, 2, 3, 4, 6, 8, 12, 16};
|
||||
quant = q0 + (dQ_table[dQ]*i + 8)/16;
|
||||
return quant > qmax ? qmax : quant;
|
||||
}
|
36
third_party/opus/src/dnn/dred_coding.h
vendored
Normal file
36
third_party/opus/src/dnn/dred_coding.h
vendored
Normal file
@ -0,0 +1,36 @@
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_CODING_H
|
||||
#define DRED_CODING_H
|
||||
|
||||
#include "opus_types.h"
|
||||
#include "entcode.h"
|
||||
|
||||
int compute_quantizer(int q0, int dQ, int qmax, int i);
|
||||
|
||||
#endif
|
54
third_party/opus/src/dnn/dred_config.h
vendored
Normal file
54
third_party/opus/src/dnn/dred_config.h
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef DRED_CONFIG_H
|
||||
#define DRED_CONFIG_H
|
||||
|
||||
/* Change this once DRED gets an extension number assigned. */
|
||||
#define DRED_EXTENSION_ID 126
|
||||
|
||||
/* Remove these two completely once DRED gets an extension number assigned. */
|
||||
#define DRED_EXPERIMENTAL_VERSION 10
|
||||
#define DRED_EXPERIMENTAL_BYTES 2
|
||||
|
||||
|
||||
#define DRED_MIN_BYTES 8
|
||||
|
||||
/* these are inpart duplicates to the values defined in dred_rdovae_constants.h */
|
||||
#define DRED_SILK_ENCODER_DELAY (79+12-80)
|
||||
#define DRED_FRAME_SIZE 160
|
||||
#define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE))
|
||||
#define DRED_MAX_DATA_SIZE 1000
|
||||
#define DRED_ENC_Q0 6
|
||||
#define DRED_ENC_Q1 15
|
||||
|
||||
/* Covers 1.04 second so we can cover one second, after the lookahead. */
|
||||
#define DRED_MAX_LATENTS 26
|
||||
#define DRED_NUM_REDUNDANCY_FRAMES (2*DRED_MAX_LATENTS)
|
||||
#define DRED_MAX_FRAMES (4*DRED_MAX_LATENTS)
|
||||
|
||||
#endif
|
129
third_party/opus/src/dnn/dred_decoder.c
vendored
Normal file
129
third_party/opus/src/dnn/dred_decoder.c
vendored
Normal file
@ -0,0 +1,129 @@
|
||||
/* Copyright (c) 2022 Amazon
|
||||
Written by Jan Buethe */
|
||||
/*
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include "os_support.h"
|
||||
#include "dred_decoder.h"
|
||||
#include "dred_coding.h"
|
||||
#include "celt/entdec.h"
|
||||
#include "celt/laplace.h"
|
||||
#include "dred_rdovae_stats_data.h"
|
||||
#include "dred_rdovae_constants.h"
|
||||
|
||||
static void dred_decode_latents(ec_dec *dec, float *x, const opus_uint8 *scale, const opus_uint8 *r, const opus_uint8 *p0, int dim) {
|
||||
int i;
|
||||
for (i=0;i<dim;i++) {
|
||||
int q;
|
||||
if (r[i] == 0 || p0[i] == 255) q = 0;
|
||||
else q = ec_laplace_decode_p0(dec, p0[i]<<7, r[i]<<7);
|
||||
x[i] = q*256.f/(scale[i] == 0 ? 1 : scale[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int dred_ec_decode(OpusDRED *dec, const opus_uint8 *bytes, int num_bytes, int min_feature_frames, int dred_frame_offset)
|
||||
{
|
||||
ec_dec ec;
|
||||
int q_level;
|
||||
int i;
|
||||
int offset;
|
||||
int q0;
|
||||
int dQ;
|
||||
int qmax;
|
||||
int state_qoffset;
|
||||
int extra_offset;
|
||||
|
||||
/* since features are decoded in quadruples, it makes no sense to go with an uneven number of redundancy frames */
|
||||
celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0);
|
||||
|
||||
/* decode initial state and initialize RDOVAE decoder */
|
||||
ec_dec_init(&ec, (unsigned char*)bytes, num_bytes);
|
||||
q0 = ec_dec_uint(&ec, 16);
|
||||
dQ = ec_dec_uint(&ec, 8);
|
||||
if (ec_dec_uint(&ec, 2)) extra_offset = 32*ec_dec_uint(&ec, 256);
|
||||
else extra_offset = 0;
|
||||
/* Compute total offset, including DRED position in a multiframe packet. */
|
||||
dec->dred_offset = 16 - ec_dec_uint(&ec, 32) - extra_offset + dred_frame_offset;
|
||||
/*printf("%d %d %d\n", dred_offset, q0, dQ);*/
|
||||
qmax = 15;
|
||||
if (q0 < 14 && dQ > 0) {
|
||||
int nvals;
|
||||
int ft;
|
||||
int s;
|
||||
/* The distribution for the dQmax symbol is split evenly between zero
|
||||
(which implies qmax == 15) and larger values, with the probability of
|
||||
all larger values being uniform.
|
||||
This is equivalent to coding 1 bit to decide if the maximum is less than
|
||||
15 followed by a uint to decide the actual value if it is less than
|
||||
15, but combined into a single symbol. */
|
||||
nvals = 15 - (q0 + 1);
|
||||
ft = 2*nvals;
|
||||
s = ec_decode(&ec, ft);
|
||||
if (s >= nvals) {
|
||||
qmax = q0 + (s - nvals) + 1;
|
||||
ec_dec_update(&ec, s, s + 1, ft);
|
||||
}
|
||||
else {
|
||||
ec_dec_update(&ec, 0, nvals, ft);
|
||||
}
|
||||
}
|
||||
state_qoffset = q0*DRED_STATE_DIM;
|
||||
dred_decode_latents(
|
||||
&ec,
|
||||
dec->state,
|
||||
dred_state_quant_scales_q8 + state_qoffset,
|
||||
dred_state_r_q8 + state_qoffset,
|
||||
dred_state_p0_q8 + state_qoffset,
|
||||
DRED_STATE_DIM);
|
||||
|
||||
/* decode newest to oldest and store oldest to newest */
|
||||
for (i = 0; i < IMIN(DRED_NUM_REDUNDANCY_FRAMES, (min_feature_frames+1)/2); i += 2)
|
||||
{
|
||||
/* FIXME: Figure out how to avoid missing a last frame that would take up < 8 bits. */
|
||||
if (8*num_bytes - ec_tell(&ec) <= 7)
|
||||
break;
|
||||
q_level = compute_quantizer(q0, dQ, qmax, i/2);
|
||||
offset = q_level*DRED_LATENT_DIM;
|
||||
dred_decode_latents(
|
||||
&ec,
|
||||
&dec->latents[(i/2)*DRED_LATENT_DIM],
|
||||
dred_latent_quant_scales_q8 + offset,
|
||||
dred_latent_r_q8 + offset,
|
||||
dred_latent_p0_q8 + offset,
|
||||
DRED_LATENT_DIM
|
||||
);
|
||||
|
||||
offset = 2 * i * DRED_NUM_FEATURES;
|
||||
}
|
||||
dec->process_stage = 1;
|
||||
dec->nb_latents = i/2;
|
||||
return i/2;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user