diff --git a/AutoCoverTool/readme.txt b/AutoCoverTool/readme.txt new file mode 100644 index 0000000..6ce7935 --- /dev/null +++ b/AutoCoverTool/readme.txt @@ -0,0 +1,224 @@ +自动翻唱工具 +1. 训练音色 +2. 给定音频,将音频按照特定音色进行转换 + + ----data // 所有数据的位置 + ----ref // 依赖的代码的位置 + ----music_remover // 消音工具 + ----so-vits-svc // 训练音色和变声的工具 + ----script // 脚本位置 + + +3. data结构介绍: + ---train_users // 训练时使用的数据集 + ---zjl // 每个音色 + ---src // 源数据(文件夹) + ---vocals // 消音之后的数据(文件夹) + ---speaker0 // 切片之后的数据(文件夹) + ---slice_resample // 重采样规整之后的数据(文件夹) + ---speaker0 // 重采样规整之后的数据(文件夹) + ---filelists // 训练时需要使用的文件夹 + ---train.txt + ---val.txt + ---test.txt + ---config // 配置文件 + ---config.json // 配置文件 + ---inf_users // 推理时要用的数据 + ---song1 // 每个测试时输入的歌曲 + ---src // 源数据(文件) + ---vocals // 分离出的人声文件 + ---acc // 分离出的伴奏文件 + ---song2 + ... + ---out_data // 推理出的数据 + ---song1 // 每个歌曲 + ---song1_zjl_vocals // 歌曲和音色组成的结果人声 + ---song2 + .... + ---final_data // 最终结果 + ---song1 // 每个歌曲 + ---song1_zjl_mix // 歌曲和音色组成的结果人声混音之后的结果 + +---------------------------------------------------------------------->>>> +快速使用之训练: + 将数据放到data/train_users/xxx/src目录下,以ogg作为结尾 + 然后执行script/train.sh xxx 即可,输出的模型在data/train_users/xxx/logs/32k目录下 +---------------------------------------------------------------------->>>> + +训练过程: +环境变量: +export LD_LIBRARY_PATH=/data/gpu_env_common/env/anaconda3/envs/so_vits_svc/lib:$LD_LIBRARY_PATH +export PATH=$PATH:/data/gpu_env_common/env/bin/ffmpeg/bin +export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs + +# 训练过程 +1. 收集数据放到data/train_users/zjl/src +2. 提取人声 & 分片 & 取音量响度大的Top80 +/data/gpu_env_common/env/anaconda3/envs/demucs/bin/python script/get_vocals_for_train.py zjl +3. 重采样 +/data/gpu_env_common/env/anaconda3/envs/so_vits_svc/bin/python ref/so-vits-svc/resample.py --in_dir=/data/rsync/jianli.yang/AutoCoverTool/data/train_users/zjl --out_dir2=/data/rsync/jianli.yang/AutoCoverTool/data/train_users/zjl/slice_resample +4. 生成配置文件 +/data/gpu_env_common/env/anaconda3/envs/so_vits_svc/bin/python ref/so-vits-svc/preprocess_flist_config.py --source_dir=/data/rsync/jianli.yang/AutoCoverTool/data/train_users/zjl/slice_resample --train_list=/data/rsync/jianli.yang/AutoCoverTool/data/train_users/zjl/filelists/train.txt --val_list=/data/rsync/jianli.yang/AutoCoverTool/data/train_users/zjl/filelists/val.txt --test_list=/data/rsync/jianli.yang/AutoCoverTool/data/train_users/zjl/filelists/test.txt --config_path=/data/rsync/jianli.yang/AutoCoverTool/data/train_users/zjl/config/config.json +5. 预处理提取特征 +/data/gpu_env_common/env/anaconda3/envs/so_vits_svc/bin/python ref/so-vits-svc/preprocess_hubert_f0.py --in_dir=/data/rsync/jianli.yang/AutoCoverTool/data/train_users/zjl/slice_resample +6. 拷贝数据到logs文件夹 +mkdir -p data/train_users/zjl/logs/32k +cp -r data/models/G_0.pth data/train_users/zjl/logs/32k +cp -r data/models/D_0.pth data/train_users/zjl/logs/32k +7. 训练 +/data/gpu_env_common/env/anaconda3/envs/so_vits_svc/bin/python ref/so-vits-svc/train.py -c data/train_users/zjl/config/config.json -m 32k -l data/train_users/zjl/logs + + +# 推理过程 +1. 将输出放入data/inf_users/pfdyt/src.mp3 +2. 提取人声、伴奏 +/data/gpu_env_common/env/anaconda3/envs/demucs/bin/python script/get_vocals_for_inference.py data/inf_users/test_2/src.mp3 data/inf_users/test_2 +3. 推理 +/data/gpu_env_common/env/anaconda3/envs/so_vits_svc/bin/python ref/so-vits-svc/inference_main.py data/train_users/zjl/logs/32k/G_2000.pth data/train_users/zjl/config/config.json data/inf_users/pfdyt/vocal_32.wav data/out_data/pfdyt/pfdyt_zjl.wav +4. 获取占比信息,剔除占比过高的音频 +/data/gpu_env_common/env/anaconda3/envs/th1_9_3_9/bin/python script/process_one.py +5. 对符合要求的音频进行降噪和拉伸 +/opt/soft/bin/denoise_exe in_wav out_wav +/opt/soft/bin/draw_volume in_wav ref_wav out_wav + +6. 手动制作, 使用 accentize/Chameleon 插件,复制原始wav的混响,以及调整响度大小 +7. 用手动制作好的数据进行再次替换 +/data/gpu_env_common/env/anaconda3/envs/th1_9_3_9/bin/python script/process_one.py +8. 将替换好的成品和伴奏混合,得到结果 +/opt/soft/bin/simple_mixer in_wav acc_path mix_path + + +4. 将干声重采样 +ffmpeg -i data/out_data/pfdyt/pfdyt_zjl.wav -ar 44100 -ac 2 data/out_data/pfdyt/pfdyt_zjl_44_2.wav +5. 合成 +/data/rsync/jianli.yang/AutoCoverTool/data/bin/mixer data/models/impluse_im_plus_wet2.wav data/out_data/pfdyt/pfdyt_zjl_44_2.wav data/inf_users/pfdyt/acc.wav data/final_data/pfdyt_zjl_44_2_mix.wav + + + + +/data/rsync/jianli.yang/AutoCoverTool/data/bin/mixer data/models/impluse_im_plus_wet2.wav data/out_data/pfdyt/pfdyt_zjl_44_2.wav data/inf_users/pfdyt/acc.wav data/final_data/pfdyt_zjl_44_2_mix.wav + + + +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step1/0.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step1/1.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step1/2.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step1/3.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step1/4.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step1/5.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step1/6.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step1/7.zip + +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step2/0.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step2/1.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step3/2.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step4/3.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step5/4.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step6/5.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step7/6.zip +https://av-audit-sync-bj-1256122840.cos.ap-beijing.myqcloud.com/tmp/fanchang/step8/7.zip + + +// 从编辑库获取原唱数据: +select task_url,starmaker_songid from starmaker_musicbook.silence where starmaker_songid in + +611752105026189342 +611752105030433779 +611752105029689090 +611752105021285282 +611752105030419624 +611752105030419633 +611752105030104548 +611752105029990849 +611752105029993297 +611752105030047424 +611752105030419688 +611752105023434557 +611752105024429936 +611752105027557408 +611752105024250202 +611752105027302268 +611752105026707760 +611752105022345104 +611752105024678976 +611752105024679221 +611752105020378620 +611752105022667231 +611752105023811083 +611752105023623965 +611752105022745595 +611752105020290695 +611752105028683824 +611752105020411654 +611752105020286501 +611752105020387015 +611752105020382559 +611752105030077711 +611752105019423720 +611752105020256284 +611752105020357112 +611752105024628047 +611752105020282612 +611752105020351134 +611752105020336950 +611752105022736204 +611752105020290639 +611752105021442406 +611752105020286443 +611752105024953316 +611752105020282613 +611752105024714646 +611752105022647082 +611752105027188746 +611752105022770952 +611752105020417488 +611752105025104181 +611752105022735101 +611752105023532439 +611752105022842477 +611752105028650636 +611752105022842004 +611752105029954168 +611752105020417688 +611752105020336946 +611752105020394297 +611752105026946178 +611752105020343687 +611752105024676794 +611752105020390950 +611752105020286433 +611752105026771723 +611752105022446809 +611752105020350988 +611752105025510149 +611752105020394121 +611752105021442417 +611752105020256227 +611752105025231610 +611752105021453011 +611752105020325137 +611752105027047993 +611752105021330812 +611752105021375100 +611752105021273980 +611752105021453011 +611752105020325137 +611752105027047993 +611752105021330812 +611752105021273980 +611752105024786030 +611752105027189453 +611752105020548211 +611752105020286446 +611752105020376320 +611752105020563523 +611752105027588072 +611752105022389596 +611752105020315368 +611752105020343699 +611752105029954089 +611752105026523547 +611752105029955214 +611752105020315328 +611752105020350990 +611752105021332759 diff --git a/AutoCoverTool/ref/music_remover/demucs/CODE_OF_CONDUCT.md b/AutoCoverTool/ref/music_remover/demucs/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..f049d4c --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/CODE_OF_CONDUCT.md @@ -0,0 +1,76 @@ +# Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to make participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, sex characteristics, gender identity and expression, +level of experience, education, socio-economic status, nationality, personal +appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies within all project spaces, and it also applies when +an individual is representing the project or its community in public spaces. +Examples of representing a project or community include using an official +project e-mail address, posting via an official social media account, or acting +as an appointed representative at an online or offline event. Representation of +a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at . All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html + +[homepage]: https://www.contributor-covenant.org + +For answers to common questions about this code of conduct, see +https://www.contributor-covenant.org/faq diff --git a/AutoCoverTool/ref/music_remover/demucs/CONTRIBUTING.md b/AutoCoverTool/ref/music_remover/demucs/CONTRIBUTING.md new file mode 100644 index 0000000..f14f4af --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/CONTRIBUTING.md @@ -0,0 +1,23 @@ +# Contributing to Demucs + +## Pull Requests + +In order to accept your pull request, we need you to submit a CLA. You only need +to do this once to work on any of Facebook's open source projects. + +Complete your CLA here: + +Demucs is the implementation of a research paper. +Therefore, we do not plan on accepting many pull requests for new features. +We certainly welcome them for bug fixes. + + +## Issues + +We use GitHub issues to track public bugs. Please ensure your description is +clear and has sufficient instructions to be able to reproduce the issue. + + +## License +By contributing to this repository, you agree that your contributions will be licensed +under the LICENSE file in the root directory of this source tree. diff --git a/AutoCoverTool/ref/music_remover/demucs/Demucs.ipynb b/AutoCoverTool/ref/music_remover/demucs/Demucs.ipynb new file mode 100644 index 0000000..9ebcfd5 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/Demucs.ipynb @@ -0,0 +1,153 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "Be9yoh-ILfRr" + }, + "source": [ + "# Hybrid Demucs\n", + "\n", + "Feel free to use the Colab version:\n", + "https://colab.research.google.com/drive/1dC9nVxk3V_VPjUADsnFu8EiT-xnU1tGH?usp=sharing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "colab_type": "code", + "executionInfo": { + "elapsed": 12277, + "status": "ok", + "timestamp": 1583778134659, + "user": { + "displayName": "Marllus Lustosa", + "photoUrl": "https://lh3.googleusercontent.com/a-/AOh14GgLl2RbW64ZyWz3Y8IBku0zhHCMnt7fz7fEl0LTdA=s64", + "userId": "14811735256675200480" + }, + "user_tz": 180 + }, + "id": "kOjIPLlzhPfn", + "outputId": "c75f17ec-b576-4105-bc5b-c2ac9c1018a3" + }, + "outputs": [], + "source": [ + "!pip install -U demucs\n", + "# or for local development, if you have a clone of Demucs\n", + "# pip install -e ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "5lYOzKKCKAbJ" + }, + "outputs": [], + "source": [ + "# You can use the `demucs` command line to separate tracks\n", + "!demucs test.mp3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# You can also load directly the pretrained models,\n", + "# for instance for the MDX 2021 winning model of Track A:\n", + "from demucs import pretrained\n", + "model = pretrained.get_model('mdx')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Because `model` is a bag of 4 models, you cannot directly call it on your data,\n", + "# but the `apply_model` will know what to do of it.\n", + "import torch\n", + "from demucs.apply import apply_model\n", + "x = torch.randn(1, 2, 44100 * 10) # ten seconds of white noise for the demo\n", + "out = apply_model(model, x)[0] # shape is [S, C, T] with S the number of sources\n", + "\n", + "# So let see, where is all the white noise content is going ?\n", + "for name, source in zip(model.sources, out):\n", + " print(name, source.std() / x.std())\n", + "# The outputs are quite weird to be fair, not what I would have expected." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# now let's take a single model from the bag, and let's test it on a pure cosine\n", + "freq = 440 # in Hz\n", + "sr = model.samplerate\n", + "t = torch.arange(10 * sr).float() / sr\n", + "x = torch.cos(2 * 3.1416 * freq * t).expand(1, 2, -1)\n", + "sub_model = model.models[3]\n", + "out = sub_model(x)[0]\n", + "\n", + "# Same question where does it go?\n", + "for name, source in zip(model.sources, out):\n", + " print(name, source.std() / x.std())\n", + " \n", + "# Well now it makes much more sense, all the energy is going\n", + "# in the `other` source.\n", + "# Feel free to try lower pitch (try 80 Hz) to see what happens !" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# For training or more fun, refer to the Demucs README on our repo\n", + "# https://github.com/facebookresearch/demucs/tree/main/demucs" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "authorship_tag": "ABX9TyM9xpVr1M86NRcjtQ7g9tCx", + "collapsed_sections": [], + "name": "Demucs.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.8" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/AutoCoverTool/ref/music_remover/demucs/LICENSE b/AutoCoverTool/ref/music_remover/demucs/LICENSE new file mode 100644 index 0000000..5797855 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) Meta, Inc. and its affiliates. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/MANIFEST.in b/AutoCoverTool/ref/music_remover/demucs/MANIFEST.in new file mode 100644 index 0000000..96e5f54 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/MANIFEST.in @@ -0,0 +1,13 @@ +recursive-exclude env * +recursive-include conf *.yaml +include Makefile +include LICENSE +include demucs.png +include outputs.tar.gz +include test.mp3 +include requirements.txt +include requirements_minimal.txt +include mypy.ini +include demucs/py.typed +include demucs/remote/*.txt +include demucs/remote/*.yaml diff --git a/AutoCoverTool/ref/music_remover/demucs/Makefile b/AutoCoverTool/ref/music_remover/demucs/Makefile new file mode 100644 index 0000000..344786c --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/Makefile @@ -0,0 +1,32 @@ +all: linter tests + +linter: + flake8 demucs + mypy demucs + +tests: test_train test_eval + +test_train: tests/musdb + _DORA_TEST_PATH=/tmp/demucs python3 -m dora run --clear \ + dset.musdb=./tests/musdb dset.segment=4 dset.shift=2 epochs=2 model=demucs \ + demucs.depth=2 demucs.channels=4 test.sdr=false misc.num_workers=0 test.workers=0 \ + test.shifts=0 + +test_eval: + python3 -m demucs -n demucs_unittest test.mp3 + python3 -m demucs -n demucs_unittest --two-stems=vocals test.mp3 + python3 -m demucs -n demucs_unittest --mp3 test.mp3 + python3 -m demucs -n demucs_unittest --int24 --clip-mode clamp test.mp3 + +tests/musdb: + test -e tests || mkdir tests + python3 -c 'import musdb; musdb.DB("tests/tmp", download=True)' + musdbconvert tests/tmp tests/musdb + +dist: + python3 setup.py sdist + +clean: + rm -r dist build *.egg-info + +.PHONY: linter dist test_train test_eval diff --git a/AutoCoverTool/ref/music_remover/demucs/README.md b/AutoCoverTool/ref/music_remover/demucs/README.md new file mode 100644 index 0000000..365c03e --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/README.md @@ -0,0 +1,290 @@ +# Demucs Music Source Separation + +[![Support Ukraine](https://img.shields.io/badge/Support-Ukraine-FFD500?style=flat&labelColor=005BBB)](https://opensource.fb.com/support-ukraine) +![tests badge](https://github.com/facebookresearch/demucs/workflows/tests/badge.svg) +![linter badge](https://github.com/facebookresearch/demucs/workflows/linter/badge.svg) + + +This is the 4th release of Demucs (v4), featuring Hybrid Transformer based source separation. +**For the classic Hybrid Demucs (v3):** [Go this commit][demucs_v3]. +If you are experiencing issues and want the old Demucs back, please fill an issue, and then you can get back to the v3 with +`git checkout v3`. You can also go [Demucs v2][demucs_v2]. + + +Demucs is a state-of-the-art music source separation model, currently capable of separating +drums, bass, and vocals from the rest of the accompaniment. +Demucs is based on a U-Net convolutional architecture inspired by [Wave-U-Net][waveunet]. +The v4 version features [Hybrid Transformer Demucs][htdemucs], a hybrid spectrogram/waveform separation model using Transformers. +It is based on [Hybrid Demucs][hybrid_paper] (also provided in this repo) with the innermost layers are +replaced by a cross-domain Transformer Encoder. This Transformer uses self-attention within each domain, +and cross-attention across domains. +The model achieves a SDR of 9.00 dB on the MUSDB HQ test set. Moreover, when using sparse attention +kernels to extend its receptive field and per source fine-tuning, we achieve state-of-the-art 9.20 dB of SDR. + +Samples are available [on our sample page](https://ai.honu.io/papers/htdemucs/index.html). +Checkout [our paper][htdemucs] for more information. +It has been trained on the [MUSDB HQ][musdb] dataset + an extra training dataset of 800 songs. +This model separates drums, bass and vocals and other stems for any song. + + +As Hybrid Transformer Demucs is brand new, it is not activated by default, you can activate it in the usual +commands described hereafter with `-n htdemucs_ft`. +The single, non fine-tuned model is provided as `-n htdemucs`, and the retrained baseline +as `-n hdemucs_mmi`. The Sparse Hybrid Transformer model decribed in our paper is not provided as its +requires custom CUDA code that is not ready for release yet. + + +

+Schema representing the structure of Hybrid Transformer Demucs,
+    with a dual U-Net structure, one branch for the temporal domain,
+    and one branch for the spectral domain. There is a cross-domain Transformer between the Encoders and Decoders.

+ + + +## Important news if you are already using Demucs + +See the [release notes](./docs/release.md) for more details. + +- 16/11/2022: Added the new Hybrid Transformer Demucs models. + Adding support for the [torchaudio implementation of HDemucs](https://pytorch.org/audio/stable/tutorials/hybrid_demucs_tutorial.html). +- 30/08/2022: added reproducibility and ablation grids, along with an updated version of the paper. +- 17/08/2022: Releasing v3.0.5: Set split segment length to reduce memory. Compatible with pyTorch 1.12. +- 24/02/2022: Releasing v3.0.4: split into two stems (i.e. karaoke mode). + Export as float32 or int24. +- 17/12/2021: Releasing v3.0.3: bug fixes (thanks @keunwoochoi), memory drastically + reduced on GPU (thanks @famzah) and new multi-core evaluation on CPU (`-j` flag). +- 12/11/2021: Releasing **Demucs v3** with hybrid domain separation. Strong improvements + on all sources. This is the model that won Sony MDX challenge. +- 11/05/2021: Adding support for MusDB-HQ and arbitrary wav set, for the MDX challenge. For more information +on joining the challenge with Demucs see [the Demucs MDX instructions](docs/mdx.md) +- 28/04/2021: **Demucs v2**, with extra augmentation and DiffQ based quantization. + **EVERYTHING WILL BREAK**, please restart from scratch following the instructions hereafter. + This version also adds overlap between prediction frames, with linear transition from one to the next, + which should prevent sudden changes at frame boundaries. Also, Demucs is now on PyPI, so for separation + only, installation is as easy as `pip install demucs` :) +- 13/04/2020: **Demucs released under MIT**: We are happy to release Demucs under the MIT licence. + We hope that this will broaden the impact of this research to new applications. + + +## Comparison with other models + +We provide hereafter a summary of the different metrics presented in the paper. +You can also compare Hybrid Demucs (v3), [KUIELAB-MDX-Net][kuielab], [Spleeter][spleeter], Open-Unmix, Demucs (v1), and Conv-Tasnet on one of my favorite +songs on my [soundcloud playlist][soundcloud]. + +### Comparison of accuracy + +`Overall SDR` is the mean of the SDR for each of the 4 sources, `MOS Quality` is a rating from 1 to 5 +of the naturalness and absence of artifacts given by human listeners (5 = no artifacts), `MOS Contamination` +is a rating from 1 to 5 with 5 being zero contamination by other sources. We refer the reader to our [paper][hybrid_paper], +for more details. + +| Model | Domain | Extra data? | Overall SDR | MOS Quality | MOS Contamination | +|------------------------------|-------------|-------------|-------------|-------------|-------------------| +| [Wave-U-Net][waveunet] | waveform | no | 3.2 | - | - | +| [Open-Unmix][openunmix] | spectrogram | no | 5.3 | - | - | +| [D3Net][d3net] | spectrogram | no | 6.0 | - | - | +| [Conv-Tasnet][demucs_v2] | waveform | no | 5.7 | - | | +| [Demucs (v2)][demucs_v2] | waveform | no | 6.3 | 2.37 | 2.36 | +| [ResUNetDecouple+][decouple] | spectrogram | no | 6.7 | - | - | +| [KUIELAB-MDX-Net][kuielab] | hybrid | no | 7.5 | **2.86** | 2.55 | +| [Band-Spit RNN][bandsplit] | spectrogram | no | **8.2** | - | - | +| **Hybrid Demucs (v3)** | hybrid | no | 7.7 | **2.83** | **3.04** | +| [MMDenseLSTM][mmdenselstm] | spectrogram | 804 songs | 6.0 | - | - | +| [D3Net][d3net] | spectrogram | 1.5k songs | 6.7 | - | - | +| [Spleeter][spleeter] | spectrogram | 25k songs | 5.9 | - | - | +| [Band-Spit RNN][bandsplit] | spectrogram | 1.7k (mixes only) | **9.0** | - | - | +| **HT Demucs f.t. (v4)** | hybrid | 800 songs | **9.0** | - | - | + + + +## Requirements + +You will need at least Python 3.7. See `requirements_minimal.txt` for requirements for separation only, +and `environment-[cpu|cuda].yml` (or `requirements.txt`) if you want to train a new model. + +### For Windows users + +Everytime you see `python3`, replace it with `python.exe`. You should always run commands from the +Anaconda console. + +### For musicians + +If you just want to use Demucs to separate tracks, you can install it with + +```bash +python3 -m pip install -U demucs +``` + +For bleeding edge versions, you can install directly from this repo using +```bash +python3 -m pip install -U git+https://github.com/facebookresearch/demucs#egg=demucs +``` + +**For Hybrid Transformer Demucs,** you must install the bleeding edge version and use either +`-n htdemucs` or `-n htdemucs_ft`. + +Advanced OS support are provided on the following page, **you must read the page for your OS before posting an issues**: +- **If you are using Windows:** [Windows support](docs/windows.md). +- **If you are using MAC OS X:** [Mac OS X support](docs/mac.md). +- **If you are using Linux:** [Linux support](docs/linux.md). + +### For machine learning scientists + +If you have anaconda installed, you can run from the root of this repository: + +```bash +conda env update -f environment-cpu.yml # if you don't have GPUs +conda env update -f environment-cuda.yml # if you have GPUs +conda activate demucs +pip install -e . +``` + +This will create a `demucs` environment with all the dependencies installed. + +You will also need to install [soundstretch/soundtouch](https://www.surina.net/soundtouch/soundstretch.html): on Mac OSX you can do `brew install sound-touch`, +and on Ubuntu `sudo apt-get install soundstretch`. This is used for the +pitch/tempo augmentation. + + +### Running in Docker + +Thanks to @xserrat, there is now a Docker image definition ready for using Demucs. This can ensure all libraries are correctly installed without interfering with the host OS. See his repo [Docker Facebook Demucs](https://github.com/xserrat/docker-facebook-demucs) for more information. + + +### Running from Colab + +I made a Colab to easily separate track with Demucs. Note that +transfer speeds with Colab are a bit slow for large media files, +but it will allow you to use Demucs without installing anything. + +[Demucs on Google Colab](https://colab.research.google.com/drive/1dC9nVxk3V_VPjUADsnFu8EiT-xnU1tGH?usp=sharing) + +### Web Demo + +Integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/demucs) + +### Graphical Interface + +@CarlGao4 has released a GUI for Demucs: [CarlGao4/Demucs-Gui](https://github.com/CarlGao4/Demucs-Gui). Downloads for Windows and macOS is available [here](https://github.com/CarlGao4/Demucs-Gui/releases). Use [FossHub mirror](https://fosshub.com/Demucs-GUI.html) to speed up your download. + +@Anjok07 is providing a self contained GUI in [UVR (Ultimate Vocal Remover)](https://github.com/facebookresearch/demucs/issues/334) that supports Demucs. + +### Other providers + +Audiostrip is providing free online separation with Demucs on their website [https://audiostrip.co.uk/](https://audiostrip.co.uk/). +[MVSep](https://mvsep.com/) also provides free online separation, select `Demucs3 model B` for the best quality. + +Spleeter.io provides free online separation with Demucs on their website [https://www.spleeter.io/demucs](https://www.spleeter.io/demucs). + + +## Separating tracks + +In order to try Demucs, you can just run from any folder (as long as you properly installed it) + +```bash +demucs PATH_TO_AUDIO_FILE_1 [PATH_TO_AUDIO_FILE_2 ...] # for Demucs +# If you used `pip install --user` you might need to replace demucs with python3 -m demucs +python3 -m demucs --mp3 --mp3-bitrate BITRATE PATH_TO_AUDIO_FILE_1 # output files saved as MP3 +# If your filename contain spaces don't forget to quote it !!! +demucs "my music/my favorite track.mp3" +# You can select different models with `-n` mdx_q is the quantized model, smaller but maybe a bit less accurate. +demucs -n mdx_q myfile.mp3 +# If you only want to separate vocals out of an audio, use `--two-stems=vocal` (You can also set to drums or bass) +demucs --two-stems=vocals myfile.mp3 +``` + + +If you have a GPU, but you run out of memory, please use `--segment SEGMENT` to reduce length of each split. `SEGMENT` should be changed to a integer. Personally recommend not less than 10 (the bigger the number is, the more memory is required, but quality may increase). Create an environment variable `PYTORCH_NO_CUDA_MEMORY_CACHING=1` is also helpful. If this still cannot help, please add `-d cpu` to the command line. See the section hereafter for more details on the memory requirements for GPU acceleration. + +Separated tracks are stored in the `separated/MODEL_NAME/TRACK_NAME` folder. There you will find four stereo wav files sampled at 44.1 kHz: `drums.wav`, `bass.wav`, +`other.wav`, `vocals.wav` (or `.mp3` if you used the `--mp3` option). + +All audio formats supported by `torchaudio` can be processed (i.e. wav, mp3, flac, ogg/vorbis on Linux/Mac OS X etc.). On Windows, `torchaudio` has limited support, so we rely on `ffmpeg`, which should support pretty much anything. +Audio is resampled on the fly if necessary. +The output will be a wave file encoded as int16. +You can save as float32 wav files with `--float32`, or 24 bits integer wav with `--int24`. +You can pass `--mp3` to save as mp3 instead, and set the bitrate with `--mp3-bitrate` (default is 320kbps). + +It can happen that the output would need clipping, in particular due to some separation artifacts. +Demucs will automatically rescale each output stem so as to avoid clipping. This can however break +the relative volume between stems. If instead you prefer hard clipping, pass `--clip-mode clamp`. +You can also try to reduce the volume of the input mixture before feeding it to Demucs. + + +Other pre-trained models can be selected with the `-n` flag. +The list of pre-trained models is: +- `htdemucs`: first version of Hybrid Transformer Demucs. Trained on MusDB + 800 songs. +- `htdemucs_ft`: fine-tuned version of `htdemucs`, separation will take 4 times more time + but might be a bit better. Same training set as `htdemucs`. +- `hdemucs_mmi`: Hybrid Demucs v3, retrained on MusDB + 800 songs. +- `mdx`: trained only on MusDB HQ, winning model on track A at the [MDX][mdx] challenge. +- `mdx_extra`: trained with extra training data (including MusDB test set), ranked 2nd on the track B + of the [MDX][mdx] challenge. +- `mdx_q`, `mdx_extra_q`: quantized version of the previous models. Smaller download and storage + but quality can be slightly worse. `mdx_extra_q` is the default model used. +- `SIG`: where `SIG` is a single model from the [model zoo](docs/training.md#model-zoo). + +The `--two-stems=vocals` option allows to separate vocals from the rest (e.g. karaoke mode). +`vocals` can be changed into any source in the selected model. +This will mix the files after separating the mix fully, so this won't be faster or use less memory. + +The `--shifts=SHIFTS` performs multiple predictions with random shifts (a.k.a the *shift trick*) of the input and average them. This makes prediction `SHIFTS` times +slower. Don't use it unless you have a GPU. + +The `--overlap` option controls the amount of overlap between prediction windows. Default is 0.25 (i.e. 25%) which is probably fine. +It can probably be reduced to 0.1 to improve a bit speed. + + +The `-j` flag allow to specify a number of parallel jobs (e.g. `demucs -j 2 myfile.mp3`). +This will multiply by the same amount the RAM used so be careful! + +### Memory requirements for GPU acceleration + +If you want to use GPU acceleration, you will need at least 3GB of RAM on your GPU for `demucs`. However, about 7GB of RAM will be required if you use the default arguments. Add `--segment SEGMENT` to change size of each split. If you only have 3GB memory, set SEGMENT to 8 (though quality may be worse if this argument is too small). Creating an environment variable `PYTORCH_NO_CUDA_MEMORY_CACHING=1` can help users with even smaller RAM such as 2GB (I separated a track that is 4 minutes but only 1.5GB is used), but this would make the separation slower. + +If you do not have enough memory on your GPU, simply add `-d cpu` to the command line to use the CPU. With Demucs, processing time should be roughly equal to 1.5 times the duration of the track. + + +## Training Demucs + +If you want to train (Hybrid) Demucs, please follow the [training doc](docs/training.md). + +## MDX Challenge reproduction + +In order to reproduce the results from the Track A and Track B submissions, checkout the [MDX Hybrid Demucs submission repo][mdx_submission]. + + + +## How to cite + +``` +@inproceedings{defossez2021hybrid, + title={Hybrid Spectrogram and Waveform Source Separation}, + author={D{\'e}fossez, Alexandre}, + booktitle={Proceedings of the ISMIR 2021 Workshop on Music Source Separation}, + year={2021} +} +``` + +## License + +Demucs is released under the MIT license as found in the [LICENSE](LICENSE) file. + +[hybrid_paper]: https://arxiv.org/abs/2111.03600 +[waveunet]: https://github.com/f90/Wave-U-Net +[musdb]: https://sigsep.github.io/datasets/musdb.html +[openunmix]: https://github.com/sigsep/open-unmix-pytorch +[mmdenselstm]: https://arxiv.org/abs/1805.02410 +[demucs_v2]: https://github.com/facebookresearch/demucs/tree/v2 +[demucs_v3]: https://github.com/facebookresearch/demucs/tree/v3 +[spleeter]: https://github.com/deezer/spleeter +[soundcloud]: https://soundcloud.com/honualx/sets/source-separation-in-the-waveform-domain +[d3net]: https://arxiv.org/abs/2010.01733 +[mdx]: https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021 +[kuielab]: https://github.com/kuielab/mdx-net-submission +[decouple]: https://arxiv.org/abs/2109.05418 +[mdx_submission]: https://github.com/adefossez/mdx21_demucs +[bandsplit]: https://arxiv.org/abs/2209.15174 +[htdemucs]: https://arxiv.org/abs/2211.08553 diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/config.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/config.yaml new file mode 100644 index 0000000..0ea477e --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/config.yaml @@ -0,0 +1,302 @@ +defaults: + - _self_ + - dset: musdb44 + - svd: default + - variant: default + - override hydra/hydra_logging: colorlog + - override hydra/job_logging: colorlog + +dummy: +dset: + musdb: /checkpoint/defossez/datasets/musdbhq + musdb_samplerate: 44100 + wav: # path to custom wav dataset + wav2: # second custom wav dataset + segment: 11 + shift: 1 + train_valid: false + full_cv: true + samplerate: 44100 + channels: 2 + normalize: true + metadata: ./metadata + sources: ['drums', 'bass', 'other', 'vocals'] + valid_samples: # valid dataset size + +test: + save: False + best: True + workers: 2 + every: 20 + split: true + shifts: 1 + overlap: 0.25 + sdr: true + metric: 'loss' # metric used for best model selection on the valid set, can also be nsdr + nonhq: # path to non hq MusDB for evaluation + +epochs: 360 +batch_size: 64 +max_batches: # limit the number of batches per epoch, useful for debugging + # or if your dataset is gigantic. +optim: + lr: 3e-4 + momentum: 0.9 + beta2: 0.999 + loss: l1 # l1 or mse + optim: adam + weight_decay: 0 + clip_grad: 0 + +seed: 42 +debug: false +valid_apply: true +flag: +save_every: +weights: [1., 1., 1., 1.] # weights over each source for the training/valid loss. + +augment: + shift_same: false + repitch: + proba: 0.2 + max_tempo: 12 + remix: + proba: 1 + group_size: 4 + scale: + proba: 1 + min: 0.25 + max: 1.25 + flip: true + +continue_from: # continue from other XP, give the XP Dora signature. +continue_pretrained: # signature of a pretrained XP, this cannot be a bag of models. +pretrained_repo: # repo for pretrained model (default is official AWS) +continue_best: true +continue_opt: false + +misc: + num_workers: 10 + num_prints: 4 + show: false + verbose: false + +# List of decay for EMA at batch or epoch level, e.g. 0.999. +# Batch level EMA are kept on GPU for speed. +ema: + epoch: [] + batch: [] + +use_train_segment: true # to remove +model_segment: # override the segment parameter for the model, usually 4 times the training segment. +model: demucs # see demucs/train.py for the possibilities, and config for each model hereafter. +demucs: # see demucs/demucs.py for a detailed description + # Channels + channels: 64 + growth: 2 + # Main structure + depth: 6 + rewrite: true + lstm_layers: 0 + # Convolutions + kernel_size: 8 + stride: 4 + context: 1 + # Activations + gelu: true + glu: true + # Normalization + norm_groups: 4 + norm_starts: 4 + # DConv residual branch + dconv_depth: 2 + dconv_mode: 1 # 1 = branch in encoder, 2 = in decoder, 3 = in both. + dconv_comp: 4 + dconv_attn: 4 + dconv_lstm: 4 + dconv_init: 1e-4 + # Pre/post treatment + resample: true + normalize: false + # Weight init + rescale: 0.1 + +hdemucs: # see demucs/hdemucs.py for a detailed description + # Channels + channels: 48 + channels_time: + growth: 2 + # STFT + nfft: 4096 + wiener_iters: 0 + end_iters: 0 + wiener_residual: false + cac: true + # Main structure + depth: 6 + rewrite: true + hybrid: true + hybrid_old: false + # Frequency Branch + multi_freqs: [] + multi_freqs_depth: 3 + freq_emb: 0.2 + emb_scale: 10 + emb_smooth: true + # Convolutions + kernel_size: 8 + stride: 4 + time_stride: 2 + context: 1 + context_enc: 0 + # normalization + norm_starts: 4 + norm_groups: 4 + # DConv residual branch + dconv_mode: 1 + dconv_depth: 2 + dconv_comp: 4 + dconv_attn: 4 + dconv_lstm: 4 + dconv_init: 1e-3 + # Weight init + rescale: 0.1 + +# Torchaudio implementation of HDemucs +torch_hdemucs: +# Channels + channels: 48 + growth: 2 + # STFT + nfft: 4096 + # Main structure + depth: 6 + freq_emb: 0.2 + emb_scale: 10 + emb_smooth: true + # Convolutions + kernel_size: 8 + stride: 4 + time_stride: 2 + context: 1 + context_enc: 0 + # normalization + norm_starts: 4 + norm_groups: 4 + # DConv residual branch + dconv_depth: 2 + dconv_comp: 4 + dconv_attn: 4 + dconv_lstm: 4 + dconv_init: 1e-3 + +htdemucs: # see demucs/htdemucs.py for a detailed description + # Channels + channels: 48 + channels_time: + growth: 2 + # STFT + nfft: 4096 + wiener_iters: 0 + end_iters: 0 + wiener_residual: false + cac: true + # Main structure + depth: 4 + rewrite: true + # Frequency Branch + multi_freqs: [] + multi_freqs_depth: 3 + freq_emb: 0.2 + emb_scale: 10 + emb_smooth: true + # Convolutions + kernel_size: 8 + stride: 4 + time_stride: 2 + context: 1 + context_enc: 0 + # normalization + norm_starts: 4 + norm_groups: 4 + # DConv residual branch + dconv_mode: 1 + dconv_depth: 2 + dconv_comp: 8 + dconv_init: 1e-3 + # Before the Transformer + bottom_channels: 0 + # CrossTransformer + # ------ Common to all + # Regular parameters + t_layers: 5 + t_hidden_scale: 4.0 + t_heads: 8 + t_dropout: 0.0 + t_layer_scale: True + t_gelu: True + # ------------- Positional Embedding + t_emb: sin + t_max_positions: 10000 # for the scaled embedding + t_max_period: 10000.0 + t_weight_pos_embed: 1.0 + t_cape_mean_normalize: True + t_cape_augment: True + t_cape_glob_loc_scale: [5000.0, 1.0, 1.4] + t_sin_random_shift: 0 + # ------------- norm before a transformer encoder + t_norm_in: True + t_norm_in_group: False + # ------------- norm inside the encoder + t_group_norm: False + t_norm_first: True + t_norm_out: True + # ------------- optim + t_weight_decay: 0.0 + t_lr: + # ------------- sparsity + t_sparse_self_attn: False + t_sparse_cross_attn: False + t_mask_type: diag + t_mask_random_seed: 42 + t_sparse_attn_window: 400 + t_global_window: 100 + t_sparsity: 0.95 + t_auto_sparsity: False + # Cross Encoder First (False) + t_cross_first: False + # Weight init + rescale: 0.1 + +svd: # see svd.py for documentation + penalty: 0 + min_size: 0.1 + dim: 1 + niters: 2 + powm: false + proba: 1 + conv_only: false + convtr: false + bs: 1 + +quant: # quantization hyper params + diffq: # diffq penalty, typically 1e-4 or 3e-4 + qat: # use QAT with a fixed number of bits (not as good as diffq) + min_size: 0.2 + group_size: 8 + +dora: + dir: outputs + exclude: ["misc.*", "slurm.*", 'test.reval', 'flag'] + +slurm: + time: 4320 + constraint: volta32gb + setup: ['module load cudnn/v8.4.1.50-cuda.11.6 NCCL/2.11.4-6-cuda.11.6 cuda/11.6'] + +# Hydra config +hydra: + job_logging: + formatters: + colorlog: + datefmt: "%m-%d %H:%M:%S" diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/dset/aetl.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/dset/aetl.yaml new file mode 100644 index 0000000..7c98316 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/dset/aetl.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +# automix dataset with Musdb, extra training data and the test set of Musdb. +# This used even more remixes than auto_extra_test. +dset: + wav: /checkpoint/defossez/datasets/aetl + samplerate: 44100 + channels: 2 +epochs: 320 +max_batches: 500 + +augment: + shift_same: true + scale: + proba: 0. + remix: + proba: 0 + repitch: + proba: 0 diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/dset/auto_extra_test.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/dset/auto_extra_test.yaml new file mode 100644 index 0000000..056183a --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/dset/auto_extra_test.yaml @@ -0,0 +1,18 @@ +# @package _global_ + +# automix dataset with Musdb, extra training data and the test set of Musdb. +dset: + wav: /checkpoint/defossez/datasets/automix_extra_test2 + samplerate: 44100 + channels: 2 +epochs: 320 +max_batches: 500 + +augment: + shift_same: true + scale: + proba: 0. + remix: + proba: 0 + repitch: + proba: 0 diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/dset/auto_mus.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/dset/auto_mus.yaml new file mode 100644 index 0000000..9a2d9df --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/dset/auto_mus.yaml @@ -0,0 +1,20 @@ +# @package _global_ + +# Automix dataset based on musdb train set. +dset: + wav: /checkpoint/defossez/datasets/automix_musdb + samplerate: 44100 + channels: 2 +epochs: 360 +max_batches: 300 +test: + every: 4 + +augment: + shift_same: true + scale: + proba: 0.5 + remix: + proba: 0 + repitch: + proba: 0 diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra44.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra44.yaml new file mode 100644 index 0000000..f0adc46 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra44.yaml @@ -0,0 +1,8 @@ +# @package _global_ + +# Musdb + extra tracks +dset: + wav: /checkpoint/defossez/datasets/allstems_44/ + samplerate: 44100 + channels: 2 +epochs: 320 diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra_mmi_goodclean.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra_mmi_goodclean.yaml new file mode 100644 index 0000000..fe47bcf --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra_mmi_goodclean.yaml @@ -0,0 +1,12 @@ +# @package _global_ + +# Musdb + extra tracks +dset: + wav: /checkpoint/defossez/datasets/allstems_44/ + wav2: /checkpoint/defossez/datasets/mmi44_goodclean + samplerate: 44100 + channels: 2 + wav2_weight: null + wav2_valid: false + valid_samples: 100 +epochs: 1200 diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra_test.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra_test.yaml new file mode 100644 index 0000000..1e7d05a --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/dset/extra_test.yaml @@ -0,0 +1,12 @@ +# @package _global_ + +# Musdb + extra tracks + test set from musdb. +dset: + wav: /checkpoint/defossez/datasets/allstems_test_44/ + samplerate: 44100 + channels: 2 +epochs: 320 +max_batches: 700 +test: + sdr: false + every: 500 diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/dset/musdb44.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/dset/musdb44.yaml new file mode 100644 index 0000000..c562346 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/dset/musdb44.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +dset: + samplerate: 44100 + channels: 2 \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/svd/base.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/svd/base.yaml new file mode 100644 index 0000000..e4de868 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/svd/base.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +svd: + penalty: 0 + min_size: 1 + dim: 50 + niters: 4 + powm: false + proba: 1 + conv_only: false + convtr: false # ideally this should be true, but some models were trained with this to false. + +optim: + beta2: 0.9998 \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/svd/base2.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/svd/base2.yaml new file mode 100644 index 0000000..b88a751 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/svd/base2.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +svd: + penalty: 0 + min_size: 1 + dim: 100 + niters: 4 + powm: false + proba: 1 + conv_only: false + convtr: true + +optim: + beta2: 0.9998 \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/svd/default.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/svd/default.yaml new file mode 100644 index 0000000..03bfe3d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/svd/default.yaml @@ -0,0 +1 @@ +# @package _global_ diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/variant/default.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/variant/default.yaml new file mode 100644 index 0000000..03bfe3d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/variant/default.yaml @@ -0,0 +1 @@ +# @package _global_ diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/variant/example.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/variant/example.yaml new file mode 100644 index 0000000..9b38aec --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/variant/example.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +model: hdemucs +hdemucs: + channels: 32 \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/conf/variant/finetune.yaml b/AutoCoverTool/ref/music_remover/demucs/conf/variant/finetune.yaml new file mode 100644 index 0000000..c3ea21e --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/conf/variant/finetune.yaml @@ -0,0 +1,19 @@ +# @package _global_ + +epochs: 4 +batch_size: 16 +optim: + lr: 0.0006 +test: + every: 1 + sdr: false +dset: + segment: 28 + shift: 2 + +augment: + scale: + proba: 0 + shift_same: true + remix: + proba: 0 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/PKG-INFO b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/PKG-INFO new file mode 100644 index 0000000..2e97a2d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/PKG-INFO @@ -0,0 +1,307 @@ +Metadata-Version: 2.1 +Name: demucs +Version: 4.0.0a1 +Summary: Music source separation in the waveform domain. +Home-page: https://github.com/facebookresearch/demucs +Author: Alexandre Défossez +Author-email: defossez@fb.com +License: MIT License +Classifier: License :: OSI Approved :: MIT License +Classifier: Topic :: Multimedia :: Sound/Audio +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Requires-Python: >=3.7.0 +Description-Content-Type: text/markdown +Provides-Extra: dev +License-File: LICENSE + + +# Demucs Music Source Separation + +[![Support Ukraine](https://img.shields.io/badge/Support-Ukraine-FFD500?style=flat&labelColor=005BBB)](https://opensource.fb.com/support-ukraine) +![tests badge](https://github.com/facebookresearch/demucs/workflows/tests/badge.svg) +![linter badge](https://github.com/facebookresearch/demucs/workflows/linter/badge.svg) + + +This is the 4th release of Demucs (v4), featuring Hybrid Transformer based source separation. +**For the classic Hybrid Demucs (v3):** [Go this commit][demucs_v3]. +If you are experiencing issues and want the old Demucs back, please fill an issue, and then you can get back to the v3 with +`git checkout v3`. You can also go [Demucs v2][demucs_v2]. + + +Demucs is a state-of-the-art music source separation model, currently capable of separating +drums, bass, and vocals from the rest of the accompaniment. +Demucs is based on a U-Net convolutional architecture inspired by [Wave-U-Net][waveunet]. +The v4 version features [Hybrid Transformer Demucs][htdemucs], a hybrid spectrogram/waveform separation model using Transformers. +It is based on [Hybrid Demucs][hybrid_paper] (also provided in this repo) with the innermost layers are +replaced by a cross-domain Transformer Encoder. This Transformer uses self-attention within each domain, +and cross-attention across domains. +The model achieves a SDR of 9.00 dB on the MUSDB HQ test set. Moreover, when using sparse attention +kernels to extend its receptive field and per source fine-tuning, we achieve state-of-the-art 9.20 dB of SDR. + +Samples are available [on our sample page](https://ai.honu.io/papers/htdemucs/index.html). +Checkout [our paper][htdemucs] for more information. +It has been trained on the [MUSDB HQ][musdb] dataset + an extra training dataset of 800 songs. +This model separates drums, bass and vocals and other stems for any song. + + +As Hybrid Transformer Demucs is brand new, it is not activated by default, you can activate it in the usual +commands described hereafter with `-n htdemucs_ft`. +The single, non fine-tuned model is provided as `-n htdemucs`, and the retrained baseline +as `-n hdemucs_mmi`. The Sparse Hybrid Transformer model decribed in our paper is not provided as its +requires custom CUDA code that is not ready for release yet. + + +

+Schema representing the structure of Hybrid Transformer Demucs,
+    with a dual U-Net structure, one branch for the temporal domain,
+    and one branch for the spectral domain. There is a cross-domain Transformer between the Encoders and Decoders.

+ + + +## Important news if you are already using Demucs + +See the [release notes](./docs/release.md) for more details. + +- 16/11/2022: Added the new Hybrid Transformer Demucs models. + Adding support for the [torchaudio implementation of HDemucs](https://pytorch.org/audio/stable/tutorials/hybrid_demucs_tutorial.html). +- 30/08/2022: added reproducibility and ablation grids, along with an updated version of the paper. +- 17/08/2022: Releasing v3.0.5: Set split segment length to reduce memory. Compatible with pyTorch 1.12. +- 24/02/2022: Releasing v3.0.4: split into two stems (i.e. karaoke mode). + Export as float32 or int24. +- 17/12/2021: Releasing v3.0.3: bug fixes (thanks @keunwoochoi), memory drastically + reduced on GPU (thanks @famzah) and new multi-core evaluation on CPU (`-j` flag). +- 12/11/2021: Releasing **Demucs v3** with hybrid domain separation. Strong improvements + on all sources. This is the model that won Sony MDX challenge. +- 11/05/2021: Adding support for MusDB-HQ and arbitrary wav set, for the MDX challenge. For more information +on joining the challenge with Demucs see [the Demucs MDX instructions](docs/mdx.md) +- 28/04/2021: **Demucs v2**, with extra augmentation and DiffQ based quantization. + **EVERYTHING WILL BREAK**, please restart from scratch following the instructions hereafter. + This version also adds overlap between prediction frames, with linear transition from one to the next, + which should prevent sudden changes at frame boundaries. Also, Demucs is now on PyPI, so for separation + only, installation is as easy as `pip install demucs` :) +- 13/04/2020: **Demucs released under MIT**: We are happy to release Demucs under the MIT licence. + We hope that this will broaden the impact of this research to new applications. + + +## Comparison with other models + +We provide hereafter a summary of the different metrics presented in the paper. +You can also compare Hybrid Demucs (v3), [KUIELAB-MDX-Net][kuielab], [Spleeter][spleeter], Open-Unmix, Demucs (v1), and Conv-Tasnet on one of my favorite +songs on my [soundcloud playlist][soundcloud]. + +### Comparison of accuracy + +`Overall SDR` is the mean of the SDR for each of the 4 sources, `MOS Quality` is a rating from 1 to 5 +of the naturalness and absence of artifacts given by human listeners (5 = no artifacts), `MOS Contamination` +is a rating from 1 to 5 with 5 being zero contamination by other sources. We refer the reader to our [paper][hybrid_paper], +for more details. + +| Model | Domain | Extra data? | Overall SDR | MOS Quality | MOS Contamination | +|------------------------------|-------------|-------------|-------------|-------------|-------------------| +| [Wave-U-Net][waveunet] | waveform | no | 3.2 | - | - | +| [Open-Unmix][openunmix] | spectrogram | no | 5.3 | - | - | +| [D3Net][d3net] | spectrogram | no | 6.0 | - | - | +| [Conv-Tasnet][demucs_v2] | waveform | no | 5.7 | - | | +| [Demucs (v2)][demucs_v2] | waveform | no | 6.3 | 2.37 | 2.36 | +| [ResUNetDecouple+][decouple] | spectrogram | no | 6.7 | - | - | +| [KUIELAB-MDX-Net][kuielab] | hybrid | no | 7.5 | **2.86** | 2.55 | +| [Band-Spit RNN][bandsplit] | spectrogram | no | **8.2** | - | - | +| **Hybrid Demucs (v3)** | hybrid | no | 7.7 | **2.83** | **3.04** | +| [MMDenseLSTM][mmdenselstm] | spectrogram | 804 songs | 6.0 | - | - | +| [D3Net][d3net] | spectrogram | 1.5k songs | 6.7 | - | - | +| [Spleeter][spleeter] | spectrogram | 25k songs | 5.9 | - | - | +| [Band-Spit RNN][bandsplit] | spectrogram | 1.7k (mixes only) | **9.0** | - | - | +| **HT Demucs f.t. (v4)** | hybrid | 800 songs | **9.0** | - | - | + + + +## Requirements + +You will need at least Python 3.7. See `requirements_minimal.txt` for requirements for separation only, +and `environment-[cpu|cuda].yml` (or `requirements.txt`) if you want to train a new model. + +### For Windows users + +Everytime you see `python3`, replace it with `python.exe`. You should always run commands from the +Anaconda console. + +### For musicians + +If you just want to use Demucs to separate tracks, you can install it with + +```bash +python3 -m pip install -U demucs +``` + +For bleeding edge versions, you can install directly from this repo using +```bash +python3 -m pip install -U git+https://github.com/facebookresearch/demucs#egg=demucs +``` + +**For Hybrid Transformer Demucs,** you must install the bleeding edge version and use either +`-n htdemucs` or `-n htdemucs_ft`. + +Advanced OS support are provided on the following page, **you must read the page for your OS before posting an issues**: +- **If you are using Windows:** [Windows support](docs/windows.md). +- **If you are using MAC OS X:** [Mac OS X support](docs/mac.md). +- **If you are using Linux:** [Linux support](docs/linux.md). + +### For machine learning scientists + +If you have anaconda installed, you can run from the root of this repository: + +```bash +conda env update -f environment-cpu.yml # if you don't have GPUs +conda env update -f environment-cuda.yml # if you have GPUs +conda activate demucs +pip install -e . +``` + +This will create a `demucs` environment with all the dependencies installed. + +You will also need to install [soundstretch/soundtouch](https://www.surina.net/soundtouch/soundstretch.html): on Mac OSX you can do `brew install sound-touch`, +and on Ubuntu `sudo apt-get install soundstretch`. This is used for the +pitch/tempo augmentation. + + +### Running in Docker + +Thanks to @xserrat, there is now a Docker image definition ready for using Demucs. This can ensure all libraries are correctly installed without interfering with the host OS. See his repo [Docker Facebook Demucs](https://github.com/xserrat/docker-facebook-demucs) for more information. + + +### Running from Colab + +I made a Colab to easily separate track with Demucs. Note that +transfer speeds with Colab are a bit slow for large media files, +but it will allow you to use Demucs without installing anything. + +[Demucs on Google Colab](https://colab.research.google.com/drive/1dC9nVxk3V_VPjUADsnFu8EiT-xnU1tGH?usp=sharing) + +### Web Demo + +Integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See demo: [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/akhaliq/demucs) + +### Graphical Interface + +@CarlGao4 has released a GUI for Demucs: [CarlGao4/Demucs-Gui](https://github.com/CarlGao4/Demucs-Gui). Downloads for Windows and macOS is available [here](https://github.com/CarlGao4/Demucs-Gui/releases). Use [FossHub mirror](https://fosshub.com/Demucs-GUI.html) to speed up your download. + +@Anjok07 is providing a self contained GUI in [UVR (Ultimate Vocal Remover)](https://github.com/facebookresearch/demucs/issues/334) that supports Demucs. + +### Other providers + +Audiostrip is providing free online separation with Demucs on their website [https://audiostrip.co.uk/](https://audiostrip.co.uk/). +[MVSep](https://mvsep.com/) also provides free online separation, select `Demucs3 model B` for the best quality. + +Spleeter.io provides free online separation with Demucs on their website [https://www.spleeter.io/demucs](https://www.spleeter.io/demucs). + + +## Separating tracks + +In order to try Demucs, you can just run from any folder (as long as you properly installed it) + +```bash +demucs PATH_TO_AUDIO_FILE_1 [PATH_TO_AUDIO_FILE_2 ...] # for Demucs +# If you used `pip install --user` you might need to replace demucs with python3 -m demucs +python3 -m demucs --mp3 --mp3-bitrate BITRATE PATH_TO_AUDIO_FILE_1 # output files saved as MP3 +# If your filename contain spaces don't forget to quote it !!! +demucs "my music/my favorite track.mp3" +# You can select different models with `-n` mdx_q is the quantized model, smaller but maybe a bit less accurate. +demucs -n mdx_q myfile.mp3 +# If you only want to separate vocals out of an audio, use `--two-stems=vocal` (You can also set to drums or bass) +demucs --two-stems=vocals myfile.mp3 +``` + + +If you have a GPU, but you run out of memory, please use `--segment SEGMENT` to reduce length of each split. `SEGMENT` should be changed to a integer. Personally recommend not less than 10 (the bigger the number is, the more memory is required, but quality may increase). Create an environment variable `PYTORCH_NO_CUDA_MEMORY_CACHING=1` is also helpful. If this still cannot help, please add `-d cpu` to the command line. See the section hereafter for more details on the memory requirements for GPU acceleration. + +Separated tracks are stored in the `separated/MODEL_NAME/TRACK_NAME` folder. There you will find four stereo wav files sampled at 44.1 kHz: `drums.wav`, `bass.wav`, +`other.wav`, `vocals.wav` (or `.mp3` if you used the `--mp3` option). + +All audio formats supported by `torchaudio` can be processed (i.e. wav, mp3, flac, ogg/vorbis on Linux/Mac OS X etc.). On Windows, `torchaudio` has limited support, so we rely on `ffmpeg`, which should support pretty much anything. +Audio is resampled on the fly if necessary. +The output will be a wave file encoded as int16. +You can save as float32 wav files with `--float32`, or 24 bits integer wav with `--int24`. +You can pass `--mp3` to save as mp3 instead, and set the bitrate with `--mp3-bitrate` (default is 320kbps). + +It can happen that the output would need clipping, in particular due to some separation artifacts. +Demucs will automatically rescale each output stem so as to avoid clipping. This can however break +the relative volume between stems. If instead you prefer hard clipping, pass `--clip-mode clamp`. +You can also try to reduce the volume of the input mixture before feeding it to Demucs. + + +Other pre-trained models can be selected with the `-n` flag. +The list of pre-trained models is: +- `htdemucs`: first version of Hybrid Transformer Demucs. Trained on MusDB + 800 songs. +- `htdemucs_ft`: fine-tuned version of `htdemucs`, separation will take 4 times more time + but might be a bit better. Same training set as `htdemucs`. +- `hdemucs_mmi`: Hybrid Demucs v3, retrained on MusDB + 800 songs. +- `mdx`: trained only on MusDB HQ, winning model on track A at the [MDX][mdx] challenge. +- `mdx_extra`: trained with extra training data (including MusDB test set), ranked 2nd on the track B + of the [MDX][mdx] challenge. +- `mdx_q`, `mdx_extra_q`: quantized version of the previous models. Smaller download and storage + but quality can be slightly worse. `mdx_extra_q` is the default model used. +- `SIG`: where `SIG` is a single model from the [model zoo](docs/training.md#model-zoo). + +The `--two-stems=vocals` option allows to separate vocals from the rest (e.g. karaoke mode). +`vocals` can be changed into any source in the selected model. +This will mix the files after separating the mix fully, so this won't be faster or use less memory. + +The `--shifts=SHIFTS` performs multiple predictions with random shifts (a.k.a the *shift trick*) of the input and average them. This makes prediction `SHIFTS` times +slower. Don't use it unless you have a GPU. + +The `--overlap` option controls the amount of overlap between prediction windows. Default is 0.25 (i.e. 25%) which is probably fine. +It can probably be reduced to 0.1 to improve a bit speed. + + +The `-j` flag allow to specify a number of parallel jobs (e.g. `demucs -j 2 myfile.mp3`). +This will multiply by the same amount the RAM used so be careful! + +### Memory requirements for GPU acceleration + +If you want to use GPU acceleration, you will need at least 3GB of RAM on your GPU for `demucs`. However, about 7GB of RAM will be required if you use the default arguments. Add `--segment SEGMENT` to change size of each split. If you only have 3GB memory, set SEGMENT to 8 (though quality may be worse if this argument is too small). Creating an environment variable `PYTORCH_NO_CUDA_MEMORY_CACHING=1` can help users with even smaller RAM such as 2GB (I separated a track that is 4 minutes but only 1.5GB is used), but this would make the separation slower. + +If you do not have enough memory on your GPU, simply add `-d cpu` to the command line to use the CPU. With Demucs, processing time should be roughly equal to 1.5 times the duration of the track. + + +## Training Demucs + +If you want to train (Hybrid) Demucs, please follow the [training doc](docs/training.md). + +## MDX Challenge reproduction + +In order to reproduce the results from the Track A and Track B submissions, checkout the [MDX Hybrid Demucs submission repo][mdx_submission]. + + + +## How to cite + +``` +@inproceedings{defossez2021hybrid, + title={Hybrid Spectrogram and Waveform Source Separation}, + author={D{\'e}fossez, Alexandre}, + booktitle={Proceedings of the ISMIR 2021 Workshop on Music Source Separation}, + year={2021} +} +``` + +## License + +Demucs is released under the MIT license as found in the [LICENSE](LICENSE) file. + +[hybrid_paper]: https://arxiv.org/abs/2111.03600 +[waveunet]: https://github.com/f90/Wave-U-Net +[musdb]: https://sigsep.github.io/datasets/musdb.html +[openunmix]: https://github.com/sigsep/open-unmix-pytorch +[mmdenselstm]: https://arxiv.org/abs/1805.02410 +[demucs_v2]: https://github.com/facebookresearch/demucs/tree/v2 +[demucs_v3]: https://github.com/facebookresearch/demucs/tree/v3 +[spleeter]: https://github.com/deezer/spleeter +[soundcloud]: https://soundcloud.com/honualx/sets/source-separation-in-the-waveform-domain +[d3net]: https://arxiv.org/abs/2010.01733 +[mdx]: https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021 +[kuielab]: https://github.com/kuielab/mdx-net-submission +[decouple]: https://arxiv.org/abs/2109.05418 +[mdx_submission]: https://github.com/adefossez/mdx21_demucs +[bandsplit]: https://arxiv.org/abs/2209.15174 +[htdemucs]: https://arxiv.org/abs/2211.08553 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/SOURCES.txt b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/SOURCES.txt new file mode 100644 index 0000000..e6ad754 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/SOURCES.txt @@ -0,0 +1,68 @@ +LICENSE +MANIFEST.in +Makefile +README.md +demucs.png +mypy.ini +outputs.tar.gz +requirements.txt +requirements_minimal.txt +setup.cfg +setup.py +test.mp3 +conf/config.yaml +conf/dset/aetl.yaml +conf/dset/auto_extra_test.yaml +conf/dset/auto_mus.yaml +conf/dset/extra44.yaml +conf/dset/extra_mmi_goodclean.yaml +conf/dset/extra_test.yaml +conf/dset/musdb44.yaml +conf/svd/base.yaml +conf/svd/base2.yaml +conf/svd/default.yaml +conf/variant/default.yaml +conf/variant/example.yaml +conf/variant/finetune.yaml +demucs/__init__.py +demucs/__main__.py +demucs/apply.py +demucs/audio.py +demucs/augment.py +demucs/demucs.py +demucs/distrib.py +demucs/ema.py +demucs/evaluate.py +demucs/hdemucs.py +demucs/htdemucs.py +demucs/pretrained.py +demucs/py.typed +demucs/repitch.py +demucs/repo.py +demucs/separate.py +demucs/solver.py +demucs/spec.py +demucs/states.py +demucs/svd.py +demucs/train.py +demucs/transformer.py +demucs/utils.py +demucs/wav.py +demucs/wdemucs.py +demucs.egg-info/PKG-INFO +demucs.egg-info/SOURCES.txt +demucs.egg-info/dependency_links.txt +demucs.egg-info/entry_points.txt +demucs.egg-info/requires.txt +demucs.egg-info/top_level.txt +demucs/remote/files.txt +demucs/remote/hdemucs_mmi.yaml +demucs/remote/htdemucs.yaml +demucs/remote/htdemucs_ft.yaml +demucs/remote/mdx.yaml +demucs/remote/mdx_extra.yaml +demucs/remote/mdx_extra_q.yaml +demucs/remote/mdx_q.yaml +demucs/remote/repro_mdx_a.yaml +demucs/remote/repro_mdx_a_hybrid_only.yaml +demucs/remote/repro_mdx_a_time_only.yaml \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/dependency_links.txt b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/entry_points.txt b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/entry_points.txt new file mode 100644 index 0000000..3def845 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +demucs = demucs.separate:main diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/requires.txt b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/requires.txt new file mode 100644 index 0000000..012c2cb --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/requires.txt @@ -0,0 +1,32 @@ +dora-search +diffq>=0.2.1 +einops +julius>=0.2.3 +lameenc>=1.2 +openunmix +pyyaml +torch>=1.8.1 +torchaudio>=0.8 +tqdm + +[dev] +dora-search +diffq>=0.2.1 +einops +flake8 +hydra-colorlog>=1.1 +hydra-core>=1.1 +julius>=0.2.3 +lameenc>=1.2 +museval +mypy +openunmix +pyyaml +submitit +torch>=1.8.1 +torchaudio>=0.8 +tqdm +treetable + +[dev:sys_platform == "win32"] +soundfile>=0.10.3 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/top_level.txt b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/top_level.txt new file mode 100644 index 0000000..b35ba4c --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs.egg-info/top_level.txt @@ -0,0 +1 @@ +demucs diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs.png b/AutoCoverTool/ref/music_remover/demucs/demucs.png new file mode 100644 index 0000000..d043f64 Binary files /dev/null and b/AutoCoverTool/ref/music_remover/demucs/demucs.png differ diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/__init__.py b/AutoCoverTool/ref/music_remover/demucs/demucs/__init__.py new file mode 100644 index 0000000..5578946 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +__version__ = "4.0.0a1" diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/__main__.py b/AutoCoverTool/ref/music_remover/demucs/demucs/__main__.py new file mode 100644 index 0000000..2171e17 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/__main__.py @@ -0,0 +1,10 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from .separate import main + +if __name__ == '__main__': + main() diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/apply.py b/AutoCoverTool/ref/music_remover/demucs/demucs/apply.py new file mode 100644 index 0000000..def1729 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/apply.py @@ -0,0 +1,245 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +""" +Code to apply a model to a mix. It will handle chunking with overlaps and +inteprolation between chunks, as well as the "shift trick". +""" +from concurrent.futures import ThreadPoolExecutor +import random +import typing as tp + +import torch as th +from torch import nn +from torch.nn import functional as F +import tqdm + +from .demucs import Demucs +from .hdemucs import HDemucs +from .utils import center_trim, DummyPoolExecutor + +Model = tp.Union[Demucs, HDemucs] + + +class BagOfModels(nn.Module): + def __init__(self, models: tp.List[Model], + weights: tp.Optional[tp.List[tp.List[float]]] = None, + segment: tp.Optional[float] = None): + """ + Represents a bag of models with specific weights. + You should call `apply_model` rather than calling directly the forward here for + optimal performance. + + Args: + models (list[nn.Module]): list of Demucs/HDemucs models. + weights (list[list[float]]): list of weights. If None, assumed to + be all ones, otherwise it should be a list of N list (N number of models), + each containing S floats (S number of sources). + segment (None or float): overrides the `segment` attribute of each model + (this is performed inplace, be careful is you reuse the models passed). + """ + super().__init__() + assert len(models) > 0 + first = models[0] + for other in models: + assert other.sources == first.sources + assert other.samplerate == first.samplerate + assert other.audio_channels == first.audio_channels + if segment is not None: + other.segment = segment + + self.audio_channels = first.audio_channels + self.samplerate = first.samplerate + self.sources = first.sources + self.models = nn.ModuleList(models) + + if weights is None: + weights = [[1. for _ in first.sources] for _ in models] + else: + assert len(weights) == len(models) + for weight in weights: + assert len(weight) == len(first.sources) + self.weights = weights + + def forward(self, x): + raise NotImplementedError("Call `apply_model` on this.") + + +class TensorChunk: + def __init__(self, tensor, offset=0, length=None): + total_length = tensor.shape[-1] + assert offset >= 0 + assert offset < total_length + + if length is None: + length = total_length - offset + else: + length = min(total_length - offset, length) + + if isinstance(tensor, TensorChunk): + self.tensor = tensor.tensor + self.offset = offset + tensor.offset + else: + self.tensor = tensor + self.offset = offset + self.length = length + self.device = tensor.device + + @property + def shape(self): + shape = list(self.tensor.shape) + shape[-1] = self.length + return shape + + def padded(self, target_length): + delta = target_length - self.length + total_length = self.tensor.shape[-1] + assert delta >= 0 + + start = self.offset - delta // 2 + end = start + target_length + + correct_start = max(0, start) + correct_end = min(total_length, end) + + pad_left = correct_start - start + pad_right = end - correct_end + + out = F.pad(self.tensor[..., correct_start:correct_end], (pad_left, pad_right)) + assert out.shape[-1] == target_length + return out + + +def tensor_chunk(tensor_or_chunk): + if isinstance(tensor_or_chunk, TensorChunk): + return tensor_or_chunk + else: + assert isinstance(tensor_or_chunk, th.Tensor) + return TensorChunk(tensor_or_chunk) + + +def apply_model(model, mix, shifts=1, split=True, + overlap=0.25, transition_power=1., progress=False, device=None, + num_workers=0, pool=None): + """ + Apply model to a given mixture. + + Args: + shifts (int): if > 0, will shift in time `mix` by a random amount between 0 and 0.5 sec + and apply the oppositve shift to the output. This is repeated `shifts` time and + all predictions are averaged. This effectively makes the model time equivariant + and improves SDR by up to 0.2 points. + split (bool): if True, the input will be broken down in 8 seconds extracts + and predictions will be performed individually on each and concatenated. + Useful for model with large memory footprint like Tasnet. + progress (bool): if True, show a progress bar (requires split=True) + device (torch.device, str, or None): if provided, device on which to + execute the computation, otherwise `mix.device` is assumed. + When `device` is different from `mix.device`, only local computations will + be on `device`, while the entire tracks will be stored on `mix.device`. + """ + if device is None: + device = mix.device + else: + device = th.device(device) + if pool is None: + if num_workers > 0 and device.type == 'cpu': + pool = ThreadPoolExecutor(num_workers) + else: + pool = DummyPoolExecutor() + kwargs = { + 'shifts': shifts, + 'split': split, + 'overlap': overlap, + 'transition_power': transition_power, + 'progress': progress, + 'device': device, + 'pool': pool, + } + if isinstance(model, BagOfModels): + # Special treatment for bag of model. + # We explicitely apply multiple times `apply_model` so that the random shifts + # are different for each model. + estimates = 0 + totals = [0] * len(model.sources) + for sub_model, weight in zip(model.models, model.weights): + original_model_device = next(iter(sub_model.parameters())).device + sub_model.to(device) + + out = apply_model(sub_model, mix, **kwargs) + sub_model.to(original_model_device) + for k, inst_weight in enumerate(weight): + out[:, k, :, :] *= inst_weight + totals[k] += inst_weight + estimates += out + del out + + for k in range(estimates.shape[1]): + estimates[:, k, :, :] /= totals[k] + return estimates + + model.to(device) + model.eval() + assert transition_power >= 1, "transition_power < 1 leads to weird behavior." + batch, channels, length = mix.shape + if shifts: + kwargs['shifts'] = 0 + max_shift = int(0.5 * model.samplerate) + mix = tensor_chunk(mix) + padded_mix = mix.padded(length + 2 * max_shift) + out = 0 + for _ in range(shifts): + # TODO 这里使用了一个随机值,导致多次输出的结果不一致 + # 验证代码时可以先写死 + # offset = 2312 + offset = random.randint(0, max_shift) + shifted = TensorChunk(padded_mix, offset, length + max_shift - offset) + shifted_out = apply_model(model, shifted, **kwargs) + out += shifted_out[..., max_shift - offset:] + out /= shifts + return out + elif split: + kwargs['split'] = False + out = th.zeros(batch, len(model.sources), channels, length, device=mix.device) + sum_weight = th.zeros(length, device=mix.device) + segment = int(model.samplerate * model.segment) + stride = int((1 - overlap) * segment) + offsets = range(0, length, stride) + scale = float(format(stride / model.samplerate, ".2f")) + # We start from a triangle shaped weight, with maximal weight in the middle + # of the segment. Then we normalize and take to the power `transition_power`. + # Large values of transition power will lead to sharper transitions. + weight = th.cat([th.arange(1, segment // 2 + 1, device=device), + th.arange(segment - segment // 2, 0, -1, device=device)]) + assert len(weight) == segment + # If the overlap < 50%, this will translate to linear transition when + # transition_power is 1. + weight = (weight / weight.max())**transition_power + futures = [] + for offset in offsets: + chunk = TensorChunk(mix, offset, segment) + future = pool.submit(apply_model, model, chunk, **kwargs) + futures.append((future, offset)) + offset += segment + if progress: + futures = tqdm.tqdm(futures, unit_scale=scale, ncols=120, unit='seconds') + for future, offset in futures: + chunk_out = future.result() + chunk_length = chunk_out.shape[-1] + out[..., offset:offset + segment] += (weight[:chunk_length] * chunk_out).to(mix.device) + sum_weight[offset:offset + segment] += weight[:chunk_length].to(mix.device) + assert sum_weight.min() > 0 + out /= sum_weight + return out + else: + if hasattr(model, 'valid_length'): + valid_length = model.valid_length(length) + else: + valid_length = length + mix = tensor_chunk(mix) + padded_mix = mix.padded(valid_length).to(device) + with th.no_grad(): + out = model(padded_mix) + return center_trim(out, length) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/audio.py b/AutoCoverTool/ref/music_remover/demucs/demucs/audio.py new file mode 100644 index 0000000..ae3bfb7 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/audio.py @@ -0,0 +1,257 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +import json +import subprocess as sp +from pathlib import Path + +import lameenc +import julius +import numpy as np +import torch +import torchaudio as ta + +from .utils import temp_filenames + + +def _read_info(path): + stdout_data = sp.check_output([ + 'ffprobe', "-loglevel", "panic", + str(path), '-print_format', 'json', '-show_format', '-show_streams' + ]) + return json.loads(stdout_data.decode('utf-8')) + + +class AudioFile: + """ + Allows to read audio from any format supported by ffmpeg, as well as resampling or + converting to mono on the fly. See :method:`read` for more details. + """ + def __init__(self, path: Path): + self.path = Path(path) + self._info = None + + def __repr__(self): + features = [("path", self.path)] + features.append(("samplerate", self.samplerate())) + features.append(("channels", self.channels())) + features.append(("streams", len(self))) + features_str = ", ".join(f"{name}={value}" for name, value in features) + return f"AudioFile({features_str})" + + @property + def info(self): + if self._info is None: + self._info = _read_info(self.path) + return self._info + + @property + def duration(self): + return float(self.info['format']['duration']) + + @property + def _audio_streams(self): + return [ + index for index, stream in enumerate(self.info["streams"]) + if stream["codec_type"] == "audio" + ] + + def __len__(self): + return len(self._audio_streams) + + def channels(self, stream=0): + return int(self.info['streams'][self._audio_streams[stream]]['channels']) + + def samplerate(self, stream=0): + return int(self.info['streams'][self._audio_streams[stream]]['sample_rate']) + + def read(self, + seek_time=None, + duration=None, + streams=slice(None), + samplerate=None, + channels=None, + temp_folder=None): + """ + Slightly more efficient implementation than stempeg, + in particular, this will extract all stems at once + rather than having to loop over one file multiple times + for each stream. + + Args: + seek_time (float): seek time in seconds or None if no seeking is needed. + duration (float): duration in seconds to extract or None to extract until the end. + streams (slice, int or list): streams to extract, can be a single int, a list or + a slice. If it is a slice or list, the output will be of size [S, C, T] + with S the number of streams, C the number of channels and T the number of samples. + If it is an int, the output will be [C, T]. + samplerate (int): if provided, will resample on the fly. If None, no resampling will + be done. Original sampling rate can be obtained with :method:`samplerate`. + channels (int): if 1, will convert to mono. We do not rely on ffmpeg for that + as ffmpeg automatically scale by +3dB to conserve volume when playing on speakers. + See https://sound.stackexchange.com/a/42710. + Our definition of mono is simply the average of the two channels. Any other + value will be ignored. + temp_folder (str or Path or None): temporary folder to use for decoding. + + + """ + streams = np.array(range(len(self)))[streams] + single = not isinstance(streams, np.ndarray) + if single: + streams = [streams] + + if duration is None: + target_size = None + query_duration = None + else: + target_size = int((samplerate or self.samplerate()) * duration) + query_duration = float((target_size + 1) / (samplerate or self.samplerate())) + + with temp_filenames(len(streams)) as filenames: + command = ['ffmpeg', '-y'] + command += ['-loglevel', 'panic'] + if seek_time: + command += ['-ss', str(seek_time)] + command += ['-i', str(self.path)] + for stream, filename in zip(streams, filenames): + command += ['-map', f'0:{self._audio_streams[stream]}'] + if query_duration is not None: + command += ['-t', str(query_duration)] + command += ['-threads', '1'] + command += ['-f', 'f32le'] + if samplerate is not None: + command += ['-ar', str(samplerate)] + command += [filename] + + sp.run(command, check=True) + wavs = [] + for filename in filenames: + wav = np.fromfile(filename, dtype=np.float32) + wav = torch.from_numpy(wav) + wav = wav.view(-1, self.channels()).t() + if channels is not None: + wav = convert_audio_channels(wav, channels) + if target_size is not None: + wav = wav[..., :target_size] + wavs.append(wav) + wav = torch.stack(wavs, dim=0) + if single: + wav = wav[0] + return wav + + +def convert_audio_channels(wav, channels=2): + """Convert audio to the given number of channels.""" + *shape, src_channels, length = wav.shape + if src_channels == channels: + pass + elif channels == 1: + # Case 1: + # The caller asked 1-channel audio, but the stream have multiple + # channels, downmix all channels. + wav = wav.mean(dim=-2, keepdim=True) + elif src_channels == 1: + # Case 2: + # The caller asked for multiple channels, but the input file have + # one single channel, replicate the audio over all channels. + wav = wav.expand(*shape, channels, length) + elif src_channels >= channels: + # Case 3: + # The caller asked for multiple channels, and the input file have + # more channels than requested. In that case return the first channels. + wav = wav[..., :channels, :] + else: + # Case 4: What is a reasonable choice here? + raise ValueError('The audio file has less channels than requested but is not mono.') + return wav + + +def convert_audio(wav, from_samplerate, to_samplerate, channels): + """Convert audio from a given samplerate to a target one and target number of channels.""" + wav = convert_audio_channels(wav, channels) + return julius.resample_frac(wav, from_samplerate, to_samplerate) + + +def i16_pcm(wav): + """Convert audio to 16 bits integer PCM format.""" + if wav.dtype.is_floating_point: + return (wav.clamp_(-1, 1) * (2**15 - 1)).short() + else: + return wav + + +def f32_pcm(wav): + """Convert audio to float 32 bits PCM format.""" + if wav.dtype.is_floating_point: + return wav + else: + return wav.float() / (2**15 - 1) + + +def as_dtype_pcm(wav, dtype): + """Convert audio to either f32 pcm or i16 pcm depending on the given dtype.""" + if wav.dtype.is_floating_point: + return f32_pcm(wav) + else: + return i16_pcm(wav) + + +def encode_mp3(wav, path, samplerate=44100, bitrate=320, verbose=False): + """Save given audio as mp3. This should work on all OSes.""" + C, T = wav.shape + wav = i16_pcm(wav) + encoder = lameenc.Encoder() + encoder.set_bit_rate(bitrate) + encoder.set_in_sample_rate(samplerate) + encoder.set_channels(C) + encoder.set_quality(2) # 2-highest, 7-fastest + if not verbose: + encoder.silence() + wav = wav.data.cpu() + wav = wav.transpose(0, 1).numpy() + mp3_data = encoder.encode(wav.tobytes()) + mp3_data += encoder.flush() + with open(path, "wb") as f: + f.write(mp3_data) + + +def prevent_clip(wav, mode='rescale'): + """ + different strategies for avoiding raw clipping. + """ + assert wav.dtype.is_floating_point, "too late for clipping" + if mode == 'rescale': + wav = wav / max(1.01 * wav.abs().max(), 1) + elif mode == 'clamp': + wav = wav.clamp(-0.99, 0.99) + elif mode == 'tanh': + wav = torch.tanh(wav) + else: + raise ValueError(f"Invalid mode {mode}") + return wav + + +def save_audio(wav, path, samplerate, bitrate=320, clip='rescale', + bits_per_sample=16, as_float=False): + """Save audio file, automatically preventing clipping if necessary + based on the given `clip` strategy. If the path ends in `.mp3`, this + will save as mp3 with the given `bitrate`. + """ + wav = prevent_clip(wav, mode=clip) + path = Path(path) + suffix = path.suffix.lower() + if suffix == ".mp3": + encode_mp3(wav, path, samplerate, bitrate) + elif suffix == ".wav": + if as_float: + bits_per_sample = 32 + encoding = 'PCM_F' + else: + encoding = 'PCM_S' + ta.save(str(path), wav, sample_rate=samplerate, + encoding=encoding, bits_per_sample=bits_per_sample) + else: + raise ValueError(f"Invalid suffix for path: {suffix}") diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/augment.py b/AutoCoverTool/ref/music_remover/demucs/demucs/augment.py new file mode 100644 index 0000000..2e4df78 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/augment.py @@ -0,0 +1,111 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Data augmentations. +""" + +import random +import torch as th +from torch import nn + + +class Shift(nn.Module): + """ + Randomly shift audio in time by up to `shift` samples. + """ + def __init__(self, shift=8192, same=False): + super().__init__() + self.shift = shift + self.same = same + + def forward(self, wav): + batch, sources, channels, time = wav.size() + length = time - self.shift + if self.shift > 0: + if not self.training: + wav = wav[..., :length] + else: + srcs = 1 if self.same else sources + offsets = th.randint(self.shift, [batch, srcs, 1, 1], device=wav.device) + offsets = offsets.expand(-1, sources, channels, -1) + indexes = th.arange(length, device=wav.device) + wav = wav.gather(3, indexes + offsets) + return wav + + +class FlipChannels(nn.Module): + """ + Flip left-right channels. + """ + def forward(self, wav): + batch, sources, channels, time = wav.size() + if self.training and wav.size(2) == 2: + left = th.randint(2, (batch, sources, 1, 1), device=wav.device) + left = left.expand(-1, -1, -1, time) + right = 1 - left + wav = th.cat([wav.gather(2, left), wav.gather(2, right)], dim=2) + return wav + + +class FlipSign(nn.Module): + """ + Random sign flip. + """ + def forward(self, wav): + batch, sources, channels, time = wav.size() + if self.training: + signs = th.randint(2, (batch, sources, 1, 1), device=wav.device, dtype=th.float32) + wav = wav * (2 * signs - 1) + return wav + + +class Remix(nn.Module): + """ + Shuffle sources to make new mixes. + """ + def __init__(self, proba=1, group_size=4): + """ + Shuffle sources within one batch. + Each batch is divided into groups of size `group_size` and shuffling is done within + each group separatly. This allow to keep the same probability distribution no matter + the number of GPUs. Without this grouping, using more GPUs would lead to a higher + probability of keeping two sources from the same track together which can impact + performance. + """ + super().__init__() + self.proba = proba + self.group_size = group_size + + def forward(self, wav): + batch, streams, channels, time = wav.size() + device = wav.device + + if self.training and random.random() < self.proba: + group_size = self.group_size or batch + if batch % group_size != 0: + raise ValueError(f"Batch size {batch} must be divisible by group size {group_size}") + groups = batch // group_size + wav = wav.view(groups, group_size, streams, channels, time) + permutations = th.argsort(th.rand(groups, group_size, streams, 1, 1, device=device), + dim=1) + wav = wav.gather(1, permutations.expand(-1, -1, -1, channels, time)) + wav = wav.view(batch, streams, channels, time) + return wav + + +class Scale(nn.Module): + def __init__(self, proba=1., min=0.25, max=1.25): + super().__init__() + self.proba = proba + self.min = min + self.max = max + + def forward(self, wav): + batch, streams, channels, time = wav.size() + device = wav.device + if self.training and random.random() < self.proba: + scales = th.empty(batch, streams, 1, 1, device=device).uniform_(self.min, self.max) + wav *= scales + return wav diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/demucs.py b/AutoCoverTool/ref/music_remover/demucs/demucs/demucs.py new file mode 100644 index 0000000..967c833 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/demucs.py @@ -0,0 +1,447 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +import math +import typing as tp + +import julius +import torch +from torch import nn +from torch.nn import functional as F + +from .states import capture_init +from .utils import center_trim, unfold +from .transformer import LayerScale + + +class BLSTM(nn.Module): + """ + BiLSTM with same hidden units as input dim. + If `max_steps` is not None, input will be splitting in overlapping + chunks and the LSTM applied separately on each chunk. + """ + def __init__(self, dim, layers=1, max_steps=None, skip=False): + super().__init__() + assert max_steps is None or max_steps % 4 == 0 + self.max_steps = max_steps + self.lstm = nn.LSTM(bidirectional=True, num_layers=layers, hidden_size=dim, input_size=dim) + self.linear = nn.Linear(2 * dim, dim) + self.skip = skip + + def forward(self, x): + B, C, T = x.shape + y = x + framed = False + if self.max_steps is not None and T > self.max_steps: + width = self.max_steps + stride = width // 2 + frames = unfold(x, width, stride) + nframes = frames.shape[2] + framed = True + x = frames.permute(0, 2, 1, 3).reshape(-1, C, width) + + x = x.permute(2, 0, 1) + + x = self.lstm(x)[0] + x = self.linear(x) + x = x.permute(1, 2, 0) + if framed: + out = [] + frames = x.reshape(B, -1, C, width) + limit = stride // 2 + for k in range(nframes): + if k == 0: + out.append(frames[:, k, :, :-limit]) + elif k == nframes - 1: + out.append(frames[:, k, :, limit:]) + else: + out.append(frames[:, k, :, limit:-limit]) + out = torch.cat(out, -1) + out = out[..., :T] + x = out + if self.skip: + x = x + y + return x + + +def rescale_conv(conv, reference): + """Rescale initial weight scale. It is unclear why it helps but it certainly does. + """ + std = conv.weight.std().detach() + scale = (std / reference)**0.5 + conv.weight.data /= scale + if conv.bias is not None: + conv.bias.data /= scale + + +def rescale_module(module, reference): + for sub in module.modules(): + if isinstance(sub, (nn.Conv1d, nn.ConvTranspose1d, nn.Conv2d, nn.ConvTranspose2d)): + rescale_conv(sub, reference) + + +class DConv(nn.Module): + """ + New residual branches in each encoder layer. + This alternates dilated convolutions, potentially with LSTMs and attention. + Also before entering each residual branch, dimension is projected on a smaller subspace, + e.g. of dim `channels // compress`. + """ + def __init__(self, channels: int, compress: float = 4, depth: int = 2, init: float = 1e-4, + norm=True, attn=False, heads=4, ndecay=4, lstm=False, gelu=True, + kernel=3, dilate=True): + """ + Args: + channels: input/output channels for residual branch. + compress: amount of channel compression inside the branch. + depth: number of layers in the residual branch. Each layer has its own + projection, and potentially LSTM and attention. + init: initial scale for LayerNorm. + norm: use GroupNorm. + attn: use LocalAttention. + heads: number of heads for the LocalAttention. + ndecay: number of decay controls in the LocalAttention. + lstm: use LSTM. + gelu: Use GELU activation. + kernel: kernel size for the (dilated) convolutions. + dilate: if true, use dilation, increasing with the depth. + """ + + super().__init__() + assert kernel % 2 == 1 + self.channels = channels + self.compress = compress + self.depth = abs(depth) + dilate = depth > 0 + + norm_fn: tp.Callable[[int], nn.Module] + norm_fn = lambda d: nn.Identity() # noqa + if norm: + norm_fn = lambda d: nn.GroupNorm(1, d) # noqa + + hidden = int(channels / compress) + + act: tp.Type[nn.Module] + if gelu: + act = nn.GELU + else: + act = nn.ReLU + + self.layers = nn.ModuleList([]) + for d in range(self.depth): + dilation = 2 ** d if dilate else 1 + padding = dilation * (kernel // 2) + mods = [ + nn.Conv1d(channels, hidden, kernel, dilation=dilation, padding=padding), + norm_fn(hidden), act(), + nn.Conv1d(hidden, 2 * channels, 1), + norm_fn(2 * channels), nn.GLU(1), + LayerScale(channels, init), + ] + if attn: + mods.insert(3, LocalState(hidden, heads=heads, ndecay=ndecay)) + if lstm: + mods.insert(3, BLSTM(hidden, layers=2, max_steps=200, skip=True)) + layer = nn.Sequential(*mods) + self.layers.append(layer) + + def forward(self, x): + for layer in self.layers: + x = x + layer(x) + return x + + +class LocalState(nn.Module): + """Local state allows to have attention based only on data (no positional embedding), + but while setting a constraint on the time window (e.g. decaying penalty term). + + Also a failed experiments with trying to provide some frequency based attention. + """ + def __init__(self, channels: int, heads: int = 4, nfreqs: int = 0, ndecay: int = 4): + super().__init__() + assert channels % heads == 0, (channels, heads) + self.heads = heads + self.nfreqs = nfreqs + self.ndecay = ndecay + self.content = nn.Conv1d(channels, channels, 1) + self.query = nn.Conv1d(channels, channels, 1) + self.key = nn.Conv1d(channels, channels, 1) + if nfreqs: + self.query_freqs = nn.Conv1d(channels, heads * nfreqs, 1) + if ndecay: + self.query_decay = nn.Conv1d(channels, heads * ndecay, 1) + # Initialize decay close to zero (there is a sigmoid), for maximum initial window. + self.query_decay.weight.data *= 0.01 + assert self.query_decay.bias is not None # stupid type checker + self.query_decay.bias.data[:] = -2 + self.proj = nn.Conv1d(channels + heads * nfreqs, channels, 1) + + def forward(self, x): + B, C, T = x.shape + heads = self.heads + indexes = torch.arange(T, device=x.device, dtype=x.dtype) + # left index are keys, right index are queries + delta = indexes[:, None] - indexes[None, :] + + queries = self.query(x).view(B, heads, -1, T) + keys = self.key(x).view(B, heads, -1, T) + # t are keys, s are queries + dots = torch.einsum("bhct,bhcs->bhts", keys, queries) + dots /= keys.shape[2]**0.5 + if self.nfreqs: + periods = torch.arange(1, self.nfreqs + 1, device=x.device, dtype=x.dtype) + freq_kernel = torch.cos(2 * math.pi * delta / periods.view(-1, 1, 1)) + freq_q = self.query_freqs(x).view(B, heads, -1, T) / self.nfreqs ** 0.5 + dots += torch.einsum("fts,bhfs->bhts", freq_kernel, freq_q) + if self.ndecay: + decays = torch.arange(1, self.ndecay + 1, device=x.device, dtype=x.dtype) + decay_q = self.query_decay(x).view(B, heads, -1, T) + decay_q = torch.sigmoid(decay_q) / 2 + decay_kernel = - decays.view(-1, 1, 1) * delta.abs() / self.ndecay**0.5 + dots += torch.einsum("fts,bhfs->bhts", decay_kernel, decay_q) + + # Kill self reference. + dots.masked_fill_(torch.eye(T, device=dots.device, dtype=torch.bool), -100) + weights = torch.softmax(dots, dim=2) + + content = self.content(x).view(B, heads, -1, T) + result = torch.einsum("bhts,bhct->bhcs", weights, content) + if self.nfreqs: + time_sig = torch.einsum("bhts,fts->bhfs", weights, freq_kernel) + result = torch.cat([result, time_sig], 2) + result = result.reshape(B, -1, T) + return x + self.proj(result) + + +class Demucs(nn.Module): + @capture_init + def __init__(self, + sources, + # Channels + audio_channels=2, + channels=64, + growth=2., + # Main structure + depth=6, + rewrite=True, + lstm_layers=0, + # Convolutions + kernel_size=8, + stride=4, + context=1, + # Activations + gelu=True, + glu=True, + # Normalization + norm_starts=4, + norm_groups=4, + # DConv residual branch + dconv_mode=1, + dconv_depth=2, + dconv_comp=4, + dconv_attn=4, + dconv_lstm=4, + dconv_init=1e-4, + # Pre/post processing + normalize=True, + resample=True, + # Weight init + rescale=0.1, + # Metadata + samplerate=44100, + segment=4 * 10): + """ + Args: + sources (list[str]): list of source names + audio_channels (int): stereo or mono + channels (int): first convolution channels + depth (int): number of encoder/decoder layers + growth (float): multiply (resp divide) number of channels by that + for each layer of the encoder (resp decoder) + depth (int): number of layers in the encoder and in the decoder. + rewrite (bool): add 1x1 convolution to each layer. + lstm_layers (int): number of lstm layers, 0 = no lstm. Deactivated + by default, as this is now replaced by the smaller and faster small LSTMs + in the DConv branches. + kernel_size (int): kernel size for convolutions + stride (int): stride for convolutions + context (int): kernel size of the convolution in the + decoder before the transposed convolution. If > 1, + will provide some context from neighboring time steps. + gelu: use GELU activation function. + glu (bool): use glu instead of ReLU for the 1x1 rewrite conv. + norm_starts: layer at which group norm starts being used. + decoder layers are numbered in reverse order. + norm_groups: number of groups for group norm. + dconv_mode: if 1: dconv in encoder only, 2: decoder only, 3: both. + dconv_depth: depth of residual DConv branch. + dconv_comp: compression of DConv branch. + dconv_attn: adds attention layers in DConv branch starting at this layer. + dconv_lstm: adds a LSTM layer in DConv branch starting at this layer. + dconv_init: initial scale for the DConv branch LayerScale. + normalize (bool): normalizes the input audio on the fly, and scales back + the output by the same amount. + resample (bool): upsample x2 the input and downsample /2 the output. + rescale (int): rescale initial weights of convolutions + to get their standard deviation closer to `rescale`. + samplerate (int): stored as meta information for easing + future evaluations of the model. + segment (float): duration of the chunks of audio to ideally evaluate the model on. + This is used by `demucs.apply.apply_model`. + """ + + super().__init__() + self.audio_channels = audio_channels + self.sources = sources + self.kernel_size = kernel_size + self.context = context + self.stride = stride + self.depth = depth + self.resample = resample + self.channels = channels + self.normalize = normalize + self.samplerate = samplerate + self.segment = segment + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + self.skip_scales = nn.ModuleList() + + if glu: + activation = nn.GLU(dim=1) + ch_scale = 2 + else: + activation = nn.ReLU() + ch_scale = 1 + if gelu: + act2 = nn.GELU + else: + act2 = nn.ReLU + + in_channels = audio_channels + padding = 0 + for index in range(depth): + norm_fn = lambda d: nn.Identity() # noqa + if index >= norm_starts: + norm_fn = lambda d: nn.GroupNorm(norm_groups, d) # noqa + + encode = [] + encode += [ + nn.Conv1d(in_channels, channels, kernel_size, stride), + norm_fn(channels), + act2(), + ] + attn = index >= dconv_attn + lstm = index >= dconv_lstm + if dconv_mode & 1: + encode += [DConv(channels, depth=dconv_depth, init=dconv_init, + compress=dconv_comp, attn=attn, lstm=lstm)] + if rewrite: + encode += [ + nn.Conv1d(channels, ch_scale * channels, 1), + norm_fn(ch_scale * channels), activation] + self.encoder.append(nn.Sequential(*encode)) + + decode = [] + if index > 0: + out_channels = in_channels + else: + out_channels = len(self.sources) * audio_channels + if rewrite: + decode += [ + nn.Conv1d(channels, ch_scale * channels, 2 * context + 1, padding=context), + norm_fn(ch_scale * channels), activation] + if dconv_mode & 2: + decode += [DConv(channels, depth=dconv_depth, init=dconv_init, + compress=dconv_comp, attn=attn, lstm=lstm)] + decode += [nn.ConvTranspose1d(channels, out_channels, + kernel_size, stride, padding=padding)] + if index > 0: + decode += [norm_fn(out_channels), act2()] + self.decoder.insert(0, nn.Sequential(*decode)) + in_channels = channels + channels = int(growth * channels) + + channels = in_channels + if lstm_layers: + self.lstm = BLSTM(channels, lstm_layers) + else: + self.lstm = None + + if rescale: + rescale_module(self, reference=rescale) + + def valid_length(self, length): + """ + Return the nearest valid length to use with the model so that + there is no time steps left over in a convolution, e.g. for all + layers, size of the input - kernel_size % stride = 0. + + Note that input are automatically padded if necessary to ensure that the output + has the same length as the input. + """ + if self.resample: + length *= 2 + + for _ in range(self.depth): + length = math.ceil((length - self.kernel_size) / self.stride) + 1 + length = max(1, length) + + for idx in range(self.depth): + length = (length - 1) * self.stride + self.kernel_size + + if self.resample: + length = math.ceil(length / 2) + return int(length) + + def forward(self, mix): + x = mix + length = x.shape[-1] + + if self.normalize: + mono = mix.mean(dim=1, keepdim=True) + mean = mono.mean(dim=-1, keepdim=True) + std = mono.std(dim=-1, keepdim=True) + x = (x - mean) / (1e-5 + std) + else: + mean = 0 + std = 1 + + delta = self.valid_length(length) - length + x = F.pad(x, (delta // 2, delta - delta // 2)) + + if self.resample: + x = julius.resample_frac(x, 1, 2) + + saved = [] + for encode in self.encoder: + x = encode(x) + saved.append(x) + + if self.lstm: + x = self.lstm(x) + + for decode in self.decoder: + skip = saved.pop(-1) + skip = center_trim(skip, x) + x = decode(x + skip) + + if self.resample: + x = julius.resample_frac(x, 2, 1) + x = x * std + mean + x = center_trim(x, length) + x = x.view(x.size(0), len(self.sources), self.audio_channels, x.size(-1)) + return x + + def load_state_dict(self, state, strict=True): + # fix a mismatch with previous generation Demucs models. + for idx in range(self.depth): + for a in ['encoder', 'decoder']: + for b in ['bias', 'weight']: + new = f'{a}.{idx}.3.{b}' + old = f'{a}.{idx}.2.{b}' + if old in state and new not in state: + state[new] = state.pop(old) + super().load_state_dict(state, strict=strict) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/distrib.py b/AutoCoverTool/ref/music_remover/demucs/demucs/distrib.py new file mode 100644 index 0000000..370822e --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/distrib.py @@ -0,0 +1,100 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Distributed training utilities. +""" +import logging +import pickle + +import numpy as np +import torch +from torch.utils.data.distributed import DistributedSampler +from torch.utils.data import DataLoader, Subset +from torch.nn.parallel.distributed import DistributedDataParallel + +from dora import distrib as dora_distrib + +logger = logging.getLogger(__name__) +rank = 0 +world_size = 1 + + +def init(): + global rank, world_size + if not torch.distributed.is_initialized(): + dora_distrib.init() + rank = dora_distrib.rank() + world_size = dora_distrib.world_size() + + +def average(metrics, count=1.): + if isinstance(metrics, dict): + keys, values = zip(*sorted(metrics.items())) + values = average(values, count) + return dict(zip(keys, values)) + if world_size == 1: + return metrics + tensor = torch.tensor(list(metrics) + [1], device='cuda', dtype=torch.float32) + tensor *= count + torch.distributed.all_reduce(tensor, op=torch.distributed.ReduceOp.SUM) + return (tensor[:-1] / tensor[-1]).cpu().numpy().tolist() + + +def wrap(model): + if world_size == 1: + return model + else: + return DistributedDataParallel( + model, + # find_unused_parameters=True, + device_ids=[torch.cuda.current_device()], + output_device=torch.cuda.current_device()) + + +def barrier(): + if world_size > 1: + torch.distributed.barrier() + + +def share(obj=None, src=0): + if world_size == 1: + return obj + size = torch.empty(1, device='cuda', dtype=torch.long) + if rank == src: + dump = pickle.dumps(obj) + size[0] = len(dump) + torch.distributed.broadcast(size, src=src) + # size variable is now set to the length of pickled obj in all processes + + if rank == src: + buffer = torch.from_numpy(np.frombuffer(dump, dtype=np.uint8).copy()).cuda() + else: + buffer = torch.empty(size[0].item(), device='cuda', dtype=torch.uint8) + torch.distributed.broadcast(buffer, src=src) + # buffer variable is now set to pickled obj in all processes + + if rank != src: + obj = pickle.loads(buffer.cpu().numpy().tobytes()) + logger.debug(f"Shared object of size {len(buffer)}") + return obj + + +def loader(dataset, *args, shuffle=False, klass=DataLoader, **kwargs): + """ + Create a dataloader properly in case of distributed training. + If a gradient is going to be computed you must set `shuffle=True`. + """ + if world_size == 1: + return klass(dataset, *args, shuffle=shuffle, **kwargs) + + if shuffle: + # train means we will compute backward, we use DistributedSampler + sampler = DistributedSampler(dataset) + # We ignore shuffle, DistributedSampler already shuffles + return klass(dataset, *args, **kwargs, sampler=sampler) + else: + # We make a manual shard, as DistributedSampler otherwise replicate some examples + dataset = Subset(dataset, list(range(rank, len(dataset), world_size))) + return klass(dataset, *args, shuffle=shuffle, **kwargs) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/ema.py b/AutoCoverTool/ref/music_remover/demucs/demucs/ema.py new file mode 100644 index 0000000..41e615a --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/ema.py @@ -0,0 +1,66 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Inspired from https://github.com/rwightman/pytorch-image-models +from contextlib import contextmanager + +import torch + +from .states import swap_state + + +class ModelEMA: + """ + Perform EMA on a model. You can switch to the EMA weights temporarily + with the `swap` method. + + ema = ModelEMA(model) + with ema.swap(): + # compute valid metrics with averaged model. + """ + def __init__(self, model, decay=0.9999, unbias=True, device='cpu'): + self.decay = decay + self.model = model + self.state = {} + self.count = 0 + self.device = device + self.unbias = unbias + + self._init() + + def _init(self): + for key, val in self.model.state_dict().items(): + if val.dtype != torch.float32: + continue + device = self.device or val.device + if key not in self.state: + self.state[key] = val.detach().to(device, copy=True) + + def update(self): + if self.unbias: + self.count = self.count * self.decay + 1 + w = 1 / self.count + else: + w = 1 - self.decay + for key, val in self.model.state_dict().items(): + if val.dtype != torch.float32: + continue + device = self.device or val.device + self.state[key].mul_(1 - w) + self.state[key].add_(val.detach().to(device), alpha=w) + + @contextmanager + def swap(self): + with swap_state(self.model, self.state): + yield + + def state_dict(self): + return {'state': self.state, 'count': self.count} + + def load_state_dict(self, state): + self.count = state['count'] + for k, v in state['state'].items(): + self.state[k].copy_(v) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/evaluate.py b/AutoCoverTool/ref/music_remover/demucs/demucs/evaluate.py new file mode 100755 index 0000000..24e2994 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/evaluate.py @@ -0,0 +1,174 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +"""Test time evaluation, either using the original SDR from [Vincent et al. 2006] +or the newest SDR definition from the MDX 2021 competition (this one will +be reported as `nsdr` for `new sdr`). +""" + +from concurrent import futures +import logging + +from dora.log import LogProgress +import numpy as np +import musdb +import museval +import torch as th + +from .apply import apply_model +from .audio import convert_audio, save_audio +from . import distrib +from .utils import DummyPoolExecutor + + +logger = logging.getLogger(__name__) + + +def new_sdr(references, estimates): + """ + Compute the SDR according to the MDX challenge definition. + Adapted from AIcrowd/music-demixing-challenge-starter-kit (MIT license) + """ + assert references.dim() == 4 + assert estimates.dim() == 4 + delta = 1e-7 # avoid numerical errors + num = th.sum(th.square(references), dim=(2, 3)) + den = th.sum(th.square(references - estimates), dim=(2, 3)) + num += delta + den += delta + scores = 10 * th.log10(num / den) + return scores + + +def eval_track(references, estimates, win, hop, compute_sdr=True): + references = references.transpose(1, 2).double() + estimates = estimates.transpose(1, 2).double() + + new_scores = new_sdr(references.cpu()[None], estimates.cpu()[None])[0] + + if not compute_sdr: + return None, new_scores + else: + references = references.numpy() + estimates = estimates.numpy() + scores = museval.metrics.bss_eval( + references, estimates, + compute_permutation=False, + window=win, + hop=hop, + framewise_filters=False, + bsseval_sources_version=False)[:-1] + return scores, new_scores + + +def evaluate(solver, compute_sdr=False): + """ + Evaluate model using museval. + compute_sdr=False means using only the MDX definition of the SDR, which + is much faster to evaluate. + """ + + args = solver.args + + output_dir = solver.folder / "results" + output_dir.mkdir(exist_ok=True, parents=True) + json_folder = solver.folder / "results/test" + json_folder.mkdir(exist_ok=True, parents=True) + + # we load tracks from the original musdb set + if args.test.nonhq is None: + test_set = musdb.DB(args.dset.musdb, subsets=["test"], is_wav=True) + else: + test_set = musdb.DB(args.test.nonhq, subsets=["test"], is_wav=False) + src_rate = args.dset.musdb_samplerate + + eval_device = 'cpu' + + model = solver.model + win = int(1. * model.samplerate) + hop = int(1. * model.samplerate) + + indexes = range(distrib.rank, len(test_set), distrib.world_size) + indexes = LogProgress(logger, indexes, updates=args.misc.num_prints, + name='Eval') + pendings = [] + + pool = futures.ProcessPoolExecutor if args.test.workers else DummyPoolExecutor + with pool(args.test.workers) as pool: + for index in indexes: + track = test_set.tracks[index] + + mix = th.from_numpy(track.audio).t().float() + if mix.dim() == 1: + mix = mix[None] + mix = mix.to(solver.device) + ref = mix.mean(dim=0) # mono mixture + mix = (mix - ref.mean()) / ref.std() + mix = convert_audio(mix, src_rate, model.samplerate, model.audio_channels) + estimates = apply_model(model, mix[None], + shifts=args.test.shifts, split=args.test.split, + overlap=args.test.overlap)[0] + estimates = estimates * ref.std() + ref.mean() + estimates = estimates.to(eval_device) + + references = th.stack( + [th.from_numpy(track.targets[name].audio).t() for name in model.sources]) + if references.dim() == 2: + references = references[:, None] + references = references.to(eval_device) + references = convert_audio(references, src_rate, + model.samplerate, model.audio_channels) + if args.test.save: + folder = solver.folder / "wav" / track.name + folder.mkdir(exist_ok=True, parents=True) + for name, estimate in zip(model.sources, estimates): + save_audio(estimate.cpu(), folder / (name + ".mp3"), model.samplerate) + + pendings.append((track.name, pool.submit( + eval_track, references, estimates, win=win, hop=hop, compute_sdr=compute_sdr))) + + pendings = LogProgress(logger, pendings, updates=args.misc.num_prints, + name='Eval (BSS)') + tracks = {} + for track_name, pending in pendings: + pending = pending.result() + scores, nsdrs = pending + tracks[track_name] = {} + for idx, target in enumerate(model.sources): + tracks[track_name][target] = {'nsdr': [float(nsdrs[idx])]} + if scores is not None: + (sdr, isr, sir, sar) = scores + for idx, target in enumerate(model.sources): + values = { + "SDR": sdr[idx].tolist(), + "SIR": sir[idx].tolist(), + "ISR": isr[idx].tolist(), + "SAR": sar[idx].tolist() + } + tracks[track_name][target].update(values) + + all_tracks = {} + for src in range(distrib.world_size): + all_tracks.update(distrib.share(tracks, src)) + + result = {} + metric_names = next(iter(all_tracks.values()))[model.sources[0]] + for metric_name in metric_names: + avg = 0 + avg_of_medians = 0 + for source in model.sources: + medians = [ + np.nanmedian(all_tracks[track][source][metric_name]) + for track in all_tracks.keys()] + mean = np.mean(medians) + median = np.median(medians) + result[metric_name.lower() + "_" + source] = mean + result[metric_name.lower() + "_med" + "_" + source] = median + avg += mean / len(model.sources) + avg_of_medians += median / len(model.sources) + result[metric_name.lower()] = avg + result[metric_name.lower() + "_med"] = avg_of_medians + return result diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/__init__.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/_explorers.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/_explorers.py new file mode 100644 index 0000000..f01d84f --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/_explorers.py @@ -0,0 +1,64 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +from dora import Explorer +import treetable as tt + + +class MyExplorer(Explorer): + test_metrics = ['nsdr', 'sdr_med'] + + def get_grid_metrics(self): + """Return the metrics that should be displayed in the tracking table. + """ + return [ + tt.group("train", [ + tt.leaf("epoch"), + tt.leaf("reco", ".3f"), + ], align=">"), + tt.group("valid", [ + tt.leaf("penalty", ".1f"), + tt.leaf("ms", ".1f"), + tt.leaf("reco", ".2%"), + tt.leaf("breco", ".2%"), + tt.leaf("b_nsdr", ".2f"), + # tt.leaf("b_nsdr_drums", ".2f"), + # tt.leaf("b_nsdr_bass", ".2f"), + # tt.leaf("b_nsdr_other", ".2f"), + # tt.leaf("b_nsdr_vocals", ".2f"), + ], align=">"), + tt.group("test", [ + tt.leaf(name, ".2f") + for name in self.test_metrics + ], align=">") + ] + + def process_history(self, history): + train = { + 'epoch': len(history), + } + valid = {} + test = {} + best_v_main = float('inf') + breco = float('inf') + for metrics in history: + train.update(metrics['train']) + valid.update(metrics['valid']) + if 'main' in metrics['valid']: + best_v_main = min(best_v_main, metrics['valid']['main']['loss']) + valid['bmain'] = best_v_main + valid['breco'] = min(breco, metrics['valid']['reco']) + breco = valid['breco'] + if (metrics['valid']['loss'] == metrics['valid']['best'] or + metrics['valid'].get('nsdr') == metrics['valid']['best']): + for k, v in metrics['valid'].items(): + if k.startswith('reco_'): + valid['b_' + k[len('reco_'):]] = v + if k.startswith('nsdr'): + valid[f'b_{k}'] = v + if 'test' in metrics: + test.update(metrics['test']) + metrics = history[-1] + return {"train": train, "valid": valid, "test": test} diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx.py new file mode 100644 index 0000000..b35d8ab --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx.py @@ -0,0 +1,33 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +""" +Main training for the Track A MDX models. +""" + +from ._explorers import MyExplorer +from ..train import main + + +TRACK_A = ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68'] + + +@MyExplorer +def explorer(launcher): + launcher.slurm_( + gpus=8, + time=3 * 24 * 60, + partition='learnlab') + + # Reproduce results from MDX competition Track A + # This trains the first round of models. Once this is trained, + # you will need to schedule `mdx_refine`. + for sig in TRACK_A: + xp = main.get_xp_from_sig(sig) + parent = xp.cfg.continue_from + xp = main.get_xp_from_sig(parent) + launcher(xp.argv) + launcher(xp.argv, {'quant.diffq': 1e-4}) + launcher(xp.argv, {'quant.diffq': 3e-4}) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx_extra.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx_extra.py new file mode 100644 index 0000000..e32f7cd --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx_extra.py @@ -0,0 +1,36 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +""" +Main training for the Track A MDX models. +""" + +from ._explorers import MyExplorer +from ..train import main + +TRACK_B = ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08'] + + +@MyExplorer +def explorer(launcher): + launcher.slurm_( + gpus=8, + time=3 * 24 * 60, + partition='learnlab') + + # Reproduce results from MDX competition Track A + # This trains the first round of models. Once this is trained, + # you will need to schedule `mdx_refine`. + for sig in TRACK_B: + while sig is not None: + xp = main.get_xp_from_sig(sig) + sig = xp.cfg.continue_from + + for dset in ['extra44', 'extra_test']: + sub = launcher.bind(xp.argv, dset=dset) + sub() + if dset == 'extra_test': + sub({'quant.diffq': 1e-4}) + sub({'quant.diffq': 3e-4}) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx_refine.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx_refine.py new file mode 100644 index 0000000..496a879 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mdx_refine.py @@ -0,0 +1,34 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +""" +Main training for the Track A MDX models. +""" + +from ._explorers import MyExplorer +from .mdx import TRACK_A +from ..train import main + + +@MyExplorer +def explorer(launcher): + launcher.slurm_( + gpus=8, + time=3 * 24 * 60, + partition='learnlab') + + # Reproduce results from MDX competition Track A + # WARNING: all the experiments in the `mdx` grid must have completed. + for sig in TRACK_A: + xp = main.get_xp_from_sig(sig) + launcher(xp.argv) + for diffq in [1e-4, 3e-4]: + xp_src = main.get_xp_from_sig(xp.cfg.continue_from) + q_argv = [f'quant.diffq={diffq}'] + actual_src = main.get_xp(xp_src.argv + q_argv) + actual_src.link.load() + assert len(actual_src.link.history) == actual_src.cfg.epochs + argv = xp.argv + q_argv + [f'continue_from="{actual_src.sig}"'] + launcher(argv) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mmi.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mmi.py new file mode 100644 index 0000000..f51e1b8 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mmi.py @@ -0,0 +1,69 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from ._explorers import MyExplorer +from dora import Launcher + + +@MyExplorer +def explorer(launcher: Launcher): + launcher.slurm_(gpus=8, time=3 * 24 * 60, partition="devlab,learnlab,learnfair") # 3 days + + sub = launcher.bind_( + { + "dset": "extra_mmi_goodclean", + "test.shifts": 0, + "model": "htdemucs", + "htdemucs.dconv_mode": 3, + "htdemucs.depth": 4, + "htdemucs.t_dropout": 0.02, + "htdemucs.t_layers": 5, + "max_batches": 800, + "ema.epoch": [0.9, 0.95], + "ema.batch": [0.9995, 0.9999], + "dset.segment": 10, + "batch_size": 32, + } + ) + sub({"model": "hdemucs"}) + sub({"model": "hdemucs", "dset": "extra44"}) + sub({"model": "hdemucs", "dset": "musdb44"}) + + sparse = { + 'batch_size': 3 * 8, + 'augment.remix.group_size': 3, + 'htdemucs.t_auto_sparsity': True, + 'htdemucs.t_sparse_self_attn': True, + 'htdemucs.t_sparse_cross_attn': True, + 'htdemucs.t_sparsity': 0.9, + "htdemucs.t_layers": 7 + } + + with launcher.job_array(): + for transf_layers in [5, 7]: + for bottom_channels in [0, 512]: + sub = launcher.bind({ + "htdemucs.t_layers": transf_layers, + "htdemucs.bottom_channels": bottom_channels, + }) + if bottom_channels == 0 and transf_layers == 5: + sub({"augment.remix.proba": 0.0}) + sub({ + "augment.repitch.proba": 0.0, + # when doing repitching, we trim the outut to align on the + # highest change of BPM. When removing repitching, + # we simulate it here to ensure the training context is the same. + # Another second is lost for all experiments due to the random + # shift augmentation. + "dset.segment": 10 * 0.88}) + elif bottom_channels == 512 and transf_layers == 5: + sub(dset="musdb44") + sub(dset="extra44") + # Sparse kernel XP, currently not released as kernels are still experimental. + sub(sparse, {'dset.segment': 15, "htdemucs.t_layers": 7}) + + for duration in [5, 10, 15]: + sub({"dset.segment": duration}) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mmi_ft.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mmi_ft.py new file mode 100644 index 0000000..10de9c8 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/mmi_ft.py @@ -0,0 +1,55 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from ._explorers import MyExplorer +from dora import Launcher +from demucs import train + + +def get_sub(launcher, sig): + xp = train.main.get_xp_from_sig(sig) + sub = launcher.bind(xp.argv) + sub() + sub.bind_({ + 'continue_from': sig, + 'continue_best': True}) + return sub + + +@MyExplorer +def explorer(launcher: Launcher): + launcher.slurm_(gpus=4, time=3 * 24 * 60, partition="devlab,learnlab,learnfair") # 3 days + ft = { + 'optim.lr': 1e-4, + 'augment.remix.proba': 0, + 'augment.scale.proba': 0, + 'augment.shift_same': True, + 'htdemucs.t_weight_decay': 0.05, + 'batch_size': 8, + 'optim.clip_grad': 5, + 'optim.optim': 'adamw', + 'epochs': 50, + 'dset.wav2_valid': True, + 'ema.epoch': [], # let's make valid a bit faster + } + with launcher.job_array(): + for sig in ['2899e11a']: + sub = get_sub(launcher, sig) + sub.bind_(ft) + for segment in [15, 18]: + for source in range(4): + w = [0] * 4 + w[source] = 1 + sub({'weights': w, 'dset.segment': segment}) + + for sig in ['955717e8']: + sub = get_sub(launcher, sig) + sub.bind_(ft) + for segment in [10, 15]: + for source in range(4): + w = [0] * 4 + w[source] = 1 + sub({'weights': w, 'dset.segment': segment}) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/repro.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/repro.py new file mode 100644 index 0000000..c5eba72 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/repro.py @@ -0,0 +1,50 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +""" +Easier training for reproducibility +""" + +from ._explorers import MyExplorer + + +@MyExplorer +def explorer(launcher): + launcher.slurm_( + gpus=8, + time=3 * 24 * 60, + partition='devlab,learnlab') + + launcher.bind_({'ema.epoch': [0.9, 0.95]}) + launcher.bind_({'ema.batch': [0.9995, 0.9999]}) + launcher.bind_({'epochs': 600}) + + base = {'model': 'demucs', 'demucs.dconv_mode': 0, 'demucs.gelu': False, + 'demucs.lstm_layers': 2} + newt = {'model': 'demucs', 'demucs.normalize': True} + hdem = {'model': 'hdemucs'} + svd = {'svd.penalty': 1e-5, 'svd': 'base2'} + + with launcher.job_array(): + for model in [base, newt, hdem]: + sub = launcher.bind(model) + if model is base: + # Training the v2 Demucs on MusDB HQ + sub(epochs=360) + continue + + # those two will be used in the repro_mdx_a bag of models. + sub(svd) + sub(svd, seed=43) + if model == newt: + # Ablation study + sub() + abl = sub.bind(svd) + abl({'ema.epoch': [], 'ema.batch': []}) + abl({'demucs.dconv_lstm': 10}) + abl({'demucs.dconv_attn': 10}) + abl({'demucs.dconv_attn': 10, 'demucs.dconv_lstm': 10, 'demucs.lstm_layers': 2}) + abl({'demucs.dconv_mode': 0}) + abl({'demucs.gelu': False}) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/grids/repro_ft.py b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/repro_ft.py new file mode 100644 index 0000000..2399fcc --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/grids/repro_ft.py @@ -0,0 +1,46 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +""" +Fine tuning experiments +""" + +from ._explorers import MyExplorer +from ..train import main + + +@MyExplorer +def explorer(launcher): + launcher.slurm_( + gpus=8, + time=300, + partition='devlab,learnlab') + + # Mus + launcher.slurm_(constraint='volta32gb') + + grid = "repro" + folder = main.dora.dir / "grids" / grid + + for sig in folder.iterdir(): + if not sig.is_symlink(): + continue + xp = main.get_xp_from_sig(sig) + xp.link.load() + if len(xp.link.history) != xp.cfg.epochs: + continue + sub = launcher.bind(xp.argv, [f'continue_from="{xp.sig}"']) + sub.bind_({'ema.epoch': [0.9, 0.95], 'ema.batch': [0.9995, 0.9999]}) + sub.bind_({'test.every': 1, 'test.sdr': True, 'epochs': 4}) + sub.bind_({'dset.segment': 28, 'dset.shift': 2}) + sub.bind_({'batch_size': 32}) + auto = {'dset': 'auto_mus'} + auto.update({'augment.remix.proba': 0, 'augment.scale.proba': 0, + 'augment.shift_same': True}) + sub.bind_(auto) + sub.bind_({'batch_size': 16}) + sub.bind_({'optim.lr': 1e-4}) + sub.bind_({'model_segment': 44}) + sub() diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/hdemucs.py b/AutoCoverTool/ref/music_remover/demucs/demucs/hdemucs.py new file mode 100644 index 0000000..c2e0ae7 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/hdemucs.py @@ -0,0 +1,784 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +""" +This code contains the spectrogram and Hybrid version of Demucs. +""" +from copy import deepcopy +import math +import typing as tp + +from openunmix.filtering import wiener +import torch +from torch import nn +from torch.nn import functional as F + +from .demucs import DConv, rescale_module +from .states import capture_init +from .spec import spectro, ispectro + + +def pad1d(x: torch.Tensor, paddings: tp.Tuple[int, int], mode: str = 'constant', value: float = 0.): + """Tiny wrapper around F.pad, just to allow for reflect padding on small input. + If this is the case, we insert extra 0 padding to the right before the reflection happen.""" + x0 = x + length = x.shape[-1] + padding_left, padding_right = paddings + if mode == 'reflect': + max_pad = max(padding_left, padding_right) + if length <= max_pad: + extra_pad = max_pad - length + 1 + extra_pad_right = min(padding_right, extra_pad) + extra_pad_left = extra_pad - extra_pad_right + paddings = (padding_left - extra_pad_left, padding_right - extra_pad_right) + x = F.pad(x, (extra_pad_left, extra_pad_right)) + out = F.pad(x, paddings, mode, value) + assert out.shape[-1] == length + padding_left + padding_right + if not (out[..., padding_left: padding_left + length] == x0).all(): + raise RuntimeError("pad1d err!") + # assert (out[..., padding_left: padding_left + length] == x0).all() + return out + + +class ScaledEmbedding(nn.Module): + """ + Boost learning rate for embeddings (with `scale`). + Also, can make embeddings continuous with `smooth`. + """ + def __init__(self, num_embeddings: int, embedding_dim: int, + scale: float = 10., smooth=False): + super().__init__() + self.embedding = nn.Embedding(num_embeddings, embedding_dim) + if smooth: + weight = torch.cumsum(self.embedding.weight.data, dim=0) + # when summing gaussian, overscale raises as sqrt(n), so we nornalize by that. + weight = weight / torch.arange(1, num_embeddings + 1).to(weight).sqrt()[:, None] + self.embedding.weight.data[:] = weight + self.embedding.weight.data /= scale + self.scale = scale + + @property + def weight(self): + return self.embedding.weight * self.scale + + def forward(self, x): + out = self.embedding(x) * self.scale + return out + + +class HEncLayer(nn.Module): + def __init__(self, chin, chout, kernel_size=8, stride=4, norm_groups=1, empty=False, + freq=True, dconv=True, norm=True, context=0, dconv_kw={}, pad=True, + rewrite=True): + """Encoder layer. This used both by the time and the frequency branch. + + Args: + chin: number of input channels. + chout: number of output channels. + norm_groups: number of groups for group norm. + empty: used to make a layer with just the first conv. this is used + before merging the time and freq. branches. + freq: this is acting on frequencies. + dconv: insert DConv residual branches. + norm: use GroupNorm. + context: context size for the 1x1 conv. + dconv_kw: list of kwargs for the DConv class. + pad: pad the input. Padding is done so that the output size is + always the input size / stride. + rewrite: add 1x1 conv at the end of the layer. + """ + super().__init__() + norm_fn = lambda d: nn.Identity() # noqa + if norm: + norm_fn = lambda d: nn.GroupNorm(norm_groups, d) # noqa + if pad: + pad = kernel_size // 4 + else: + pad = 0 + klass = nn.Conv1d + self.freq = freq + self.kernel_size = kernel_size + self.stride = stride + self.empty = empty + self.norm = norm + self.pad = pad + if freq: + kernel_size = [kernel_size, 1] + stride = [stride, 1] + pad = [pad, 0] + klass = nn.Conv2d + self.conv = klass(chin, chout, kernel_size, stride, pad) + if self.empty: + return + self.norm1 = norm_fn(chout) + self.rewrite = None + if rewrite: + self.rewrite = klass(chout, 2 * chout, 1 + 2 * context, 1, context) + self.norm2 = norm_fn(2 * chout) + + self.dconv = None + if dconv: + self.dconv = DConv(chout, **dconv_kw) + + def forward(self, x, inject=None): + """ + `inject` is used to inject the result from the time branch into the frequency branch, + when both have the same stride. + """ + if not self.freq and x.dim() == 4: + B, C, Fr, T = x.shape + x = x.view(B, -1, T) + + if not self.freq: + le = x.shape[-1] + if not le % self.stride == 0: + x = F.pad(x, (0, self.stride - (le % self.stride))) + y = self.conv(x) + if self.empty: + return y + if inject is not None: + assert inject.shape[-1] == y.shape[-1], (inject.shape, y.shape) + if inject.dim() == 3 and y.dim() == 4: + inject = inject[:, :, None] + y = y + inject + y = F.gelu(self.norm1(y)) + if self.dconv: + if self.freq: + B, C, Fr, T = y.shape + y = y.permute(0, 2, 1, 3).reshape(-1, C, T) + y = self.dconv(y) + if self.freq: + y = y.view(B, Fr, C, T).permute(0, 2, 1, 3) + if self.rewrite: + z = self.norm2(self.rewrite(y)) + z = F.glu(z, dim=1) + else: + z = y + return z + + +class MultiWrap(nn.Module): + """ + Takes one layer and replicate it N times. each replica will act + on a frequency band. All is done so that if the N replica have the same weights, + then this is exactly equivalent to applying the original module on all frequencies. + + This is a bit over-engineered to avoid edge artifacts when splitting + the frequency bands, but it is possible the naive implementation would work as well... + """ + def __init__(self, layer, split_ratios): + """ + Args: + layer: module to clone, must be either HEncLayer or HDecLayer. + split_ratios: list of float indicating which ratio to keep for each band. + """ + super().__init__() + self.split_ratios = split_ratios + self.layers = nn.ModuleList() + self.conv = isinstance(layer, HEncLayer) + assert not layer.norm + assert layer.freq + assert layer.pad + if not self.conv: + assert not layer.context_freq + for k in range(len(split_ratios) + 1): + lay = deepcopy(layer) + if self.conv: + lay.conv.padding = (0, 0) + else: + lay.pad = False + for m in lay.modules(): + if hasattr(m, 'reset_parameters'): + m.reset_parameters() + self.layers.append(lay) + + def forward(self, x, skip=None, length=None): + B, C, Fr, T = x.shape + + ratios = list(self.split_ratios) + [1] + start = 0 + outs = [] + for ratio, layer in zip(ratios, self.layers): + if self.conv: + pad = layer.kernel_size // 4 + if ratio == 1: + limit = Fr + frames = -1 + else: + limit = int(round(Fr * ratio)) + le = limit - start + if start == 0: + le += pad + frames = round((le - layer.kernel_size) / layer.stride + 1) + limit = start + (frames - 1) * layer.stride + layer.kernel_size + if start == 0: + limit -= pad + assert limit - start > 0, (limit, start) + assert limit <= Fr, (limit, Fr) + y = x[:, :, start:limit, :] + if start == 0: + y = F.pad(y, (0, 0, pad, 0)) + if ratio == 1: + y = F.pad(y, (0, 0, 0, pad)) + outs.append(layer(y)) + start = limit - layer.kernel_size + layer.stride + else: + if ratio == 1: + limit = Fr + else: + limit = int(round(Fr * ratio)) + last = layer.last + layer.last = True + + y = x[:, :, start:limit] + s = skip[:, :, start:limit] + out, _ = layer(y, s, None) + if outs: + outs[-1][:, :, -layer.stride:] += ( + out[:, :, :layer.stride] - layer.conv_tr.bias.view(1, -1, 1, 1)) + out = out[:, :, layer.stride:] + if ratio == 1: + out = out[:, :, :-layer.stride // 2, :] + if start == 0: + out = out[:, :, layer.stride // 2:, :] + outs.append(out) + layer.last = last + start = limit + out = torch.cat(outs, dim=2) + if not self.conv and not last: + out = F.gelu(out) + if self.conv: + return out + else: + return out, None + + +class HDecLayer(nn.Module): + def __init__(self, chin, chout, last=False, kernel_size=8, stride=4, norm_groups=1, empty=False, + freq=True, dconv=True, norm=True, context=1, dconv_kw={}, pad=True, + context_freq=True, rewrite=True): + """ + Same as HEncLayer but for decoder. See `HEncLayer` for documentation. + """ + super().__init__() + norm_fn = lambda d: nn.Identity() # noqa + if norm: + norm_fn = lambda d: nn.GroupNorm(norm_groups, d) # noqa + if pad: + pad = kernel_size // 4 + else: + pad = 0 + self.pad = pad + self.last = last + self.freq = freq + self.chin = chin + self.empty = empty + self.stride = stride + self.kernel_size = kernel_size + self.norm = norm + self.context_freq = context_freq + klass = nn.Conv1d + klass_tr = nn.ConvTranspose1d + if freq: + kernel_size = [kernel_size, 1] + stride = [stride, 1] + klass = nn.Conv2d + klass_tr = nn.ConvTranspose2d + self.conv_tr = klass_tr(chin, chout, kernel_size, stride) + self.norm2 = norm_fn(chout) + if self.empty: + return + self.rewrite = None + if rewrite: + if context_freq: + self.rewrite = klass(chin, 2 * chin, 1 + 2 * context, 1, context) + else: + self.rewrite = klass(chin, 2 * chin, [1, 1 + 2 * context], 1, + [0, context]) + self.norm1 = norm_fn(2 * chin) + + self.dconv = None + if dconv: + self.dconv = DConv(chin, **dconv_kw) + + def forward(self, x, skip, length): + if self.freq and x.dim() == 3: + B, C, T = x.shape + x = x.view(B, self.chin, -1, T) + + if not self.empty: + x = x + skip + + if self.rewrite: + y = F.glu(self.norm1(self.rewrite(x)), dim=1) + else: + y = x + if self.dconv: + if self.freq: + B, C, Fr, T = y.shape + y = y.permute(0, 2, 1, 3).reshape(-1, C, T) + y = self.dconv(y) + if self.freq: + y = y.view(B, Fr, C, T).permute(0, 2, 1, 3) + else: + y = x + assert skip is None + z = self.norm2(self.conv_tr(y)) + if self.freq: + if self.pad: + z = z[..., self.pad:-self.pad, :] + else: + z = z[..., self.pad:self.pad + length] + assert z.shape[-1] == length, (z.shape[-1], length) + if not self.last: + z = F.gelu(z) + return z, y + + +class HDemucs(nn.Module): + """ + Spectrogram and hybrid Demucs model. + The spectrogram model has the same structure as Demucs, except the first few layers are over the + frequency axis, until there is only 1 frequency, and then it moves to time convolutions. + Frequency layers can still access information across time steps thanks to the DConv residual. + + Hybrid model have a parallel time branch. At some layer, the time branch has the same stride + as the frequency branch and then the two are combined. The opposite happens in the decoder. + + Models can either use naive iSTFT from masking, Wiener filtering ([Ulhih et al. 2017]), + or complex as channels (CaC) [Choi et al. 2020]. Wiener filtering is based on + Open Unmix implementation [Stoter et al. 2019]. + + The loss is always on the temporal domain, by backpropagating through the above + output methods and iSTFT. This allows to define hybrid models nicely. However, this breaks + a bit Wiener filtering, as doing more iteration at test time will change the spectrogram + contribution, without changing the one from the waveform, which will lead to worse performance. + I tried using the residual option in OpenUnmix Wiener implementation, but it didn't improve. + CaC on the other hand provides similar performance for hybrid, and works naturally with + hybrid models. + + This model also uses frequency embeddings are used to improve efficiency on convolutions + over the freq. axis, following [Isik et al. 2020] (https://arxiv.org/pdf/2008.04470.pdf). + + Unlike classic Demucs, there is no resampling here, and normalization is always applied. + """ + @capture_init + def __init__(self, + sources, + # Channels + audio_channels=2, + channels=48, + channels_time=None, + growth=2, + # STFT + nfft=4096, + wiener_iters=0, + end_iters=0, + wiener_residual=False, + cac=True, + # Main structure + depth=6, + rewrite=True, + hybrid=True, + hybrid_old=False, + # Frequency branch + multi_freqs=None, + multi_freqs_depth=2, + freq_emb=0.2, + emb_scale=10, + emb_smooth=True, + # Convolutions + kernel_size=8, + time_stride=2, + stride=4, + context=1, + context_enc=0, + # Normalization + norm_starts=4, + norm_groups=4, + # DConv residual branch + dconv_mode=1, + dconv_depth=2, + dconv_comp=4, + dconv_attn=4, + dconv_lstm=4, + dconv_init=1e-4, + # Weight init + rescale=0.1, + # Metadata + samplerate=44100, + segment=4 * 10): + """ + Args: + sources (list[str]): list of source names. + audio_channels (int): input/output audio channels. + channels (int): initial number of hidden channels. + channels_time: if not None, use a different `channels` value for the time branch. + growth: increase the number of hidden channels by this factor at each layer. + nfft: number of fft bins. Note that changing this require careful computation of + various shape parameters and will not work out of the box for hybrid models. + wiener_iters: when using Wiener filtering, number of iterations at test time. + end_iters: same but at train time. For a hybrid model, must be equal to `wiener_iters`. + wiener_residual: add residual source before wiener filtering. + cac: uses complex as channels, i.e. complex numbers are 2 channels each + in input and output. no further processing is done before ISTFT. + depth (int): number of layers in the encoder and in the decoder. + rewrite (bool): add 1x1 convolution to each layer. + hybrid (bool): make a hybrid time/frequency domain, otherwise frequency only. + hybrid_old: some models trained for MDX had a padding bug. This replicates + this bug to avoid retraining them. + multi_freqs: list of frequency ratios for splitting frequency bands with `MultiWrap`. + multi_freqs_depth: how many layers to wrap with `MultiWrap`. Only the outermost + layers will be wrapped. + freq_emb: add frequency embedding after the first frequency layer if > 0, + the actual value controls the weight of the embedding. + emb_scale: equivalent to scaling the embedding learning rate + emb_smooth: initialize the embedding with a smooth one (with respect to frequencies). + kernel_size: kernel_size for encoder and decoder layers. + stride: stride for encoder and decoder layers. + time_stride: stride for the final time layer, after the merge. + context: context for 1x1 conv in the decoder. + context_enc: context for 1x1 conv in the encoder. + norm_starts: layer at which group norm starts being used. + decoder layers are numbered in reverse order. + norm_groups: number of groups for group norm. + dconv_mode: if 1: dconv in encoder only, 2: decoder only, 3: both. + dconv_depth: depth of residual DConv branch. + dconv_comp: compression of DConv branch. + dconv_attn: adds attention layers in DConv branch starting at this layer. + dconv_lstm: adds a LSTM layer in DConv branch starting at this layer. + dconv_init: initial scale for the DConv branch LayerScale. + rescale: weight recaling trick + + """ + super().__init__() + self.cac = cac + self.wiener_residual = wiener_residual + self.audio_channels = audio_channels + self.sources = sources + self.kernel_size = kernel_size + self.context = context + self.stride = stride + self.depth = depth + self.channels = channels + self.samplerate = samplerate + self.segment = segment + + self.nfft = nfft + self.hop_length = nfft // 4 + self.wiener_iters = wiener_iters + self.end_iters = end_iters + self.freq_emb = None + self.hybrid = hybrid + self.hybrid_old = hybrid_old + if hybrid_old: + assert hybrid, "hybrid_old must come with hybrid=True" + if hybrid: + assert wiener_iters == end_iters + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + if hybrid: + self.tencoder = nn.ModuleList() + self.tdecoder = nn.ModuleList() + + chin = audio_channels + chin_z = chin # number of channels for the freq branch + if self.cac: + chin_z *= 2 + chout = channels_time or channels + chout_z = channels + freqs = nfft // 2 + + for index in range(depth): + lstm = index >= dconv_lstm + attn = index >= dconv_attn + norm = index >= norm_starts + freq = freqs > 1 + stri = stride + ker = kernel_size + if not freq: + assert freqs == 1 + ker = time_stride * 2 + stri = time_stride + + pad = True + last_freq = False + if freq and freqs <= kernel_size: + ker = freqs + pad = False + last_freq = True + + kw = { + 'kernel_size': ker, + 'stride': stri, + 'freq': freq, + 'pad': pad, + 'norm': norm, + 'rewrite': rewrite, + 'norm_groups': norm_groups, + 'dconv_kw': { + 'lstm': lstm, + 'attn': attn, + 'depth': dconv_depth, + 'compress': dconv_comp, + 'init': dconv_init, + 'gelu': True, + } + } + kwt = dict(kw) + kwt['freq'] = 0 + kwt['kernel_size'] = kernel_size + kwt['stride'] = stride + kwt['pad'] = True + kw_dec = dict(kw) + multi = False + if multi_freqs and index < multi_freqs_depth: + multi = True + kw_dec['context_freq'] = False + + if last_freq: + chout_z = max(chout, chout_z) + chout = chout_z + + enc = HEncLayer(chin_z, chout_z, + dconv=dconv_mode & 1, context=context_enc, **kw) + if hybrid and freq: + tenc = HEncLayer(chin, chout, dconv=dconv_mode & 1, context=context_enc, + empty=last_freq, **kwt) + self.tencoder.append(tenc) + + if multi: + enc = MultiWrap(enc, multi_freqs) + self.encoder.append(enc) + if index == 0: + chin = self.audio_channels * len(self.sources) + chin_z = chin + if self.cac: + chin_z *= 2 + dec = HDecLayer(chout_z, chin_z, dconv=dconv_mode & 2, + last=index == 0, context=context, **kw_dec) + if multi: + dec = MultiWrap(dec, multi_freqs) + if hybrid and freq: + tdec = HDecLayer(chout, chin, dconv=dconv_mode & 2, empty=last_freq, + last=index == 0, context=context, **kwt) + self.tdecoder.insert(0, tdec) + self.decoder.insert(0, dec) + + chin = chout + chin_z = chout_z + chout = int(growth * chout) + chout_z = int(growth * chout_z) + if freq: + if freqs <= kernel_size: + freqs = 1 + else: + freqs //= stride + if index == 0 and freq_emb: + self.freq_emb = ScaledEmbedding( + freqs, chin_z, smooth=emb_smooth, scale=emb_scale) + self.freq_emb_scale = freq_emb + + if rescale: + rescale_module(self, reference=rescale) + + def _spec(self, x): + hl = self.hop_length + nfft = self.nfft + x0 = x # noqa + + if self.hybrid: + # We re-pad the signal in order to keep the property + # that the size of the output is exactly the size of the input + # divided by the stride (here hop_length), when divisible. + # This is achieved by padding by 1/4th of the kernel size (here nfft). + # which is not supported by torch.stft. + # Having all convolution operations follow this convention allow to easily + # align the time and frequency branches later on. + assert hl == nfft // 4 + le = int(math.ceil(x.shape[-1] / hl)) + pad = hl // 2 * 3 + if not self.hybrid_old: + x = pad1d(x, (pad, pad + le * hl - x.shape[-1]), mode='reflect') + else: + x = pad1d(x, (pad, pad + le * hl - x.shape[-1])) + + z = spectro(x, nfft, hl)[..., :-1, :] + if self.hybrid: + assert z.shape[-1] == le + 4, (z.shape, x.shape, le) + z = z[..., 2:2+le] + return z + + def _ispec(self, z, length=None, scale=0): + hl = self.hop_length // (4 ** scale) + z = F.pad(z, (0, 0, 0, 1)) + if self.hybrid: + z = F.pad(z, (2, 2)) + pad = hl // 2 * 3 + if not self.hybrid_old: + le = hl * int(math.ceil(length / hl)) + 2 * pad + else: + le = hl * int(math.ceil(length / hl)) + x = ispectro(z, hl, length=le) + if not self.hybrid_old: + x = x[..., pad:pad + length] + else: + x = x[..., :length] + else: + x = ispectro(z, hl, length) + return x + + def _magnitude(self, z): + # return the magnitude of the spectrogram, except when cac is True, + # in which case we just move the complex dimension to the channel one. + if self.cac: + B, C, Fr, T = z.shape + m = torch.view_as_real(z).permute(0, 1, 4, 2, 3) + m = m.reshape(B, C * 2, Fr, T) + else: + m = z.abs() + return m + + def _mask(self, z, m): + # Apply masking given the mixture spectrogram `z` and the estimated mask `m`. + # If `cac` is True, `m` is actually a full spectrogram and `z` is ignored. + niters = self.wiener_iters + if self.cac: + B, S, C, Fr, T = m.shape + out = m.view(B, S, -1, 2, Fr, T).permute(0, 1, 2, 4, 5, 3) + out = torch.view_as_complex(out.contiguous()) + return out + if self.training: + niters = self.end_iters + if niters < 0: + z = z[:, None] + return z / (1e-8 + z.abs()) * m + else: + return self._wiener(m, z, niters) + + def _wiener(self, mag_out, mix_stft, niters): + # apply wiener filtering from OpenUnmix. + init = mix_stft.dtype + wiener_win_len = 300 + residual = self.wiener_residual + + B, S, C, Fq, T = mag_out.shape + mag_out = mag_out.permute(0, 4, 3, 2, 1) + mix_stft = torch.view_as_real(mix_stft.permute(0, 3, 2, 1)) + + outs = [] + for sample in range(B): + pos = 0 + out = [] + for pos in range(0, T, wiener_win_len): + frame = slice(pos, pos + wiener_win_len) + z_out = wiener( + mag_out[sample, frame], mix_stft[sample, frame], niters, + residual=residual) + out.append(z_out.transpose(-1, -2)) + outs.append(torch.cat(out, dim=0)) + out = torch.view_as_complex(torch.stack(outs, 0)) + out = out.permute(0, 4, 3, 2, 1).contiguous() + if residual: + out = out[:, :-1] + assert list(out.shape) == [B, S, C, Fq, T] + return out.to(init) + + def forward(self, mix): + x = mix + length = x.shape[-1] + + z = self._spec(mix) + mag = self._magnitude(z) + x = mag + + B, C, Fq, T = x.shape + + # unlike previous Demucs, we always normalize because it is easier. + mean = x.mean(dim=(1, 2, 3), keepdim=True) + std = x.std(dim=(1, 2, 3), keepdim=True) + x = (x - mean) / (1e-5 + std) + # x will be the freq. branch input. + + if self.hybrid: + # Prepare the time branch input. + xt = mix + meant = xt.mean(dim=(1, 2), keepdim=True) + stdt = xt.std(dim=(1, 2), keepdim=True) + xt = (xt - meant) / (1e-5 + stdt) + + # okay, this is a giant mess I know... + saved = [] # skip connections, freq. + saved_t = [] # skip connections, time. + lengths = [] # saved lengths to properly remove padding, freq branch. + lengths_t = [] # saved lengths for time branch. + for idx, encode in enumerate(self.encoder): + lengths.append(x.shape[-1]) + inject = None + if self.hybrid and idx < len(self.tencoder): + # we have not yet merged branches. + lengths_t.append(xt.shape[-1]) + tenc = self.tencoder[idx] + xt = tenc(xt) + if not tenc.empty: + # save for skip connection + saved_t.append(xt) + else: + # tenc contains just the first conv., so that now time and freq. + # branches have the same shape and can be merged. + inject = xt + x = encode(x, inject) + if idx == 0 and self.freq_emb is not None: + # add frequency embedding to allow for non equivariant convolutions + # over the frequency axis. + frs = torch.arange(x.shape[-2], device=x.device) + emb = self.freq_emb(frs).t()[None, :, :, None].expand_as(x) + x = x + self.freq_emb_scale * emb + + saved.append(x) + + x = torch.zeros_like(x) + if self.hybrid: + xt = torch.zeros_like(x) + # initialize everything to zero (signal will go through u-net skips). + + for idx, decode in enumerate(self.decoder): + skip = saved.pop(-1) + x, pre = decode(x, skip, lengths.pop(-1)) + # `pre` contains the output just before final transposed convolution, + # which is used when the freq. and time branch separate. + + if self.hybrid: + offset = self.depth - len(self.tdecoder) + if self.hybrid and idx >= offset: + tdec = self.tdecoder[idx - offset] + length_t = lengths_t.pop(-1) + if tdec.empty: + assert pre.shape[2] == 1, pre.shape + pre = pre[:, :, 0] + xt, _ = tdec(pre, None, length_t) + else: + skip = saved_t.pop(-1) + xt, _ = tdec(xt, skip, length_t) + + # Let's make sure we used all stored skip connections. + assert len(saved) == 0 + assert len(lengths_t) == 0 + assert len(saved_t) == 0 + + S = len(self.sources) + x = x.view(B, S, -1, Fq, T) + x = x * std[:, None] + mean[:, None] + + zout = self._mask(z, x) + x = self._ispec(zout, length) + + if self.hybrid: + xt = xt.view(B, S, -1, length) + xt = xt * stdt[:, None] + meant[:, None] + x = xt + x + return x diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/htdemucs.py b/AutoCoverTool/ref/music_remover/demucs/demucs/htdemucs.py new file mode 100644 index 0000000..adc3713 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/htdemucs.py @@ -0,0 +1,648 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# First author is Simon Rouard. +""" +This code contains the spectrogram and Hybrid version of Demucs. +""" +import math + +from openunmix.filtering import wiener +import torch +from torch import nn +from torch.nn import functional as F +from fractions import Fraction +from einops import rearrange + +from .transformer import CrossTransformerEncoder + +from .demucs import rescale_module +from .states import capture_init +from .spec import spectro, ispectro +from .hdemucs import pad1d, ScaledEmbedding, HEncLayer, MultiWrap, HDecLayer + + +class HTDemucs(nn.Module): + """ + Spectrogram and hybrid Demucs model. + The spectrogram model has the same structure as Demucs, except the first few layers are over the + frequency axis, until there is only 1 frequency, and then it moves to time convolutions. + Frequency layers can still access information across time steps thanks to the DConv residual. + + Hybrid model have a parallel time branch. At some layer, the time branch has the same stride + as the frequency branch and then the two are combined. The opposite happens in the decoder. + + Models can either use naive iSTFT from masking, Wiener filtering ([Ulhih et al. 2017]), + or complex as channels (CaC) [Choi et al. 2020]. Wiener filtering is based on + Open Unmix implementation [Stoter et al. 2019]. + + The loss is always on the temporal domain, by backpropagating through the above + output methods and iSTFT. This allows to define hybrid models nicely. However, this breaks + a bit Wiener filtering, as doing more iteration at test time will change the spectrogram + contribution, without changing the one from the waveform, which will lead to worse performance. + I tried using the residual option in OpenUnmix Wiener implementation, but it didn't improve. + CaC on the other hand provides similar performance for hybrid, and works naturally with + hybrid models. + + This model also uses frequency embeddings are used to improve efficiency on convolutions + over the freq. axis, following [Isik et al. 2020] (https://arxiv.org/pdf/2008.04470.pdf). + + Unlike classic Demucs, there is no resampling here, and normalization is always applied. + """ + + @capture_init + def __init__( + self, + sources, + # Channels + audio_channels=2, + channels=48, + channels_time=None, + growth=2, + # STFT + nfft=4096, + wiener_iters=0, + end_iters=0, + wiener_residual=False, + cac=True, + # Main structure + depth=4, + rewrite=True, + # Frequency branch + multi_freqs=None, + multi_freqs_depth=3, + freq_emb=0.2, + emb_scale=10, + emb_smooth=True, + # Convolutions + kernel_size=8, + time_stride=2, + stride=4, + context=1, + context_enc=0, + # Normalization + norm_starts=4, + norm_groups=4, + # DConv residual branch + dconv_mode=1, + dconv_depth=2, + dconv_comp=8, + dconv_init=1e-3, + # Before the Transformer + bottom_channels=0, + # Transformer + t_layers=5, + t_emb="sin", + t_hidden_scale=4.0, + t_heads=8, + t_dropout=0.0, + t_max_positions=10000, + t_norm_in=True, + t_norm_in_group=False, + t_group_norm=False, + t_norm_first=True, + t_norm_out=True, + t_max_period=10000.0, + t_weight_decay=0.0, + t_lr=None, + t_layer_scale=True, + t_gelu=True, + t_weight_pos_embed=1.0, + t_sin_random_shift=0, + t_cape_mean_normalize=True, + t_cape_augment=True, + t_cape_glob_loc_scale=[5000.0, 1.0, 1.4], + t_sparse_self_attn=False, + t_sparse_cross_attn=False, + t_mask_type="diag", + t_mask_random_seed=42, + t_sparse_attn_window=500, + t_global_window=100, + t_sparsity=0.95, + t_auto_sparsity=False, + # ------ Particuliar parameters + t_cross_first=False, + # Weight init + rescale=0.1, + # Metadata + samplerate=44100, + segment=10, + use_train_segment=True, + ): + """ + Args: + sources (list[str]): list of source names. + audio_channels (int): input/output audio channels. + channels (int): initial number of hidden channels. + channels_time: if not None, use a different `channels` value for the time branch. + growth: increase the number of hidden channels by this factor at each layer. + nfft: number of fft bins. Note that changing this require careful computation of + various shape parameters and will not work out of the box for hybrid models. + wiener_iters: when using Wiener filtering, number of iterations at test time. + end_iters: same but at train time. For a hybrid model, must be equal to `wiener_iters`. + wiener_residual: add residual source before wiener filtering. + cac: uses complex as channels, i.e. complex numbers are 2 channels each + in input and output. no further processing is done before ISTFT. + depth (int): number of layers in the encoder and in the decoder. + rewrite (bool): add 1x1 convolution to each layer. + multi_freqs: list of frequency ratios for splitting frequency bands with `MultiWrap`. + multi_freqs_depth: how many layers to wrap with `MultiWrap`. Only the outermost + layers will be wrapped. + freq_emb: add frequency embedding after the first frequency layer if > 0, + the actual value controls the weight of the embedding. + emb_scale: equivalent to scaling the embedding learning rate + emb_smooth: initialize the embedding with a smooth one (with respect to frequencies). + kernel_size: kernel_size for encoder and decoder layers. + stride: stride for encoder and decoder layers. + time_stride: stride for the final time layer, after the merge. + context: context for 1x1 conv in the decoder. + context_enc: context for 1x1 conv in the encoder. + norm_starts: layer at which group norm starts being used. + decoder layers are numbered in reverse order. + norm_groups: number of groups for group norm. + dconv_mode: if 1: dconv in encoder only, 2: decoder only, 3: both. + dconv_depth: depth of residual DConv branch. + dconv_comp: compression of DConv branch. + dconv_attn: adds attention layers in DConv branch starting at this layer. + dconv_lstm: adds a LSTM layer in DConv branch starting at this layer. + dconv_init: initial scale for the DConv branch LayerScale. + bottom_channels: if >0 it adds a linear layer (1x1 Conv) before and after the + transformer in order to change the number of channels + t_layers: number of layers in each branch (waveform and spec) of the transformer + t_emb: "sin", "cape" or "scaled" + t_hidden_scale: the hidden scale of the Feedforward parts of the transformer + for instance if C = 384 (the number of channels in the transformer) and + t_hidden_scale = 4.0 then the intermediate layer of the FFN has dimension + 384 * 4 = 1536 + t_heads: number of heads for the transformer + t_dropout: dropout in the transformer + t_max_positions: max_positions for the "scaled" positional embedding, only + useful if t_emb="scaled" + t_norm_in: (bool) norm before addinf positional embedding and getting into the + transformer layers + t_norm_in_group: (bool) if True while t_norm_in=True, the norm is on all the + timesteps (GroupNorm with group=1) + t_group_norm: (bool) if True, the norms of the Encoder Layers are on all the + timesteps (GroupNorm with group=1) + t_norm_first: (bool) if True the norm is before the attention and before the FFN + t_norm_out: (bool) if True, there is a GroupNorm (group=1) at the end of each layer + t_max_period: (float) denominator in the sinusoidal embedding expression + t_weight_decay: (float) weight decay for the transformer + t_lr: (float) specific learning rate for the transformer + t_layer_scale: (bool) Layer Scale for the transformer + t_gelu: (bool) activations of the transformer are GeLU if True, ReLU else + t_weight_pos_embed: (float) weighting of the positional embedding + t_cape_mean_normalize: (bool) if t_emb="cape", normalisation of positional embeddings + see: https://arxiv.org/abs/2106.03143 + t_cape_augment: (bool) if t_emb="cape", must be True during training and False + during the inference, see: https://arxiv.org/abs/2106.03143 + t_cape_glob_loc_scale: (list of 3 floats) if t_emb="cape", CAPE parameters + see: https://arxiv.org/abs/2106.03143 + t_sparse_self_attn: (bool) if True, the self attentions are sparse + t_sparse_cross_attn: (bool) if True, the cross-attentions are sparse (don't use it + unless you designed really specific masks) + t_mask_type: (str) can be "diag", "jmask", "random", "global" or any combination + with '_' between: i.e. "diag_jmask_random" (note that this is permutation + invariant i.e. "diag_jmask_random" is equivalent to "jmask_random_diag") + t_mask_random_seed: (int) if "random" is in t_mask_type, controls the seed + that generated the random part of the mask + t_sparse_attn_window: (int) if "diag" is in t_mask_type, for a query (i), and + a key (j), the mask is True id |i-j|<=t_sparse_attn_window + t_global_window: (int) if "global" is in t_mask_type, mask[:t_global_window, :] + and mask[:, :t_global_window] will be True + t_sparsity: (float) if "random" is in t_mask_type, t_sparsity is the sparsity + level of the random part of the mask. + t_cross_first: (bool) if True cross attention is the first layer of the + transformer (False seems to be better) + rescale: weight rescaling trick + use_train_segment: (bool) if True, the actual size that is used during the + training is used during inference. + """ + super().__init__() + self.cac = cac + self.wiener_residual = wiener_residual + self.audio_channels = audio_channels + self.sources = sources + self.kernel_size = kernel_size + self.context = context + self.stride = stride + self.depth = depth + self.bottom_channels = bottom_channels + self.channels = channels + self.samplerate = samplerate + self.segment = segment + self.use_train_segment = use_train_segment + self.nfft = nfft + self.hop_length = nfft // 4 + self.wiener_iters = wiener_iters + self.end_iters = end_iters + self.freq_emb = None + assert wiener_iters == end_iters + + self.encoder = nn.ModuleList() + self.decoder = nn.ModuleList() + + self.tencoder = nn.ModuleList() + self.tdecoder = nn.ModuleList() + + chin = audio_channels + chin_z = chin # number of channels for the freq branch + if self.cac: + chin_z *= 2 + chout = channels_time or channels + chout_z = channels + freqs = nfft // 2 + + for index in range(depth): + norm = index >= norm_starts + freq = freqs > 1 + stri = stride + ker = kernel_size + if not freq: + assert freqs == 1 + ker = time_stride * 2 + stri = time_stride + + pad = True + last_freq = False + if freq and freqs <= kernel_size: + ker = freqs + pad = False + last_freq = True + + kw = { + "kernel_size": ker, + "stride": stri, + "freq": freq, + "pad": pad, + "norm": norm, + "rewrite": rewrite, + "norm_groups": norm_groups, + "dconv_kw": { + "depth": dconv_depth, + "compress": dconv_comp, + "init": dconv_init, + "gelu": True, + }, + } + kwt = dict(kw) + kwt["freq"] = 0 + kwt["kernel_size"] = kernel_size + kwt["stride"] = stride + kwt["pad"] = True + kw_dec = dict(kw) + multi = False + if multi_freqs and index < multi_freqs_depth: + multi = True + kw_dec["context_freq"] = False + + if last_freq: + chout_z = max(chout, chout_z) + chout = chout_z + + enc = HEncLayer( + chin_z, chout_z, dconv=dconv_mode & 1, context=context_enc, **kw + ) + if freq: + tenc = HEncLayer( + chin, + chout, + dconv=dconv_mode & 1, + context=context_enc, + empty=last_freq, + **kwt + ) + self.tencoder.append(tenc) + + if multi: + enc = MultiWrap(enc, multi_freqs) + self.encoder.append(enc) + if index == 0: + chin = self.audio_channels * len(self.sources) + chin_z = chin + if self.cac: + chin_z *= 2 + dec = HDecLayer( + chout_z, + chin_z, + dconv=dconv_mode & 2, + last=index == 0, + context=context, + **kw_dec + ) + if multi: + dec = MultiWrap(dec, multi_freqs) + if freq: + tdec = HDecLayer( + chout, + chin, + dconv=dconv_mode & 2, + empty=last_freq, + last=index == 0, + context=context, + **kwt + ) + self.tdecoder.insert(0, tdec) + self.decoder.insert(0, dec) + + chin = chout + chin_z = chout_z + chout = int(growth * chout) + chout_z = int(growth * chout_z) + if freq: + if freqs <= kernel_size: + freqs = 1 + else: + freqs //= stride + if index == 0 and freq_emb: + self.freq_emb = ScaledEmbedding( + freqs, chin_z, smooth=emb_smooth, scale=emb_scale + ) + self.freq_emb_scale = freq_emb + + if rescale: + rescale_module(self, reference=rescale) + + transformer_channels = channels * growth ** (depth - 1) + if bottom_channels: + self.channel_upsampler = nn.Conv1d(transformer_channels, bottom_channels, 1) + self.channel_downsampler = nn.Conv1d( + bottom_channels, transformer_channels, 1 + ) + self.channel_upsampler_t = nn.Conv1d( + transformer_channels, bottom_channels, 1 + ) + self.channel_downsampler_t = nn.Conv1d( + bottom_channels, transformer_channels, 1 + ) + + transformer_channels = bottom_channels + + if t_layers > 0: + self.crosstransformer = CrossTransformerEncoder( + dim=transformer_channels, + emb=t_emb, + hidden_scale=t_hidden_scale, + num_heads=t_heads, + num_layers=t_layers, + cross_first=t_cross_first, + dropout=t_dropout, + max_positions=t_max_positions, + norm_in=t_norm_in, + norm_in_group=t_norm_in_group, + group_norm=t_group_norm, + norm_first=t_norm_first, + norm_out=t_norm_out, + max_period=t_max_period, + weight_decay=t_weight_decay, + lr=t_lr, + layer_scale=t_layer_scale, + gelu=t_gelu, + sin_random_shift=t_sin_random_shift, + weight_pos_embed=t_weight_pos_embed, + cape_mean_normalize=t_cape_mean_normalize, + cape_augment=t_cape_augment, + cape_glob_loc_scale=t_cape_glob_loc_scale, + sparse_self_attn=t_sparse_self_attn, + sparse_cross_attn=t_sparse_cross_attn, + mask_type=t_mask_type, + mask_random_seed=t_mask_random_seed, + sparse_attn_window=t_sparse_attn_window, + global_window=t_global_window, + sparsity=t_sparsity, + auto_sparsity=t_auto_sparsity, + ) + else: + self.crosstransformer = None + + def _spec(self, x): + hl = self.hop_length + nfft = self.nfft + x0 = x # noqa + + # We re-pad the signal in order to keep the property + # that the size of the output is exactly the size of the input + # divided by the stride (here hop_length), when divisible. + # This is achieved by padding by 1/4th of the kernel size (here nfft). + # which is not supported by torch.stft. + # Having all convolution operations follow this convention allow to easily + # align the time and frequency branches later on. + assert hl == nfft // 4 + le = int(math.ceil(x.shape[-1] / hl)) + pad = hl // 2 * 3 + x = pad1d(x, (pad, pad + le * hl - x.shape[-1]), mode="reflect") + + z = spectro(x, nfft, hl)[..., :-1, :] + assert z.shape[-1] == le + 4, (z.shape, x.shape, le) + z = z[..., 2: 2 + le] + return z + + def _ispec(self, z, length=None, scale=0): + hl = self.hop_length // (4**scale) + z = F.pad(z, (0, 0, 0, 1)) + z = F.pad(z, (2, 2)) + pad = hl // 2 * 3 + le = hl * int(math.ceil(length / hl)) + 2 * pad + x = ispectro(z, hl, length=le) + x = x[..., pad: pad + length] + return x + + def _magnitude(self, z): + # return the magnitude of the spectrogram, except when cac is True, + # in which case we just move the complex dimension to the channel one. + if self.cac: + B, C, Fr, T = z.shape + m = torch.view_as_real(z).permute(0, 1, 4, 2, 3) + m = m.reshape(B, C * 2, Fr, T) + else: + m = z.abs() + return m + + def _mask(self, z, m): + # Apply masking given the mixture spectrogram `z` and the estimated mask `m`. + # If `cac` is True, `m` is actually a full spectrogram and `z` is ignored. + niters = self.wiener_iters + if self.cac: + B, S, C, Fr, T = m.shape + out = m.view(B, S, -1, 2, Fr, T).permute(0, 1, 2, 4, 5, 3) + out = torch.view_as_complex(out.contiguous()) + return out + if self.training: + niters = self.end_iters + if niters < 0: + z = z[:, None] + return z / (1e-8 + z.abs()) * m + else: + return self._wiener(m, z, niters) + + def _wiener(self, mag_out, mix_stft, niters): + # apply wiener filtering from OpenUnmix. + init = mix_stft.dtype + wiener_win_len = 300 + residual = self.wiener_residual + + B, S, C, Fq, T = mag_out.shape + mag_out = mag_out.permute(0, 4, 3, 2, 1) + mix_stft = torch.view_as_real(mix_stft.permute(0, 3, 2, 1)) + + outs = [] + for sample in range(B): + pos = 0 + out = [] + for pos in range(0, T, wiener_win_len): + frame = slice(pos, pos + wiener_win_len) + z_out = wiener( + mag_out[sample, frame], + mix_stft[sample, frame], + niters, + residual=residual, + ) + out.append(z_out.transpose(-1, -2)) + outs.append(torch.cat(out, dim=0)) + out = torch.view_as_complex(torch.stack(outs, 0)) + out = out.permute(0, 4, 3, 2, 1).contiguous() + if residual: + out = out[:, :-1] + assert list(out.shape) == [B, S, C, Fq, T] + return out.to(init) + + def valid_length(self, length: int): + """ + Return a length that is appropriate for evaluation. + In our case, always return the training length, unless + it is smaller than the given length, in which case this + raises an error. + """ + if not self.use_train_segment: + return length + training_length = int(self.segment * self.samplerate) + if training_length < length: + raise ValueError( + f"Given length {length} is longer than " + f"training length {training_length}") + return training_length + + def forward(self, mix): + length = mix.shape[-1] + length_pre_pad = None + if self.use_train_segment: + if self.training: + self.segment = Fraction(mix.shape[-1], self.samplerate) + else: + training_length = int(self.segment * self.samplerate) + if mix.shape[-1] < training_length: + length_pre_pad = mix.shape[-1] + mix = F.pad(mix, (0, training_length - length_pre_pad)) + z = self._spec(mix) + mag = self._magnitude(z) + x = mag + + B, C, Fq, T = x.shape + + # unlike previous Demucs, we always normalize because it is easier. + mean = x.mean(dim=(1, 2, 3), keepdim=True) + std = x.std(dim=(1, 2, 3), keepdim=True) + x = (x - mean) / (1e-5 + std) + # x will be the freq. branch input. + + # Prepare the time branch input. + xt = mix + meant = xt.mean(dim=(1, 2), keepdim=True) + stdt = xt.std(dim=(1, 2), keepdim=True) + xt = (xt - meant) / (1e-5 + stdt) + + # okay, this is a giant mess I know... + saved = [] # skip connections, freq. + saved_t = [] # skip connections, time. + lengths = [] # saved lengths to properly remove padding, freq branch. + lengths_t = [] # saved lengths for time branch. + for idx, encode in enumerate(self.encoder): + lengths.append(x.shape[-1]) + inject = None + if idx < len(self.tencoder): + # we have not yet merged branches. + lengths_t.append(xt.shape[-1]) + tenc = self.tencoder[idx] + xt = tenc(xt) + if not tenc.empty: + # save for skip connection + saved_t.append(xt) + else: + # tenc contains just the first conv., so that now time and freq. + # branches have the same shape and can be merged. + inject = xt + x = encode(x, inject) + if idx == 0 and self.freq_emb is not None: + # add frequency embedding to allow for non equivariant convolutions + # over the frequency axis. + frs = torch.arange(x.shape[-2], device=x.device) + emb = self.freq_emb(frs).t()[None, :, :, None].expand_as(x) + x = x + self.freq_emb_scale * emb + + saved.append(x) + if self.crosstransformer: + if self.bottom_channels: + b, c, f, t = x.shape + x = rearrange(x, "b c f t-> b c (f t)") + x = self.channel_upsampler(x) + x = rearrange(x, "b c (f t)-> b c f t", f=f) + xt = self.channel_upsampler_t(xt) + + x, xt = self.crosstransformer(x, xt) + + if self.bottom_channels: + x = rearrange(x, "b c f t-> b c (f t)") + x = self.channel_downsampler(x) + x = rearrange(x, "b c (f t)-> b c f t", f=f) + xt = self.channel_downsampler_t(xt) + + for idx, decode in enumerate(self.decoder): + skip = saved.pop(-1) + x, pre = decode(x, skip, lengths.pop(-1)) + # `pre` contains the output just before final transposed convolution, + # which is used when the freq. and time branch separate. + + offset = self.depth - len(self.tdecoder) + if idx >= offset: + tdec = self.tdecoder[idx - offset] + length_t = lengths_t.pop(-1) + if tdec.empty: + assert pre.shape[2] == 1, pre.shape + pre = pre[:, :, 0] + xt, _ = tdec(pre, None, length_t) + else: + skip = saved_t.pop(-1) + xt, _ = tdec(xt, skip, length_t) + + # Let's make sure we used all stored skip connections. + assert len(saved) == 0 + assert len(lengths_t) == 0 + assert len(saved_t) == 0 + + S = len(self.sources) + x = x.view(B, S, -1, Fq, T) + x = x * std[:, None] + mean[:, None] + + zout = self._mask(z, x) + if self.use_train_segment: + if self.training: + x = self._ispec(zout, length) + else: + x = self._ispec(zout, training_length) + else: + x = self._ispec(zout, length) + + if self.use_train_segment: + if self.training: + xt = xt.view(B, S, -1, length) + else: + xt = xt.view(B, S, -1, training_length) + else: + xt = xt.view(B, S, -1, length) + xt = xt * stdt[:, None] + meant[:, None] + x = xt + x + if length_pre_pad: + x = x[..., :length_pre_pad] + return x diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/pretrained.py b/AutoCoverTool/ref/music_remover/demucs/demucs/pretrained.py new file mode 100644 index 0000000..bb0ab00 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/pretrained.py @@ -0,0 +1,82 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Loading pretrained models. +""" + +import logging +from pathlib import Path +import typing as tp + +from dora.log import fatal + +from .hdemucs import HDemucs +from .repo import RemoteRepo, LocalRepo, ModelOnlyRepo, BagOnlyRepo, AnyModelRepo, ModelLoadingError # noqa + +logger = logging.getLogger(__name__) +ROOT_URL = "https://dl.fbaipublicfiles.com/demucs/" +REMOTE_ROOT = Path(__file__).parent / 'remote' + +SOURCES = ["drums", "bass", "other", "vocals"] + + +def demucs_unittest(): + model = HDemucs(channels=4, sources=SOURCES) + return model + + +def add_model_flags(parser): + group = parser.add_mutually_exclusive_group(required=False) + group.add_argument("-s", "--sig", help="Locally trained XP signature.") + group.add_argument("-n", "--name", default="mdx_extra_q", + help="Pretrained model name or signature. Default is mdx_extra_q.") + parser.add_argument("--repo", type=Path, + help="Folder containing all pre-trained models for use with -n.") + + +def _parse_remote_files(remote_file_list) -> tp.Dict[str, str]: + root: str = '' + models: tp.Dict[str, str] = {} + for line in remote_file_list.read_text().split('\n'): + line = line.strip() + if line.startswith('#'): + continue + elif line.startswith('root:'): + root = line.split(':', 1)[1].strip() + else: + sig = line.split('-', 1)[0] + assert sig not in models + models[sig] = ROOT_URL + root + line + return models + + +def get_model(name: str, + repo: tp.Optional[Path] = None): + """`name` must be a bag of models name or a pretrained signature + from the remote AWS model repo or the specified local repo if `repo` is not None. + """ + if name == 'demucs_unittest': + return demucs_unittest() + model_repo: ModelOnlyRepo + if repo is None: + models = _parse_remote_files(REMOTE_ROOT / 'files.txt') + model_repo = RemoteRepo(models) + bag_repo = BagOnlyRepo(REMOTE_ROOT, model_repo) + else: + if not repo.is_dir(): + fatal(f"{repo} must exist and be a directory.") + model_repo = LocalRepo(repo) + bag_repo = BagOnlyRepo(repo, model_repo) + any_repo = AnyModelRepo(model_repo, bag_repo) + model = any_repo.get_model(name) + model.eval() + return model + + +def get_model_from_args(args): + """ + Load local model package or pre-trained model. + """ + return get_model(name=args.name, repo=args.repo) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/py.typed b/AutoCoverTool/ref/music_remover/demucs/demucs/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/files.txt b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/files.txt new file mode 100644 index 0000000..7cb7c1a --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/files.txt @@ -0,0 +1,30 @@ +# MDX Models +root: mdx_final/ +0d19c1c6-0f06f20e.th +5d2d6c55-db83574e.th +7d865c68-3d5dd56b.th +7ecf8ec1-70f50cc9.th +a1d90b5c-ae9d2452.th +c511e2ab-fe698775.th +cfa93e08-61801ae1.th +e51eebcc-c1b80bdd.th +6b9c2ca1-3fd82607.th +b72baf4e-8778635e.th +42e558d4-196e0e1b.th +305bc58f-18378783.th +14fc6a69-a89dd0ee.th +464b36d7-e5a9386e.th +7fd6ef75-a905dd85.th +83fc094f-4a16d450.th +1ef250f1-592467ce.th +902315c2-b39ce9c9.th +9a6b4851-03af0aa6.th +fa0cb7f9-100d8bf4.th +# Hybrid Transformer models +root: hybrid_transformer/ +955717e8-8726e21a.th +f7e0c4bc-ba3fe64a.th +d12395a8-e57c48e6.th +92cfc3b6-ef3bcb9c.th +04573f0d-f3cf25b2.th +75fc33f5-1941ce65.th \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/hdemucs_mmi.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/hdemucs_mmi.yaml new file mode 100644 index 0000000..0ea0891 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/hdemucs_mmi.yaml @@ -0,0 +1,2 @@ +models: ['75fc33f5'] +segment: 44 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/htdemucs.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/htdemucs.yaml new file mode 100644 index 0000000..0d5f208 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/htdemucs.yaml @@ -0,0 +1 @@ +models: ['955717e8'] diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/htdemucs_ft.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/htdemucs_ft.yaml new file mode 100644 index 0000000..ba5c69c --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/htdemucs_ft.yaml @@ -0,0 +1,7 @@ +models: ['f7e0c4bc', 'd12395a8', '92cfc3b6', '04573f0d'] +weights: [ + [1., 0., 0., 0.], + [0., 1., 0., 0.], + [0., 0., 1., 0.], + [0., 0., 0., 1.], +] \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx.yaml new file mode 100644 index 0000000..4e81a50 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx.yaml @@ -0,0 +1,8 @@ +models: ['0d19c1c6', '7ecf8ec1', 'c511e2ab', '7d865c68'] +weights: [ + [1., 1., 0., 0.], + [0., 1., 0., 0.], + [1., 0., 1., 1.], + [1., 0., 1., 1.], +] +segment: 44 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_extra.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_extra.yaml new file mode 100644 index 0000000..847bf66 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_extra.yaml @@ -0,0 +1,2 @@ +models: ['e51eebcc', 'a1d90b5c', '5d2d6c55', 'cfa93e08'] +segment: 44 \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_extra_q.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_extra_q.yaml new file mode 100644 index 0000000..87702bc --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_extra_q.yaml @@ -0,0 +1,2 @@ +models: ['83fc094f', '464b36d7', '14fc6a69', '7fd6ef75'] +segment: 44 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_q.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_q.yaml new file mode 100644 index 0000000..827d2c6 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/mdx_q.yaml @@ -0,0 +1,8 @@ +models: ['6b9c2ca1', 'b72baf4e', '42e558d4', '305bc58f'] +weights: [ + [1., 1., 0., 0.], + [0., 1., 0., 0.], + [1., 0., 1., 1.], + [1., 0., 1., 1.], +] +segment: 44 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a.yaml new file mode 100644 index 0000000..691abc2 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a.yaml @@ -0,0 +1,2 @@ +models: ['9a6b4851', '1ef250f1', 'fa0cb7f9', '902315c2'] +segment: 44 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a_hybrid_only.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a_hybrid_only.yaml new file mode 100644 index 0000000..78eb8e0 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a_hybrid_only.yaml @@ -0,0 +1,2 @@ +models: ['fa0cb7f9', '902315c2', 'fa0cb7f9', '902315c2'] +segment: 44 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a_time_only.yaml b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a_time_only.yaml new file mode 100644 index 0000000..d5d16ea --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/remote/repro_mdx_a_time_only.yaml @@ -0,0 +1,2 @@ +models: ['9a6b4851', '9a6b4851', '1ef250f1', '1ef250f1'] +segment: 44 diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/repitch.py b/AutoCoverTool/ref/music_remover/demucs/demucs/repitch.py new file mode 100644 index 0000000..d23d2bc --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/repitch.py @@ -0,0 +1,86 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Utility for on the fly pitch/tempo change for data augmentation.""" + +import random +import subprocess as sp +import tempfile + +import torch +import torchaudio as ta + +from .audio import save_audio + + +class RepitchedWrapper: + """ + Wrap a dataset to apply online change of pitch / tempo. + """ + def __init__(self, dataset, proba=0.2, max_pitch=2, max_tempo=12, + tempo_std=5, vocals=[3], same=True): + self.dataset = dataset + self.proba = proba + self.max_pitch = max_pitch + self.max_tempo = max_tempo + self.tempo_std = tempo_std + self.same = same + self.vocals = vocals + + def __len__(self): + return len(self.dataset) + + def __getitem__(self, index): + streams = self.dataset[index] + in_length = streams.shape[-1] + out_length = int((1 - 0.01 * self.max_tempo) * in_length) + + if random.random() < self.proba: + outs = [] + for idx, stream in enumerate(streams): + if idx == 0 or not self.same: + delta_pitch = random.randint(-self.max_pitch, self.max_pitch) + delta_tempo = random.gauss(0, self.tempo_std) + delta_tempo = min(max(-self.max_tempo, delta_tempo), self.max_tempo) + stream = repitch( + stream, + delta_pitch, + delta_tempo, + voice=idx in self.vocals) + outs.append(stream[:, :out_length]) + streams = torch.stack(outs) + else: + streams = streams[..., :out_length] + return streams + + +def repitch(wav, pitch, tempo, voice=False, quick=False, samplerate=44100): + """ + tempo is a relative delta in percentage, so tempo=10 means tempo at 110%! + pitch is in semi tones. + Requires `soundstretch` to be installed, see + https://www.surina.net/soundtouch/soundstretch.html + """ + infile = tempfile.NamedTemporaryFile(suffix=".wav") + outfile = tempfile.NamedTemporaryFile(suffix=".wav") + save_audio(wav, infile.name, samplerate, clip='clamp') + command = [ + "soundstretch", + infile.name, + outfile.name, + f"-pitch={pitch}", + f"-tempo={tempo:.6f}", + ] + if quick: + command += ["-quick"] + if voice: + command += ["-speech"] + try: + sp.run(command, capture_output=True, check=True) + except sp.CalledProcessError as error: + raise RuntimeError(f"Could not change bpm because {error.stderr.decode('utf-8')}") + wav, sr = ta.load(outfile.name) + assert sr == samplerate + return wav diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/repo.py b/AutoCoverTool/ref/music_remover/demucs/demucs/repo.py new file mode 100644 index 0000000..a7645be --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/repo.py @@ -0,0 +1,148 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Represents a model repository, including pre-trained models and bags of models. +A repo can either be the main remote repository stored in AWS, or a local repository +with your own models. +""" + +from hashlib import sha256 +from pathlib import Path +import typing as tp + +import torch +import yaml + +from .apply import BagOfModels, Model +from .states import load_model + + +AnyModel = tp.Union[Model, BagOfModels] + + +class ModelLoadingError(RuntimeError): + pass + + +def check_checksum(path: Path, checksum: str): + sha = sha256() + with open(path, 'rb') as file: + while True: + buf = file.read(2**20) + if not buf: + break + sha.update(buf) + actual_checksum = sha.hexdigest()[:len(checksum)] + if actual_checksum != checksum: + raise ModelLoadingError(f'Invalid checksum for file {path}, ' + f'expected {checksum} but got {actual_checksum}') + + +class ModelOnlyRepo: + """Base class for all model only repos. + """ + def has_model(self, sig: str) -> bool: + raise NotImplementedError() + + def get_model(self, sig: str) -> Model: + raise NotImplementedError() + + +class RemoteRepo(ModelOnlyRepo): + def __init__(self, models: tp.Dict[str, str]): + self._models = models + + def has_model(self, sig: str) -> bool: + return sig in self._models + + def get_model(self, sig: str) -> Model: + try: + url = self._models[sig] + except KeyError: + raise ModelLoadingError(f'Could not find a pre-trained model with signature {sig}.') + pkg = torch.hub.load_state_dict_from_url( + url, map_location='cpu', check_hash=True) # type: ignore + return load_model(pkg) + + +class LocalRepo(ModelOnlyRepo): + def __init__(self, root: Path): + self.root = root + self.scan() + + def scan(self): + self._models = {} + self._checksums = {} + for file in self.root.iterdir(): + if file.suffix == '.th': + if '-' in file.stem: + xp_sig, checksum = file.stem.split('-') + self._checksums[xp_sig] = checksum + else: + xp_sig = file.stem + if xp_sig in self._models: + raise ModelLoadingError( + f'Duplicate pre-trained model exist for signature {xp_sig}. ' + 'Please delete all but one.') + self._models[xp_sig] = file + + def has_model(self, sig: str) -> bool: + return sig in self._models + + def get_model(self, sig: str) -> Model: + try: + file = self._models[sig] + except KeyError: + raise ModelLoadingError(f'Could not find pre-trained model with signature {sig}.') + if sig in self._checksums: + check_checksum(file, self._checksums[sig]) + return load_model(file) + + +class BagOnlyRepo: + """Handles only YAML files containing bag of models, leaving the actual + model loading to some Repo. + """ + def __init__(self, root: Path, model_repo: ModelOnlyRepo): + self.root = root + self.model_repo = model_repo + self.scan() + + def scan(self): + self._bags = {} + for file in self.root.iterdir(): + if file.suffix == '.yaml': + self._bags[file.stem] = file + + def has_model(self, name: str) -> bool: + return name in self._bags + + def get_model(self, name: str) -> BagOfModels: + try: + yaml_file = self._bags[name] + except KeyError: + raise ModelLoadingError(f'{name} is neither a single pre-trained model or ' + 'a bag of models.') + bag = yaml.safe_load(open(yaml_file)) + signatures = bag['models'] + models = [self.model_repo.get_model(sig) for sig in signatures] + weights = bag.get('weights') + segment = bag.get('segment') + return BagOfModels(models, weights, segment) + + +class AnyModelRepo: + def __init__(self, model_repo: ModelOnlyRepo, bag_repo: BagOnlyRepo): + self.model_repo = model_repo + self.bag_repo = bag_repo + + def has_model(self, name_or_sig: str) -> bool: + return self.model_repo.has_model(name_or_sig) or self.bag_repo.has_model(name_or_sig) + + def get_model(self, name_or_sig: str) -> AnyModel: + if self.model_repo.has_model(name_or_sig): + return self.model_repo.get_model(name_or_sig) + else: + return self.bag_repo.get_model(name_or_sig) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/separate.py b/AutoCoverTool/ref/music_remover/demucs/demucs/separate.py new file mode 100644 index 0000000..1560a44 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/separate.py @@ -0,0 +1,217 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +import time +import argparse +import sys +from pathlib import Path +import subprocess + +from dora.log import fatal +import torch as th +import torchaudio as ta + +from .apply import apply_model, BagOfModels +from .audio import AudioFile, convert_audio, save_audio +from .pretrained import get_model_from_args, add_model_flags, ModelLoadingError + + +def load_track(track, audio_channels, samplerate): + errors = {} + wav = None + + try: + wav = AudioFile(track).read( + streams=0, + samplerate=samplerate, + channels=audio_channels) + except FileNotFoundError: + errors['ffmpeg'] = 'FFmpeg is not installed.' + except subprocess.CalledProcessError: + errors['ffmpeg'] = 'FFmpeg could not read the file.' + + if wav is None: + try: + wav, sr = ta.load(str(track)) + except RuntimeError as err: + errors['torchaudio'] = err.args[0] + else: + wav = convert_audio(wav, sr, samplerate, audio_channels) + + if wav is None: + print(f"Could not load file {track}. " + "Maybe it is not a supported file format? ") + for backend, error in errors.items(): + print(f"When trying to load using {backend}, got the following error: {error}") + sys.exit(1) + return wav + + +def main(): + parser = argparse.ArgumentParser("demucs.separate", + description="Separate the sources for the given tracks") + parser.add_argument("tracks", nargs='+', type=Path, default=[], help='Path to tracks') + add_model_flags(parser) + parser.add_argument("-v", "--verbose", action="store_true") + parser.add_argument("-o", + "--out", + type=Path, + default=Path("separated"), + help="Folder where to put extracted tracks. A subfolder " + "with the model name will be created.") + parser.add_argument("--filename", + default="{track}/{stem}.{ext}", + help="Set the name of output file. \n" + 'Use "{track}", "{trackext}", "{stem}", "{ext}" to use ' + "variables of track name without extension, track extension, " + "stem name and default output file extension. \n" + 'Default is "{track}/{stem}.{ext}".') + parser.add_argument("-d", + "--device", + default="cuda" if th.cuda.is_available() else "cpu", + help="Device to use, default is cuda if available else cpu") + parser.add_argument("--shifts", + default=1, + type=int, + help="Number of random shifts for equivariant stabilization." + "Increase separation time but improves quality for Demucs. 10 was used " + "in the original paper.") + parser.add_argument("--overlap", + default=0.25, + type=float, + help="Overlap between the splits.") + split_group = parser.add_mutually_exclusive_group() + split_group.add_argument("--no-split", + action="store_false", + dest="split", + default=True, + help="Doesn't split audio in chunks. " + "This can use large amounts of memory.") + split_group.add_argument("--segment", type=int, + help="Set split size of each chunk. " + "This can help save memory of graphic card. ") + parser.add_argument("--two-stems", + dest="stem", metavar="STEM", + help="Only separate audio into {STEM} and no_{STEM}. ") + group = parser.add_mutually_exclusive_group() + group.add_argument("--int24", action="store_true", + help="Save wav output as 24 bits wav.") + group.add_argument("--float32", action="store_true", + help="Save wav output as float32 (2x bigger).") + parser.add_argument("--clip-mode", default="rescale", choices=["rescale", "clamp"], + help="Strategy for avoiding clipping: rescaling entire signal " + "if necessary (rescale) or hard clipping (clamp).") + parser.add_argument("--mp3", action="store_true", + help="Convert the output wavs to mp3.") + parser.add_argument("--mp3-bitrate", + default=320, + type=int, + help="Bitrate of converted mp3.") + parser.add_argument("-j", "--jobs", + default=0, + type=int, + help="Number of jobs. This can increase memory usage but will " + "be much faster when multiple cores are available.") + + args = parser.parse_args() + print(args) + + st = time.time() + try: + model = get_model_from_args(args) + except ModelLoadingError as error: + fatal(error.args[0]) + + print("load data spent time={}".format(time.time() - st)) + st = time.time() + + if args.segment is not None and args.segment < 8: + fatal("Segment must greater than 8. ") + + if '..' in args.filename.replace("\\", "/").split("/"): + fatal('".." must not appear in filename. ') + + if isinstance(model, BagOfModels): + print(f"Selected model is a bag of {len(model.models)} models. " + "You will see that many progress bars per track.") + if args.segment is not None: + for sub in model.models: + sub.segment = args.segment + else: + if args.segment is not None: + model.segment = args.segment + + model.cpu() + model.eval() + + if args.stem is not None and args.stem not in model.sources: + fatal( + 'error: stem "{stem}" is not in selected model. STEM must be one of {sources}.'.format( + stem=args.stem, sources=', '.join(model.sources))) + out = args.out / args.name + out.mkdir(parents=True, exist_ok=True) + print(f"Separated tracks will be stored in {out.resolve()}") + for track in args.tracks: + if not track.exists(): + print( + f"File {track} does not exist. If the path contains spaces, " + "please try again after surrounding the entire path with quotes \"\".", + file=sys.stderr) + continue + print(f"Separating track {track}") + st = time.time() + wav = load_track(track, model.audio_channels, model.samplerate) + print("load track spent = {}".format(time.time() - st)) + + st = time.time() + ref = wav.mean(0) + wav = (wav - ref.mean()) / ref.std() + # wav[None] -> 增加一个维度,原来是[2, xxx] -> [1, 2, xxx] + sources = apply_model(model, wav[None], device=args.device, shifts=args.shifts, + split=args.split, overlap=args.overlap, progress=True, + num_workers=args.jobs)[0] + sources = sources * ref.std() + ref.mean() + print("apply_model spent = {}".format(time.time() - st)) + + if args.mp3: + ext = "mp3" + else: + ext = "wav" + kwargs = { + 'samplerate': model.samplerate, + 'bitrate': args.mp3_bitrate, + 'clip': args.clip_mode, + 'as_float': args.float32, + 'bits_per_sample': 24 if args.int24 else 16, + } + if args.stem is None: + for source, name in zip(sources, model.sources): + st = time.time() + stem = out / args.filename.format(track=track.name.rsplit(".", 1)[0], + trackext=track.name.rsplit(".", 1)[-1], + stem=name, ext=ext) + stem.parent.mkdir(parents=True, exist_ok=True) + save_audio(source, str(stem), **kwargs) + print("save_audio spent = {}".format(time.time() - st)) + else: + sources = list(sources) + stem = out / args.filename.format(track=track.name.rsplit(".", 1)[0], + trackext=track.name.rsplit(".", 1)[-1], + stem=args.stem, ext=ext) + stem.parent.mkdir(parents=True, exist_ok=True) + save_audio(sources.pop(model.sources.index(args.stem)), str(stem), **kwargs) + # Warning : after poping the stem, selected stem is no longer in the list 'sources' + other_stem = th.zeros_like(sources[0]) + for i in sources: + other_stem += i + stem = out / args.filename.format(track=track.name.rsplit(".", 1)[0], + trackext=track.name.rsplit(".", 1)[-1], + stem="no_"+args.stem, ext=ext) + stem.parent.mkdir(parents=True, exist_ok=True) + save_audio(other_stem, str(stem), **kwargs) + + +if __name__ == "__main__": + main() diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/solver.py b/AutoCoverTool/ref/music_remover/demucs/demucs/solver.py new file mode 100644 index 0000000..ced824a --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/solver.py @@ -0,0 +1,405 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Main training loop.""" + +import logging + +from dora import get_xp +from dora.utils import write_and_rename +from dora.log import LogProgress, bold +import torch +import torch.nn.functional as F + +from . import augment, distrib, states, pretrained +from .apply import apply_model +from .ema import ModelEMA +from .evaluate import evaluate, new_sdr +from .svd import svd_penalty +from .utils import pull_metric, EMA + +logger = logging.getLogger(__name__) + + +def _summary(metrics): + return " | ".join(f"{key.capitalize()}={val}" for key, val in metrics.items()) + + +class Solver(object): + def __init__(self, loaders, model, optimizer, args): + self.args = args + self.loaders = loaders + + self.model = model + self.optimizer = optimizer + self.quantizer = states.get_quantizer(self.model, args.quant, self.optimizer) + self.dmodel = distrib.wrap(model) + self.device = next(iter(self.model.parameters())).device + + # Exponential moving average of the model, either updated every batch or epoch. + # The best model from all the EMAs and the original one is kept based on the valid + # loss for the final best model. + self.emas = {'batch': [], 'epoch': []} + for kind in self.emas.keys(): + decays = getattr(args.ema, kind) + device = self.device if kind == 'batch' else 'cpu' + if decays: + for decay in decays: + self.emas[kind].append(ModelEMA(self.model, decay, device=device)) + + # data augment + augments = [augment.Shift(shift=int(args.dset.samplerate * args.dset.shift), + same=args.augment.shift_same)] + if args.augment.flip: + augments += [augment.FlipChannels(), augment.FlipSign()] + for aug in ['scale', 'remix']: + kw = getattr(args.augment, aug) + if kw.proba: + augments.append(getattr(augment, aug.capitalize())(**kw)) + self.augment = torch.nn.Sequential(*augments) + + xp = get_xp() + self.folder = xp.folder + # Checkpoints + self.checkpoint_file = xp.folder / 'checkpoint.th' + self.best_file = xp.folder / 'best.th' + logger.debug("Checkpoint will be saved to %s", self.checkpoint_file.resolve()) + self.best_state = None + self.best_changed = False + + self.link = xp.link + self.history = self.link.history + + self._reset() + + def _serialize(self, epoch): + package = {} + package['state'] = self.model.state_dict() + package['optimizer'] = self.optimizer.state_dict() + package['history'] = self.history + package['best_state'] = self.best_state + package['args'] = self.args + for kind, emas in self.emas.items(): + for k, ema in enumerate(emas): + package[f'ema_{kind}_{k}'] = ema.state_dict() + with write_and_rename(self.checkpoint_file) as tmp: + torch.save(package, tmp) + + save_every = self.args.save_every + if save_every and (epoch + 1) % save_every == 0 and epoch + 1 != self.args.epochs: + with write_and_rename(self.folder / f'checkpoint_{epoch + 1}.th') as tmp: + torch.save(package, tmp) + + if self.best_changed: + # Saving only the latest best model. + with write_and_rename(self.best_file) as tmp: + package = states.serialize_model(self.model, self.args) + package['state'] = self.best_state + torch.save(package, tmp) + self.best_changed = False + + def _reset(self): + """Reset state of the solver, potentially using checkpoint.""" + if self.checkpoint_file.exists(): + logger.info(f'Loading checkpoint model: {self.checkpoint_file}') + package = torch.load(self.checkpoint_file, 'cpu') + self.model.load_state_dict(package['state']) + self.optimizer.load_state_dict(package['optimizer']) + self.history[:] = package['history'] + self.best_state = package['best_state'] + for kind, emas in self.emas.items(): + for k, ema in enumerate(emas): + ema.load_state_dict(package[f'ema_{kind}_{k}']) + elif self.args.continue_pretrained: + model = pretrained.get_model( + name=self.args.continue_pretrained, + repo=self.args.pretrained_repo) + self.model.load_state_dict(model.state_dict()) + elif self.args.continue_from: + name = 'checkpoint.th' + root = self.folder.parent + cf = root / str(self.args.continue_from) / name + logger.info("Loading from %s", cf) + package = torch.load(cf, 'cpu') + self.best_state = package['best_state'] + if self.args.continue_best: + self.model.load_state_dict(package['best_state'], strict=False) + else: + self.model.load_state_dict(package['state'], strict=False) + if self.args.continue_opt: + self.optimizer.load_state_dict(package['optimizer']) + + def _format_train(self, metrics: dict) -> dict: + """Formatting for train/valid metrics.""" + losses = { + 'loss': format(metrics['loss'], ".4f"), + 'reco': format(metrics['reco'], ".4f"), + } + if 'nsdr' in metrics: + losses['nsdr'] = format(metrics['nsdr'], ".3f") + if self.quantizer is not None: + losses['ms'] = format(metrics['ms'], ".2f") + if 'grad' in metrics: + losses['grad'] = format(metrics['grad'], ".4f") + if 'best' in metrics: + losses['best'] = format(metrics['best'], '.4f') + if 'bname' in metrics: + losses['bname'] = metrics['bname'] + if 'penalty' in metrics: + losses['penalty'] = format(metrics['penalty'], ".4f") + if 'hloss' in metrics: + losses['hloss'] = format(metrics['hloss'], ".4f") + return losses + + def _format_test(self, metrics: dict) -> dict: + """Formatting for test metrics.""" + losses = {} + if 'sdr' in metrics: + losses['sdr'] = format(metrics['sdr'], '.3f') + if 'nsdr' in metrics: + losses['nsdr'] = format(metrics['nsdr'], '.3f') + for source in self.model.sources: + key = f'sdr_{source}' + if key in metrics: + losses[key] = format(metrics[key], '.3f') + key = f'nsdr_{source}' + if key in metrics: + losses[key] = format(metrics[key], '.3f') + return losses + + def train(self): + # Optimizing the model + if self.history: + logger.info("Replaying metrics from previous run") + for epoch, metrics in enumerate(self.history): + formatted = self._format_train(metrics['train']) + logger.info( + bold(f'Train Summary | Epoch {epoch + 1} | {_summary(formatted)}')) + formatted = self._format_train(metrics['valid']) + logger.info( + bold(f'Valid Summary | Epoch {epoch + 1} | {_summary(formatted)}')) + if 'test' in metrics: + formatted = self._format_test(metrics['test']) + if formatted: + logger.info(bold(f"Test Summary | Epoch {epoch + 1} | {_summary(formatted)}")) + + epoch = 0 + for epoch in range(len(self.history), self.args.epochs): + # Train one epoch + self.model.train() # Turn on BatchNorm & Dropout + metrics = {} + logger.info('-' * 70) + logger.info("Training...") + metrics['train'] = self._run_one_epoch(epoch) + formatted = self._format_train(metrics['train']) + logger.info( + bold(f'Train Summary | Epoch {epoch + 1} | {_summary(formatted)}')) + + # Cross validation + logger.info('-' * 70) + logger.info('Cross validation...') + self.model.eval() # Turn off Batchnorm & Dropout + with torch.no_grad(): + valid = self._run_one_epoch(epoch, train=False) + bvalid = valid + bname = 'main' + state = states.copy_state(self.model.state_dict()) + metrics['valid'] = {} + metrics['valid']['main'] = valid + key = self.args.test.metric + for kind, emas in self.emas.items(): + for k, ema in enumerate(emas): + with ema.swap(): + valid = self._run_one_epoch(epoch, train=False) + name = f'ema_{kind}_{k}' + metrics['valid'][name] = valid + a = valid[key] + b = bvalid[key] + if key.startswith('nsdr'): + a = -a + b = -b + if a < b: + bvalid = valid + state = ema.state + bname = name + metrics['valid'].update(bvalid) + metrics['valid']['bname'] = bname + + valid_loss = metrics['valid'][key] + mets = pull_metric(self.link.history, f'valid.{key}') + [valid_loss] + if key.startswith('nsdr'): + best_loss = max(mets) + else: + best_loss = min(mets) + metrics['valid']['best'] = best_loss + if self.args.svd.penalty > 0: + kw = dict(self.args.svd) + kw.pop('penalty') + with torch.no_grad(): + penalty = svd_penalty(self.model, exact=True, **kw) + metrics['valid']['penalty'] = penalty + + formatted = self._format_train(metrics['valid']) + logger.info( + bold(f'Valid Summary | Epoch {epoch + 1} | {_summary(formatted)}')) + + # Save the best model + if valid_loss == best_loss or self.args.dset.train_valid: + logger.info(bold('New best valid loss %.4f'), valid_loss) + self.best_state = states.copy_state(state) + self.best_changed = True + + # Eval model every `test.every` epoch or on last epoch + should_eval = (epoch + 1) % self.args.test.every == 0 + is_last = epoch == self.args.epochs - 1 + # # Tries to detect divergence in a reliable way and finish job + # # not to waste compute. + # # Commented out as this was super specific to the MDX competition. + # reco = metrics['valid']['main']['reco'] + # div = epoch >= 180 and reco > 0.18 + # div = div or epoch >= 100 and reco > 0.25 + # div = div and self.args.optim.loss == 'l1' + # if div: + # logger.warning("Finishing training early because valid loss is too high.") + # is_last = True + if should_eval or is_last: + # Evaluate on the testset + logger.info('-' * 70) + logger.info('Evaluating on the test set...') + # We switch to the best known model for testing + if self.args.test.best: + state = self.best_state + else: + state = states.copy_state(self.model.state_dict()) + compute_sdr = self.args.test.sdr and is_last + with states.swap_state(self.model, state): + with torch.no_grad(): + metrics['test'] = evaluate(self, compute_sdr=compute_sdr) + formatted = self._format_test(metrics['test']) + logger.info(bold(f"Test Summary | Epoch {epoch + 1} | {_summary(formatted)}")) + self.link.push_metrics(metrics) + + if distrib.rank == 0: + # Save model each epoch + self._serialize(epoch) + logger.debug("Checkpoint saved to %s", self.checkpoint_file.resolve()) + if is_last: + break + + def _run_one_epoch(self, epoch, train=True): + args = self.args + data_loader = self.loaders['train'] if train else self.loaders['valid'] + if distrib.world_size > 1 and train: + data_loader.sampler.set_epoch(epoch) + + label = ["Valid", "Train"][train] + name = label + f" | Epoch {epoch + 1}" + total = len(data_loader) + if args.max_batches: + total = min(total, args.max_batches) + logprog = LogProgress(logger, data_loader, total=total, + updates=self.args.misc.num_prints, name=name) + averager = EMA() + + for idx, sources in enumerate(logprog): + sources = sources.to(self.device) + if train: + sources = self.augment(sources) + mix = sources.sum(dim=1) + else: + mix = sources[:, 0] + sources = sources[:, 1:] + + if not train and self.args.valid_apply: + estimate = apply_model(self.model, mix, split=self.args.test.split, overlap=0) + else: + estimate = self.dmodel(mix) + if train and hasattr(self.model, 'transform_target'): + sources = self.model.transform_target(mix, sources) + assert estimate.shape == sources.shape, (estimate.shape, sources.shape) + dims = tuple(range(2, sources.dim())) + + if args.optim.loss == 'l1': + loss = F.l1_loss(estimate, sources, reduction='none') + loss = loss.mean(dims).mean(0) + reco = loss + elif args.optim.loss == 'mse': + loss = F.mse_loss(estimate, sources, reduction='none') + loss = loss.mean(dims) + reco = loss**0.5 + reco = reco.mean(0) + else: + raise ValueError(f"Invalid loss {self.args.loss}") + weights = torch.tensor(args.weights).to(sources) + loss = (loss * weights).sum() / weights.sum() + + ms = 0 + if self.quantizer is not None: + ms = self.quantizer.model_size() + if args.quant.diffq: + loss += args.quant.diffq * ms + + losses = {} + losses['reco'] = (reco * weights).sum() / weights.sum() + losses['ms'] = ms + + if not train: + nsdrs = new_sdr(sources, estimate.detach()).mean(0) + total = 0 + for source, nsdr, w in zip(self.model.sources, nsdrs, weights): + losses[f'nsdr_{source}'] = nsdr + total += w * nsdr + losses['nsdr'] = total / weights.sum() + + if train and args.svd.penalty > 0: + kw = dict(args.svd) + kw.pop('penalty') + penalty = svd_penalty(self.model, **kw) + losses['penalty'] = penalty + loss += args.svd.penalty * penalty + + losses['loss'] = loss + + for k, source in enumerate(self.model.sources): + losses[f'reco_{source}'] = reco[k] + + # optimize model in training mode + if train: + loss.backward() + grad_norm = 0 + grads = [] + for p in self.model.parameters(): + if p.grad is not None: + grad_norm += p.grad.data.norm()**2 + grads.append(p.grad.data) + losses['grad'] = grad_norm ** 0.5 + if args.optim.clip_grad: + torch.nn.utils.clip_grad_norm_( + self.model.parameters(), + args.optim.clip_grad) + + if self.args.flag == 'uns': + for n, p in self.model.named_parameters(): + if p.grad is None: + print('no grad', n) + self.optimizer.step() + self.optimizer.zero_grad() + for ema in self.emas['batch']: + ema.update() + losses = averager(losses) + logs = self._format_train(losses) + logprog.update(**logs) + # Just in case, clear some memory + del loss, estimate, reco, ms + if args.max_batches == idx: + break + if self.args.debug and train: + break + if self.args.flag == 'debug': + break + if train: + for ema in self.emas['epoch']: + ema.update() + return distrib.average(losses, idx + 1) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/spec.py b/AutoCoverTool/ref/music_remover/demucs/demucs/spec.py new file mode 100644 index 0000000..3fa9835 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/spec.py @@ -0,0 +1,41 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Conveniance wrapper to perform STFT and iSTFT""" + +import torch as th + + +def spectro(x, n_fft=512, hop_length=None, pad=0): + *other, length = x.shape + x = x.reshape(-1, length) + z = th.stft(x, + n_fft * (1 + pad), + hop_length or n_fft // 4, + window=th.hann_window(n_fft).to(x), + win_length=n_fft, + normalized=True, + center=True, + return_complex=True, + pad_mode='reflect') + _, freqs, frame = z.shape + return z.view(*other, freqs, frame) + + +def ispectro(z, hop_length=None, length=None, pad=0): + *other, freqs, frames = z.shape + n_fft = 2 * freqs - 2 + z = z.view(-1, freqs, frames) + win_length = n_fft // (1 + pad) + x = th.istft(z, + n_fft, + hop_length, + window=th.hann_window(win_length).to(z.real), + win_length=win_length, + normalized=True, + length=length, + center=True) + _, length = x.shape + return x.view(*other, length) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/states.py b/AutoCoverTool/ref/music_remover/demucs/demucs/states.py new file mode 100644 index 0000000..71f229a --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/states.py @@ -0,0 +1,148 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +""" +Utilities to save and load models. +""" +from contextlib import contextmanager + +import functools +import hashlib +import inspect +import io +from pathlib import Path +import warnings + +from omegaconf import OmegaConf +from diffq import DiffQuantizer, UniformQuantizer, restore_quantized_state +import torch + + +def get_quantizer(model, args, optimizer=None): + """Return the quantizer given the XP quantization args.""" + quantizer = None + if args.diffq: + quantizer = DiffQuantizer( + model, min_size=args.min_size, group_size=args.group_size) + if optimizer is not None: + quantizer.setup_optimizer(optimizer) + elif args.qat: + quantizer = UniformQuantizer( + model, bits=args.qat, min_size=args.min_size) + return quantizer + + +def load_model(path_or_package, strict=False): + """Load a model from the given serialized model, either given as a dict (already loaded) + or a path to a file on disk.""" + if isinstance(path_or_package, dict): + package = path_or_package + elif isinstance(path_or_package, (str, Path)): + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + path = path_or_package + package = torch.load(path, 'cpu') + else: + raise ValueError(f"Invalid type for {path_or_package}.") + + klass = package["klass"] + args = package["args"] + kwargs = package["kwargs"] + + if strict: + model = klass(*args, **kwargs) + else: + sig = inspect.signature(klass) + for key in list(kwargs): + if key not in sig.parameters: + warnings.warn("Dropping inexistant parameter " + key) + del kwargs[key] + model = klass(*args, **kwargs) + + state = package["state"] + + set_state(model, state) + return model + + +def get_state(model, quantizer, half=False): + """Get the state from a model, potentially with quantization applied. + If `half` is True, model are stored as half precision, which shouldn't impact performance + but half the state size.""" + if quantizer is None: + dtype = torch.half if half else None + state = {k: p.data.to(device='cpu', dtype=dtype) for k, p in model.state_dict().items()} + else: + state = quantizer.get_quantized_state() + state['__quantized'] = True + return state + + +def set_state(model, state, quantizer=None): + """Set the state on a given model.""" + if state.get('__quantized'): + if quantizer is not None: + quantizer.restore_quantized_state(model, state['quantized']) + else: + restore_quantized_state(model, state) + else: + model.load_state_dict(state) + return state + + +def save_with_checksum(content, path): + """Save the given value on disk, along with a sha256 hash. + Should be used with the output of either `serialize_model` or `get_state`.""" + buf = io.BytesIO() + torch.save(content, buf) + sig = hashlib.sha256(buf.getvalue()).hexdigest()[:8] + + path = path.parent / (path.stem + "-" + sig + path.suffix) + path.write_bytes(buf.getvalue()) + + +def serialize_model(model, training_args, quantizer=None, half=True): + args, kwargs = model._init_args_kwargs + klass = model.__class__ + + state = get_state(model, quantizer, half) + return { + 'klass': klass, + 'args': args, + 'kwargs': kwargs, + 'state': state, + 'training_args': OmegaConf.to_container(training_args, resolve=True), + } + + +def copy_state(state): + return {k: v.cpu().clone() for k, v in state.items()} + + +@contextmanager +def swap_state(model, state): + """ + Context manager that swaps the state of a model, e.g: + + # model is in old state + with swap_state(model, new_state): + # model in new state + # model back to old state + """ + old_state = copy_state(model.state_dict()) + model.load_state_dict(state, strict=False) + try: + yield + finally: + model.load_state_dict(old_state) + + +def capture_init(init): + @functools.wraps(init) + def __init__(self, *args, **kwargs): + self._init_args_kwargs = (args, kwargs) + init(self, *args, **kwargs) + + return __init__ diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/svd.py b/AutoCoverTool/ref/music_remover/demucs/demucs/svd.py new file mode 100644 index 0000000..59ab603 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/svd.py @@ -0,0 +1,83 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Ways to make the model stronger.""" +import random +import torch + + +def power_iteration(m, niters=1, bs=1): + """This is the power method. batch size is used to try multiple starting point in parallel.""" + assert m.dim() == 2 + assert m.shape[0] == m.shape[1] + dim = m.shape[0] + b = torch.randn(dim, bs, device=m.device, dtype=m.dtype) + + for _ in range(niters): + n = m.mm(b) + norm = n.norm(dim=0, keepdim=True) + b = n / (1e-10 + norm) + + return norm.mean() + + +# We need a shared RNG to make sure all the distributed worker will skip the penalty together, +# as otherwise we wouldn't get any speed up. +penalty_rng = random.Random(1234) + + +def svd_penalty(model, min_size=0.1, dim=1, niters=2, powm=False, convtr=True, + proba=1, conv_only=False, exact=False, bs=1): + """ + Penalty on the largest singular value for a layer. + Args: + - model: model to penalize + - min_size: minimum size in MB of a layer to penalize. + - dim: projection dimension for the svd_lowrank. Higher is better but slower. + - niters: number of iterations in the algorithm used by svd_lowrank. + - powm: use power method instead of lowrank SVD, my own experience + is that it is both slower and less stable. + - convtr: when True, differentiate between Conv and Transposed Conv. + this is kept for compatibility with older experiments. + - proba: probability to apply the penalty. + - conv_only: only apply to conv and conv transposed, not LSTM + (might not be reliable for other models than Demucs). + - exact: use exact SVD (slow but useful at validation). + - bs: batch_size for power method. + """ + total = 0 + if penalty_rng.random() > proba: + return 0. + + for m in model.modules(): + for name, p in m.named_parameters(recurse=False): + if p.numel() / 2**18 < min_size: + continue + if convtr: + if isinstance(m, (torch.nn.ConvTranspose1d, torch.nn.ConvTranspose2d)): + if p.dim() in [3, 4]: + p = p.transpose(0, 1).contiguous() + if p.dim() == 3: + p = p.view(len(p), -1) + elif p.dim() == 4: + p = p.view(len(p), -1) + elif p.dim() == 1: + continue + elif conv_only: + continue + assert p.dim() == 2, (name, p.shape) + if exact: + estimate = torch.svd(p, compute_uv=False)[1].pow(2).max() + elif powm: + a, b = p.shape + if a < b: + n = p.mm(p.t()) + else: + n = p.t().mm(p) + estimate = power_iteration(n, niters, bs) + else: + estimate = torch.svd_lowrank(p, dim, niters)[1][0].pow(2) + total += estimate + return total / proba diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/train.py b/AutoCoverTool/ref/music_remover/demucs/demucs/train.py new file mode 100644 index 0000000..6cc0256 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/train.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Main training script entry point""" + +import logging +import os +from pathlib import Path +import sys + +from dora import hydra_main +import hydra +from hydra.core.global_hydra import GlobalHydra +from omegaconf import OmegaConf +import torch +from torch import nn +from torch.utils.data import ConcatDataset + +from . import distrib +from .wav import get_wav_datasets, get_musdb_wav_datasets +from .demucs import Demucs +from .hdemucs import HDemucs +from .htdemucs import HTDemucs +from .repitch import RepitchedWrapper +from .solver import Solver +from .states import capture_init +from .utils import random_subset + +logger = logging.getLogger(__name__) + + +class TorchHDemucsWrapper(nn.Module): + """Wrapper around torchaudio HDemucs implementation to provide the proper metadata + for model evaluation. + See https://pytorch.org/audio/stable/tutorials/hybrid_demucs_tutorial.html""" + + @capture_init + def __init__(self, **kwargs): + super().__init__() + try: + from torchaudio.models import HDemucs as TorchHDemucs + except ImportError: + raise ImportError("Please upgrade torchaudio for using its implementation of HDemucs") + self.samplerate = kwargs.pop('samplerate') + self.segment = kwargs.pop('segment') + self.sources = kwargs['sources'] + self.torch_hdemucs = TorchHDemucs(**kwargs) + + def forward(self, mix): + return self.torch_hdemucs.forward(mix) + + +def get_model(args): + extra = { + 'sources': list(args.dset.sources), + 'audio_channels': args.dset.channels, + 'samplerate': args.dset.samplerate, + 'segment': args.model_segment or 4 * args.dset.segment, + } + klass = { + 'demucs': Demucs, + 'hdemucs': HDemucs, + 'htdemucs': HTDemucs, + 'torch_hdemucs': TorchHDemucsWrapper, + }[args.model] + kw = OmegaConf.to_container(getattr(args, args.model), resolve=True) + model = klass(**extra, **kw) + return model + + +def get_optimizer(model, args): + seen_params = set() + other_params = [] + groups = [] + for n, module in model.named_modules(): + if hasattr(module, "make_optim_group"): + group = module.make_optim_group() + params = set(group["params"]) + assert params.isdisjoint(seen_params) + seen_params |= set(params) + groups.append(group) + for param in model.parameters(): + if param not in seen_params: + other_params.append(param) + groups.insert(0, {"params": other_params}) + parameters = groups + if args.optim.optim == "adam": + return torch.optim.Adam( + parameters, + lr=args.optim.lr, + betas=(args.optim.momentum, args.optim.beta2), + weight_decay=args.optim.weight_decay, + ) + elif args.optim.optim == "adamw": + return torch.optim.AdamW( + parameters, + lr=args.optim.lr, + betas=(args.optim.momentum, args.optim.beta2), + weight_decay=args.optim.weight_decay, + ) + else: + raise ValueError("Invalid optimizer %s", args.optim.optimizer) + + +def get_datasets(args): + train_set, valid_set = get_musdb_wav_datasets(args.dset) + if args.dset.wav: + extra_train_set, extra_valid_set = get_wav_datasets(args.dset) + if len(args.dset.sources) <= 4: + train_set = ConcatDataset([train_set, extra_train_set]) + valid_set = ConcatDataset([valid_set, extra_valid_set]) + else: + train_set = extra_train_set + valid_set = extra_valid_set + + if args.dset.wav2: + extra_train_set, extra_valid_set = get_wav_datasets(args.dset, "wav2") + weight = args.dset.wav2_weight + if weight is not None: + b = len(train_set) + e = len(extra_train_set) + reps = max(1, round(e / b * (1 / weight - 1))) + else: + reps = 1 + train_set = ConcatDataset([train_set] * reps + [extra_train_set]) + if args.dset.wav2_valid: + if weight is not None: + b = len(valid_set) + n_kept = int(round(weight * b / (1 - weight))) + valid_set = ConcatDataset( + [valid_set, random_subset(extra_valid_set, n_kept)] + ) + else: + valid_set = ConcatDataset([valid_set, extra_valid_set]) + if args.dset.valid_samples is not None: + valid_set = random_subset(valid_set, args.dset.valid_samples) + return train_set, valid_set + + +def get_solver(args, model_only=False): + distrib.init() + + torch.manual_seed(args.seed) + model = get_model(args) + if args.misc.show: + logger.info(model) + mb = sum(p.numel() for p in model.parameters()) * 4 / 2**20 + logger.info('Size: %.1f MB', mb) + if hasattr(model, 'valid_length'): + field = model.valid_length(1) + logger.info('Field: %.1f ms', field / args.dset.samplerate * 1000) + sys.exit(0) + + # torch also initialize cuda seed if available + if torch.cuda.is_available(): + model.cuda() + + # optimizer + optimizer = get_optimizer(model, args) + + assert args.batch_size % distrib.world_size == 0 + args.batch_size //= distrib.world_size + + if model_only: + return Solver(None, model, optimizer, args) + + train_set, valid_set = get_datasets(args) + + if args.augment.repitch.proba: + vocals = [] + if 'vocals' in args.dset.sources: + vocals.append(args.dset.sources.index('vocals')) + else: + logger.warning('No vocal source found') + if args.augment.repitch.proba: + train_set = RepitchedWrapper(train_set, vocals=vocals, **args.augment.repitch) + + logger.info("train/valid set size: %d %d", len(train_set), len(valid_set)) + train_loader = distrib.loader( + train_set, batch_size=args.batch_size, shuffle=True, + num_workers=args.misc.num_workers, drop_last=True) + if args.dset.full_cv: + valid_loader = distrib.loader( + valid_set, batch_size=1, shuffle=False, + num_workers=args.misc.num_workers) + else: + valid_loader = distrib.loader( + valid_set, batch_size=args.batch_size, shuffle=False, + num_workers=args.misc.num_workers, drop_last=True) + loaders = {"train": train_loader, "valid": valid_loader} + + # Construct Solver + return Solver(loaders, model, optimizer, args) + + +def get_solver_from_sig(sig, model_only=False): + inst = GlobalHydra.instance() + hyd = None + if inst.is_initialized(): + hyd = inst.hydra + inst.clear() + xp = main.get_xp_from_sig(sig) + if hyd is not None: + inst.clear() + inst.initialize(hyd) + + with xp.enter(stack=True): + return get_solver(xp.cfg, model_only) + + +@hydra_main(config_path="../conf", config_name="config") +def main(args): + global __file__ + __file__ = hydra.utils.to_absolute_path(__file__) + for attr in ["musdb", "wav", "metadata"]: + val = getattr(args.dset, attr) + if val is not None: + setattr(args.dset, attr, hydra.utils.to_absolute_path(val)) + + os.environ["OMP_NUM_THREADS"] = "1" + os.environ["MKL_NUM_THREADS"] = "1" + + if args.misc.verbose: + logger.setLevel(logging.DEBUG) + + logger.info("For logs, checkpoints and samples check %s", os.getcwd()) + logger.debug(args) + from dora import get_xp + logger.debug(get_xp().cfg) + + solver = get_solver(args) + solver.train() + + +if '_DORA_TEST_PATH' in os.environ: + main.dora.dir = Path(os.environ['_DORA_TEST_PATH']) + + +if __name__ == "__main__": + main() diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/transformer.py b/AutoCoverTool/ref/music_remover/demucs/demucs/transformer.py new file mode 100644 index 0000000..56a465b --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/transformer.py @@ -0,0 +1,839 @@ +# Copyright (c) 2019-present, Meta, Inc. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# First author is Simon Rouard. + +import random +import typing as tp + +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +import math +from einops import rearrange + + +def create_sin_embedding( + length: int, dim: int, shift: int = 0, device="cpu", max_period=10000 +): + # We aim for TBC format + assert dim % 2 == 0 + pos = shift + torch.arange(length, device=device).view(-1, 1, 1) + half_dim = dim // 2 + adim = torch.arange(dim // 2, device=device).view(1, 1, -1) + phase = pos / (max_period ** (adim / (half_dim - 1))) + return torch.cat( + [ + torch.cos(phase), + torch.sin(phase), + ], + dim=-1, + ) + + +def create_2d_sin_embedding(d_model, height, width, device="cpu", max_period=10000): + """ + :param d_model: dimension of the model + :param height: height of the positions + :param width: width of the positions + :return: d_model*height*width position matrix + """ + if d_model % 4 != 0: + raise ValueError( + "Cannot use sin/cos positional encoding with " + "odd dimension (got dim={:d})".format(d_model) + ) + pe = torch.zeros(d_model, height, width) + # Each dimension use half of d_model + d_model = int(d_model / 2) + div_term = torch.exp( + torch.arange(0.0, d_model, 2) * -(math.log(max_period) / d_model) + ) + pos_w = torch.arange(0.0, width).unsqueeze(1) + pos_h = torch.arange(0.0, height).unsqueeze(1) + pe[0:d_model:2, :, :] = ( + torch.sin(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1) + ) + pe[1:d_model:2, :, :] = ( + torch.cos(pos_w * div_term).transpose(0, 1).unsqueeze(1).repeat(1, height, 1) + ) + pe[d_model::2, :, :] = ( + torch.sin(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width) + ) + pe[d_model + 1:: 2, :, :] = ( + torch.cos(pos_h * div_term).transpose(0, 1).unsqueeze(2).repeat(1, 1, width) + ) + + return pe[None, :].to(device) + + +def create_sin_embedding_cape( + length: int, + dim: int, + batch_size: int, + mean_normalize: bool, + augment: bool, # True during training + max_global_shift: float = 0.0, # delta max + max_local_shift: float = 0.0, # epsilon max + max_scale: float = 1.0, + device: str = "cpu", + max_period: float = 10000.0, +): + # We aim for TBC format + assert dim % 2 == 0 + pos = 1.0 * torch.arange(length).view(-1, 1, 1) # (length, 1, 1) + pos = pos.repeat(1, batch_size, 1) # (length, batch_size, 1) + if mean_normalize: + pos -= torch.nanmean(pos, dim=0, keepdim=True) + + if augment: + delta = np.random.uniform( + -max_global_shift, +max_global_shift, size=[1, batch_size, 1] + ) + delta_local = np.random.uniform( + -max_local_shift, +max_local_shift, size=[length, batch_size, 1] + ) + log_lambdas = np.random.uniform( + -np.log(max_scale), +np.log(max_scale), size=[1, batch_size, 1] + ) + pos = (pos + delta + delta_local) * np.exp(log_lambdas) + + pos = pos.to(device) + + half_dim = dim // 2 + adim = torch.arange(dim // 2, device=device).view(1, 1, -1) + phase = pos / (max_period ** (adim / (half_dim - 1))) + return torch.cat( + [ + torch.cos(phase), + torch.sin(phase), + ], + dim=-1, + ).float() + + +def get_causal_mask(length): + pos = torch.arange(length) + return pos > pos[:, None] + + +def get_elementary_mask( + T1, + T2, + mask_type, + sparse_attn_window, + global_window, + mask_random_seed, + sparsity, + device, +): + """ + When the input of the Decoder has length T1 and the output T2 + The mask matrix has shape (T2, T1) + """ + assert mask_type in ["diag", "jmask", "random", "global"] + + if mask_type == "global": + mask = torch.zeros(T2, T1, dtype=torch.bool) + mask[:, :global_window] = True + line_window = int(global_window * T2 / T1) + mask[:line_window, :] = True + + if mask_type == "diag": + + mask = torch.zeros(T2, T1, dtype=torch.bool) + rows = torch.arange(T2)[:, None] + cols = ( + (T1 / T2 * rows + torch.arange(-sparse_attn_window, sparse_attn_window + 1)) + .long() + .clamp(0, T1 - 1) + ) + mask.scatter_(1, cols, torch.ones(1, dtype=torch.bool).expand_as(cols)) + + elif mask_type == "jmask": + mask = torch.zeros(T2 + 2, T1 + 2, dtype=torch.bool) + rows = torch.arange(T2 + 2)[:, None] + t = torch.arange(0, int((2 * T1) ** 0.5 + 1)) + t = (t * (t + 1) / 2).int() + t = torch.cat([-t.flip(0)[:-1], t]) + cols = (T1 / T2 * rows + t).long().clamp(0, T1 + 1) + mask.scatter_(1, cols, torch.ones(1, dtype=torch.bool).expand_as(cols)) + mask = mask[1:-1, 1:-1] + + elif mask_type == "random": + gene = torch.Generator(device=device) + gene.manual_seed(mask_random_seed) + mask = ( + torch.rand(T1 * T2, generator=gene, device=device).reshape(T2, T1) + > sparsity + ) + + mask = mask.to(device) + return mask + + +def get_mask( + T1, + T2, + mask_type, + sparse_attn_window, + global_window, + mask_random_seed, + sparsity, + device, +): + """ + Return a SparseCSRTensor mask that is a combination of elementary masks + mask_type can be a combination of multiple masks: for instance "diag_jmask_random" + """ + from xformers.sparse import SparseCSRTensor + # create a list + mask_types = mask_type.split("_") + + all_masks = [ + get_elementary_mask( + T1, + T2, + mask, + sparse_attn_window, + global_window, + mask_random_seed, + sparsity, + device, + ) + for mask in mask_types + ] + + final_mask = torch.stack(all_masks).sum(axis=0) > 0 + + return SparseCSRTensor.from_dense(final_mask[None]) + + +class ScaledEmbedding(nn.Module): + def __init__( + self, + num_embeddings: int, + embedding_dim: int, + scale: float = 1.0, + boost: float = 3.0, + ): + super().__init__() + self.embedding = nn.Embedding(num_embeddings, embedding_dim) + self.embedding.weight.data *= scale / boost + self.boost = boost + + @property + def weight(self): + return self.embedding.weight * self.boost + + def forward(self, x): + return self.embedding(x) * self.boost + + +class LayerScale(nn.Module): + """Layer scale from [Touvron et al 2021] (https://arxiv.org/pdf/2103.17239.pdf). + This rescales diagonaly residual outputs close to 0 initially, then learnt. + """ + + def __init__(self, channels: int, init: float = 0, channel_last=False): + """ + channel_last = False corresponds to (B, C, T) tensors + channel_last = True corresponds to (T, B, C) tensors + """ + super().__init__() + self.channel_last = channel_last + self.scale = nn.Parameter(torch.zeros(channels, requires_grad=True)) + self.scale.data[:] = init + + def forward(self, x): + if self.channel_last: + return self.scale * x + else: + return self.scale[:, None] * x + + +class MyGroupNorm(nn.GroupNorm): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def forward(self, x): + """ + x: (B, T, C) + if num_groups=1: Normalisation on all T and C together for each B + """ + x = x.transpose(1, 2) + return super().forward(x).transpose(1, 2) + + +class MyTransformerEncoderLayer(nn.TransformerEncoderLayer): + def __init__( + self, + d_model, + nhead, + dim_feedforward=2048, + dropout=0.1, + activation=F.relu, + group_norm=0, + norm_first=False, + norm_out=False, + layer_norm_eps=1e-5, + layer_scale=False, + init_values=1e-4, + device=None, + dtype=None, + sparse=False, + mask_type="diag", + mask_random_seed=42, + sparse_attn_window=500, + global_window=50, + auto_sparsity=False, + sparsity=0.95, + batch_first=False, + ): + factory_kwargs = {"device": device, "dtype": dtype} + super().__init__( + d_model=d_model, + nhead=nhead, + dim_feedforward=dim_feedforward, + dropout=dropout, + activation=activation, + layer_norm_eps=layer_norm_eps, + batch_first=batch_first, + norm_first=norm_first, + device=device, + dtype=dtype, + ) + self.sparse = sparse + self.auto_sparsity = auto_sparsity + if sparse: + if not auto_sparsity: + self.mask_type = mask_type + self.sparse_attn_window = sparse_attn_window + self.global_window = global_window + self.sparsity = sparsity + if group_norm: + self.norm1 = MyGroupNorm(int(group_norm), d_model, eps=layer_norm_eps, **factory_kwargs) + self.norm2 = MyGroupNorm(int(group_norm), d_model, eps=layer_norm_eps, **factory_kwargs) + + self.norm_out = None + if self.norm_first & norm_out: + self.norm_out = MyGroupNorm(num_groups=int(norm_out), num_channels=d_model) + self.gamma_1 = ( + LayerScale(d_model, init_values, True) if layer_scale else nn.Identity() + ) + self.gamma_2 = ( + LayerScale(d_model, init_values, True) if layer_scale else nn.Identity() + ) + + if sparse: + self.self_attn = MultiheadAttention( + d_model, nhead, dropout=dropout, batch_first=batch_first, + auto_sparsity=sparsity if auto_sparsity else 0, + ) + self.__setattr__("src_mask", torch.zeros(1, 1)) + self.mask_random_seed = mask_random_seed + + def forward(self, src, src_mask=None, src_key_padding_mask=None): + """ + if batch_first = False, src shape is (T, B, C) + the case where batch_first=True is not covered + """ + device = src.device + x = src + T, B, C = x.shape + if self.sparse and not self.auto_sparsity: + assert src_mask is None + src_mask = self.src_mask + if src_mask.shape[-1] != T: + src_mask = get_mask( + T, + T, + self.mask_type, + self.sparse_attn_window, + self.global_window, + self.mask_random_seed, + self.sparsity, + device, + ) + self.__setattr__("src_mask", src_mask) + + if self.norm_first: + x = x + self.gamma_1( + self._sa_block(self.norm1(x), src_mask, src_key_padding_mask) + ) + x = x + self.gamma_2(self._ff_block(self.norm2(x))) + + if self.norm_out: + x = self.norm_out(x) + else: + x = self.norm1( + x + self.gamma_1(self._sa_block(x, src_mask, src_key_padding_mask)) + ) + x = self.norm2(x + self.gamma_2(self._ff_block(x))) + + return x + + +class CrossTransformerEncoderLayer(nn.Module): + def __init__( + self, + d_model: int, + nhead: int, + dim_feedforward: int = 2048, + dropout: float = 0.1, + activation=F.relu, + layer_norm_eps: float = 1e-5, + layer_scale: bool = False, + init_values: float = 1e-4, + norm_first: bool = False, + group_norm: bool = False, + norm_out: bool = False, + sparse=False, + mask_type="diag", + mask_random_seed=42, + sparse_attn_window=500, + global_window=50, + sparsity=0.95, + auto_sparsity=None, + device=None, + dtype=None, + batch_first=False, + ): + factory_kwargs = {"device": device, "dtype": dtype} + super().__init__() + + self.sparse = sparse + self.auto_sparsity = auto_sparsity + if sparse: + if not auto_sparsity: + self.mask_type = mask_type + self.sparse_attn_window = sparse_attn_window + self.global_window = global_window + self.sparsity = sparsity + + self.cross_attn: nn.Module + self.cross_attn = nn.MultiheadAttention( + d_model, nhead, dropout=dropout, batch_first=batch_first) + # Implementation of Feedforward model + self.linear1 = nn.Linear(d_model, dim_feedforward, **factory_kwargs) + self.dropout = nn.Dropout(dropout) + self.linear2 = nn.Linear(dim_feedforward, d_model, **factory_kwargs) + + self.norm_first = norm_first + self.norm1: nn.Module + self.norm2: nn.Module + self.norm3: nn.Module + if group_norm: + self.norm1 = MyGroupNorm(int(group_norm), d_model, eps=layer_norm_eps, **factory_kwargs) + self.norm2 = MyGroupNorm(int(group_norm), d_model, eps=layer_norm_eps, **factory_kwargs) + self.norm3 = MyGroupNorm(int(group_norm), d_model, eps=layer_norm_eps, **factory_kwargs) + else: + self.norm1 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs) + self.norm2 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs) + self.norm3 = nn.LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs) + + self.norm_out = None + if self.norm_first & norm_out: + self.norm_out = MyGroupNorm(num_groups=int(norm_out), num_channels=d_model) + + self.gamma_1 = ( + LayerScale(d_model, init_values, True) if layer_scale else nn.Identity() + ) + self.gamma_2 = ( + LayerScale(d_model, init_values, True) if layer_scale else nn.Identity() + ) + + self.dropout1 = nn.Dropout(dropout) + self.dropout2 = nn.Dropout(dropout) + + # Legacy string support for activation function. + if isinstance(activation, str): + self.activation = self._get_activation_fn(activation) + else: + self.activation = activation + + if sparse: + self.cross_attn = MultiheadAttention( + d_model, nhead, dropout=dropout, batch_first=batch_first, + auto_sparsity=sparsity if auto_sparsity else 0) + if not auto_sparsity: + self.__setattr__("mask", torch.zeros(1, 1)) + self.mask_random_seed = mask_random_seed + + def forward(self, q, k, mask=None): + """ + Args: + q: tensor of shape (T, B, C) + k: tensor of shape (S, B, C) + mask: tensor of shape (T, S) + + """ + device = q.device + T, B, C = q.shape + S, B, C = k.shape + if self.sparse and not self.auto_sparsity: + assert mask is None + mask = self.mask + if mask.shape[-1] != S or mask.shape[-2] != T: + mask = get_mask( + S, + T, + self.mask_type, + self.sparse_attn_window, + self.global_window, + self.mask_random_seed, + self.sparsity, + device, + ) + self.__setattr__("mask", mask) + + if self.norm_first: + x = q + self.gamma_1(self._ca_block(self.norm1(q), self.norm2(k), mask)) + x = x + self.gamma_2(self._ff_block(self.norm3(x))) + if self.norm_out: + x = self.norm_out(x) + else: + x = self.norm1(q + self.gamma_1(self._ca_block(q, k, mask))) + x = self.norm2(x + self.gamma_2(self._ff_block(x))) + + return x + + # self-attention block + def _ca_block(self, q, k, attn_mask=None): + x = self.cross_attn(q, k, k, attn_mask=attn_mask, need_weights=False)[0] + return self.dropout1(x) + + # feed forward block + def _ff_block(self, x): + x = self.linear2(self.dropout(self.activation(self.linear1(x)))) + return self.dropout2(x) + + def _get_activation_fn(self, activation): + if activation == "relu": + return F.relu + elif activation == "gelu": + return F.gelu + + raise RuntimeError("activation should be relu/gelu, not {}".format(activation)) + + +# ----------------- MULTI-BLOCKS MODELS: ----------------------- + + +class CrossTransformerEncoder(nn.Module): + def __init__( + self, + dim: int, + emb: str = "sin", + hidden_scale: float = 4.0, + num_heads: int = 8, + num_layers: int = 6, + cross_first: bool = False, + dropout: float = 0.0, + max_positions: int = 1000, + norm_in: bool = True, + norm_in_group: bool = False, + group_norm: int = False, + norm_first: bool = False, + norm_out: bool = False, + max_period: float = 10000.0, + weight_decay: float = 0.0, + lr: tp.Optional[float] = None, + layer_scale: bool = False, + gelu: bool = True, + sin_random_shift: int = 0, + weight_pos_embed: float = 1.0, + cape_mean_normalize: bool = True, + cape_augment: bool = True, + cape_glob_loc_scale: list = [5000.0, 1.0, 1.4], + sparse_self_attn: bool = False, + sparse_cross_attn: bool = False, + mask_type: str = "diag", + mask_random_seed: int = 42, + sparse_attn_window: int = 500, + global_window: int = 50, + auto_sparsity: bool = False, + sparsity: float = 0.95, + ): + super().__init__() + """ + """ + assert dim % num_heads == 0 + + hidden_dim = int(dim * hidden_scale) + + self.num_layers = num_layers + # classic parity = 1 means that if idx%2 == 1 there is a + # classical encoder else there is a cross encoder + self.classic_parity = 1 if cross_first else 0 + self.emb = emb + self.max_period = max_period + self.weight_decay = weight_decay + self.weight_pos_embed = weight_pos_embed + self.sin_random_shift = sin_random_shift + if emb == "cape": + self.cape_mean_normalize = cape_mean_normalize + self.cape_augment = cape_augment + self.cape_glob_loc_scale = cape_glob_loc_scale + if emb == "scaled": + self.position_embeddings = ScaledEmbedding(max_positions, dim, scale=0.2) + + self.lr = lr + + activation: tp.Any = F.gelu if gelu else F.relu + + self.norm_in: nn.Module + self.norm_in_t: nn.Module + if norm_in: + self.norm_in = nn.LayerNorm(dim) + self.norm_in_t = nn.LayerNorm(dim) + elif norm_in_group: + self.norm_in = MyGroupNorm(int(norm_in_group), dim) + self.norm_in_t = MyGroupNorm(int(norm_in_group), dim) + else: + self.norm_in = nn.Identity() + self.norm_in_t = nn.Identity() + + # spectrogram layers + self.layers = nn.ModuleList() + # temporal layers + self.layers_t = nn.ModuleList() + + kwargs_common = { + "d_model": dim, + "nhead": num_heads, + "dim_feedforward": hidden_dim, + "dropout": dropout, + "activation": activation, + "group_norm": group_norm, + "norm_first": norm_first, + "norm_out": norm_out, + "layer_scale": layer_scale, + "mask_type": mask_type, + "mask_random_seed": mask_random_seed, + "sparse_attn_window": sparse_attn_window, + "global_window": global_window, + "sparsity": sparsity, + "auto_sparsity": auto_sparsity, + "batch_first": True, + } + + kwargs_classic_encoder = dict(kwargs_common) + kwargs_classic_encoder.update({ + "sparse": sparse_self_attn, + }) + kwargs_cross_encoder = dict(kwargs_common) + kwargs_cross_encoder.update({ + "sparse": sparse_cross_attn, + }) + + for idx in range(num_layers): + if idx % 2 == self.classic_parity: + + self.layers.append(MyTransformerEncoderLayer(**kwargs_classic_encoder)) + self.layers_t.append( + MyTransformerEncoderLayer(**kwargs_classic_encoder) + ) + + else: + self.layers.append(CrossTransformerEncoderLayer(**kwargs_cross_encoder)) + + self.layers_t.append( + CrossTransformerEncoderLayer(**kwargs_cross_encoder) + ) + + def forward(self, x, xt): + B, C, Fr, T1 = x.shape + pos_emb_2d = create_2d_sin_embedding( + C, Fr, T1, x.device, self.max_period + ) # (1, C, Fr, T1) + pos_emb_2d = rearrange(pos_emb_2d, "b c fr t1 -> b (t1 fr) c") + x = rearrange(x, "b c fr t1 -> b (t1 fr) c") + x = self.norm_in(x) + x = x + self.weight_pos_embed * pos_emb_2d + + B, C, T2 = xt.shape + xt = rearrange(xt, "b c t2 -> b t2 c") # now T2, B, C + pos_emb = self._get_pos_embedding(T2, B, C, x.device) + pos_emb = rearrange(pos_emb, "t2 b c -> b t2 c") + xt = self.norm_in_t(xt) + xt = xt + self.weight_pos_embed * pos_emb + + for idx in range(self.num_layers): + if idx % 2 == self.classic_parity: + x = self.layers[idx](x) + xt = self.layers_t[idx](xt) + else: + old_x = x + x = self.layers[idx](x, xt) + xt = self.layers_t[idx](xt, old_x) + + x = rearrange(x, "b (t1 fr) c -> b c fr t1", t1=T1) + xt = rearrange(xt, "b t2 c -> b c t2") + return x, xt + + def _get_pos_embedding(self, T, B, C, device): + if self.emb == "sin": + shift = random.randrange(self.sin_random_shift + 1) + pos_emb = create_sin_embedding( + T, C, shift=shift, device=device, max_period=self.max_period + ) + elif self.emb == "cape": + if self.training: + pos_emb = create_sin_embedding_cape( + T, + C, + B, + device=device, + max_period=self.max_period, + mean_normalize=self.cape_mean_normalize, + augment=self.cape_augment, + max_global_shift=self.cape_glob_loc_scale[0], + max_local_shift=self.cape_glob_loc_scale[1], + max_scale=self.cape_glob_loc_scale[2], + ) + else: + pos_emb = create_sin_embedding_cape( + T, + C, + B, + device=device, + max_period=self.max_period, + mean_normalize=self.cape_mean_normalize, + augment=False, + ) + + elif self.emb == "scaled": + pos = torch.arange(T, device=device) + pos_emb = self.position_embeddings(pos)[:, None] + + return pos_emb + + def make_optim_group(self): + group = {"params": list(self.parameters()), "weight_decay": self.weight_decay} + if self.lr is not None: + group["lr"] = self.lr + return group + + +# Attention Modules + + +class MultiheadAttention(nn.Module): + def __init__( + self, + embed_dim, + num_heads, + dropout=0.0, + bias=True, + add_bias_kv=False, + add_zero_attn=False, + kdim=None, + vdim=None, + batch_first=False, + auto_sparsity=None, + ): + super().__init__() + assert auto_sparsity is not None, "sanity check" + self.num_heads = num_heads + self.q = torch.nn.Linear(embed_dim, embed_dim, bias=bias) + self.k = torch.nn.Linear(embed_dim, embed_dim, bias=bias) + self.v = torch.nn.Linear(embed_dim, embed_dim, bias=bias) + self.attn_drop = torch.nn.Dropout(dropout) + self.proj = torch.nn.Linear(embed_dim, embed_dim, bias) + self.proj_drop = torch.nn.Dropout(dropout) + self.batch_first = batch_first + self.auto_sparsity = auto_sparsity + + def forward( + self, + query, + key, + value, + key_padding_mask=None, + need_weights=True, + attn_mask=None, + average_attn_weights=True, + ): + + if not self.batch_first: # N, B, C + query = query.permute(1, 0, 2) # B, N_q, C + key = key.permute(1, 0, 2) # B, N_k, C + value = value.permute(1, 0, 2) # B, N_k, C + B, N_q, C = query.shape + B, N_k, C = key.shape + + q = ( + self.q(query) + .reshape(B, N_q, self.num_heads, C // self.num_heads) + .permute(0, 2, 1, 3) + ) + q = q.flatten(0, 1) + k = ( + self.k(key) + .reshape(B, N_k, self.num_heads, C // self.num_heads) + .permute(0, 2, 1, 3) + ) + k = k.flatten(0, 1) + v = ( + self.v(value) + .reshape(B, N_k, self.num_heads, C // self.num_heads) + .permute(0, 2, 1, 3) + ) + v = v.flatten(0, 1) + + if self.auto_sparsity: + assert attn_mask is None + x = dynamic_sparse_attention(q, k, v, sparsity=self.auto_sparsity) + else: + x = scaled_dot_product_attention(q, k, v, attn_mask, dropout=self.attn_drop) + x = x.reshape(B, self.num_heads, N_q, C // self.num_heads) + + x = x.transpose(1, 2).reshape(B, N_q, C) + x = self.proj(x) + x = self.proj_drop(x) + if not self.batch_first: + x = x.permute(1, 0, 2) + return x, None + + +def scaled_query_key_softmax(q, k, att_mask): + from xformers.ops import masked_matmul + q = q / (k.size(-1)) ** 0.5 + att = masked_matmul(q, k.transpose(-2, -1), att_mask) + att = torch.nn.functional.softmax(att, -1) + return att + + +def scaled_dot_product_attention(q, k, v, att_mask, dropout): + att = scaled_query_key_softmax(q, k, att_mask=att_mask) + att = dropout(att) + y = att @ v + return y + + +def _compute_buckets(x, R): + qq = torch.einsum('btf,bfhi->bhti', x, R) + qq = torch.cat([qq, -qq], dim=-1) + buckets = qq.argmax(dim=-1) + + return buckets.permute(0, 2, 1).byte().contiguous() + + +def dynamic_sparse_attention(query, key, value, sparsity, infer_sparsity=True, attn_bias=None): + # assert False, "The code for the custom sparse kernel is not ready for release yet." + from xformers.ops import find_locations, sparse_memory_efficient_attention + n_hashes = 32 + proj_size = 4 + query, key, value = [x.contiguous() for x in [query, key, value]] + with torch.no_grad(): + R = torch.randn(1, query.shape[-1], n_hashes, proj_size // 2, device=query.device) + bucket_query = _compute_buckets(query, R) + bucket_key = _compute_buckets(key, R) + row_offsets, column_indices = find_locations( + bucket_query, bucket_key, sparsity, infer_sparsity) + return sparse_memory_efficient_attention( + query, key, value, row_offsets, column_indices, attn_bias) diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/utils.py b/AutoCoverTool/ref/music_remover/demucs/demucs/utils.py new file mode 100755 index 0000000..38ef120 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/utils.py @@ -0,0 +1,141 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +from collections import defaultdict +from contextlib import contextmanager +import math +import os +import tempfile +import typing as tp + +import torch +from torch.nn import functional as F +from torch.utils.data import Subset + + +def unfold(a, kernel_size, stride): + """Given input of size [*OT, T], output Tensor of size [*OT, F, K] + with K the kernel size, by extracting frames with the given stride. + + This will pad the input so that `F = ceil(T / K)`. + + see https://github.com/pytorch/pytorch/issues/60466 + """ + *shape, length = a.shape + n_frames = math.ceil(length / stride) + tgt_length = (n_frames - 1) * stride + kernel_size + a = F.pad(a, (0, tgt_length - length)) + strides = list(a.stride()) + assert strides[-1] == 1, 'data should be contiguous' + strides = strides[:-1] + [stride, 1] + return a.as_strided([*shape, n_frames, kernel_size], strides) + + +def center_trim(tensor: torch.Tensor, reference: tp.Union[torch.Tensor, int]): + """ + Center trim `tensor` with respect to `reference`, along the last dimension. + `reference` can also be a number, representing the length to trim to. + If the size difference != 0 mod 2, the extra sample is removed on the right side. + """ + ref_size: int + if isinstance(reference, torch.Tensor): + ref_size = reference.size(-1) + else: + ref_size = reference + delta = tensor.size(-1) - ref_size + if delta < 0: + raise ValueError("tensor must be larger than reference. " f"Delta is {delta}.") + if delta: + tensor = tensor[..., delta // 2:-(delta - delta // 2)] + return tensor + + +def pull_metric(history: tp.List[dict], name: str): + out = [] + for metrics in history: + metric = metrics + for part in name.split("."): + metric = metric[part] + out.append(metric) + return out + + +def EMA(beta: float = 1): + """ + Exponential Moving Average callback. + Returns a single function that can be called to repeatidly update the EMA + with a dict of metrics. The callback will return + the new averaged dict of metrics. + + Note that for `beta=1`, this is just plain averaging. + """ + fix: tp.Dict[str, float] = defaultdict(float) + total: tp.Dict[str, float] = defaultdict(float) + + def _update(metrics: dict, weight: float = 1) -> dict: + nonlocal total, fix + for key, value in metrics.items(): + total[key] = total[key] * beta + weight * float(value) + fix[key] = fix[key] * beta + weight + return {key: tot / fix[key] for key, tot in total.items()} + return _update + + +def sizeof_fmt(num: float, suffix: str = 'B'): + """ + Given `num` bytes, return human readable size. + Taken from https://stackoverflow.com/a/1094933 + """ + for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: + if abs(num) < 1024.0: + return "%3.1f%s%s" % (num, unit, suffix) + num /= 1024.0 + return "%.1f%s%s" % (num, 'Yi', suffix) + + +@contextmanager +def temp_filenames(count: int, delete=True): + names = [] + try: + for _ in range(count): + names.append(tempfile.NamedTemporaryFile(delete=False).name) + yield names + finally: + if delete: + for name in names: + os.unlink(name) + + +def random_subset(dataset, max_samples: int, seed: int = 42): + if max_samples >= len(dataset): + return dataset + + generator = torch.Generator().manual_seed(seed) + perm = torch.randperm(len(dataset), generator=generator) + return Subset(dataset, perm[:max_samples].tolist()) + + +class DummyPoolExecutor: + class DummyResult: + def __init__(self, func, *args, **kwargs): + self.func = func + self.args = args + self.kwargs = kwargs + + def result(self): + return self.func(*self.args, **self.kwargs) + + def __init__(self, workers=0): + pass + + def submit(self, func, *args, **kwargs): + return DummyPoolExecutor.DummyResult(func, *args, **kwargs) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, exc_tb): + return diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/wav.py b/AutoCoverTool/ref/music_remover/demucs/demucs/wav.py new file mode 100644 index 0000000..a0e2dd4 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/wav.py @@ -0,0 +1,243 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +"""Loading wav based datasets, including MusdbHQ.""" + +from collections import OrderedDict +import hashlib +import math +import json +import os +from pathlib import Path +import tqdm + +import musdb +import julius +import torch as th +from torch import distributed +import torchaudio as ta +from torch.nn import functional as F + +from .audio import convert_audio_channels +from . import distrib + +MIXTURE = "mixture" +EXT = ".wav" + + +def _track_metadata(track, sources, normalize=True, ext=EXT): + track_length = None + track_samplerate = None + mean = 0 + std = 1 + for source in sources + [MIXTURE]: + file = track / f"{source}{ext}" + try: + info = ta.info(str(file)) + except RuntimeError: + print(file) + raise + length = info.num_frames + if track_length is None: + track_length = length + track_samplerate = info.sample_rate + elif track_length != length: + raise ValueError( + f"Invalid length for file {file}: " + f"expecting {track_length} but got {length}.") + elif info.sample_rate != track_samplerate: + raise ValueError( + f"Invalid sample rate for file {file}: " + f"expecting {track_samplerate} but got {info.sample_rate}.") + if source == MIXTURE and normalize: + try: + wav, _ = ta.load(str(file)) + except RuntimeError: + print(file) + raise + wav = wav.mean(0) + mean = wav.mean().item() + std = wav.std().item() + + return {"length": length, "mean": mean, "std": std, "samplerate": track_samplerate} + + +def build_metadata(path, sources, normalize=True, ext=EXT): + """ + Build the metadata for `Wavset`. + + Args: + path (str or Path): path to dataset. + sources (list[str]): list of sources to look for. + normalize (bool): if True, loads full track and store normalization + values based on the mixture file. + ext (str): extension of audio files (default is .wav). + """ + + meta = {} + path = Path(path) + pendings = [] + from concurrent.futures import ThreadPoolExecutor + with ThreadPoolExecutor(8) as pool: + for root, folders, files in os.walk(path, followlinks=True): + root = Path(root) + if root.name.startswith('.') or folders or root == path: + continue + name = str(root.relative_to(path)) + pendings.append((name, pool.submit(_track_metadata, root, sources, normalize, ext))) + # meta[name] = _track_metadata(root, sources, normalize, ext) + for name, pending in tqdm.tqdm(pendings, ncols=120): + meta[name] = pending.result() + return meta + + +class Wavset: + def __init__( + self, + root, metadata, sources, + segment=None, shift=None, normalize=True, + samplerate=44100, channels=2, ext=EXT): + """ + Waveset (or mp3 set for that matter). Can be used to train + with arbitrary sources. Each track should be one folder inside of `path`. + The folder should contain files named `{source}.{ext}`. + + Args: + root (Path or str): root folder for the dataset. + metadata (dict): output from `build_metadata`. + sources (list[str]): list of source names. + segment (None or float): segment length in seconds. If `None`, returns entire tracks. + shift (None or float): stride in seconds bewteen samples. + normalize (bool): normalizes input audio, **based on the metadata content**, + i.e. the entire track is normalized, not individual extracts. + samplerate (int): target sample rate. if the file sample rate + is different, it will be resampled on the fly. + channels (int): target nb of channels. if different, will be + changed onthe fly. + ext (str): extension for audio files (default is .wav). + + samplerate and channels are converted on the fly. + """ + self.root = Path(root) + self.metadata = OrderedDict(metadata) + self.segment = segment + self.shift = shift or segment + self.normalize = normalize + self.sources = sources + self.channels = channels + self.samplerate = samplerate + self.ext = ext + self.num_examples = [] + for name, meta in self.metadata.items(): + track_duration = meta['length'] / meta['samplerate'] + if segment is None or track_duration < segment: + examples = 1 + else: + examples = int(math.ceil((track_duration - self.segment) / self.shift) + 1) + self.num_examples.append(examples) + + def __len__(self): + return sum(self.num_examples) + + def get_file(self, name, source): + return self.root / name / f"{source}{self.ext}" + + def __getitem__(self, index): + for name, examples in zip(self.metadata, self.num_examples): + if index >= examples: + index -= examples + continue + meta = self.metadata[name] + num_frames = -1 + offset = 0 + if self.segment is not None: + offset = int(meta['samplerate'] * self.shift * index) + num_frames = int(math.ceil(meta['samplerate'] * self.segment)) + wavs = [] + for source in self.sources: + file = self.get_file(name, source) + wav, _ = ta.load(str(file), frame_offset=offset, num_frames=num_frames) + wav = convert_audio_channels(wav, self.channels) + wavs.append(wav) + + example = th.stack(wavs) + example = julius.resample_frac(example, meta['samplerate'], self.samplerate) + if self.normalize: + example = (example - meta['mean']) / meta['std'] + if self.segment: + length = int(self.segment * self.samplerate) + example = example[..., :length] + example = F.pad(example, (0, length - example.shape[-1])) + return example + + +def get_wav_datasets(args, name='wav'): + """Extract the wav datasets from the XP arguments.""" + path = getattr(args, name) + sig = hashlib.sha1(str(path).encode()).hexdigest()[:8] + metadata_file = Path(args.metadata) / ('wav_' + sig + ".json") + train_path = Path(path) / "train" + valid_path = Path(path) / "valid" + if not metadata_file.is_file() and distrib.rank == 0: + metadata_file.parent.mkdir(exist_ok=True, parents=True) + train = build_metadata(train_path, args.sources) + valid = build_metadata(valid_path, args.sources) + json.dump([train, valid], open(metadata_file, "w")) + if distrib.world_size > 1: + distributed.barrier() + train, valid = json.load(open(metadata_file)) + if args.full_cv: + kw_cv = {} + else: + kw_cv = {'segment': args.segment, 'shift': args.shift} + train_set = Wavset(train_path, train, args.sources, + segment=args.segment, shift=args.shift, + samplerate=args.samplerate, channels=args.channels, + normalize=args.normalize) + valid_set = Wavset(valid_path, valid, [MIXTURE] + list(args.sources), + samplerate=args.samplerate, channels=args.channels, + normalize=args.normalize, **kw_cv) + return train_set, valid_set + + +def _get_musdb_valid(): + # Return musdb valid set. + import yaml + setup_path = Path(musdb.__path__[0]) / 'configs' / 'mus.yaml' + setup = yaml.safe_load(open(setup_path, 'r')) + return setup['validation_tracks'] + + +def get_musdb_wav_datasets(args): + """Extract the musdb dataset from the XP arguments.""" + sig = hashlib.sha1(str(args.musdb).encode()).hexdigest()[:8] + metadata_file = Path(args.metadata) / ('musdb_' + sig + ".json") + root = Path(args.musdb) / "train" + if not metadata_file.is_file() and distrib.rank == 0: + metadata_file.parent.mkdir(exist_ok=True, parents=True) + metadata = build_metadata(root, args.sources) + json.dump(metadata, open(metadata_file, "w")) + if distrib.world_size > 1: + distributed.barrier() + metadata = json.load(open(metadata_file)) + + valid_tracks = _get_musdb_valid() + if args.train_valid: + metadata_train = metadata + else: + metadata_train = {name: meta for name, meta in metadata.items() if name not in valid_tracks} + metadata_valid = {name: meta for name, meta in metadata.items() if name in valid_tracks} + if args.full_cv: + kw_cv = {} + else: + kw_cv = {'segment': args.segment, 'shift': args.shift} + train_set = Wavset(root, metadata_train, args.sources, + segment=args.segment, shift=args.shift, + samplerate=args.samplerate, channels=args.channels, + normalize=args.normalize) + valid_set = Wavset(root, metadata_valid, [MIXTURE] + list(args.sources), + samplerate=args.samplerate, channels=args.channels, + normalize=args.normalize, **kw_cv) + return train_set, valid_set diff --git a/AutoCoverTool/ref/music_remover/demucs/demucs/wdemucs.py b/AutoCoverTool/ref/music_remover/demucs/demucs/wdemucs.py new file mode 100644 index 0000000..60ec8d0 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/demucs/wdemucs.py @@ -0,0 +1,9 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# For compat +from .hdemucs import HDemucs + +WDemucs = HDemucs diff --git a/AutoCoverTool/ref/music_remover/demucs/docs/linux.md b/AutoCoverTool/ref/music_remover/demucs/docs/linux.md new file mode 100644 index 0000000..482ab5d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/docs/linux.md @@ -0,0 +1,28 @@ +# Linux support for Demucs + +If your distribution has at least Python 3.7, and you just wish to separate +tracks with Demucs, not train it, you can just run + +```bash +pip3 install --user -U demucs +# Then anytime you want to use demucs, just do +python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1 +# If you have added the user specific pip bin/ folder to your path, you can also do +demucs -d cpu PATH_TO_AUDIO_FILE_1 +``` + +If Python is too old, or you want to be able to train, I recommend [installing Miniconda][miniconda], with Python 3.7 or more. + +```bash +conda activate +pip3 install -U demucs +# Then anytime you want to use demucs, first do conda activate, then +demucs -d cpu PATH_TO_AUDIO_FILE_1 +``` + +Of course, you can also use a specific env for Demucs. + +**Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or as a distribution package (e.g. `sudo apt-get install ffmpeg`). + + +[miniconda]: https://docs.conda.io/en/latest/miniconda.html#linux-installers diff --git a/AutoCoverTool/ref/music_remover/demucs/docs/mac.md b/AutoCoverTool/ref/music_remover/demucs/docs/mac.md new file mode 100644 index 0000000..6e6c3d0 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/docs/mac.md @@ -0,0 +1,28 @@ +# Mac OS X support for Demucs + +If you have a sufficiently recent version of OS X, you can just run + +```bash +python3 -m pip install --user -U demucs +# Then anytime you want to use demucs, just do +python3 -m demucs -d cpu PATH_TO_AUDIO_FILE_1 +# If you have added the user specific pip bin/ folder to your path, you can also do +demucs -d cpu PATH_TO_AUDIO_FILE_1 +``` + +If you do not already have Anaconda installed or much experience with the terminal on Mac OS X here are some detailed instructions: + +1. Download [Anaconda 3.8 (or more recent) 64 bits for MacOS][anaconda]: +2. Open [Anaconda Prompt in MacOSX][prompt] +3. Follow these commands: +```bash +conda activate +pip3 install -U demucs +# Then anytime you want to use demucs, first do conda activate, then +demucs -d cpu PATH_TO_AUDIO_FILE_1 +``` + +**Important, torchaudio 0.12 update:** Torchaudio no longer supports decoding mp3s without ffmpeg installed. You must have ffmpeg installed, either through Anaconda (`conda install ffmpeg -c conda-forge`) or with Homebrew for instance (`brew install ffmpeg`). + +[anaconda]: https://www.anaconda.com/distribution/#download-section +[prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-nav-mac diff --git a/AutoCoverTool/ref/music_remover/demucs/docs/mdx.md b/AutoCoverTool/ref/music_remover/demucs/docs/mdx.md new file mode 100644 index 0000000..2a20f9c --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/docs/mdx.md @@ -0,0 +1,73 @@ +# Music DemiXing challenge (MDX) + +If you want to use Demucs for the [MDX challenge](https://www.aicrowd.com/challenges/music-demixing-challenge-ismir-2021), +please follow the instructions hereafter + +## Installing Demucs + +Follow the instructions from the [main README](https://github.com/facebookresearch/demucs#requirements) +in order to setup Demucs using Anaconda. You will need the full setup up for training, including soundstretch. + +## Getting MusDB-HQ + +Download [MusDB-HQ](https://zenodo.org/record/3338373) to some folder and unzip it. + +## Training Demucs + +Train Demucs (you might need to change the batch size depending on the number of GPUs available). +It seems 48 channels is enough to get the best performance on MusDB-HQ, and training will faster +and less memory demanding. In any case, the 64 channels versions is timing out on the challenge. +```bash +./run.py --channels=48 --batch_size 64 --musdb=PATH_TO_MUSDB --is_wav [EXTRA_FLAGS] +``` + +### Post training + +Once the training is completed, a new model file will be exported in `models/`. + +You can look at the SDR on the MusDB dataset using `python result_table.py`. + + +### Evaluate and export a model before training is over + +If you want to export a model before training is complete, use the following command: +```bash +python -m demucs [ALL EXACT TRAINING FLAGS] --save_model +``` +You can also pass the `--half` flag, in order to save weights in half precision. This will divide the model size by 2 and won't impact SDR. + +Once this is done, you can partially evaluate a model with +```bash +./run.py --test NAME_OF_MODEL.th --musdb=PATH_TO_MUSDB --is_wav +``` + +**Note:** `NAME_OF_MODEL.th` is given relative to the models folder (given by `--models`, defaults to `models/`), so don't include it in the name. + + +### Training smaller models + +If you want to quickly test idea, I would recommend training a 16 kHz model, and testing if things work there or not, before training the full 44kHz model. You can train one of those with +```bash +./run.py --channels=32 --samplerate 16000 --samples 160000 --data_stride 16000 --depth=5 --batch_size 64 --repitch=0 --musdb=PATH_TO_MUSDB --is_wav [EXTRA_FLAGS] +``` +(repitch must be turned off, because things will break at 16kHz). + +## Submitting your model + +1. Git clone [the Music Demixing Challenge - Starter Kit - Demucs Edition](https://github.com/adefossez/music-demixing-challenge-starter-kit). +2. Inside the starter kit, create a `models/` folder and copy over the trained model from the Demucs repo (renaming +it for instance `my_model.th`) +3. Inside the `test_demuc.py` file, change the function `prediction_setup`: comment the loading +of the pre-trained model, and uncomment the code to load your own model. +4. Edit the file `aicrowd.json` with your username. +5. Install [git-lfs](https://git-lfs.github.com/). Then run + +```bash +git lfs install +git add models/ +git add -u . +git commit -m "My Demucs submission" +``` +6. Follow the [submission instructions](https://github.com/AIcrowd/music-demixing-challenge-starter-kit/blob/master/docs/SUBMISSION.md). + +Best of luck 🤞 diff --git a/AutoCoverTool/ref/music_remover/demucs/docs/release.md b/AutoCoverTool/ref/music_remover/demucs/docs/release.md new file mode 100644 index 0000000..b5343ca --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/docs/release.md @@ -0,0 +1,80 @@ +# Release notes for Demucs + + +## V4.0.0a, TBC + +Adding hybrid transformer Demucs model. + +Added support for [Torchaudio implementation of HDemucs](https://pytorch.org/audio/main/tutorials/hybrid_demucs_tutorial.html), thanks @skim0514. + +## V3.0.6, 16th of November 2022 + +Option to customize output path of stems (@CarlGao4) + +Fixed bug in pad1d leading to failure sometimes. + +## V3.0.5, 17th of August 2022 + +Added `--segment` flag to customize the segment length and use less memory (thanks @CarlGao4). + +Fix reflect padding bug on small inputs. + +Compatible with pyTorch 1.12 + +## V3.0.4, 24th of February 2022 + +Added option to split into two stems (i.e. vocals, vs. non vocals), thanks to @CarlGao4. + +Added `--float32`, `--int24` and `--clip-mode` options to customize how output stems are saved. + +## V3.0.3, 2nd of December 2021 + +Fix bug in weights used for different sources. Thanks @keunwoochoi for the report and fix. + +Improving drastically memory usage on GPU for long files. Thanks a lot @famzah for providing this. + +Adding multithread evaluation on CPU (`-j` option). + +(v3.0.2 had a bug with the CPU pool and is skipped.) + +## V3.0.1, 12th of November 2021 + +Release of Demucs v3, featuring hybrid domain separation and much more. +This drops support for Conv-Tasnet and training on the non HQ MusDB dataset. +There is no version 3.0.0 because I messed up. + +## V2.0.2, 26th of May 2021 + +- Fix in Tasnet (PR #178) +- Use ffmpeg in priority when available instead of torchaudio to avoid small shift in MP3 data. +- other minor fixes + +## v2.0.1, 11th of May 2021 + +MusDB HQ support added. Custom wav dataset support added. +Minor changes: issue with padding of mp3 and torchaudio reading, in order to limit that, +Demucs now uses ffmpeg in priority and fallback to torchaudio. +Replaced pre-trained demucs model with one trained on more recent codebase. + +## v2.0.0, 28th of April 2021 + +This is a big release, with at lof of breaking changes. You will likely +need to install Demucs from scratch. + + + +- Demucs now supports on the fly resampling by a factor of 2. +This improves SDR almost 0.3 points. +- Random scaling of each source added (From Uhlich et al. 2017). +- Random pitch and tempo augmentation addded, from [Cohen-Hadria et al. 2019]. +- With extra augmentation, the best performing Demucs model now has only 64 channels +instead of 100, so model size goes from 2.4GB to 1GB. Also SDR is up from 5.6 SDR to 6.3 when trained only on MusDB. +- Quantized model using [DiffQ](https://github.com/facebookresearch/diffq) has been added. Model size is 150MB, no loss in quality as far as I, or the metrics, +can say. +- Pretrained models are now using the TorchHub interface. +- Overlap mode for separation, to limit inconsitencies at + frame boundaries, with linear transition over the overlap. Overlap is currently + at 25%. Not that this is only done for separation, not training, because + I added that quite late to the code. For Conv-TasNet this can improve + SDR quite a bit (+0.3 points, to 6.0). +- PyPI hosting, for separation, not training! diff --git a/AutoCoverTool/ref/music_remover/demucs/docs/training.md b/AutoCoverTool/ref/music_remover/demucs/docs/training.md new file mode 100644 index 0000000..87e73ca --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/docs/training.md @@ -0,0 +1,290 @@ +# Training (Hybrid) Demucs + +## Install all the dependencies + +You should install all the dependencies either with either Anaconda (using the env file `environment-cuda.yml` ) +or `pip`, with `requirements.txt`. + +## Datasets + +### MusDB HQ + +Note that we do not support MusDB non HQ training anymore. +Get the [Musdb HQ](https://zenodo.org/record/3338373) dataset, and update the path to it in two places: +- The `dset.musdb` key inside `conf/config.yaml`. +- The variable `MUSDB_PATH` inside `tools/automix.py`. + +### Create the fine tuning datasets + +**This is only for the MDX 2021 competition models** + +I use a fine tuning on a dataset crafted by remixing songs in a musically plausible way. +The automix script will make sure that BPM, first beat and pitches are aligned. +In the file `tools/automix.py`, edit `OUTPATH` to suit your setup, as well as the `MUSDB_PATH` +to point to your copy of MusDB HQ. Then run + +```bash +export NUMBA_NUM_THREADS=1; python3 -m tools.automix +``` + +**Important:** the script will show many errors, those are normals. They just indicate when two stems + do not batch due to BPM or music scale difference. + +Finally, edit the file `conf/dset/auto_mus.yaml` and replace `dset.wav` to the value of `OUTPATH`. + +If you have a custom dataset, you can also uncomment the lines `dset2 = ...` and +`dset3 = ...` to add your custom wav data and the test set of MusDB for Track B models. +You can then replace the paths in `conf/dset/auto_extra.yaml`, `conf/dset/auto_extra_test.yaml` +and `conf/dset/aetl.yaml` (this last one was using 10 mixes instead of 6 for each song). + +### Dataset metadata cache + +Datasets are scanned the first time they are used to determine the files and their durations. +If you change a dataset and need a rescan, just delete the `metadata` folder. + +## A short intro to Dora + +I use [Dora][dora] for all the of experiments (XPs) management. You should have a look at the Dora README +to learn about the tool. Here is a quick summary of what to know: + +- An XP is a unique set of hyper-parameters with a given signature. The signature is a hash of + those hyper-parameters. I will always refer to an XP with its signature, e.g. `9357e12e`. + We will see after that you can retrieve the hyper-params and re-rerun it in a single command. +- In fact, the hash is defined as a delta between the base config and the one obtained with + the config overrides you passed from the command line. + **This means you must never change the `conf/**.yaml` files directly.**, + except for editing things like paths. Changing the default values in the config files means + the XP signature won't reflect that change, and wrong checkpoints might be reused. + I know, this is annoying, but the reason is that otherwise, any change to the config file would + mean that all XPs ran so far would see their signature change. + +### Dora commands + +Run `tar xvf outputs.tar.gz`. This will initialize the Dora XP repository, so that Dora knows +which hyper-params match the signature like `9357e12e`. Once you have done that, you should be able +to run the following: + +```bash +dora info -f 81de367c # this will show the hyper-parameter used by a specific XP. + # Be careful some overrides might present twice, and the right most one + # will give you the right value for it. +dora run -d -f 81de367c # run an XP with the hyper-parameters from XP 81de367c. + # `-d` is for distributed, it will use all available GPUs. +dora run -d -f 81de367c hdemucs.channels=32 # start from the config of XP 81de367c but change some hyper-params. + # This will give you a new XP with a new signature (here 3fe9c332). +``` + +An XP runs from a specific folder based on its signature, by default under the `outputs/` folder. +You can safely interrupt a training and resume it, it will reuse any existing checkpoint, as it will +reuse the same folder. +If you made some change to the code and need to ignore a previous checkpoint you can use `dora run --clear [RUN ARGS]`. + +If you have a Slurm cluster, you can also use the `dora grid` command, e.g. `dora grid mdx`. +Please refer to the [Dora documentation][dora] for more information. + +## Hyper parameters + +Have a look at [conf/config.yaml](../conf/config.yaml) for a list of all the hyper-parameters you can override. +If you are not familiar with [Hydra](https://github.com/facebookresearch/hydra), go checkout their page +to be familiar with how to provide overrides for your trainings. + + +## Model architecture + +A number of architectures are supported. You can select one with `model=NAME`, and have a look +in [conf/config.yaml'(../conf/config.yaml) for each architecture specific hyperparams. +Those specific params will be always prefixed with the architecture name when passing the override +from the command line or in grid files. Here is the list of models: + +- demucs: original time-only Demucs. +- hdemucs: Hybrid Demucs (v3). +- torch_hdemucs: Same as Hybrid Demucs, but using [torchaudio official implementation](https://pytorch.org/audio/stable/tutorials/hybrid_demucs_tutorial.html). +- htdemucs: Hybrid Transformer Demucs (v4). + +### Storing config in files + +As mentioned earlier, you should never change the base config files. However, you can use Hydra config groups +in order to store variants you often use. If you want to create a new variant combining multiple hyper-params, +copy the file `conf/variant/example.yaml` to `conf/variant/my_variant.yaml`, and then you can use it with + +```bash +dora train -d variant=my_variant +``` + +Once you have created this file, you should not edit it once you have started training models with it. + + +## Fine tuning + +If a first model is trained, you can fine tune it with other settings (e.g. automix dataset) with + +```bash +dora run -d -f 81de367c continue_from=81de367c dset=auto_mus variant=finetune +```` + +Note that you need both `-f 81de367c` and `continue_from=81de367c`. The first one indicates +that the hyper-params of `81de367c` should be used as a starting point for the config. +The second indicates that the weights from `81de367c` should be used as a starting point for the solver. + + +## Model evaluation + +Your model will be evaluated automatically with the new SDR definition from MDX every 20 epochs. +Old style SDR (which is quite slow) will only happen at the end of training. + +## Model Export + + +In order to use your models with other commands (such as the `demucs` command for separation) you must +export it. For that run + +```bash +python3 -m tools.export 9357e12e [OTHER SIGS ...] # replace with the appropriate signatures. +``` + +The models will be stored under `release_models/`. You can use them with the `demucs` separation command with the following flags: +```bash +demucs --repo ./release_models -n 9357e12e my_track.mp3 +``` + +### Bag of models + +If you want to combine multiple models, potentially with different weights for each source, you can copy +`demucs/remote/mdx.yaml` to `./release_models/my_bag.yaml`. You can then edit the list of models (all models used should have been exported first) and the weights per source and model (list of list, outer list is over models, inner list is over sources). You can then use your bag of model as + +```bash +demucs --repo ./release_models -n my_bag my_track.mp3 +``` + +## Model evaluation + +You can evaluate any pre-trained model or bag of models using the following command: +```bash +python3 -m tools.test_pretrained -n NAME_OF_MODEL [EXTRA ARGS] +``` +where `NAME_OF_MODEL` is either the name of the bag (e.g. `mdx`, `repro_mdx_a`), +or a single Dora signature of one of the model of the bags. You can pass `EXTRA ARGS` to customize +the test options, like the number of random shifts (e.g. `test.shifts=2`). This will compute the old-style +SDR and can take quite bit of time. + +For custom models that were trained locally, you will need to indicate that you wish +to use the local model repositories, with the `--repo ./release_models` flag, e.g., +```bash +python3 -m tools.test_pretrained --repo ./release_models -n my_bag +``` + + +## API to retrieve the model + +You can retrieve officially released models in Python using the following API: +```python +from demucs import pretrained +from demucs.apply import apply_model +bag = pretrained.get_model('htdemucs') # for a bag of models or a named model + # (which is just a bag with 1 model). +model = pretrained.get_model('955717e8') # using the signature for single models. + +bag.models # list of individual models +stems = apply_model(model, mix) # apply the model to the given mix. +``` + +## Model Zoo + +### Hybrid Transformer Demucs + +The configuration for the Hybrid Transformer models are available in: + +```shell +dora grid mmi --dry_run --init +dora grid mmi_ft --dry_run --init # fined tuned on each sources. +``` + +We release in particular `955717e8`, Hybrid Transformer Demucs using 5 layers, 512 channels, 10 seconds training segment length. We also release its fine tuned version, with one model +for each source `f7e0c4bc`, `d12395a8`, `92cfc3b6`, `04573f0d` (drums, bass, other, vocals). +The model `955717e8` is also named `htdemucs`, while the bag of models is provided +as `htdemucs_ft`. + +We also release `75fc33f5`, a regular Hybrid Demucs trained on the same dataset, +available as `hdemucs_mmi`. + + + +### Models from the MDX Competition 2021 + + +Here is a short descriptions of the models used for the MDX submission, either Track A (MusDB HQ only) +or Track B (extra training data allowed). Training happen in two stage, with the second stage +being the fine tunining on the automix generated dataset. +All the fine tuned models are available on our AWS repository +(you can retrieve it with `demucs.pretrained.get_model(SIG)`). The bag of models are available +by doing `demucs.pretrained.get_model(NAME)` with `NAME` begin either `mdx` (for Track A) or `mdx_extra` +(for Track B). + +#### Track A + +The 4 models are: + +- `0d19c1c6`: fine-tuned on automix dataset from `9357e12e` +- `7ecf8ec1`: fine-tuned on automix dataset from `e312f349` +- `c511e2ab`: fine-tuned on automix dataset from `81de367c` +- `7d865c68`: fine-tuned on automix dataset from `80a68df8` + +The 4 initial models (before fine tuning are): + +- `9357e12e`: 64ch time domain only improved Demucs, with new residual branches, group norm, + and singular value penalty. +- `e312f349`: 64ch time domain only improved, with new residual branches, group norm, + and singular value penalty, trained with a loss that focus only on drums and bass. +- `81de367c`: 48ch hybrid model , with residual branches, group norm, + singular value penalty penalty and amplitude spectrogram. +- `80a68df8`: same as b5559babb but using CaC and different + random seed, as well different weigths per frequency bands in outermost layers. + +The hybrid models are combined with equal weights for all sources except for the bass. +`0d19c1c6` (time domain) is used for both drums and bass. `7ecf8ec1` is used only for the bass. + +You can see all the hyper parameters at once with (one common line for all common hyper params, and then only shows +the hyper parameters that differs), along with the DiffQ variants that are used for the `mdx_q` models: +``` +dora grid mdx --dry_run --init +dora grid mdx --dry_run --init +``` + +#### Track B + +- `e51eebcc` +- `a1d90b5c` +- `5d2d6c55` +- `cfa93e08` + +All the models are 48ch hybrid demucs with different random seeds. Two of them +are using CaC, and two are using amplitude spectrograms with masking. +All the models are combined with equal weights for all sources. + +Things are a bit messy for Track B, there was a lot of fine tuning +over different datasets. I won't describe the entire genealogy of models here, +but all the information can be accessed with the `dora info -f SIG` command. + +Similarly you can do (those will contain a few extra lines, for training without the MusDB test set as training, and extra DiffQ XPs): +``` +dora grid mdx_extra --dry_run --init +``` + +### Reproducibility and Ablation + +I updated the paper to report numbers with a more homogeneous setup than the one used for the competition. +On MusDB HQ, I still need to use a combination of time only and hybrid models to achieve the best performance. +The experiments are provided in the grids [repro.py](../demucs/grids/repro.py) and +[repro_ft._py](../demucs/grids/repro_ft.py) for the fine tuning on the realistic mix datasets. + +The new bag of models reaches an SDR of 7.64 (vs. 7.68 for the original track A model). It uses +2 time only models trained with residual branches, local attention and the SVD penalty, +along with 2 hybrid models, with the same features, and using CaC representation. +We average the performance of all the models with the same weight over all sources, unlike +what was done for the original track A model. We trained for 600 epochs, against 360 before. + +The new bag of model is available as part of the pretrained model as `repro_mdx_a`. +The time only bag is named `repro_mdx_a_time_only`, and the hybrid only `repro_mdx_a_hybrid_only`. +Checkout the paper for more information on the training. + +[dora]: https://github.com/facebookresearch/dora diff --git a/AutoCoverTool/ref/music_remover/demucs/docs/windows.md b/AutoCoverTool/ref/music_remover/demucs/docs/windows.md new file mode 100644 index 0000000..36ec05d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/docs/windows.md @@ -0,0 +1,58 @@ +# Windows support for Demucs + +## Installation and usage + +Parts of the code are untested on Windows (in particular, training a new model). If you don't have much experience with Anaconda, python or the shell, here are more detailed instructions. Note that **Demucs is not supported on 32bits systems** (as Pytorch is not available there). + +- First install Anaconda with **Python 3.7** or more recent, which you can find [here][install]. +- Start the [Anaconda prompt][prompt]. + +Then, all commands that follow must be run from this prompt. + +### If you want to use your GPU + +If you have graphic cards produced by nVidia with more than 6GiB of memory, you can separate tracks with GPU acceleration. To achieve this, you must install Pytorch with CUDA. If Pytorch was already installed (you already installed Demucs for instance), first run `python.exe -m pip uninstall torch torchaudio`. +Then visit [Pytorch Home Page](https://pytorch.org/get-started/locally/) and follow the guide on it to install with CUDA support. + +### Installation + +Start the Anaconda prompt, and run the following +bash +``` +conda install -c conda-forge ffmpeg +python.exe -m pip install -U demucs PySoundFile +``` + +### Upgrade + +To upgrade Demucs, simply run `python.exe -m pip install -U demucs`, from the Anaconda prompt. + +### Usage + +Then to use Demucs, just start the **Anaconda prompt** and run: +``` +demucs -d cpu "PATH_TO_AUDIO_FILE_1" ["PATH_TO_AUDIO_FILE_2" ...] +``` +The `"` around the filename are required if the path contains spaces. +The separated files will be under `C:\Users\YOUR_USERNAME\demucs\separated\demucs\`. + + +### Separating an entire folder + +You can use the following command to separate an entire folder of mp3s for instance (replace the extension `.mp3` if needs be for other file types) +``` +cd FOLDER +for %i in (*.mp3) do (demucs -d cpu "%i") +``` + + +## Potential errors + +If you have an error saying that `mkl_intel_thread.dll` cannot be found, you can try to first run +`conda install -c defaults intel-openmp -f`. Then try again to run the `demucs` command. If it still doesn't work, you can try to run first `set CONDA_DLL_SEARCH_MODIFICATION_ENABLE=1`, then again the `demucs` command and hopefully it will work 🙏. + +**If you get a permission error**, please try starting the Anaconda Prompt as administrator. + + +[install]: https://www.anaconda.com/distribution/#windows +[prompt]: https://docs.anaconda.com/anaconda/user-guide/getting-started/#open-prompt-win diff --git a/AutoCoverTool/ref/music_remover/demucs/environment-cpu.yml b/AutoCoverTool/ref/music_remover/demucs/environment-cpu.yml new file mode 100644 index 0000000..f921d56 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/environment-cpu.yml @@ -0,0 +1,28 @@ +name: demucs + +channels: + - pytorch + - conda-forge + +dependencies: + - python>=3.7,<3.10 + - ffmpeg>=4.2 + - pytorch>=1.8.1 + - torchaudio>=0.8 + - tqdm>=4.36 + - pip + - pip: + - diffq>=0.2 + - dora-search + - einops + - hydra-colorlog>=1.1 + - hydra-core>=1.1 + - julius>=0.2.3 + - lameenc>=1.2 + - openunmix + - musdb>=0.4.0 + - museval>=0.4.0 + - soundfile + - submitit + - treetable>=0.2.3 + diff --git a/AutoCoverTool/ref/music_remover/demucs/environment-cuda.yml b/AutoCoverTool/ref/music_remover/demucs/environment-cuda.yml new file mode 100644 index 0000000..abbd258 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/environment-cuda.yml @@ -0,0 +1,28 @@ +name: demucs + +channels: + - pytorch + - conda-forge + +dependencies: + - python>=3.7,<3.10 + - ffmpeg>=4.2 + - pytorch>=1.8.1 + - torchaudio>=0.8 + - cudatoolkit>=10 + - tqdm>=4.36 + - pip + - pip: + - diffq>=0.2 + - dora-search + - einops + - hydra-colorlog>=1.1 + - hydra-core>=1.1 + - julius>=0.2.3 + - lameenc>=1.2 + - openunmix + - musdb>=0.4.0 + - museval>=0.4.0 + - soundfile + - submitit + - treetable>=0.2.3 diff --git a/AutoCoverTool/ref/music_remover/demucs/hubconf.py b/AutoCoverTool/ref/music_remover/demucs/hubconf.py new file mode 100644 index 0000000..4e64777 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/hubconf.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +dependencies = ['dora-search', 'diffq', 'julius', 'lameenc', 'openunmix', 'pyyaml', + 'torch', 'torchaudio', 'tqdm'] + +from demucs.pretrained import get_model + diff --git a/AutoCoverTool/ref/music_remover/demucs/mypy.ini b/AutoCoverTool/ref/music_remover/demucs/mypy.ini new file mode 100644 index 0000000..c4e17f1 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/mypy.ini @@ -0,0 +1,5 @@ +[mypy] + +[mypy-treetable,torchaudio.*,diffq,yaml,tqdm,lameenc,musdb,museval,openunmix.*,einops,xformers.*] +ignore_missing_imports = True + diff --git a/AutoCoverTool/ref/music_remover/demucs/outputs.tar.gz b/AutoCoverTool/ref/music_remover/demucs/outputs.tar.gz new file mode 100644 index 0000000..51933ac Binary files /dev/null and b/AutoCoverTool/ref/music_remover/demucs/outputs.tar.gz differ diff --git a/AutoCoverTool/ref/music_remover/demucs/readme.txt b/AutoCoverTool/ref/music_remover/demucs/readme.txt new file mode 100644 index 0000000..7a33a51 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/readme.txt @@ -0,0 +1,21 @@ +音乐分离工具 + +简单介绍: + demucs源码地址: https://github.com/facebookresearch/demucs + 本代码直接从上述源码拉取,使用的是Mon Nov 21 的main分支的版本 + +基本改动: + 封装separate_interface.py 文件,用于给定音频文件,生成伴奏文件 + +环境安装: + 1. GPU基本环境安装: + https://phabricator.ushow.media/w/%E9%9F%B3%E8%A7%86%E9%A2%91%E7%BB%84%E6%96%87%E6%A1%A3/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85/gpu%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85/ + 2. 安装demucs环境 + conda create -n demucs python==3.7 + 3. 安装 standard_audio 二进制文件 + 代码位置位于: av_cv_research/tools/music_remover/standard_audio + 编译安装完成后存放于: /opt/soft/bin/standard_audio 即可 + +快速使用(服务端): + python3 separate_interface.py xishuashua.mp4 xishuashua.m4a + xishuashua.mp4 的时长为3分36s, 3次平均耗时为 27.33s diff --git a/AutoCoverTool/ref/music_remover/demucs/requirements.txt b/AutoCoverTool/ref/music_remover/demucs/requirements.txt new file mode 100644 index 0000000..c263020 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/requirements.txt @@ -0,0 +1,19 @@ +# please make sure you have already a pytorch install that is cuda enabled! +dora-search +diffq>=0.2.1 +einops +flake8 +hydra-colorlog>=1.1 +hydra-core>=1.1 +julius>=0.2.3 +lameenc>=1.2 +museval +mypy +openunmix +pyyaml +submitit +torch>=1.8.1 +torchaudio>=0.8 +tqdm +treetable +soundfile>=0.10.3;sys_platform=="win32" diff --git a/AutoCoverTool/ref/music_remover/demucs/requirements_minimal.txt b/AutoCoverTool/ref/music_remover/demucs/requirements_minimal.txt new file mode 100644 index 0000000..f1ccb05 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/requirements_minimal.txt @@ -0,0 +1,11 @@ +# please make sure you have already a pytorch install that is cuda enabled! +dora-search +diffq>=0.2.1 +einops +julius>=0.2.3 +lameenc>=1.2 +openunmix +pyyaml +torch>=1.8.1 +torchaudio>=0.8 +tqdm diff --git a/AutoCoverTool/ref/music_remover/demucs/setup.cfg b/AutoCoverTool/ref/music_remover/demucs/setup.cfg new file mode 100644 index 0000000..d54d56a --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/setup.cfg @@ -0,0 +1,8 @@ +[pep8] +max-line-length = 100 + +[flake8] +max-line-length = 100 + +[yapf] +column_limit = 100 diff --git a/AutoCoverTool/ref/music_remover/demucs/setup.py b/AutoCoverTool/ref/music_remover/demucs/setup.py new file mode 100644 index 0000000..dc3de9d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/setup.py @@ -0,0 +1,75 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. +# author: adefossez +# Inspired from https://github.com/kennethreitz/setup.py + +from pathlib import Path + +from setuptools import setup + + +NAME = 'demucs' +DESCRIPTION = 'Music source separation in the waveform domain.' + +URL = 'https://github.com/facebookresearch/demucs' +EMAIL = 'defossez@fb.com' +AUTHOR = 'Alexandre Défossez' +REQUIRES_PYTHON = '>=3.7.0' + +HERE = Path(__file__).parent + +# Get version without explicitely loading the module. +for line in open('demucs/__init__.py'): + line = line.strip() + if '__version__' in line: + context = {} + exec(line, context) + VERSION = context['__version__'] + + +def load_requirements(name): + required = [i.strip() for i in open(HERE / name)] + required = [i for i in required if not i.startswith('#')] + return required + + +REQUIRED = load_requirements('requirements_minimal.txt') +ALL_REQUIRED = load_requirements('requirements.txt') + +try: + with open(HERE / "README.md", encoding='utf-8') as f: + long_description = '\n' + f.read() +except FileNotFoundError: + long_description = DESCRIPTION + +setup( + name=NAME, + version=VERSION, + description=DESCRIPTION, + long_description=long_description, + long_description_content_type='text/markdown', + author=AUTHOR, + author_email=EMAIL, + python_requires=REQUIRES_PYTHON, + url=URL, + packages=['demucs'], + extras_require={ + 'dev': ALL_REQUIRED, + }, + install_requires=REQUIRED, + include_package_data=True, + entry_points={ + 'console_scripts': ['demucs=demucs.separate:main'], + }, + license='MIT License', + classifiers=[ + # Trove classifiers + # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers + 'License :: OSI Approved :: MIT License', + 'Topic :: Multimedia :: Sound/Audio', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + ], +) diff --git a/AutoCoverTool/ref/music_remover/demucs/test.mp3 b/AutoCoverTool/ref/music_remover/demucs/test.mp3 new file mode 100644 index 0000000..668604d Binary files /dev/null and b/AutoCoverTool/ref/music_remover/demucs/test.mp3 differ diff --git a/AutoCoverTool/ref/music_remover/demucs/tools/__init__.py b/AutoCoverTool/ref/music_remover/demucs/tools/__init__.py new file mode 100644 index 0000000..a7b735c --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/tools/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. diff --git a/AutoCoverTool/ref/music_remover/demucs/tools/automix.py b/AutoCoverTool/ref/music_remover/demucs/tools/automix.py new file mode 100644 index 0000000..91400b1 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/tools/automix.py @@ -0,0 +1,343 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +""" +This script creates realistic mixes with stems from different songs. +In particular, it will align BPM, sync up the first beat and perform pitch +shift to maximize pitches overlap. +In order to limit artifacts, only parts that can be mixed with less than 15% +tempo shift, and 3 semitones of pitch shift are mixed together. +""" +from collections import namedtuple +from concurrent.futures import ProcessPoolExecutor +import hashlib +from pathlib import Path +import random +import shutil +import tqdm +import pickle + +from librosa.beat import beat_track +from librosa.feature import chroma_cqt +import numpy as np +import torch +from torch.nn import functional as F + +from dora.utils import try_load +from demucs.audio import save_audio +from demucs.repitch import repitch +from demucs.pretrained import SOURCES +from demucs.wav import build_metadata, Wavset, _get_musdb_valid + + +MUSDB_PATH = '/checkpoint/defossez/datasets/musdbhq' +EXTRA_WAV_PATH = "/checkpoint/defossez/datasets/allstems_44" +# WARNING: OUTPATH will be completely erased. +OUTPATH = Path.home() / 'tmp/demucs_mdx/automix_musdb/' +CACHE = Path.home() / 'tmp/automix_cache' # cache BPM and pitch information. +CHANNELS = 2 +SR = 44100 +MAX_PITCH = 3 # maximum allowable pitch shift in semi tones +MAX_TEMPO = 0.15 # maximum allowable tempo shift + + +Spec = namedtuple("Spec", "tempo onsets kr track index") + + +def rms(wav, window=10000): + """efficient rms computed for each time step over a given window.""" + half = window // 2 + window = 2 * half + 1 + wav = F.pad(wav, (half, half)) + tot = wav.pow(2).cumsum(dim=-1) + return ((tot[..., window - 1:] - tot[..., :-window + 1]) / window).sqrt() + + +def analyse_track(dset, index): + """analyse track, extract bpm and distribution of notes from the bass line.""" + track = dset[index] + mix = track.sum(0).mean(0) + ref = mix.std() + + starts = (abs(mix) >= 1e-2 * ref).float().argmax().item() + track = track[..., starts:] + + cache = CACHE / dset.sig + cache.mkdir(exist_ok=True, parents=True) + + cache_file = cache / f"{index}.pkl" + cached = None + if cache_file.exists(): + cached = try_load(cache_file) + if cached is not None: + tempo, events, hist_kr = cached + + if cached is None: + drums = track[0].mean(0) + if drums.std() > 1e-2 * ref: + tempo, events = beat_track(drums.numpy(), units='time', sr=SR) + else: + print("failed drums", drums.std(), ref) + return None, track + + bass = track[1].mean(0) + r = rms(bass) + peak = r.max() + mask = r >= 0.05 * peak + bass = bass[mask] + if bass.std() > 1e-2 * ref: + kr = torch.from_numpy(chroma_cqt(bass.numpy(), sr=SR)) + hist_kr = (kr.max(dim=0, keepdim=True)[0] == kr).float().mean(1) + else: + print("failed bass", bass.std(), ref) + return None, track + + pickle.dump([tempo, events, hist_kr], open(cache_file, 'wb')) + spec = Spec(tempo, events, hist_kr, track, index) + return spec, None + + +def best_pitch_shift(kr_a, kr_b): + """find the best pitch shift between two chroma distributions.""" + deltas = [] + for p in range(12): + deltas.append((kr_a - kr_b).abs().mean()) + kr_b = kr_b.roll(1, 0) + + ps = np.argmin(deltas) + if ps > 6: + ps = ps - 12 + return ps + + +def align_stems(stems): + """Align the first beats of the stems. + This is a naive implementation. A grid with a time definition 10ms is defined and + each beat onset is represented as a gaussian over this grid. + Then, we try each possible time shift to make two grids align the best. + We repeat for all sources. + """ + sources = len(stems) + width = 5e-3 # grid of 10ms + limit = 5 + std = 2 + x = torch.arange(-limit, limit + 1, 1).float() + gauss = torch.exp(-x**2 / (2 * std**2)) + + grids = [] + for wav, onsets in stems: + le = wav.shape[-1] + dur = le / SR + grid = torch.zeros(int(le / width / SR)) + for onset in onsets: + pos = int(onset / width) + if onset >= dur - 1: + continue + if onset < 1: + continue + grid[pos - limit:pos + limit + 1] += gauss + grids.append(grid) + + shifts = [0] + for s in range(1, sources): + max_shift = int(4 / width) + dots = [] + for shift in range(-max_shift, max_shift): + other = grids[s] + ref = grids[0] + if shift >= 0: + other = other[shift:] + else: + ref = ref[shift:] + le = min(len(other), len(ref)) + dots.append((ref[:le].dot(other[:le]), int(shift * width * SR))) + + _, shift = max(dots) + shifts.append(-shift) + + outs = [] + new_zero = min(shifts) + for (wav, _), shift in zip(stems, shifts): + offset = shift - new_zero + wav = F.pad(wav, (offset, 0)) + outs.append(wav) + + le = min(x.shape[-1] for x in outs) + + outs = [w[..., :le] for w in outs] + return torch.stack(outs) + + +def find_candidate(spec_ref, catalog, pitch_match=True): + """Given reference track, this finds a track in the catalog that + is a potential match (pitch and tempo delta must be within the allowable limits). + """ + candidates = list(catalog) + random.shuffle(candidates) + + for spec in candidates: + ok = False + for scale in [1/4, 1/2, 1, 2, 4]: + tempo = spec.tempo * scale + delta_tempo = spec_ref.tempo / tempo - 1 + if abs(delta_tempo) < MAX_TEMPO: + ok = True + break + if not ok: + print(delta_tempo, spec_ref.tempo, spec.tempo, "FAILED TEMPO") + # too much of a tempo difference + continue + spec = spec._replace(tempo=tempo) + + ps = 0 + if pitch_match: + ps = best_pitch_shift(spec_ref.kr, spec.kr) + if abs(ps) > MAX_PITCH: + print("Failed pitch", ps) + # too much pitch difference + continue + return spec, delta_tempo, ps + + +def get_part(spec, source, dt, dp): + """Apply given delta of tempo and delta of pitch to a stem.""" + wav = spec.track[source] + if dt or dp: + wav = repitch(wav, dp, dt * 100, samplerate=SR, voice=source == 3) + spec = spec._replace(onsets=spec.onsets / (1 + dt)) + return wav, spec + + +def build_track(ref_index, catalog): + """Given the reference track index and a catalog of track, builds + a completely new track. One of the source at random from the ref track will + be kept and other sources will be drawn from the catalog. + """ + order = list(range(len(SOURCES))) + random.shuffle(order) + + stems = [None] * len(order) + indexes = [None] * len(order) + origs = [None] * len(order) + dps = [None] * len(order) + dts = [None] * len(order) + + first = order[0] + spec_ref = catalog[ref_index] + stems[first] = (spec_ref.track[first], spec_ref.onsets) + indexes[first] = ref_index + origs[first] = spec_ref.track[first] + dps[first] = 0 + dts[first] = 0 + + pitch_match = order != 0 + + for src in order[1:]: + spec, dt, dp = find_candidate(spec_ref, catalog, pitch_match=pitch_match) + if not pitch_match: + spec_ref = spec_ref._replace(kr=spec.kr) + pitch_match = True + dps[src] = dp + dts[src] = dt + wav, spec = get_part(spec, src, dt, dp) + stems[src] = (wav, spec.onsets) + indexes[src] = spec.index + origs.append(spec.track[src]) + print("FINAL CHOICES", ref_index, indexes, dps, dts) + stems = align_stems(stems) + return stems, origs + + +def get_musdb_dataset(part='train'): + root = Path(MUSDB_PATH) / part + ext = '.wav' + metadata = build_metadata(root, SOURCES, ext=ext, normalize=False) + valid_tracks = _get_musdb_valid() + metadata_train = {name: meta for name, meta in metadata.items() if name not in valid_tracks} + train_set = Wavset( + root, metadata_train, SOURCES, samplerate=SR, channels=CHANNELS, + normalize=False, ext=ext) + sig = hashlib.sha1(str(root).encode()).hexdigest()[:8] + train_set.sig = sig + return train_set + + +def get_wav_dataset(): + root = Path(EXTRA_WAV_PATH) + ext = '.wav' + metadata = _build_metadata(root, SOURCES, ext=ext, normalize=False) + train_set = Wavset( + root, metadata, SOURCES, samplerate=SR, channels=CHANNELS, + normalize=False, ext=ext) + sig = hashlib.sha1(str(root).encode()).hexdigest()[:8] + train_set.sig = sig + return train_set + + +def main(): + random.seed(4321) + if OUTPATH.exists(): + shutil.rmtree(OUTPATH) + OUTPATH.mkdir(exist_ok=True, parents=True) + (OUTPATH / 'train').mkdir(exist_ok=True, parents=True) + (OUTPATH / 'valid').mkdir(exist_ok=True, parents=True) + out = OUTPATH / 'train' + + dset = get_musdb_dataset() + # dset2 = get_wav_dataset() + # dset3 = get_musdb_dataset('test') + dset2 = None + dset3 = None + pendings = [] + copies = 6 + copies_rej = 2 + + with ProcessPoolExecutor(20) as pool: + for index in range(len(dset)): + pendings.append(pool.submit(analyse_track, dset, index)) + + if dset2: + for index in range(len(dset2)): + pendings.append(pool.submit(analyse_track, dset2, index)) + if dset3: + for index in range(len(dset3)): + pendings.append(pool.submit(analyse_track, dset3, index)) + + catalog = [] + rej = 0 + for pending in tqdm.tqdm(pendings, ncols=120): + spec, track = pending.result() + if spec is not None: + catalog.append(spec) + else: + mix = track.sum(0) + for copy in range(copies_rej): + folder = out / f'rej_{rej}_{copy}' + folder.mkdir() + save_audio(mix, folder / "mixture.wav", SR) + for stem, source in zip(track, SOURCES): + save_audio(stem, folder / f"{source}.wav", SR, clip='clamp') + rej += 1 + + for copy in range(copies): + for index in range(len(catalog)): + track, origs = build_track(index, catalog) + mix = track.sum(0) + mx = mix.abs().max() + scale = max(1, 1.01 * mx) + mix = mix / scale + track = track / scale + folder = out / f'{copy}_{index}' + folder.mkdir() + save_audio(mix, folder / "mixture.wav", SR) + for stem, source, orig in zip(track, SOURCES, origs): + save_audio(stem, folder / f"{source}.wav", SR, clip='clamp') + # save_audio(stem.std() * orig / (1e-6 + orig.std()), folder / f"{source}_orig.wav", + # SR, clip='clamp') + + +if __name__ == '__main__': + main() diff --git a/AutoCoverTool/ref/music_remover/demucs/tools/bench.py b/AutoCoverTool/ref/music_remover/demucs/tools/bench.py new file mode 100644 index 0000000..edf6ceb --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/tools/bench.py @@ -0,0 +1,78 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +""" +benchmarking script, useful to check for OOM, reasonable train time, +and for the MDX competion, estimate if we will match the time limit.""" +from contextlib import contextmanager +import logging +import sys +import time +import torch + +from demucs.train import get_solver, main +from demucs.apply import apply_model + +logging.basicConfig(level=logging.INFO, stream=sys.stderr) + + +class Result: + pass + + +@contextmanager +def bench(): + import gc + gc.collect() + torch.cuda.reset_max_memory_allocated() + torch.cuda.empty_cache() + result = Result() + # before = torch.cuda.memory_allocated() + before = 0 + begin = time.time() + try: + yield result + finally: + torch.cuda.synchronize() + mem = (torch.cuda.max_memory_allocated() - before) / 2 ** 20 + tim = time.time() - begin + result.mem = mem + result.tim = tim + + +xp = main.get_xp_from_sig(sys.argv[1]) +xp = main.get_xp(xp.argv + sys.argv[2:]) +with xp.enter(): + solver = get_solver(xp.cfg) + if getattr(solver.model, 'use_train_segment', False): + batch = solver.augment(next(iter(solver.loaders['train']))) + solver.model.segment = Fraction(batch.shape[-1], solver.model.samplerate) + train_segment = solver.model.segment + solver.model.eval() + model = solver.model + model.cuda() + x = torch.randn(2, xp.cfg.dset.channels, int(10 * model.samplerate), device='cuda') + with bench() as res: + y = model(x) + y.sum().backward() + del y + for p in model.parameters(): + p.grad = None + print(f"FB: {res.mem:.1f} MB, {res.tim * 1000:.1f} ms") + + x = torch.randn(1, xp.cfg.dset.channels, int(model.segment * model.samplerate), device='cuda') + with bench() as res: + with torch.no_grad(): + y = model(x) + del y + print(f"FV: {res.mem:.1f} MB, {res.tim * 1000:.1f} ms") + + model.cpu() + torch.set_num_threads(1) + test = torch.randn(1, xp.cfg.dset.channels, model.samplerate * 40) + b = time.time() + apply_model(model, test, split=True, shifts=1) + print("CPU 40 sec:", time.time() - b) diff --git a/AutoCoverTool/ref/music_remover/demucs/tools/convert.py b/AutoCoverTool/ref/music_remover/demucs/tools/convert.py new file mode 100644 index 0000000..6c23222 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/tools/convert.py @@ -0,0 +1,152 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Script to convert option names and model args from the dev branch to +# the cleanup release one. There should be no reaso to use that anymore. + +import argparse +import io +import json +from pathlib import Path +import subprocess as sp + +import torch + +from demucs import train, pretrained, states + +DEV_REPO = Path.home() / 'tmp/release_demucs_mdx' + + +TO_REMOVE = [ + 'demucs.dconv_kw.gelu=True', + 'demucs.dconv_kw.nfreqs=0', + 'demucs.dconv_kw.nfreqs=0', + 'demucs.dconv_kw.version=4', + 'demucs.norm=gn', + 'wdemucs.nice=True', + 'wdemucs.good=True', + 'wdemucs.freq_emb=-0.2', + 'special=True', + 'special=False', +] + +TO_REPLACE = [ + ('power', 'svd'), + ('wdemucs', 'hdemucs'), + ('hdemucs.hybrid=True', 'hdemucs.hybrid_old=True'), + ('hdemucs.hybrid=2', 'hdemucs.hybrid=True'), +] + +TO_INJECT = [ + ('model=hdemucs', ['hdemucs.cac=False']), + ('model=hdemucs', ['hdemucs.norm_starts=999']), +] + + +def get_original_argv(sig): + return json.load(open(Path(DEV_REPO) / f'outputs/xps/{sig}/.argv.json')) + + +def transform(argv, mappings, verbose=False): + for rm in TO_REMOVE: + while rm in argv: + argv.remove(rm) + + for old, new in TO_REPLACE: + argv[:] = [a.replace(old, new) for a in argv] + + for condition, args in TO_INJECT: + if condition in argv: + argv[:] = args + argv + + for idx, arg in enumerate(argv): + if 'continue_from=' in arg: + dep_sig = arg.split('=')[1] + if dep_sig.startswith('"'): + dep_sig = eval(dep_sig) + if verbose: + print("Need to recursively convert dependency XP", dep_sig) + new_sig = convert(dep_sig, mappings, verbose).sig + argv[idx] = f'continue_from="{new_sig}"' + + +def convert(sig, mappings, verbose=False): + argv = get_original_argv(sig) + if verbose: + print("Original argv", argv) + transform(argv, mappings, verbose) + if verbose: + print("New argv", argv) + xp = train.main.get_xp(argv) + train.main.init_xp(xp) + if verbose: + print("Mapping", sig, "->", xp.sig) + mappings[sig] = xp.sig + return xp + + +def _eval_old(old_sig, x): + script = ( + 'from demucs import pretrained; import torch; import sys; import io; ' + 'buf = io.BytesIO(sys.stdin.buffer.read()); ' + 'x = torch.load(buf); m = pretrained.load_pretrained_model(' + f'"{old_sig}"); torch.save(m(x), sys.stdout.buffer)') + + buf = io.BytesIO() + torch.save(x, buf) + proc = sp.run( + ['python3', '-c', script], input=buf.getvalue(), capture_output=True, cwd=DEV_REPO) + if proc.returncode != 0: + print("Error", proc.stderr.decode()) + assert False + + buf = io.BytesIO(proc.stdout) + return torch.load(buf) + + +def compare(old_sig, model): + test = torch.randn(1, 2, 44100 * 10) + old_out = _eval_old(old_sig, test) + out = model(test) + + delta = 20 * torch.log10((out - old_out).norm() / out.norm()).item() + return delta + + +def main(): + torch.manual_seed(1234) + parser = argparse.ArgumentParser('convert') + parser.add_argument('sigs', nargs='*') + parser.add_argument('-o', '--output', type=Path, default=Path('release_models')) + parser.add_argument('-d', '--dump', action='store_true') + parser.add_argument('-c', '--compare', action='store_true') + parser.add_argument('-v', '--verbose', action='store_true') + args = parser.parse_args() + + args.output.mkdir(exist_ok=True, parents=True) + mappings = {} + for sig in args.sigs: + xp = convert(sig, mappings, args.verbose) + if args.dump or args.compare: + old_pkg = pretrained._load_package(sig, old=True) + model = train.get_model(xp.cfg) + model.load_state_dict(old_pkg['state']) + if args.dump: + pkg = states.serialize_model(model, xp.cfg) + states.save_with_checksum(pkg, args.output / f'{xp.sig}.th') + if args.compare: + delta = compare(sig, model) + print("Delta for", sig, xp.sig, delta) + + mappings[sig] = xp.sig + + print("FINAL MAPPINGS") + for old, new in mappings.items(): + print(old, " ", new) + + +if __name__ == '__main__': + main() diff --git a/AutoCoverTool/ref/music_remover/demucs/tools/export.py b/AutoCoverTool/ref/music_remover/demucs/tools/export.py new file mode 100644 index 0000000..7dfce10 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/tools/export.py @@ -0,0 +1,71 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +"""Export a trained model from the full checkpoint (with optimizer etc.) to +a final checkpoint, with only the model itself. The model is always stored as +half float to gain space, and because this has zero impact on the final loss. +When DiffQ was used for training, the model will actually be quantized and bitpacked.""" +from argparse import ArgumentParser +from fractions import Fraction +import logging +from pathlib import Path +import sys +import torch + +from demucs import train +from demucs.states import serialize_model, save_with_checksum + + +logger = logging.getLogger(__name__) + + +def main(): + logging.basicConfig(level=logging.INFO, stream=sys.stderr) + + parser = ArgumentParser("tools.export", description="Export trained models from XP sigs.") + parser.add_argument('signatures', nargs='*', help='XP signatures.') + parser.add_argument('-o', '--out', type=Path, default=Path("release_models"), + help="Path where to store release models (default release_models)") + parser.add_argument('-s', '--sign', action='store_true', + help='Add sha256 prefix checksum to the filename.') + + args = parser.parse_args() + args.out.mkdir(exist_ok=True, parents=True) + + for sig in args.signatures: + xp = train.main.get_xp_from_sig(sig) + name = train.main.get_name(xp) + logger.info('Handling %s/%s', sig, name) + + out_path = args.out / (sig + ".th") + + solver = train.get_solver_from_sig(sig) + if len(solver.history) < solver.args.epochs: + logger.warning( + 'Model %s has less epoch than expected (%d / %d)', + sig, len(solver.history), solver.args.epochs) + + solver.model.load_state_dict(solver.best_state) + pkg = serialize_model(solver.model, solver.args, solver.quantizer, half=True) + if getattr(solver.model, 'use_train_segment', False): + batch = solver.augment(next(iter(solver.loaders['train']))) + pkg['kwargs']['segment'] = Fraction(batch.shape[-1], solver.model.samplerate) + print("Override", pkg['kwargs']['segment']) + valid, test = None, None + for m in solver.history: + if 'valid' in m: + valid = m['valid'] + if 'test' in m: + test = m['test'] + pkg['metrics'] = (valid, test) + if args.sign: + save_with_checksum(pkg, out_path) + else: + torch.save(pkg, out_path) + + +if __name__ == '__main__': + main() diff --git a/AutoCoverTool/ref/music_remover/demucs/tools/test_pretrained.py b/AutoCoverTool/ref/music_remover/demucs/tools/test_pretrained.py new file mode 100644 index 0000000..3f4648d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/demucs/tools/test_pretrained.py @@ -0,0 +1,43 @@ +# Copyright (c) Meta, Inc. and its affiliates. +# All rights reserved. +# +# This source code is licensed under the license found in the +# LICENSE file in the root directory of this source tree. + +# Script to evaluate pretrained models. + +from argparse import ArgumentParser +import logging +import sys + +import torch + +from demucs import train, pretrained, evaluate + + +def main(): + torch.set_num_threads(1) + logging.basicConfig(stream=sys.stderr, level=logging.INFO) + parser = ArgumentParser("tools.test_pretrained", + description="Evaluate pre-trained models or bags of models " + "on MusDB.") + pretrained.add_model_flags(parser) + parser.add_argument('overrides', nargs='*', + help='Extra overrides, e.g. test.shifts=2.') + args = parser.parse_args() + + xp = train.main.get_xp(args.overrides) + with xp.enter(): + solver = train.get_solver(xp.cfg) + + model = pretrained.get_model_from_args(args) + solver.model = model.to(solver.device) + solver.model.eval() + + with torch.no_grad(): + results = evaluate.evaluate(solver, xp.cfg.test.sdr) + print(results) + + +if __name__ == '__main__': + main() diff --git a/AutoCoverTool/ref/music_remover/demucs/xishuashua.mp4 b/AutoCoverTool/ref/music_remover/demucs/xishuashua.mp4 new file mode 100644 index 0000000..a30c7ba Binary files /dev/null and b/AutoCoverTool/ref/music_remover/demucs/xishuashua.mp4 differ diff --git a/AutoCoverTool/ref/music_remover/readme.txt b/AutoCoverTool/ref/music_remover/readme.txt new file mode 100644 index 0000000..7a33a51 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/readme.txt @@ -0,0 +1,21 @@ +音乐分离工具 + +简单介绍: + demucs源码地址: https://github.com/facebookresearch/demucs + 本代码直接从上述源码拉取,使用的是Mon Nov 21 的main分支的版本 + +基本改动: + 封装separate_interface.py 文件,用于给定音频文件,生成伴奏文件 + +环境安装: + 1. GPU基本环境安装: + https://phabricator.ushow.media/w/%E9%9F%B3%E8%A7%86%E9%A2%91%E7%BB%84%E6%96%87%E6%A1%A3/%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85/gpu%E7%8E%AF%E5%A2%83%E5%AE%89%E8%A3%85/ + 2. 安装demucs环境 + conda create -n demucs python==3.7 + 3. 安装 standard_audio 二进制文件 + 代码位置位于: av_cv_research/tools/music_remover/standard_audio + 编译安装完成后存放于: /opt/soft/bin/standard_audio 即可 + +快速使用(服务端): + python3 separate_interface.py xishuashua.mp4 xishuashua.m4a + xishuashua.mp4 的时长为3分36s, 3次平均耗时为 27.33s diff --git a/AutoCoverTool/ref/music_remover/separate_interface.py b/AutoCoverTool/ref/music_remover/separate_interface.py new file mode 100644 index 0000000..ea3bb73 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/separate_interface.py @@ -0,0 +1,103 @@ +""" +分离的对外函数 +""" + +import os +import sys +import time +import shutil +import logging + +from demucs.pretrained import get_model +from demucs.separate import * + +# 第三方二进制文件 +gs_standard_audio_exe = "/opt/soft/bin/standard_audio" +gs_ffmpeg_exe = "ffmpeg" + +# 全局配置 +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + +# 错误码 +ERR_CODE_SUCCESS = 0 +ERR_CODE_INPUT_FILE_NOT_EXISTS = 1 + + +class SeparateInterface: + """ + 分离器对外接口,只生成伴奏 + """ + + def __init__(self): + sp_start = time.time() + # 评价之后该版本模型的效果效果最佳,性能也合适 + # 对比评价过mdx_extra_q和htdemucs_ft + # 其中mdx_extra_q和mdx_extra速度一致,但是和声保留情况不如后者 + # htdemucs_ft耗时是mdx_extra的1.6倍,在部分歌曲消去的程度上比mdx_extra好,但是和声没有mdx_extra保留的完整|详情见xishuashua的开头和声部分 + self.model = get_model('mdx_extra') + self.tmp_dir = os.path.join("/tmp/separate") + if not os.path.exists(self.tmp_dir): + os.makedirs(self.tmp_dir) + logging.info("SeparateInterface: load model spent = {}".format(time.time() - sp_start)) + + def process_logic(self, cid, cache_dir, in_file, vocal_out_file, acc_out_file, dev='cuda'): + model = self.model + sp_start = time.time() + wav = load_track(in_file, model.audio_channels, model.samplerate) + logging.info("--------load_track:cid={},sp={}".format(cid, time.time() - sp_start)) + + # 模型推理 + sp_start = time.time() + ref = wav.mean(0) + wav = (wav - ref.mean()) / ref.std() + # wav[None] -> 增加一个维度,原来是[2, xxx] -> [1, 2, xxx] + sources = apply_model(model, wav[None], device=dev, shifts=1, split=True, overlap=0.25, progress=True, + num_workers=0)[0] + sources = sources * ref.std() + ref.mean() + logging.info("--------apply_model:cid={},sp={}".format(cid, time.time() - sp_start)) + + # 只保留伴奏 + sources = list(sources) + vocals = sources.pop(model.sources.index("vocals")) + if vocals is not None: + save_audio(vocals, vocal_out_file, samplerate=model.samplerate) + other_stem = th.zeros_like(sources[0]) + for sc in sources: + other_stem += sc + if acc_out_file is not None: + save_audio(other_stem, acc_out_file, samplerate=model.samplerate) + + if vocal_out_file is not None: + if not os.path.exists(vocal_out_file): + return False + if acc_out_file is not None: + if not os.path.exists(acc_out_file): + return False + return True + + def process(self, cid, in_file, vocal_out_file, acc_out_file, dev='cuda'): + if not os.path.exists(in_file): + return ERR_CODE_INPUT_FILE_NOT_EXISTS + + st_time = time.time() + logging.info("--------process:cid={},{},{},{}".format(cid, in_file, vocal_out_file, acc_out_file)) + cache_dir = os.path.join(self.tmp_dir, str(cid)) + if os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + os.makedirs(cache_dir) + # 核心处理逻辑 + ret = self.process_logic(cid, cache_dir, in_file, vocal_out_file, acc_out_file, dev) + shutil.rmtree(cache_dir) + logging.info( + "--------finish:cid={},{},{},{}|{}|sp={}\n\n".format(cid, in_file, vocal_out_file, acc_out_file, dev, ret, + time.time() - st_time)) + return ret + +# +# if __name__ == '__main__': +# si = SeparateInterface() +# in_f = sys.argv[1] +# out_f = sys.argv[2] +# dev = sys.argv[3] # cuda或者cpu +# for i in range(0, 3): +# si.process(str(1), in_f, out_f, dev) diff --git a/AutoCoverTool/ref/music_remover/standard_audio/CMakeLists.txt b/AutoCoverTool/ref/music_remover/standard_audio/CMakeLists.txt new file mode 100644 index 0000000..5afa477 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 2.8) +project(standard_audio) + +set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) +set(CMAKE_CXX_STANDARD 11) + + +include_directories(ref/alimter/inc) +include_directories(ref/waves/inc) +include_directories(ref/ebur128/inc) + +AUX_SOURCE_DIRECTORY(ref/alimter/src DIR_ALIMTER_SRCS) +AUX_SOURCE_DIRECTORY(ref/waves/src DIR_WAVES_SRCS) +AUX_SOURCE_DIRECTORY(ref/ebur128/src DIR_EBUR128_SRCS) + +add_executable(standard_audio main.cpp ${DIR_ALIMTER_SRCS} ${DIR_EBUR128_SRCS} ${DIR_WAVES_SRCS}) \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/standard_audio/main.cpp b/AutoCoverTool/ref/music_remover/standard_audio/main.cpp new file mode 100644 index 0000000..b448e0d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/main.cpp @@ -0,0 +1,126 @@ +// +// Created by yangjianli on 2022/12/5. +// +#include "WaveFile.h" +#include "alimiter.h" +#include "ebur128.h" + +#include +#include +#include + +#define PROC_LEN 1024 +#define DEFAULT_BASELINE_DB (float)-14.57f + +// 采样率,通道数,源数据,数据长度,响度(dB), 达到目标响度需要的增益倍数 +int ebur128_whole(int channel, int sample_rate, short *data, const int len, double &gated_loudness, double &gain) +{ + ebur128_state *st = NULL; + st = ebur128_init(channel, sample_rate, EBUR128_MODE_I); + if (NULL == st) + { + return -1; + } + int pos = 0; + int temp_length = 0; + int ret; + + while (pos < len) + { + temp_length = PROC_LEN; + if (len - pos < PROC_LEN) + { + temp_length = len - pos; + } + ret = ebur128_add_frames_short(st, data + pos, temp_length / channel); + if (ret != 0) + { + return -2; + } + pos += temp_length; + } + + gated_loudness = -1; + ebur128_loudness_global(st, &gated_loudness); + float db = (DEFAULT_BASELINE_DB - gated_loudness) / 20.f; + gain = pow(10, db); + printf("gated_loudness = %f db = %f gain = %f\n", gated_loudness, db, gain); + ebur128_destroy(&st); + return 0; +} + + +int main(int argc, char *argv[]) +{ + struct timeval start; + struct timeval end; + + if (argc < 3) + { + printf("example: ./exe in_wav out_wav!\n"); + return -1; + } + + gettimeofday(&start, NULL); + + CWaveFile wave_file = CWaveFile(argv[1], false); + if (!wave_file.GetStatus()) + { + printf("%s is not wav\n", argv[1]); + return -1; + } + int32_t sample_rate = wave_file.GetSampleRate(); + int32_t channel = wave_file.GetChannels(); + int32_t len = wave_file.GetTotalFrames() * channel; + short *buffer = new short[len]; + wave_file.ReadFrameAsS16(buffer, wave_file.GetTotalFrames()); + + gettimeofday(&end, NULL); + printf("load_data|spend_time=%f\n", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0); + + // 计算响度和预期拉伸的倍数 + double gated_loudness = -1; + double gain = -1; + int err = ebur128_whole(channel, sample_rate, buffer, len, gated_loudness, gain); + if (err != 0 || gain == -1) + { + printf("%s ebur128 err=%d\n", argv[1], err); + return -1; + } + gettimeofday(&end, NULL); + printf("ebur128_whole|spend_time=%f\n", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0); + + + // 拉伸+压限防止爆音 + CWaveFile out_file = CWaveFile(argv[2], true); + out_file.SetSampleFormat(SF_IEEE_FLOAT); + out_file.SetSampleRate(sample_rate); + out_file.SetChannels(channel); + out_file.SetupDone(); + + SUPERSOUND::Alimiter alimiter; + alimiter.SetParam(sample_rate, channel); + int step = 1024; + float* tmp_buffer = new float[step]; + for(int i = 0; i < len; i+=step) + { + int cur_step = step; + if (i + cur_step > len) + { + cur_step = len - i; + } + + for(int j = 0; j < cur_step; j++) + { + tmp_buffer[j] = float(buffer[i+j] / 32768.0 * gain); + } + alimiter.Filter(tmp_buffer, tmp_buffer, cur_step); + out_file.WriteFrame(tmp_buffer, cur_step / channel); + } + + delete[] tmp_buffer; + delete[] buffer; + gettimeofday(&end, NULL); + printf("finish|spend_time=%f\n", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0); + return 0; +} diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/CMakeLists.txt b/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/CMakeLists.txt new file mode 100644 index 0000000..9748c4d --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_ALIMTER_SRCS) +add_library(alimiter ${DIR_ALIMTER_SRCS}) \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/inc/alimiter.h b/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/inc/alimiter.h new file mode 100755 index 0000000..8022d39 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/inc/alimiter.h @@ -0,0 +1,99 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现 FFMPEG 中的限制器,这个压限器对频谱友好,但是压得比较厉害 + +#ifndef __ALIMITER_H__ +#define __ALIMITER_H__ + +#include +#define ERROR_SUPERSOUND_SUCCESS 0 +#define ERROR_SUPERSOUND_PARAM -1 +#define ERROR_SUPERSOUND_MEMORY -2 +typedef struct AudioLimiterContext +{ + float limit; + float attack; + float release; + float att; + float level_in; + float level_out; + int32_t auto_release; + int32_t auto_level; + float asc; + int32_t asc_c; + int32_t asc_pos; + float asc_coeff; + + float *buffer; + int32_t buffer_size; + int32_t buffer_max_size; + int32_t pos; + int32_t *nextpos; + float *nextdelta; + + float delta; + int32_t nextiter; + int32_t nextlen; + int32_t asc_changed; +}AudioLimiterContext; + +namespace SUPERSOUND +{ + + +class Alimiter +{ +public: + Alimiter(); + ~Alimiter(); + +public: + void Flush(); + int32_t GetLatecy(); + int32_t SetParam(int32_t fs, int32_t channels); + void Filter(float * input, float * output, int32_t num); + +private: + void Uninit(); + int32_t config_input(); + float get_rdelta(AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc); + +private: + AudioLimiterContext m_alimiterCtx; + int m_nChannels; + int m_nFs; +}; + + +} + +#endif /* __ALIMITER_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/src/alimiter.cpp b/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/src/alimiter.cpp new file mode 100755 index 0000000..abbd622 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/alimter/src/alimiter.cpp @@ -0,0 +1,306 @@ + +#include "alimiter.h" +#include +#include +#include +#include + +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#define MIDDLE(x, y, z) ((x)<(y)?((y)<(z)?(y):(x)<(z)?(z):(x)):((y)>(z)?(y):(x)>(z)?(z):(x))) +#define SAFE_DELETE_PTR(ptr) \ +{ \ + if(ptr) \ + { \ + delete [] ptr; \ + ptr = NULL; \ + } \ +} + +namespace SUPERSOUND +{ + + +Alimiter::Alimiter() +{ + memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx)); + + m_nChannels = 0; + m_nFs = 0; + + Flush(); +} + +Alimiter::~Alimiter() +{ + Uninit(); +} + +void Alimiter::Flush() +{ + float * buffer = m_alimiterCtx.buffer; + float * nextdelta = m_alimiterCtx.nextdelta; + int32_t * nextpos = m_alimiterCtx.nextpos; + int32_t buffer_max_size = m_alimiterCtx.buffer_max_size; + int32_t buffer_size = m_alimiterCtx.buffer_size; + + if(buffer) + memset(buffer, 0, sizeof(float) * buffer_max_size); + if(nextdelta) + memset(nextdelta, 0, sizeof(float) * buffer_max_size); + if(nextpos) + memset(nextpos, -1, sizeof(float) * buffer_max_size); + + memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx)); + + m_alimiterCtx.level_in = 1; + m_alimiterCtx.level_out = 32000 / 32768.0; + m_alimiterCtx.limit = 1; + m_alimiterCtx.attack = 5; + m_alimiterCtx.release = 50; + m_alimiterCtx.auto_release = 0; + m_alimiterCtx.asc_coeff = 0.5; + m_alimiterCtx.auto_level = 1; + + m_alimiterCtx.attack /= 1000; + m_alimiterCtx.release /= 1000; + m_alimiterCtx.att = 1; + m_alimiterCtx.asc_pos = -1; + m_alimiterCtx.asc_coeff = pow(0.5f, m_alimiterCtx.asc_coeff - 0.5f) * 2 * -1; + + m_alimiterCtx.buffer = buffer; + m_alimiterCtx.nextdelta = nextdelta; + m_alimiterCtx.nextpos = nextpos; + m_alimiterCtx.buffer_max_size = buffer_max_size; + m_alimiterCtx.buffer_size = buffer_size; +} + +int32_t Alimiter::GetLatecy() +{ + return m_alimiterCtx.buffer_size / m_nChannels; +} + +int32_t Alimiter::SetParam( int32_t fs, int32_t channels ) +{ + if((fs == m_nFs) && (channels == m_nChannels)) + return ERROR_SUPERSOUND_SUCCESS; + + m_nChannels = channels; + m_nFs = fs; + + return config_input(); +} + +void Alimiter::Filter( float * input, float * output, int32_t num ) +{ + num = num / m_nChannels; + int channels = m_nChannels; + int buffer_size = m_alimiterCtx.buffer_size; + float * buffer = m_alimiterCtx.buffer; + float release = m_alimiterCtx.release; + float limit = m_alimiterCtx.limit; + float * nextdelta = m_alimiterCtx.nextdelta; + float level = m_alimiterCtx.auto_level ? 1 / limit : 1; + float level_out = m_alimiterCtx.level_out; + float level_in = m_alimiterCtx.level_in; + int *nextpos = m_alimiterCtx.nextpos; + + float * buf; + float * dst; + float * src; + int n, c, i; + AudioLimiterContext * s = &m_alimiterCtx; + + dst = output; + src = input; + + for (n = 0; n < num; n++) { + float peak = 0; + + for (c = 0; c < channels; c++) { + float sample = src[c] * level_in; + + buffer[s->pos + c] = sample; + peak = MAX(peak, fabs(sample)); + } + + if (s->auto_release && peak > limit) { + s->asc += peak; + s->asc_c++; + } + + if (peak > limit) { + float patt = MIN(limit / peak, 1); + float rdelta = get_rdelta(s, release, m_nFs, + peak, limit, patt, 0); + float delta = (limit / peak - s->att) / buffer_size * channels; + int found = 0; + + if (delta < s->delta) { + s->delta = delta; + nextpos[0] = s->pos; + nextpos[1] = -1; + nextdelta[0] = rdelta; + s->nextlen = 1; + s->nextiter= 0; + } else { + for (i = s->nextiter; i < s->nextiter + s->nextlen; i++) { + int j = i % buffer_size; + float ppeak, pdelta; + + ppeak = fabs(buffer[nextpos[j]]) > fabs(buffer[nextpos[j] + 1]) ? + fabs(buffer[nextpos[j]]) : fabs(buffer[nextpos[j] + 1]); + pdelta = (limit / peak - limit / ppeak) / (((buffer_size - nextpos[j] + s->pos) % buffer_size) / channels); + if (pdelta < nextdelta[j]) { + nextdelta[j] = pdelta; + found = 1; + break; + } + } + if (found) { + s->nextlen = i - s->nextiter + 1; + nextpos[(s->nextiter + s->nextlen) % buffer_size] = s->pos; + nextdelta[(s->nextiter + s->nextlen) % buffer_size] = rdelta; + nextpos[(s->nextiter + s->nextlen + 1) % buffer_size] = -1; + s->nextlen++; + } + } + } + + buf = &s->buffer[(s->pos + channels) % buffer_size]; + peak = 0; + for (c = 0; c < channels; c++) { + float sample = buf[c]; + + peak = MAX(peak, fabs(sample)); + } + + if (s->pos == s->asc_pos && !s->asc_changed) + s->asc_pos = -1; + + if (s->auto_release && s->asc_pos == -1 && peak > limit) { + s->asc -= peak; + s->asc_c--; + } + + s->att += s->delta; + + for (c = 0; c < channels; c++) + dst[c] = buf[c] * s->att; + + if ((s->pos + channels) % buffer_size == nextpos[s->nextiter]) { + if (s->auto_release) { + s->delta = get_rdelta(s, release, m_nFs, + peak, limit, s->att, 1); + if (s->nextlen > 1) { + int pnextpos = nextpos[(s->nextiter + 1) % buffer_size]; + float ppeak = fabs(buffer[pnextpos]) > fabs(buffer[pnextpos + 1]) ? + fabs(buffer[pnextpos]) : + fabs(buffer[pnextpos + 1]); + float pdelta = (limit / ppeak - s->att) / + (((buffer_size + pnextpos - + ((s->pos + channels) % buffer_size)) % + buffer_size) / channels); + if (pdelta < s->delta) + s->delta = pdelta; + } + } else { + s->delta = nextdelta[s->nextiter]; + s->att = limit / peak; + } + + s->nextlen -= 1; + nextpos[s->nextiter] = -1; + s->nextiter = (s->nextiter + 1) % buffer_size; + } + + if (s->att > 1.) { + s->att = 1.; + s->delta = 0.; + s->nextiter = 0; + s->nextlen = 0; + nextpos[0] = -1; + } + + if (s->att <= 0.) { + s->att = 0.000001f; + s->delta = (1 - s->att) / (m_nFs * release); + } + + if (s->att != 1 && (1 - s->att) < 0.000001f) + s->att = 1; + + if (s->delta != 0 && fabs(s->delta) < 0.000001f) + s->delta = 0; + + for (c = 0; c < channels; c++) + dst[c] = MIDDLE(dst[c], -limit, limit) * level * level_out; + + s->pos = (s->pos + channels) % buffer_size; + src += channels; + dst += channels; + } +} + +void Alimiter::Uninit() +{ + SAFE_DELETE_PTR(m_alimiterCtx.buffer); + SAFE_DELETE_PTR(m_alimiterCtx.nextdelta); + SAFE_DELETE_PTR(m_alimiterCtx.nextpos); +} + +int32_t Alimiter::config_input() +{ + int obuffer_size = int(m_nFs * m_nChannels * 100 / 1000. + m_nChannels); + if(obuffer_size < m_nChannels) + return ERROR_SUPERSOUND_PARAM; + + if(obuffer_size > m_alimiterCtx.buffer_max_size) + { + SAFE_DELETE_PTR(m_alimiterCtx.buffer); + m_alimiterCtx.buffer = new(std::nothrow) float[obuffer_size]; + if(m_alimiterCtx.buffer == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.buffer, 0, sizeof(float) * obuffer_size); + + SAFE_DELETE_PTR(m_alimiterCtx.nextdelta); + m_alimiterCtx.nextdelta = new(std::nothrow) float[obuffer_size]; + if(m_alimiterCtx.nextdelta == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.nextdelta, 0, sizeof(float) * obuffer_size); + + SAFE_DELETE_PTR(m_alimiterCtx.nextpos); + m_alimiterCtx.nextpos = new(std::nothrow) int32_t[obuffer_size]; + if(m_alimiterCtx.nextpos == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.nextpos, -1, obuffer_size*sizeof(int32_t)); + + m_alimiterCtx.buffer_max_size = obuffer_size; + } + + m_alimiterCtx.buffer_size = int(m_nFs * m_alimiterCtx.attack * m_nChannels); + m_alimiterCtx.buffer_size -= m_alimiterCtx.buffer_size % m_nChannels; + + return ERROR_SUPERSOUND_SUCCESS; +} + +float Alimiter::get_rdelta( AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc ) +{ + float rdelta = (1 - patt) / (sample_rate * release); + + if (asc && s->auto_release && s->asc_c > 0) { + float a_att = limit / (s->asc_coeff * s->asc) * (float)s->asc_c; + + if (a_att > patt) { + float delta = MAX((a_att - patt) / (sample_rate * release), rdelta / 10); + + if (delta < rdelta) + rdelta = delta; + } + } + + return rdelta; +} + + +} \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/CMakeLists.txt b/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/CMakeLists.txt new file mode 100644 index 0000000..18a5a86 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_EBUR128_SRCS) +add_library(ebur128 ${DIR_EBUR128_SRCS}) \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/inc/ebur128.h b/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/inc/ebur128.h new file mode 100755 index 0000000..faa66c6 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/inc/ebur128.h @@ -0,0 +1,425 @@ +/* See COPYING file for copyright and license details. */ + +#ifndef EBUR128_H_ +#define EBUR128_H_ + +/** \file ebur128.h + * \brief libebur128 - a library for loudness measurement according to + * the EBU R128 standard. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define EBUR128_VERSION_MAJOR 1 +#define EBUR128_VERSION_MINOR 2 +#define EBUR128_VERSION_PATCH 4 + +#include /* for size_t */ + +/** \enum channel + * Use these values when setting the channel map with ebur128_set_channel(). + * See definitions in ITU R-REC-BS 1770-4 + */ +enum channel { + EBUR128_UNUSED = 0, /**< unused channel (for example LFE channel) */ + EBUR128_LEFT = 1, + EBUR128_Mp030 = 1, /**< itu M+030 */ + EBUR128_RIGHT = 2, + EBUR128_Mm030 = 2, /**< itu M-030 */ + EBUR128_CENTER = 3, + EBUR128_Mp000 = 3, /**< itu M+000 */ + EBUR128_LEFT_SURROUND = 4, + EBUR128_Mp110 = 4, /**< itu M+110 */ + EBUR128_RIGHT_SURROUND = 5, + EBUR128_Mm110 = 5, /**< itu M-110 */ + EBUR128_DUAL_MONO, /**< a channel that is counted twice */ + EBUR128_MpSC, /**< itu M+SC */ + EBUR128_MmSC, /**< itu M-SC */ + EBUR128_Mp060, /**< itu M+060 */ + EBUR128_Mm060, /**< itu M-060 */ + EBUR128_Mp090, /**< itu M+090 */ + EBUR128_Mm090, /**< itu M-090 */ + EBUR128_Mp135, /**< itu M+135 */ + EBUR128_Mm135, /**< itu M-135 */ + EBUR128_Mp180, /**< itu M+180 */ + EBUR128_Up000, /**< itu U+000 */ + EBUR128_Up030, /**< itu U+030 */ + EBUR128_Um030, /**< itu U-030 */ + EBUR128_Up045, /**< itu U+045 */ + EBUR128_Um045, /**< itu U-030 */ + EBUR128_Up090, /**< itu U+090 */ + EBUR128_Um090, /**< itu U-090 */ + EBUR128_Up110, /**< itu U+110 */ + EBUR128_Um110, /**< itu U-110 */ + EBUR128_Up135, /**< itu U+135 */ + EBUR128_Um135, /**< itu U-135 */ + EBUR128_Up180, /**< itu U+180 */ + EBUR128_Tp000, /**< itu T+000 */ + EBUR128_Bp000, /**< itu B+000 */ + EBUR128_Bp045, /**< itu B+045 */ + EBUR128_Bm045 /**< itu B-045 */ +}; + +/** \enum error + * Error return values. + */ +enum error { + EBUR128_SUCCESS = 0, + EBUR128_ERROR_NOMEM, + EBUR128_ERROR_INVALID_MODE, + EBUR128_ERROR_INVALID_CHANNEL_INDEX, + EBUR128_ERROR_NO_CHANGE +}; + +/** \enum mode + * Use these values in ebur128_init (or'ed). Try to use the lowest possible + * modes that suit your needs, as performance will be better. + */ +enum mode { + /** can call ebur128_loudness_momentary */ + EBUR128_MODE_M = (1 << 0), + /** can call ebur128_loudness_shortterm */ + EBUR128_MODE_S = (1 << 1) | EBUR128_MODE_M, + /** can call ebur128_loudness_global_* and ebur128_relative_threshold */ + EBUR128_MODE_I = (1 << 2) | EBUR128_MODE_M, + /** can call ebur128_loudness_range */ + EBUR128_MODE_LRA = (1 << 3) | EBUR128_MODE_S, + /** can call ebur128_sample_peak */ + EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | EBUR128_MODE_M, + /** can call ebur128_true_peak */ + EBUR128_MODE_TRUE_PEAK = (1 << 5) | EBUR128_MODE_M + | EBUR128_MODE_SAMPLE_PEAK, + /** uses histogram algorithm to calculate loudness */ + EBUR128_MODE_HISTOGRAM = (1 << 6) +}; + +/** forward declaration of ebur128_state_internal */ +struct ebur128_state_internal; + +/** \brief Contains information about the state of a loudness measurement. + * + * You should not need to modify this struct directly. + */ +typedef struct { + int mode; /**< The current mode. */ + unsigned int channels; /**< The number of channels. */ + unsigned long samplerate; /**< The sample rate. */ + struct ebur128_state_internal* d; /**< Internal state. */ +} ebur128_state; + +/** \brief Get library version number. Do not pass null pointers here. + * + * @param major major version number of library + * @param minor minor version number of library + * @param patch patch version number of library + */ +void ebur128_get_version(int* major, int* minor, int* patch); + +/** \brief Initialize library state. + * + * @param channels the number of channels. + * @param samplerate the sample rate. + * @param mode see the mode enum for possible values. + * @return an initialized library state, or NULL on error. + */ +ebur128_state* ebur128_init(unsigned int channels, + unsigned long samplerate, + int mode); + +/** \brief Destroy library state. + * + * @param st pointer to a library state. + */ +void ebur128_destroy(ebur128_state** st); + +/** \brief Set channel type. + * + * The default is: + * - 0 -> EBUR128_LEFT + * - 1 -> EBUR128_RIGHT + * - 2 -> EBUR128_CENTER + * - 3 -> EBUR128_UNUSED + * - 4 -> EBUR128_LEFT_SURROUND + * - 5 -> EBUR128_RIGHT_SURROUND + * + * @param st library state. + * @param channel_number zero based channel index. + * @param value channel type from the "channel" enum. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_set_channel(ebur128_state* st, + unsigned int channel_number, + int value); + +/** \brief Change library parameters. + * + * Note that the channel map will be reset when setting a different number of + * channels. The current unfinished block will be lost. + * + * @param st library state. + * @param channels new number of channels. + * @param samplerate new sample rate. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be + * invalid and must be destroyed. + * - EBUR128_ERROR_NO_CHANGE if channels and sample rate were not changed. + */ +int ebur128_change_parameters(ebur128_state* st, + unsigned int channels, + unsigned long samplerate); + +/** \brief Set the maximum window duration. + * + * Set the maximum duration that will be used for ebur128_window_loudness(). + * Note that this destroys the current content of the audio buffer. + * + * @param st library state. + * @param window duration of the window in ms. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be + * invalid and must be destroyed. + * - EBUR128_ERROR_NO_CHANGE if window duration not changed. + */ +int ebur128_set_max_window(ebur128_state* st, unsigned long window); + +/** \brief Set the maximum history. + * + * Set the maximum history that will be stored for loudness integration. + * More history provides more accurate results, but requires more resources. + * + * Applies to ebur128_loudness_range() and ebur128_loudness_global() when + * EBUR128_MODE_HISTOGRAM is not set. + * + * Default is ULONG_MAX (at least ~50 days). + * Minimum is 3000ms for EBUR128_MODE_LRA and 400ms for EBUR128_MODE_M. + * + * @param st library state. + * @param history duration of history in ms. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NO_CHANGE if history not changed. + */ +int ebur128_set_max_history(ebur128_state* st, unsigned long history); + +/** \brief Add frames to be processed. + * + * @param st library state. + * @param src array of source frames. Channels must be interleaved. + * @param frames number of frames. Not number of samples! + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. + */ +int ebur128_add_frames_short(ebur128_state* st, + const short* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_int(ebur128_state* st, + const int* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_float(ebur128_state* st, + const float* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_double(ebur128_state* st, + const double* src, + size_t frames); + +/** \brief Get global integrated loudness in LUFS. + * + * @param st library state. + * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set. + */ +int ebur128_loudness_global(ebur128_state* st, double* out); +/** \brief Get global integrated loudness in LUFS across multiple instances. + * + * @param sts array of library states. + * @param size length of sts + * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set. + */ +int ebur128_loudness_global_multiple(ebur128_state** sts, + size_t size, + double* out); + +/** \brief Get momentary loudness (last 400ms) in LUFS. + * + * @param st library state. + * @param out momentary loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + */ +int ebur128_loudness_momentary(ebur128_state* st, double* out); +/** \brief Get short-term loudness (last 3s) in LUFS. + * + * @param st library state. + * @param out short-term loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_S" has not been set. + */ +int ebur128_loudness_shortterm(ebur128_state* st, double* out); + +/** \brief Get loudness of the specified window in LUFS. + * + * window must not be larger than the current window set in st. + * The current window can be changed by calling ebur128_set_max_window(). + * + * @param st library state. + * @param window window in ms to calculate loudness. + * @param out loudness in LUFS. -HUGE_VAL if result is negative infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if window larger than current window in st. + */ +int ebur128_loudness_window(ebur128_state* st, + unsigned long window, + double* out); + +/** \brief Get loudness range (LRA) of programme in LU. + * + * Calculates loudness range according to EBU 3342. + * + * @param st library state. + * @param out loudness range (LRA) in LU. Will not be changed in case of + * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be + * returned in this case. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM in case of memory allocation error. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set. + */ +int ebur128_loudness_range(ebur128_state* st, double* out); +/** \brief Get loudness range (LRA) in LU across multiple instances. + * + * Calculates loudness range according to EBU 3342. + * + * @param sts array of library states. + * @param size length of sts + * @param out loudness range (LRA) in LU. Will not be changed in case of + * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be + * returned in this case. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM in case of memory allocation error. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set. + */ +int ebur128_loudness_range_multiple(ebur128_state** sts, + size_t size, + double* out); + +/** \brief Get maximum sample peak from all frames that have been processed. + * + * The equation to convert to dBFS is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum sample peak in float format (1.0 is 0 dBFS) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum sample peak from the last call to add_frames(). + * + * The equation to convert to dBFS is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum sample peak in float format (1.0 is 0 dBFS) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_prev_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum true peak from all frames that have been processed. + * + * Uses an implementation defined algorithm to calculate the true peak. Do not + * try to compare resulting values across different versions of the library, + * as the algorithm may change. + * + * The current implementation uses a custom polyphase FIR interpolator to + * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for + * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz. + * + * The equation to convert to dBTP is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum true peak in float format (1.0 is 0 dBTP) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum true peak from the last call to add_frames(). + * + * Uses an implementation defined algorithm to calculate the true peak. Do not + * try to compare resulting values across different versions of the library, + * as the algorithm may change. + * + * The current implementation uses a custom polyphase FIR interpolator to + * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for + * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz. + * + * The equation to convert to dBTP is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum true peak in float format (1.0 is 0 dBTP) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_prev_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get relative threshold in LUFS. + * + * @param st library state + * @param out relative threshold in LUFS. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not + * been set. + */ +int ebur128_relative_threshold(ebur128_state* st, double* out); +#ifdef __cplusplus +} +#endif + +#endif /* EBUR128_H_ */ diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/src/ebur128.c b/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/src/ebur128.c new file mode 100755 index 0000000..6c10f1e --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/ebur128/src/ebur128.c @@ -0,0 +1,1333 @@ +/* See COPYING file for copyright and license details. */ + +#include "ebur128.h" + +#include +#include +#include /* You may have to define _USE_MATH_DEFINES if you use MSVC */ +#include +#include + +/* This can be replaced by any BSD-like queue implementation. */ +#include + +#define CHECK_ERROR(condition, errorcode, goto_point) \ + if ((condition)) { \ + errcode = (errorcode); \ + goto goto_point; \ + } + +STAILQ_HEAD(ebur128_double_queue, ebur128_dq_entry); +struct ebur128_dq_entry { + double z; + STAILQ_ENTRY(ebur128_dq_entry) entries; +}; + +#define ALMOST_ZERO 0.000001 + +typedef struct { /* Data structure for polyphase FIR interpolator */ + unsigned int factor; /* Interpolation factor of the interpolator */ + unsigned int taps; /* Taps (prefer odd to increase zero coeffs) */ + unsigned int channels; /* Number of channels */ + unsigned int delay; /* Size of delay buffer */ + struct { + unsigned int count; /* Number of coefficients in this subfilter */ + unsigned int* index; /* Delay index of corresponding filter coeff */ + double* coeff; /* List of subfilter coefficients */ + }* filter; /* List of subfilters (one for each factor) */ + float** z; /* List of delay buffers (one for each channel) */ + unsigned int zi; /* Current delay buffer index */ +} interpolator; + +struct ebur128_state_internal { + /** Filtered audio data (used as ring buffer). */ + double* audio_data; + /** Size of audio_data array. */ + size_t audio_data_frames; + /** Current index for audio_data. */ + size_t audio_data_index; + /** How many frames are needed for a gating block. Will correspond to 400ms + * of audio at initialization, and 100ms after the first block (75% overlap + * as specified in the 2011 revision of BS1770). */ + unsigned long needed_frames; + /** The channel map. Has as many elements as there are channels. */ + int* channel_map; + /** How many samples fit in 100ms (rounded). */ + unsigned long samples_in_100ms; + /** BS.1770 filter coefficients (nominator). */ + double b[5]; + /** BS.1770 filter coefficients (denominator). */ + double a[5]; + /** BS.1770 filter state. */ + double v[5][5]; + /** Linked list of block energies. */ + struct ebur128_double_queue block_list; + unsigned long block_list_max; + unsigned long block_list_size; + /** Linked list of 3s-block energies, used to calculate LRA. */ + struct ebur128_double_queue short_term_block_list; + unsigned long st_block_list_max; + unsigned long st_block_list_size; + int use_histogram; + unsigned long *block_energy_histogram; + unsigned long *short_term_block_energy_histogram; + /** Keeps track of when a new short term block is needed. */ + size_t short_term_frame_counter; + /** Maximum sample peak, one per channel */ + double* sample_peak; + double* prev_sample_peak; + /** Maximum true peak, one per channel */ + double* true_peak; + double* prev_true_peak; + interpolator* interp; + float* resampler_buffer_input; + size_t resampler_buffer_input_frames; + float* resampler_buffer_output; + size_t resampler_buffer_output_frames; + /** The maximum window duration in ms. */ + unsigned long window; + unsigned long history; +}; + +static double relative_gate = -10.0; + +/* Those will be calculated when initializing the library */ +static double relative_gate_factor; +static double minus_twenty_decibels; +static double histogram_energies[1000]; +static double histogram_energy_boundaries[1001]; + +static interpolator* interp_create(unsigned int taps, unsigned int factor, unsigned int channels) { + interpolator* interp = calloc(1, sizeof(interpolator)); + unsigned int j = 0; + + interp->taps = taps; + interp->factor = factor; + interp->channels = channels; + interp->delay = (interp->taps + interp->factor - 1) / interp->factor; + + /* Initialize the filter memory + * One subfilter per interpolation factor. */ + interp->filter = calloc(interp->factor, sizeof(*interp->filter)); + for (j = 0; j < interp->factor; j++) { + interp->filter[j].index = calloc(interp->delay, sizeof(unsigned int)); + interp->filter[j].coeff = calloc(interp->delay, sizeof(double)); + } + /* One delay buffer per channel. */ + interp->z = calloc(interp->channels, sizeof(float*)); + for (j = 0; j < interp->channels; j++) { + interp->z[j] = calloc( interp->delay, sizeof(float) ); + } + + /* Calculate the filter coefficients */ + for (j = 0; j < interp->taps; j++) { + /* Calculate sinc */ + double m = (double)j - (double)(interp->taps - 1) / 2.0; + double c = 1.0; + if (fabs(m) > ALMOST_ZERO) { + c = sin(m * M_PI / interp->factor) / (m * M_PI / interp->factor); + } + /* Apply Hanning window */ + c *= 0.5 * (1 - cos(2 * M_PI * j / (interp->taps - 1))); + + if (fabs(c) > ALMOST_ZERO) { /* Ignore any zero coeffs. */ + /* Put the coefficient into the correct subfilter */ + unsigned int f = j % interp->factor; + unsigned int t = interp->filter[f].count++; + interp->filter[f].coeff[t] = c; + interp->filter[f].index[t] = j / interp->factor; + } + } + return interp; +} + +static void interp_destroy(interpolator* interp) { + unsigned int j = 0; + if (!interp) { + return; + } + for (j = 0; j < interp->factor; j++) { + free(interp->filter[j].index); + free(interp->filter[j].coeff); + } + free(interp->filter); + for (j = 0; j < interp->channels; j++) { + free(interp->z[j]); + } + free(interp->z); + free(interp); +} + +static size_t interp_process(interpolator* interp, size_t frames, float* in, float* out) { + size_t frame = 0; + unsigned int chan = 0; + unsigned int f = 0; + unsigned int t = 0; + unsigned int out_stride = interp->channels * interp->factor; + float* outp = 0; + double acc = 0; + double c = 0; + + for (frame = 0; frame < frames; frame++) { + for (chan = 0; chan < interp->channels; chan++) { + /* Add sample to delay buffer */ + interp->z[chan][interp->zi] = *in++; + /* Apply coefficients */ + outp = out + chan; + for (f = 0; f < interp->factor; f++) { + acc = 0.0; + for (t = 0; t < interp->filter[f].count; t++) { + int i = (int)interp->zi - (int)interp->filter[f].index[t]; + if (i < 0) { + i += interp->delay; + } + c = interp->filter[f].coeff[t]; + acc += interp->z[chan][i] * c; + } + *outp = (float)acc; + outp += interp->channels; + } + } + out += out_stride; + interp->zi++; + if (interp->zi == interp->delay) { + interp->zi = 0; + } + } + + return frames * interp->factor; +} + +static void ebur128_init_filter(ebur128_state* st) { + int i, j; + + double f0 = 1681.974450955533; + double G = 3.999843853973347; + double Q = 0.7071752369554196; + + double K = tan(M_PI * f0 / (double) st->samplerate); + double Vh = pow(10.0, G / 20.0); + double Vb = pow(Vh, 0.4996667741545416); + + double pb[3] = {0.0, 0.0, 0.0}; + double pa[3] = {1.0, 0.0, 0.0}; + double rb[3] = {1.0, -2.0, 1.0}; + double ra[3] = {1.0, 0.0, 0.0}; + + double a0 = 1.0 + K / Q + K * K ; + pb[0] = (Vh + Vb * K / Q + K * K) / a0; + pb[1] = 2.0 * (K * K - Vh) / a0; + pb[2] = (Vh - Vb * K / Q + K * K) / a0; + pa[1] = 2.0 * (K * K - 1.0) / a0; + pa[2] = (1.0 - K / Q + K * K) / a0; + + /* fprintf(stderr, "%.14f %.14f %.14f %.14f %.14f\n", + b1[0], b1[1], b1[2], a1[1], a1[2]); */ + + f0 = 38.13547087602444; + Q = 0.5003270373238773; + K = tan(M_PI * f0 / (double) st->samplerate); + + ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K); + ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K); + + /* fprintf(stderr, "%.14f %.14f\n", a2[1], a2[2]); */ + + st->d->b[0] = pb[0] * rb[0]; + st->d->b[1] = pb[0] * rb[1] + pb[1] * rb[0]; + st->d->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0]; + st->d->b[3] = pb[1] * rb[2] + pb[2] * rb[1]; + st->d->b[4] = pb[2] * rb[2]; + + st->d->a[0] = pa[0] * ra[0]; + st->d->a[1] = pa[0] * ra[1] + pa[1] * ra[0]; + st->d->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0]; + st->d->a[3] = pa[1] * ra[2] + pa[2] * ra[1]; + st->d->a[4] = pa[2] * ra[2]; + + for (i = 0; i < 5; ++i) { + for (j = 0; j < 5; ++j) { + st->d->v[i][j] = 0.0; + } + } +} + +static int ebur128_init_channel_map(ebur128_state* st) { + size_t i; + st->d->channel_map = (int*) malloc(st->channels * sizeof(int)); + if (!st->d->channel_map) { + return EBUR128_ERROR_NOMEM; + } + if (st->channels == 4) { + st->d->channel_map[0] = EBUR128_LEFT; + st->d->channel_map[1] = EBUR128_RIGHT; + st->d->channel_map[2] = EBUR128_LEFT_SURROUND; + st->d->channel_map[3] = EBUR128_RIGHT_SURROUND; + } else if (st->channels == 5) { + st->d->channel_map[0] = EBUR128_LEFT; + st->d->channel_map[1] = EBUR128_RIGHT; + st->d->channel_map[2] = EBUR128_CENTER; + st->d->channel_map[3] = EBUR128_LEFT_SURROUND; + st->d->channel_map[4] = EBUR128_RIGHT_SURROUND; + } else { + for (i = 0; i < st->channels; ++i) { + switch (i) { + case 0: st->d->channel_map[i] = EBUR128_LEFT; break; + case 1: st->d->channel_map[i] = EBUR128_RIGHT; break; + case 2: st->d->channel_map[i] = EBUR128_CENTER; break; + case 3: st->d->channel_map[i] = EBUR128_UNUSED; break; + case 4: st->d->channel_map[i] = EBUR128_LEFT_SURROUND; break; + case 5: st->d->channel_map[i] = EBUR128_RIGHT_SURROUND; break; + default: st->d->channel_map[i] = EBUR128_UNUSED; break; + } + } + } + return EBUR128_SUCCESS; +} + +static int ebur128_init_resampler(ebur128_state* st) { + int errcode = EBUR128_SUCCESS; + + if (st->samplerate < 96000) { + st->d->interp = interp_create(49, 4, st->channels); + CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit) + } else if (st->samplerate < 192000) { + st->d->interp = interp_create(49, 2, st->channels); + CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit) + } else { + st->d->resampler_buffer_input = NULL; + st->d->resampler_buffer_output = NULL; + st->d->interp = NULL; + goto exit; + } + + st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4; + st->d->resampler_buffer_input = malloc(st->d->resampler_buffer_input_frames * + st->channels * + sizeof(float)); + CHECK_ERROR(!st->d->resampler_buffer_input, EBUR128_ERROR_NOMEM, free_interp) + + st->d->resampler_buffer_output_frames = + st->d->resampler_buffer_input_frames * + st->d->interp->factor; + st->d->resampler_buffer_output = malloc + (st->d->resampler_buffer_output_frames * + st->channels * + sizeof(float)); + CHECK_ERROR(!st->d->resampler_buffer_output, EBUR128_ERROR_NOMEM, free_input) + + return errcode; + +free_interp: + interp_destroy(st->d->interp); + st->d->interp = NULL; +free_input: + free(st->d->resampler_buffer_input); + st->d->resampler_buffer_input = NULL; +exit: + return errcode; +} + +static void ebur128_destroy_resampler(ebur128_state* st) { + free(st->d->resampler_buffer_input); + st->d->resampler_buffer_input = NULL; + free(st->d->resampler_buffer_output); + st->d->resampler_buffer_output = NULL; + interp_destroy(st->d->interp); + st->d->interp = NULL; +} + +void ebur128_get_version(int* major, int* minor, int* patch) { + *major = EBUR128_VERSION_MAJOR; + *minor = EBUR128_VERSION_MINOR; + *patch = EBUR128_VERSION_PATCH; +} + +ebur128_state* ebur128_init(unsigned int channels, + unsigned long samplerate, + int mode) { + int result; + int errcode; + ebur128_state* st; + unsigned int i; + size_t j; + + if (channels == 0 || samplerate < 5) { + return NULL; + } + + st = (ebur128_state*) malloc(sizeof(ebur128_state)); + CHECK_ERROR(!st, 0, exit) + st->d = (struct ebur128_state_internal*) + malloc(sizeof(struct ebur128_state_internal)); + CHECK_ERROR(!st->d, 0, free_state) + st->channels = channels; + errcode = ebur128_init_channel_map(st); + CHECK_ERROR(errcode, 0, free_internal) + + st->d->sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map) + st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_sample_peak, 0, free_sample_peak) + st->d->true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->true_peak, 0, free_prev_sample_peak) + st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_true_peak, 0, free_true_peak) + for (i = 0; i < channels; ++i) { + st->d->sample_peak[i] = 0.0; + st->d->prev_sample_peak[i] = 0.0; + st->d->true_peak[i] = 0.0; + st->d->prev_true_peak[i] = 0.0; + } + + st->d->use_histogram = mode & EBUR128_MODE_HISTOGRAM ? 1 : 0; + st->d->history = ULONG_MAX; + st->samplerate = samplerate; + st->d->samples_in_100ms = (st->samplerate + 5) / 10; + st->mode = mode; + if ((mode & EBUR128_MODE_S) == EBUR128_MODE_S) { + st->d->window = 3000; + } else if ((mode & EBUR128_MODE_M) == EBUR128_MODE_M) { + st->d->window = 400; + } else { + goto free_prev_true_peak; + } + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, 0, free_true_peak) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + ebur128_init_filter(st); + + if (st->d->use_histogram) { + st->d->block_energy_histogram = malloc(1000 * sizeof(unsigned long)); + CHECK_ERROR(!st->d->block_energy_histogram, 0, free_audio_data) + for (i = 0; i < 1000; ++i) { + st->d->block_energy_histogram[i] = 0; + } + } else { + st->d->block_energy_histogram = NULL; + } + if (st->d->use_histogram) { + st->d->short_term_block_energy_histogram = malloc(1000 * sizeof(unsigned long)); + CHECK_ERROR(!st->d->short_term_block_energy_histogram, 0, free_block_energy_histogram) + for (i = 0; i < 1000; ++i) { + st->d->short_term_block_energy_histogram[i] = 0; + } + } else { + st->d->short_term_block_energy_histogram = NULL; + } + STAILQ_INIT(&st->d->block_list); + st->d->block_list_size = 0; + st->d->block_list_max = st->d->history / 100; + STAILQ_INIT(&st->d->short_term_block_list); + st->d->st_block_list_size = 0; + st->d->st_block_list_max = st->d->history / 3000; + st->d->short_term_frame_counter = 0; + + result = ebur128_init_resampler(st); + CHECK_ERROR(result, 0, free_short_term_block_energy_histogram) + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + + /* initialize static constants */ + relative_gate_factor = pow(10.0, relative_gate / 10.0); + minus_twenty_decibels = pow(10.0, -20.0 / 10.0); + histogram_energy_boundaries[0] = pow(10.0, (-70.0 + 0.691) / 10.0); + if (st->d->use_histogram) { + for (i = 0; i < 1000; ++i) { + histogram_energies[i] = pow(10.0, ((double) i / 10.0 - 69.95 + 0.691) / 10.0); + } + for (i = 1; i < 1001; ++i) { + histogram_energy_boundaries[i] = pow(10.0, ((double) i / 10.0 - 70.0 + 0.691) / 10.0); + } + } + + return st; + +free_short_term_block_energy_histogram: + free(st->d->short_term_block_energy_histogram); +free_block_energy_histogram: + free(st->d->block_energy_histogram); +free_audio_data: + free(st->d->audio_data); +free_prev_true_peak: + free(st->d->prev_true_peak); +free_true_peak: + free(st->d->true_peak); +free_prev_sample_peak: + free(st->d->prev_sample_peak); +free_sample_peak: + free(st->d->sample_peak); +free_channel_map: + free(st->d->channel_map); +free_internal: + free(st->d); +free_state: + free(st); +exit: + return NULL; +} + +void ebur128_destroy(ebur128_state** st) { + struct ebur128_dq_entry* entry; + free((*st)->d->block_energy_histogram); + free((*st)->d->short_term_block_energy_histogram); + free((*st)->d->audio_data); + free((*st)->d->channel_map); + free((*st)->d->sample_peak); + free((*st)->d->prev_sample_peak); + free((*st)->d->true_peak); + free((*st)->d->prev_true_peak); + while (!STAILQ_EMPTY(&(*st)->d->block_list)) { + entry = STAILQ_FIRST(&(*st)->d->block_list); + STAILQ_REMOVE_HEAD(&(*st)->d->block_list, entries); + free(entry); + } + while (!STAILQ_EMPTY(&(*st)->d->short_term_block_list)) { + entry = STAILQ_FIRST(&(*st)->d->short_term_block_list); + STAILQ_REMOVE_HEAD(&(*st)->d->short_term_block_list, entries); + free(entry); + } + ebur128_destroy_resampler(*st); + free((*st)->d); + free(*st); + *st = NULL; +} + +static void ebur128_check_true_peak(ebur128_state* st, size_t frames) { + size_t c, i, frames_out; + + frames_out = interp_process(st->d->interp, frames, + st->d->resampler_buffer_input, + st->d->resampler_buffer_output); + + for (i = 0; i < frames_out; ++i) { + for (c = 0; c < st->channels; ++c) { + float val = st->d->resampler_buffer_output[i * st->channels + c]; + + if (val > st->d->prev_true_peak[c]) { + st->d->prev_true_peak[c] = val; + } else if (-val > st->d->prev_true_peak[c]) { + st->d->prev_true_peak[c] = -val; + } + } + } +} + +#ifdef __SSE2_MATH__ +#include +#define TURN_ON_FTZ \ + unsigned int mxcsr = _mm_getcsr(); \ + _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON); +#define TURN_OFF_FTZ _mm_setcsr(mxcsr); +#define FLUSH_MANUALLY +#else +#warning "manual FTZ is being used, please enable SSE2 (-msse2 -mfpmath=sse)" +#define TURN_ON_FTZ +#define TURN_OFF_FTZ +#define FLUSH_MANUALLY \ + st->d->v[ci][4] = fabs(st->d->v[ci][4]) < DBL_MIN ? 0.0 : st->d->v[ci][4]; \ + st->d->v[ci][3] = fabs(st->d->v[ci][3]) < DBL_MIN ? 0.0 : st->d->v[ci][3]; \ + st->d->v[ci][2] = fabs(st->d->v[ci][2]) < DBL_MIN ? 0.0 : st->d->v[ci][2]; \ + st->d->v[ci][1] = fabs(st->d->v[ci][1]) < DBL_MIN ? 0.0 : st->d->v[ci][1]; +#endif + +#define EBUR128_FILTER(type, min_scale, max_scale) \ +static void ebur128_filter_##type(ebur128_state* st, const type* src, \ + size_t frames) { \ + static double scaling_factor = \ + -((double) (min_scale)) > (double) (max_scale) ? \ + -((double) (min_scale)) : (double) (max_scale); \ + double* audio_data = st->d->audio_data + st->d->audio_data_index; \ + size_t i, c; \ + \ + TURN_ON_FTZ \ + \ + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK) { \ + for (c = 0; c < st->channels; ++c) { \ + double max = 0.0; \ + for (i = 0; i < frames; ++i) { \ + if (src[i * st->channels + c] > max) { \ + max = src[i * st->channels + c]; \ + } else if (-src[i * st->channels + c] > max) { \ + max = -1.0 * src[i * st->channels + c]; \ + } \ + } \ + max /= scaling_factor; \ + if (max > st->d->prev_sample_peak[c]) st->d->prev_sample_peak[c] = max; \ + } \ + } \ + if ((st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK && \ + st->d->interp) { \ + for (c = 0; c < st->channels; ++c) { \ + for (i = 0; i < frames; ++i) { \ + st->d->resampler_buffer_input[i * st->channels + c] = \ + (float) (src[i * st->channels + c] / scaling_factor); \ + } \ + } \ + ebur128_check_true_peak(st, frames); \ + } \ + for (c = 0; c < st->channels; ++c) { \ + int ci = st->d->channel_map[c] - 1; \ + if (ci < 0) continue; \ + else if (ci == EBUR128_DUAL_MONO - 1) ci = 0; /*dual mono */ \ + for (i = 0; i < frames; ++i) { \ + st->d->v[ci][0] = (double) (src[i * st->channels + c] / scaling_factor) \ + - st->d->a[1] * st->d->v[ci][1] \ + - st->d->a[2] * st->d->v[ci][2] \ + - st->d->a[3] * st->d->v[ci][3] \ + - st->d->a[4] * st->d->v[ci][4]; \ + audio_data[i * st->channels + c] = \ + st->d->b[0] * st->d->v[ci][0] \ + + st->d->b[1] * st->d->v[ci][1] \ + + st->d->b[2] * st->d->v[ci][2] \ + + st->d->b[3] * st->d->v[ci][3] \ + + st->d->b[4] * st->d->v[ci][4]; \ + st->d->v[ci][4] = st->d->v[ci][3]; \ + st->d->v[ci][3] = st->d->v[ci][2]; \ + st->d->v[ci][2] = st->d->v[ci][1]; \ + st->d->v[ci][1] = st->d->v[ci][0]; \ + } \ + FLUSH_MANUALLY \ + } \ + TURN_OFF_FTZ \ +} +EBUR128_FILTER(short, SHRT_MIN, SHRT_MAX) +EBUR128_FILTER(int, INT_MIN, INT_MAX) +EBUR128_FILTER(float, -1.0f, 1.0f) +EBUR128_FILTER(double, -1.0, 1.0) + +static double ebur128_energy_to_loudness(double energy) { + return 10 * (log(energy) / log(10.0)) - 0.691; +} + +static size_t find_histogram_index(double energy) { + size_t index_min = 0; + size_t index_max = 1000; + size_t index_mid; + + do { + index_mid = (index_min + index_max) / 2; + if (energy >= histogram_energy_boundaries[index_mid]) { + index_min = index_mid; + } else { + index_max = index_mid; + } + } while (index_max - index_min != 1); + + return index_min; +} + +static int ebur128_calc_gating_block(ebur128_state* st, size_t frames_per_block, + double* optional_output) { + size_t i, c; + double sum = 0.0; + double channel_sum; + for (c = 0; c < st->channels; ++c) { + if (st->d->channel_map[c] == EBUR128_UNUSED) { + continue; + } + channel_sum = 0.0; + if (st->d->audio_data_index < frames_per_block * st->channels) { + for (i = 0; i < st->d->audio_data_index / st->channels; ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + for (i = st->d->audio_data_frames - + (frames_per_block - + st->d->audio_data_index / st->channels); + i < st->d->audio_data_frames; ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + } else { + for (i = st->d->audio_data_index / st->channels - frames_per_block; + i < st->d->audio_data_index / st->channels; + ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + } + if (st->d->channel_map[c] == EBUR128_Mp110 || + st->d->channel_map[c] == EBUR128_Mm110 || + st->d->channel_map[c] == EBUR128_Mp060 || + st->d->channel_map[c] == EBUR128_Mm060 || + st->d->channel_map[c] == EBUR128_Mp090 || + st->d->channel_map[c] == EBUR128_Mm090) { + channel_sum *= 1.41; + } else if (st->d->channel_map[c] == EBUR128_DUAL_MONO) { + channel_sum *= 2.0; + } + sum += channel_sum; + } + sum /= (double) frames_per_block; + if (optional_output) { + *optional_output = sum; + return EBUR128_SUCCESS; + } else if (sum >= histogram_energy_boundaries[0]) { + if (st->d->use_histogram) { + ++st->d->block_energy_histogram[find_histogram_index(sum)]; + } else { + struct ebur128_dq_entry* block; + if (st->d->block_list_size == st->d->block_list_max) { + block = STAILQ_FIRST(&st->d->block_list); + STAILQ_REMOVE_HEAD(&st->d->block_list, entries); + } else { + block = (struct ebur128_dq_entry*) malloc(sizeof(struct ebur128_dq_entry)); + if (!block) { + return EBUR128_ERROR_NOMEM; + } + st->d->block_list_size++; + } + block->z = sum; + STAILQ_INSERT_TAIL(&st->d->block_list, block, entries); + } + return EBUR128_SUCCESS; + } else { + return EBUR128_SUCCESS; + } +} + +int ebur128_set_channel(ebur128_state* st, + unsigned int channel_number, + int value) { + if (channel_number >= st->channels) { + return 1; + } + if (value == EBUR128_DUAL_MONO && + (st->channels != 1 || channel_number != 0)) { + fprintf(stderr, "EBUR128_DUAL_MONO only works with mono files!\n"); + return 1; + } + st->d->channel_map[channel_number] = value; + return 0; +} + +int ebur128_change_parameters(ebur128_state* st, + unsigned int channels, + unsigned long samplerate) { + int errcode = EBUR128_SUCCESS; + size_t j; + + if (channels == 0 || samplerate < 5) { + return EBUR128_ERROR_NOMEM; + } + + if (channels == st->channels && + samplerate == st->samplerate) { + return EBUR128_ERROR_NO_CHANGE; + } + + free(st->d->audio_data); + st->d->audio_data = NULL; + + if (channels != st->channels) { + unsigned int i; + + free(st->d->channel_map); st->d->channel_map = NULL; + free(st->d->sample_peak); st->d->sample_peak = NULL; + free(st->d->prev_sample_peak); st->d->prev_sample_peak = NULL; + free(st->d->true_peak); st->d->true_peak = NULL; + free(st->d->prev_true_peak); st->d->prev_true_peak = NULL; + st->channels = channels; + + errcode = ebur128_init_channel_map(st); + CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + + st->d->sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->sample_peak, EBUR128_ERROR_NOMEM, exit) + st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_sample_peak, EBUR128_ERROR_NOMEM, exit) + st->d->true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->true_peak, EBUR128_ERROR_NOMEM, exit) + st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_true_peak, EBUR128_ERROR_NOMEM, exit) + for (i = 0; i < channels; ++i) { + st->d->sample_peak[i] = 0.0; + st->d->prev_sample_peak[i] = 0.0; + st->d->true_peak[i] = 0.0; + st->d->prev_true_peak[i] = 0.0; + } + } + if (samplerate != st->samplerate) { + st->samplerate = samplerate; + st->d->samples_in_100ms = (st->samplerate + 5) / 10; + ebur128_init_filter(st); + } + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + ebur128_destroy_resampler(st); + errcode = ebur128_init_resampler(st); + CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + /* reset short term frame counter */ + st->d->short_term_frame_counter = 0; + +exit: + return errcode; +} + +int ebur128_set_max_window(ebur128_state* st, unsigned long window) +{ + int errcode = EBUR128_SUCCESS; + size_t j; + + if ((st->mode & EBUR128_MODE_S) == EBUR128_MODE_S && window < 3000) { + window = 3000; + } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && window < 400) { + window = 400; + } + if (window == st->d->window) { + return EBUR128_ERROR_NO_CHANGE; + } + + st->d->window = window; + free(st->d->audio_data); + st->d->audio_data = NULL; + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + /* reset short term frame counter */ + st->d->short_term_frame_counter = 0; + +exit: + return errcode; +} + +int ebur128_set_max_history(ebur128_state* st, unsigned long history) +{ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA && history < 3000) { + history = 3000; + } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && history < 400) { + history = 400; + } + if (history == st->d->history) { + return EBUR128_ERROR_NO_CHANGE; + } + st->d->history = history; + st->d->block_list_max = st->d->history / 100; + st->d->st_block_list_max = st->d->history / 3000; + while (st->d->block_list_size > st->d->block_list_max) { + struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->block_list); + STAILQ_REMOVE_HEAD(&st->d->block_list, entries); + free(block); + st->d->block_list_size--; + } + while (st->d->st_block_list_size > st->d->st_block_list_max) { + struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->short_term_block_list); + STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); + free(block); + st->d->st_block_list_size--; + } + return EBUR128_SUCCESS; +} + +static int ebur128_energy_shortterm(ebur128_state* st, double* out); +#define EBUR128_ADD_FRAMES(type) \ +int ebur128_add_frames_##type(ebur128_state* st, \ + const type* src, size_t frames) { \ + size_t src_index = 0; \ + unsigned int c = 0; \ + for (c = 0; c < st->channels; c++) { \ + st->d->prev_sample_peak[c] = 0.0; \ + st->d->prev_true_peak[c] = 0.0; \ + } \ + while (frames > 0) { \ + if (frames >= st->d->needed_frames) { \ + ebur128_filter_##type(st, src + src_index, st->d->needed_frames); \ + src_index += st->d->needed_frames * st->channels; \ + frames -= st->d->needed_frames; \ + st->d->audio_data_index += st->d->needed_frames * st->channels; \ + /* calculate the new gating block */ \ + if ((st->mode & EBUR128_MODE_I) == EBUR128_MODE_I) { \ + if (ebur128_calc_gating_block(st, st->d->samples_in_100ms * 4, NULL)) {\ + return EBUR128_ERROR_NOMEM; \ + } \ + } \ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ + st->d->short_term_frame_counter += st->d->needed_frames; \ + if (st->d->short_term_frame_counter == st->d->samples_in_100ms * 30) { \ + struct ebur128_dq_entry* block; \ + double st_energy; \ + if (ebur128_energy_shortterm(st, &st_energy) == EBUR128_SUCCESS && \ + st_energy >= histogram_energy_boundaries[0]) { \ + if (st->d->use_histogram) { \ + ++st->d->short_term_block_energy_histogram[ \ + find_histogram_index(st_energy)];\ + } else { \ + if (st->d->st_block_list_size == st->d->st_block_list_max) { \ + block = STAILQ_FIRST(&st->d->short_term_block_list); \ + STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); \ + } else { \ + block = (struct ebur128_dq_entry*) \ + malloc(sizeof(struct ebur128_dq_entry)); \ + if (!block) return EBUR128_ERROR_NOMEM; \ + st->d->st_block_list_size++; \ + } \ + block->z = st_energy; \ + STAILQ_INSERT_TAIL(&st->d->short_term_block_list, \ + block, entries); \ + } \ + } \ + st->d->short_term_frame_counter = st->d->samples_in_100ms * 20; \ + } \ + } \ + /* 100ms are needed for all blocks besides the first one */ \ + st->d->needed_frames = st->d->samples_in_100ms; \ + /* reset audio_data_index when buffer full */ \ + if (st->d->audio_data_index == st->d->audio_data_frames * st->channels) {\ + st->d->audio_data_index = 0; \ + } \ + } else { \ + ebur128_filter_##type(st, src + src_index, frames); \ + st->d->audio_data_index += frames * st->channels; \ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ + st->d->short_term_frame_counter += frames; \ + } \ + st->d->needed_frames -= frames; \ + frames = 0; \ + } \ + } \ + for (c = 0; c < st->channels; c++) { \ + if (st->d->prev_sample_peak[c] > st->d->sample_peak[c]) { \ + st->d->sample_peak[c] = st->d->prev_sample_peak[c]; \ + } \ + if (st->d->prev_true_peak[c] > st->d->true_peak[c]) { \ + st->d->true_peak[c] = st->d->prev_true_peak[c]; \ + } \ + } \ + return EBUR128_SUCCESS; \ +} +EBUR128_ADD_FRAMES(short) +EBUR128_ADD_FRAMES(int) +EBUR128_ADD_FRAMES(float) +EBUR128_ADD_FRAMES(double) + +static int ebur128_calc_relative_threshold(ebur128_state* st, + size_t* above_thresh_counter, + double* relative_threshold) { + struct ebur128_dq_entry* it; + size_t i; + + if (st->d->use_histogram) { + for (i = 0; i < 1000; ++i) { + *relative_threshold += st->d->block_energy_histogram[i] * + histogram_energies[i]; + *above_thresh_counter += st->d->block_energy_histogram[i]; + } + } else { + STAILQ_FOREACH(it, &st->d->block_list, entries) { + ++*above_thresh_counter; + *relative_threshold += it->z; + } + } + + return EBUR128_SUCCESS; +} + +static int ebur128_gated_loudness(ebur128_state** sts, size_t size, + double* out) { + struct ebur128_dq_entry* it; + double gated_loudness = 0.0; + double relative_threshold = 0.0; + size_t above_thresh_counter = 0; + size_t i, j, start_index; + + for (i = 0; i < size; i++) { + if (sts[i] && (sts[i]->mode & EBUR128_MODE_I) != EBUR128_MODE_I) { + return EBUR128_ERROR_INVALID_MODE; + } + } + + for (i = 0; i < size; i++) { + if (!sts[i]) { + continue; + } + ebur128_calc_relative_threshold(sts[i], &above_thresh_counter, &relative_threshold); + } + if (!above_thresh_counter) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + + relative_threshold /= (double)above_thresh_counter; + relative_threshold *= relative_gate_factor; + + above_thresh_counter = 0; + if (relative_threshold < histogram_energy_boundaries[0]) { + start_index = 0; + } else { + start_index = find_histogram_index(relative_threshold); + if (relative_threshold > histogram_energies[start_index]) { + ++start_index; + } + } + for (i = 0; i < size; i++) { + if (!sts[i]) { + continue; + } + if (sts[i]->d->use_histogram) { + for (j = start_index; j < 1000; ++j) { + gated_loudness += sts[i]->d->block_energy_histogram[j] * + histogram_energies[j]; + above_thresh_counter += sts[i]->d->block_energy_histogram[j]; + } + } else { + STAILQ_FOREACH(it, &sts[i]->d->block_list, entries) { + if (it->z >= relative_threshold) { + ++above_thresh_counter; + gated_loudness += it->z; + } + } + } + } + if (!above_thresh_counter) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + gated_loudness /= (double) above_thresh_counter; + *out = ebur128_energy_to_loudness(gated_loudness); + return EBUR128_SUCCESS; +} + +int ebur128_relative_threshold(ebur128_state* st, double* out) { + double relative_threshold = 0.0; + size_t above_thresh_counter = 0; + + if ((st->mode & EBUR128_MODE_I) != EBUR128_MODE_I) { + return EBUR128_ERROR_INVALID_MODE; + } + + ebur128_calc_relative_threshold(st, &above_thresh_counter, &relative_threshold); + + if (!above_thresh_counter) { + *out = -70.0; + return EBUR128_SUCCESS; + } + + relative_threshold /= (double)above_thresh_counter; + relative_threshold *= relative_gate_factor; + + *out = ebur128_energy_to_loudness(relative_threshold); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_global(ebur128_state* st, double* out) { + return ebur128_gated_loudness(&st, 1, out); +} + +int ebur128_loudness_global_multiple(ebur128_state** sts, size_t size, + double* out) { + return ebur128_gated_loudness(sts, size, out); +} + +static int ebur128_energy_in_interval(ebur128_state* st, + size_t interval_frames, + double* out) { + if (interval_frames > st->d->audio_data_frames) { + return EBUR128_ERROR_INVALID_MODE; + } + ebur128_calc_gating_block(st, interval_frames, out); + return EBUR128_SUCCESS; +} + +static int ebur128_energy_shortterm(ebur128_state* st, double* out) { + return ebur128_energy_in_interval(st, st->d->samples_in_100ms * 30, out); +} + +int ebur128_loudness_momentary(ebur128_state* st, double* out) { + double energy; + int error = ebur128_energy_in_interval(st, st->d->samples_in_100ms * 4, + &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_shortterm(ebur128_state* st, double* out) { + double energy; + int error = ebur128_energy_shortterm(st, &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_window(ebur128_state* st, + unsigned long window, + double* out) { + double energy; + size_t interval_frames = st->samplerate * window / 1000; + int error = ebur128_energy_in_interval(st, interval_frames, &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +static int ebur128_double_cmp(const void *p1, const void *p2) { + const double* d1 = (const double*) p1; + const double* d2 = (const double*) p2; + return (*d1 > *d2) - (*d1 < *d2); +} + +/* EBU - TECH 3342 */ +int ebur128_loudness_range_multiple(ebur128_state** sts, size_t size, + double* out) { + size_t i, j; + struct ebur128_dq_entry* it; + double* stl_vector; + size_t stl_size; + double* stl_relgated; + size_t stl_relgated_size; + double stl_power, stl_integrated; + /* High and low percentile energy */ + double h_en, l_en; + int use_histogram = 0; + + for (i = 0; i < size; ++i) { + if (sts[i]) { + if ((sts[i]->mode & EBUR128_MODE_LRA) != EBUR128_MODE_LRA) { + return EBUR128_ERROR_INVALID_MODE; + } + if (i == 0 && sts[i]->mode & EBUR128_MODE_HISTOGRAM) { + use_histogram = 1; + } else if (use_histogram != !!(sts[i]->mode & EBUR128_MODE_HISTOGRAM)) { + return EBUR128_ERROR_INVALID_MODE; + } + } + } + + if (use_histogram) { + unsigned long hist[1000] = { 0 }; + size_t percentile_low, percentile_high; + size_t index; + + stl_size = 0; + stl_power = 0.0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + for (j = 0; j < 1000; ++j) { + hist[j] += sts[i]->d->short_term_block_energy_histogram[j]; + stl_size += sts[i]->d->short_term_block_energy_histogram[j]; + stl_power += sts[i]->d->short_term_block_energy_histogram[j] + * histogram_energies[j]; + } + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + + stl_power /= stl_size; + stl_integrated = minus_twenty_decibels * stl_power; + + if (stl_integrated < histogram_energy_boundaries[0]) { + index = 0; + } else { + index = find_histogram_index(stl_integrated); + if (stl_integrated > histogram_energies[index]) { + ++index; + } + } + stl_size = 0; + for (j = index; j < 1000; ++j) { + stl_size += hist[j]; + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + + percentile_low = (size_t) ((stl_size - 1) * 0.1 + 0.5); + percentile_high = (size_t) ((stl_size - 1) * 0.95 + 0.5); + + stl_size = 0; + j = index; + while (stl_size <= percentile_low) { + stl_size += hist[j++]; + } + l_en = histogram_energies[j - 1]; + while (stl_size <= percentile_high) { + stl_size += hist[j++]; + } + h_en = histogram_energies[j - 1]; + *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en); + return EBUR128_SUCCESS; + + } else { + stl_size = 0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) { + ++stl_size; + } + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + stl_vector = (double*) malloc(stl_size * sizeof(double)); + if (!stl_vector) { + return EBUR128_ERROR_NOMEM; + } + + j = 0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) { + stl_vector[j] = it->z; + ++j; + } + } + qsort(stl_vector, stl_size, sizeof(double), ebur128_double_cmp); + stl_power = 0.0; + for (i = 0; i < stl_size; ++i) { + stl_power += stl_vector[i]; + } + stl_power /= (double) stl_size; + stl_integrated = minus_twenty_decibels * stl_power; + + stl_relgated = stl_vector; + stl_relgated_size = stl_size; + while (stl_relgated_size > 0 && *stl_relgated < stl_integrated) { + ++stl_relgated; + --stl_relgated_size; + } + + if (stl_relgated_size) { + h_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.95 + 0.5)]; + l_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.1 + 0.5)]; + free(stl_vector); + *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en); + return EBUR128_SUCCESS; + } else { + free(stl_vector); + *out = 0.0; + return EBUR128_SUCCESS; + } + } +} + +int ebur128_loudness_range(ebur128_state* st, double* out) { + return ebur128_loudness_range_multiple(&st, 1, out); +} + +int ebur128_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_prev_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->prev_sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->true_peak[channel_number] > st->d->sample_peak[channel_number] + ? st->d->true_peak[channel_number] + : st->d->sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_prev_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->prev_true_peak[channel_number] + > st->d->prev_sample_peak[channel_number] + ? st->d->prev_true_peak[channel_number] + : st->d->prev_sample_peak[channel_number]; + return EBUR128_SUCCESS; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/CMakeLists.txt b/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/CMakeLists.txt new file mode 100644 index 0000000..3045b00 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_WAVES_SRCS) +add_library(waves ${DIR_WAVES_SRCS}) \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/inc/ExtraMono.h b/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/inc/ExtraMono.h new file mode 100755 index 0000000..280fab0 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/inc/ExtraMono.h @@ -0,0 +1,230 @@ + +#include +#include + +#define SIZE_LONG 4 +#define SIZE_SHORT 2 + +#define SIZE_FLAG 4 +#define FMT_TAG 0x0001 + +#define BITS_PER_BYTE 8 + +#ifndef AFS_CMPL_MAX_WAV +#define AFS_CMPL_MAX_WAV 15360000 // 时长16分(960*16000) +#endif + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个32位数据 +//+---------------------------------------------------------------------------+ +unsigned long fa_read_u32(FILE* fp) +{ + unsigned long cx; + unsigned char temp[SIZE_LONG]; + + fread(temp, sizeof(unsigned char), SIZE_LONG, fp); + cx = (unsigned long)temp[0]; + cx |= (unsigned long)temp[1] << 8; + cx |= (unsigned long)temp[2] << 16; + cx |= (unsigned long)temp[3] << 24; + return cx; +} + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个16位数据 +//+---------------------------------------------------------------------------+ +unsigned short fa_read_u16(FILE *fp) +{ + unsigned short cx; + unsigned char temp[SIZE_SHORT]; + + fread(temp, sizeof(unsigned char), SIZE_SHORT, fp); + cx = temp[0] | (temp[1] * 256); + return cx; +} + +int GetWaveHeadLen(const char* pszFile,unsigned short &channels, int &nPos, int& nLength) +{ + //+---------------------------------------------------------------------------+ + //+ 读取WAVE的头信息 + //+---------------------------------------------------------------------------+ + unsigned char temp[SIZE_FLAG]; + unsigned short bits_per_sample; + unsigned long x_size; + unsigned long n_skip; + + unsigned short format; + //unsigned short channels; + unsigned long sample_rate; + unsigned short block_align; + unsigned long data_size; + int nCnt = 0; + + /* 读取通用信息 */ + FILE* pWavFile = fopen(pszFile, "rb"); + if ( pWavFile == NULL ) + { + printf("Input file can not be opened!\n"); + return -1; + } + + fseek(pWavFile, 0, SEEK_END ); + nLength = ftell(pWavFile); + fseek(pWavFile, 0, SEEK_SET ); + + // 判断资源标识为"RIFF" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "RIFF", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Resource flag is not RIFF!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + nCnt += SIZE_LONG; + + // 判断文件标识为"WAVE" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "WAVE", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "File flag is not WAVE\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + // 判断格式标识为"fmt " + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "fmt ", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Format flag is not FMT!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + x_size = fa_read_u32(pWavFile); + nCnt += SIZE_LONG; + + // 判断编码格式为0x0001 + format = fa_read_u16(pWavFile); + nCnt += SIZE_SHORT; + if ( format != FMT_TAG ) + { + fprintf(stderr, "Encoding format is not 0x0001!\n"); + fclose(pWavFile); + + return -1; + } + + // 读取声道数目和采样频率 + channels = fa_read_u16(pWavFile); + sample_rate = fa_read_u32(pWavFile); + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + + // 读取对齐单位和样本位数 + block_align = fa_read_u16(pWavFile); + bits_per_sample = fa_read_u16(pWavFile); + + /* 读取特殊信息 */ + x_size -= (4*SIZE_SHORT + 2*SIZE_LONG); + if ( x_size != 0 ) + { + fseek(pWavFile, x_size, SEEK_CUR); + } + + // 读取数据大小 + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + while ( memcmp(temp, "data", SIZE_FLAG) != 0 ) + { + n_skip = fa_read_u32(pWavFile); + fseek(pWavFile, n_skip, SEEK_CUR); + + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + } + + data_size = fa_read_u32(pWavFile); + fclose(pWavFile); + + //+---------------------------------------------------------------------------+ + //+ 返回WAVE的头长度 + //+---------------------------------------------------------------------------+ + nPos = nCnt; + int nHeadLength = nLength - data_size; + return nHeadLength; +} + +bool ExtraMono(const std::string &sInput, const std::string &sOutput) +{ + FILE *pFile = fopen(sInput.c_str(), "rb"); + if ( NULL == pFile ) + { + printf("Fopen Error %s", sInput.c_str()); + return false; + } + + FILE *pFile2 = fopen(sOutput.c_str(), "wb"); + if ( NULL == pFile2 ) + { + printf("Fopen2 Error %s", sOutput.c_str()); + return false; + } + + short *pBuf = new short[AFS_CMPL_MAX_WAV]; + int nLen = 0; + + nLen = fread(pBuf, sizeof(short), AFS_CMPL_MAX_WAV, pFile); + if ( nLen <= 0 ) + { + perror("Fread Error!"); + return false; + } + + unsigned short channels=0; + int nPos; + int nLength; + int nHeadByte = GetWaveHeadLen(sInput.c_str(),channels, nPos, nLength); + int nHeadShort = nHeadByte/2; + + if (channels==1) + { + fwrite(pBuf + nHeadShort, sizeof(short), nLen - nHeadShort, pFile2); + } + else + { + short *pBuf2 = new short[AFS_CMPL_MAX_WAV]; + memcpy( pBuf2, pBuf, nHeadShort*sizeof(short)); + pBuf2[nPos] = 1; + + unsigned char tmp[2]; + memcpy(tmp, &pBuf2[nPos], 2); + + pBuf2[nPos] = static_cast(tmp[0] | tmp[1]*256); + + short *pWav = pBuf + nHeadShort; + nLen -= nHeadShort; + + int halfnlen=nLen/2; + for (int i=0;i<=halfnlen;i++ ) + { + pBuf2[nHeadShort+i] = *(pWav+i*2); + } + fwrite(pBuf2, sizeof(short), nLen+nHeadShort, pFile2); + + delete []pBuf; + delete []pBuf2; + pBuf = NULL; + pBuf2 = NULL; + } + + + fclose(pFile); + fclose(pFile2); + return true; +} diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/inc/WaveFile.h b/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/inc/WaveFile.h new file mode 100755 index 0000000..8b57806 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/inc/WaveFile.h @@ -0,0 +1,74 @@ +#ifndef WAVE_FILE_H +#define WAVE_FILE_H + +#include +#include + + +typedef enum SAMPLE_FORMAT +{ + SF_U8 = 8, + SF_S16 = 16, + SF_S24 = 24, + SF_S32 = 32, + SF_IEEE_FLOAT = 0x100 + 32, + SF_IEEE_DOUBLE = 0x100 + 64, + SF_MAX, +} SAMPLE_FORMAT; + +/* 主处理对象 **/ +class CWaveFile +{ +public: + /* 构造传入文件及 是读还是写 **/ + CWaveFile(const char* Filename, bool Write); + virtual ~CWaveFile(); + +public: + int GetChannels(); + int GetSampleRate(); + double GetDuration(); // in second + uint32_t GetChannelMask(); + void SetChannels(int Channels); + void SetSampleRate(int SampleRate); + void SetSampleFormat(SAMPLE_FORMAT Format); + void SetChannelMask(uint32_t Mask); + void Stat(); + void SetupDone(); + bool ReadFrameAsS16(short* FrameSamples, int Frames = 1); + bool ReadFrameAsDouble(double* FrameSamples, int Frames = 1); + bool ReadFrameAsfloat(float* FrameSamples, int Frames = 1); + void WriteRaw(void* Raw, int Size); + void WriteFrame(uint8_t* FrameSamples, int Frames = 1); + void WriteFrame(short* FrameSamples, int Frames = 1); + void WriteFrame(int32_t* FrameSamples, int Frames = 1); + void WriteFrameS24(int32_t* FrameSamples, int Frames = 1); + void WriteFrame(double* FrameSamples, int Frames = 1); + void WriteFrame(float* FrameSamples, int Frames=1); + void Seek(int FramePos, int Where = SEEK_SET); + bool GetStatus(); + SAMPLE_FORMAT GetFormat(); + int GetTotalFrames(); + int GetFramesRead(); + + +protected: + FILE* File; + int Channels; /* 通道数 **/ + int SampleRate; /* 采样率 **/ + SAMPLE_FORMAT Format; /* 采样精度 **/ + int SampleSize; // Measured in Bits + unsigned int FrameStartPos; /* 音频数据的起始位置 **/ + unsigned long TotalFrames; /* 总帧数,如果16bit,则一个short为一帧 **/ + unsigned long FramesRead; + double Duration; /* 时长 **/ + + bool ReadOnly; /* 是度还是写 **/ + + uint32_t ChannelMask; + + bool m_bOK; /* 文件是否已经被打开 **/ +}; + + +#endif \ No newline at end of file diff --git a/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/src/WaveFile.cpp b/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/src/WaveFile.cpp new file mode 100755 index 0000000..1a47272 --- /dev/null +++ b/AutoCoverTool/ref/music_remover/standard_audio/ref/waves/src/WaveFile.cpp @@ -0,0 +1,818 @@ + +#include +#include +#include +#include + +#if WIN32 +#else +#include +#endif + +#include "WaveFile.h" + +#define SPEAKER_FRONT_LEFT 0x1 +#define SPEAKER_FRONT_RIGHT 0x2 +#define SPEAKER_FRONT_CENTER 0x4 +#define SPEAKER_LOW_FREQUENCY 0x8 +#define SPEAKER_BACK_LEFT 0x10 +#define SPEAKER_BACK_RIGHT 0x20 +#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40 +#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80 +#define SPEAKER_BACK_CENTER 0x100 +#define SPEAKER_SIDE_LEFT 0x200 +#define SPEAKER_SIDE_RIGHT 0x400 +#define SPEAKER_TOP_CENTER 0x800 +#define SPEAKER_TOP_FRONT_LEFT 0x1000 +#define SPEAKER_TOP_FRONT_CENTER 0x2000 +#define SPEAKER_TOP_FRONT_RIGHT 0x4000 +#define SPEAKER_TOP_BACK_LEFT 0x8000 +#define SPEAKER_TOP_BACK_CENTER 0x10000 +#define SPEAKER_TOP_BACK_RIGHT 0x20000 +#define SPEAKER_RESERVED 0x80000000 + + +#define SPEAKER_REAR_CENTER_SURROUND SPEAKER_BACK_CENTER + +#define DCA_MONO 0 +#define DCA_CHANNEL 1 +#define DCA_STEREO 2 +#define DCA_STEREO_SUMDIFF 3 +#define DCA_STEREO_TOTAL 4 +#define DCA_3F 5 +#define DCA_2F1R 6 +#define DCA_3F1R 7 +#define DCA_2F2R 8 +#define DCA_3F2R 9 +#define DCA_4F2R 10 + +#define DCA_DOLBY 101 /* FIXME */ + +#define DCA_CHANNEL_MAX DCA_3F2R /* We don't handle anything above that */ +#define DCA_CHANNEL_BITS 6 +#define DCA_CHANNEL_MASK 0x3F + +#define DCA_LFE 0x80 +#define DCA_ADJUST_LEVEL 0x100 + +#define WAVE_FORMAT_PCM 0x0001 +#define WAVE_FORMAT_IEEE_FLOAT 0x0003 +#define WAVE_FORMAT_EXTENSIBLE 0xFFFE + +static uint8_t wav_header[] = { + 'R', 'I', 'F', 'F', 0xfc, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 16, 0, 0, 0, + WAVE_FORMAT_PCM, WAVE_FORMAT_PCM >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, + 'd', 'a', 't', 'a', 0xd8, 0xff, 0xff, 0xff +}; + +static uint8_t wavmulti_header[] = { + 'R', 'I', 'F', 'F', 0xf0, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 40, 0, 0, 0, + (uint8_t)(WAVE_FORMAT_EXTENSIBLE & 0xFF), WAVE_FORMAT_EXTENSIBLE >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 22, 0, + 0, 0, 0, 0, 0, 0, + WAVE_FORMAT_IEEE_FLOAT, WAVE_FORMAT_IEEE_FLOAT >> 8, + 0, 0, 0, 0, 0x10, 0x00, 0x80, 0, 0, 0xaa, 0, 0x38, 0x9b, 0x71, + 'd', 'a', 't', 'a', 0xb4, 0xff, 0xff, 0xff +}; + +static void store4 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; + buf[2] = value >> 16; + buf[3] = value >> 24; +} + +static void store2 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; +} + + +static uint32_t find_chunk(FILE * file, const uint8_t chunk_id[4]) +{ + uint8_t buffer[8]; + while (1) { + size_t chunksize; + size_t s = fread(buffer, 1, 8, file); + if (s < 8) + return 0; + chunksize = (uint32_t)buffer[4] | ((uint32_t)buffer[5] << 8) | + ((uint32_t)buffer[6] << 16) | ((uint32_t)buffer[7] << 24); + if (!memcmp(buffer, chunk_id, 4)) + return chunksize; + fseek(file, chunksize, SEEK_CUR); + } +} + + +CWaveFile::CWaveFile(const char* Filename, bool Write) + : Duration(0), ReadOnly(false), m_bOK(false) +{ + Channels = 0; + + /* 打开文件 **/ + File = fopen(Filename, Write ? "wb":"rb"); + if ( !File ) + return; + + /* 设置写文件初始参数 **/ + if ( Write ) + { + SampleRate = 44100; + Channels = 2; + Format = SF_S16; + SampleSize = 16; + ChannelMask = 0; + m_bOK = true; + return; + } + + ReadOnly = true; + + size_t s; + uint8_t buffer[8]; + uint8_t *fmt = NULL; + uint32_t v; + uint32_t avg_bps; + uint32_t block_align; + unsigned short FormatType; + unsigned short SampleType; + + static const uint8_t riff[4] = { 'R', 'I', 'F', 'F' }; + static const uint8_t wave[4] = { 'W', 'A', 'V', 'E' }; + static const uint8_t fmt_[4] = { 'f', 'm', 't', ' ' }; + static const uint8_t data[4] = { 'd', 'a', 't', 'a' }; + + /* 前四个字节为 riff **/ + s = fread(buffer, 1, 8, File); + if (s < 8) + goto err2; + + if (memcmp(buffer, riff, 4)) + goto err2; + + /* 8~12为wave **/ + /* TODO: check size (in buffer[4..8]) */ + s = fread(buffer, 1, 4, File); + if (s < 4) + goto err2; + + if (memcmp(buffer, wave, 4)) + goto err2; + + s = find_chunk(File, fmt_); + if ( s != 16 && s != 18 && s != 40 ) + goto err2; + + fmt = (uint8_t*)malloc(s); + if (!fmt) + goto err2; + + if (fread(fmt, 1, s, File) != s) + goto err3; + + /* wFormatTag */ + v = (uint32_t)fmt[0] | ((uint32_t)fmt[1] << 8); + if (v != WAVE_FORMAT_PCM && v != WAVE_FORMAT_IEEE_FLOAT && v != WAVE_FORMAT_EXTENSIBLE) + goto err3; + + FormatType = v; + + if (s == 40 && 0xfffe == v) + { + // fmt begins at 0x14 of the wave file + v = *(unsigned short*)&fmt[0x2C - 0x14]; + } + + SampleType = v; + + /* wChannels */ + v = (uint32_t)fmt[2] | ((uint32_t)fmt[3] << 8); + + Channels = v; + + if (v < 1 || v > 32) + goto err3; + + /* dwSamplesPerSec */ + SampleRate = (uint32_t)fmt[4] | ((uint32_t)fmt[5] << 8) | + ((uint32_t)fmt[6] << 16) | ((uint32_t)fmt[7] << 24); + + /* dwAvgBytesPerSec */ + avg_bps = (uint32_t)fmt[8] | ((uint32_t)fmt[9] << 8) | + ((uint32_t)fmt[10] << 16) | ((uint32_t)fmt[11] << 24); + + /* wBlockAlign */ + block_align = (uint32_t)fmt[12] | ((uint32_t)fmt[13] << 8); + + /* wBitsPerSample */ + SampleSize = (uint32_t)fmt[14] | ((uint32_t)fmt[15] << 8); + if (SampleSize != 8 && SampleSize != 16 && SampleSize != 32 && SampleSize != 24 && SampleSize != 64) + goto err3; + + switch (SampleSize) + { + case 8: + Format = SF_U8; + break; + case 16: + Format = SF_S16; + break; + case 24: + Format = SF_S24; + break; + case 32: + { + if (SampleType == WAVE_FORMAT_IEEE_FLOAT) + Format = SF_IEEE_FLOAT; + else + Format = SF_S32; + + } + break; + case 64: + if (SampleType != WAVE_FORMAT_IEEE_FLOAT) + goto err3; + Format = SF_IEEE_DOUBLE; + break; + } + + + // Handle 24-bit samples individually +#if 0 + if (SampleSize == 24 && Channels <= 2) + { + int ba24 = Channels * (SampleSize / 8); // Align to 4x + + ba24 = (ba24 + 3) / 4 * 4; + + if (block_align != ba24) + goto err3; + } + else +#endif + { + if (block_align != Channels * (SampleSize / 8)) + goto err3; + } + + if (avg_bps != block_align * SampleRate) + goto err3; + + v = find_chunk(File, data); + + if (v == 0 || v % block_align != 0) + goto err3; + + TotalFrames = v / block_align; + + FramesRead = 0; + + if (FormatType == WAVE_FORMAT_EXTENSIBLE) + { + ChannelMask = *(unsigned int*)(&fmt[0x14]); + } + else + { + ChannelMask = 0; + } + + FrameStartPos = ftell(File); + + free(fmt); + m_bOK = true; + return; + +err3: + free(fmt); +err2: + fclose(File); + + File = NULL; +} + +bool CWaveFile::GetStatus() +{ + return m_bOK; +} + +SAMPLE_FORMAT CWaveFile::GetFormat() +{ + return Format; +} + +int CWaveFile::GetTotalFrames() +{ + return TotalFrames; +} + +int CWaveFile::GetFramesRead() +{ + return FramesRead; +} + +CWaveFile::~CWaveFile() +{ + if (File != NULL) + { + if (!ReadOnly) + { + unsigned int Size = ftell(File) - FrameStartPos;// 44; + + fseek(File, FrameStartPos - 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + + Size += FrameStartPos - 8; + + fseek(File, 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + } + + fclose(File); + } +} + +int CWaveFile::GetSampleRate() +{ + return SampleRate; +} + +void CWaveFile::SetSampleRate(int SampleRate) +{ + this->SampleRate = SampleRate; +} + +void CWaveFile::SetupDone() +{ + unsigned char Header[68]; + + fseek(File, 0, SEEK_SET); + + SampleSize = Format & 0xFF; + + if (ChannelMask) + { + memcpy(Header, wavmulti_header, sizeof(wavmulti_header)); + + if (Format < SF_IEEE_FLOAT) + { + // store2(Header + 20, WAVE_FORMAT_PCM); + store2(Header + 44, WAVE_FORMAT_PCM); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + store2(Header + 38, SampleSize / 8 * 8); + store4(Header + 40, ChannelMask); + + fwrite(Header, sizeof(wavmulti_header), 1, File); + } + else + { + memcpy(Header, wav_header, sizeof(wav_header)); + + if (Format >= SF_IEEE_FLOAT) + { + store2(Header + 20, WAVE_FORMAT_IEEE_FLOAT); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + fwrite(Header, sizeof(wav_header), 1, File); + } + + + FrameStartPos = ftell(File); +} + + +void CWaveFile::Seek(int FramePos, int Where) +{ + // Ignoring Where + + fseek(File, FrameStartPos + FramePos * Channels* (SampleSize / 8), Where); + + FramesRead = FramePos; + +} + +int CWaveFile::GetChannels() +{ + return Channels; +} + +void CWaveFile::SetChannels(int Channels) +{ + this->Channels = Channels; +} + +void CWaveFile::SetSampleFormat(SAMPLE_FORMAT Format) +{ + this->Format = Format; +} + +uint32_t CWaveFile::GetChannelMask() +{ + return ChannelMask; +} + +void CWaveFile::SetChannelMask(uint32_t Mask) +{ + ChannelMask = Mask; +} + +bool CWaveFile::ReadFrameAsS16(short* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) << 8; + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + return Frames == fread(FrameSamples, sizeof(FrameSamples[0])*Channels, Frames, File); + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 8); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 16); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + double DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + } + return false; +} + +bool CWaveFile::ReadFrameAsfloat(float* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +bool CWaveFile::ReadFrameAsDouble(double* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample ))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +void CWaveFile::WriteRaw(void* Raw, int Size) +{ + fwrite(Raw, Size, 1, File); +} + + +void CWaveFile::WriteFrame(uint8_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(short* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(int32_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrameS24(int32_t* FrameSamples, int Frames) +{ + for (int c = 0; c < Channels; c++) + { + fwrite(&FrameSamples[c], 3, 1, File); + } +} + +void CWaveFile::WriteFrame(double* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(float* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + + +double CWaveFile::GetDuration() +{ + return Duration; +} diff --git a/AutoCoverTool/ref/pic_au_tools/main.py b/AutoCoverTool/ref/pic_au_tools/main.py new file mode 100644 index 0000000..6db2a4d --- /dev/null +++ b/AutoCoverTool/ref/pic_au_tools/main.py @@ -0,0 +1,16 @@ +# 这是一个示例 Python 脚本。 + +# 按 Shift+F10 执行或将其替换为您的代码。 +# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。 + + +def print_hi(name): + # 在下面的代码行中使用断点来调试脚本。 + print(f'Hi, {name}') # 按 Ctrl+F8 切换断点。 + + +# 按间距中的绿色按钮以运行脚本。 +if __name__ == '__main__': + print_hi('PyCharm') + +# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助 diff --git a/AutoCoverTool/ref/pic_au_tools/picture/click_apply.png b/AutoCoverTool/ref/pic_au_tools/picture/click_apply.png new file mode 100644 index 0000000..4a4692e Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/click_apply.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/click_library.png b/AutoCoverTool/ref/pic_au_tools/picture/click_library.png new file mode 100644 index 0000000..709519f Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/click_library.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/click_sure.jpg b/AutoCoverTool/ref/pic_au_tools/picture/click_sure.jpg new file mode 100644 index 0000000..8f3382f Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/click_sure.jpg differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/close_all_files.png b/AutoCoverTool/ref/pic_au_tools/picture/close_all_files.png new file mode 100644 index 0000000..6215929 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/close_all_files.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/close_time.png b/AutoCoverTool/ref/pic_au_tools/picture/close_time.png new file mode 100644 index 0000000..3ce8362 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/close_time.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/create_vocal.png b/AutoCoverTool/ref/pic_au_tools/picture/create_vocal.png new file mode 100644 index 0000000..c2ba81e Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/create_vocal.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/end_time.png b/AutoCoverTool/ref/pic_au_tools/picture/end_time.png new file mode 100644 index 0000000..7128e39 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/end_time.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/get_library.png b/AutoCoverTool/ref/pic_au_tools/picture/get_library.png new file mode 100644 index 0000000..44649dd Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/get_library.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/modify_display.png b/AutoCoverTool/ref/pic_au_tools/picture/modify_display.png new file mode 100644 index 0000000..2cfa19b Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/modify_display.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/modify_gain.png b/AutoCoverTool/ref/pic_au_tools/picture/modify_gain.png new file mode 100644 index 0000000..39b0ca2 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/modify_gain.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/open.png b/AutoCoverTool/ref/pic_au_tools/picture/open.png new file mode 100644 index 0000000..f3b4683 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/open.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/open_library.png.png b/AutoCoverTool/ref/pic_au_tools/picture/open_library.png.png new file mode 100644 index 0000000..a1c4242 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/open_library.png.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/picture_tools.py b/AutoCoverTool/ref/pic_au_tools/picture/picture_tools.py new file mode 100644 index 0000000..d6136aa --- /dev/null +++ b/AutoCoverTool/ref/pic_au_tools/picture/picture_tools.py @@ -0,0 +1,27 @@ +import time +import pyautogui + + +# 通过图片识别定位 +def get_pic(pic_name): + pyautogui.screenshot('screenshot.png') + # pyautogui.screenshot('img12.png', region=(0, 0, 1110, 776)) + Part = pyautogui.locateOnScreen(pic_name, confidence=0.85) # + point = pyautogui.center(Part) + print(point) + return point + +# im = pyautogui.screenshot() +# print((type(im), im.getdata(100, 200))) +# +# a = get_pic("11.png") +# print(a) +# pyautogui.click(a) +# # +# click_file = get_pic("select_files.png") +# pyautogui.rightClick(click_file) +# +# # click_clear = mouse.click(button="left", coords=(128, 115)) +# time.sleep(2) +# click_clear = get_pic("img_11.png") +# pyautogui.click(click_clear) diff --git a/AutoCoverTool/ref/pic_au_tools/picture/save_as_ok.png b/AutoCoverTool/ref/pic_au_tools/picture/save_as_ok.png new file mode 100644 index 0000000..389634a Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/save_as_ok.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/save_library.png b/AutoCoverTool/ref/pic_au_tools/picture/save_library.png new file mode 100644 index 0000000..cef8d20 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/save_library.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/save_vocal_file.png b/AutoCoverTool/ref/pic_au_tools/picture/save_vocal_file.png new file mode 100644 index 0000000..92c3e16 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/save_vocal_file.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/screenshot.png b/AutoCoverTool/ref/pic_au_tools/picture/screenshot.png new file mode 100644 index 0000000..445bd26 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/screenshot.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/search_name.png b/AutoCoverTool/ref/pic_au_tools/picture/search_name.png new file mode 100644 index 0000000..d3870f8 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/search_name.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/select_files.png b/AutoCoverTool/ref/pic_au_tools/picture/select_files.png new file mode 100644 index 0000000..97e7d65 Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/select_files.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/select_library.png b/AutoCoverTool/ref/pic_au_tools/picture/select_library.png new file mode 100644 index 0000000..1d87bcc Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/select_library.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/picture/start_time.png b/AutoCoverTool/ref/pic_au_tools/picture/start_time.png new file mode 100644 index 0000000..cd0f6de Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/picture/start_time.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/read_all_files.py b/AutoCoverTool/ref/pic_au_tools/read_all_files.py new file mode 100644 index 0000000..e452a2e --- /dev/null +++ b/AutoCoverTool/ref/pic_au_tools/read_all_files.py @@ -0,0 +1,65 @@ +import os + + +def find_file(search_path, include_str=None, filter_strs=None): + if filter_strs is None: + filter_strs = [] + + files = [] + # 获取路径下所有文件 + names = os.listdir(search_path) + for name in names: + path = os.path.abspath(os.path.join(search_path, name)) + if os.path.isfile(path): + # 如果不包含指定字符串则 + if include_str is not None and include_str not in name: + continue + + # 如果未break,说明不包含filter_strs中的字符 + for filter_str in filter_strs: + if filter_str in name: + break + else: + files.append(path) + else: + files += find_file(path, include_str=include_str, filter_strs=filter_strs) + return files + + +# 读取多有文件架 +def find_dos(): + a = os.listdir("docs/Songs") + for i in a: + if i.endswith(".meta"): + a.remove(i) + return a + + +if __name__ == '__main__': + # 获取全部文件 + # f = find_file("./txt") + # print(f) + + # # 获取包含指定字符的文件 + # f = find_file("/Users/lbj/Desktop/Beat", include_str=".txt") + # print(f) + # + + # # 获取不包含指定字符的文件 + f = find_file(r"../me_3_w4\me_3_w4", include_str="dv", filter_strs=[".pkf"]) + print(f) + a = f[0].split('\\') + # 列表中删除已使用的文件 + f.remove(f[0]) + print(f) + # 获取文件 + print(a[-1]) + del_a = a.remove(a[-1]) + address = '\\'.join(a) + # 获取文件地址 + print(address) + + # + # # 获取包含指定字符且不包含某些指定字符的文件 + # f = find_file(r"test", include_str="wav", filter_strs=[".meta", "__init__"]) + # print(f) diff --git a/AutoCoverTool/ref/pic_au_tools/screenshot.png b/AutoCoverTool/ref/pic_au_tools/screenshot.png new file mode 100644 index 0000000..e56127b Binary files /dev/null and b/AutoCoverTool/ref/pic_au_tools/screenshot.png differ diff --git a/AutoCoverTool/ref/pic_au_tools/star_app.py b/AutoCoverTool/ref/pic_au_tools/star_app.py new file mode 100644 index 0000000..47c701f --- /dev/null +++ b/AutoCoverTool/ref/pic_au_tools/star_app.py @@ -0,0 +1,392 @@ +import pyautogui +import pywinauto +from pywinauto import application, mouse +import time +from pywinauto.keyboard import send_keys +from read_all_files import find_file +from picture.picture_tools import get_pic + +gs_rate_dict = { + 611752105020256284: 38.340000, + 611752105020286433: 33.730000, + 611752105020286443: 11.370000, + 611752105020286446: 31.920000, + 611752105020290639: 62.400000, + 611752105020290695: 20.550000, + 611752105020315328: 43.860000, + 611752105020315368: 25.740000, + 611752105020336950: 47.170000, + 611752105020343687: 30.000000, + 611752105020343699: 19.210000, + 611752105020351134: 43.300000, + 611752105020357112: 27.560000, + 611752105020378620: 58.470000, + 611752105020387015: 11.220000, + 611752105020394121: 30.660000, + 611752105020394297: 112.720000, + 611752105020411654: 32.330000, + 611752105020417688: 50.610000, + 611752105020548211: 42.730000, + 611752105020563523: -1.000000, + 611752105021285282: -1.000000, + 611752105021332759: 11.370000, + 611752105022446809: 7.600000, + 611752105022647082: 3.340000, + 611752105022667231: 43.260000, + 611752105022735101: 3.680000, + 611752105022736204: 17.650000, + 611752105022745595: 29.430000, + 611752105022770952: 10.820000, + 611752105022842004: 25.400000, + 611752105022842477: 10.020000, + 611752105023434557: 28.160000, + 611752105023532439: 11.850000, + 611752105023623965: 3.710000, + 611752105024250202: 43.550000, + 611752105024628047: 22.520000, + 611752105024676794: 63.380000, + 611752105024678976: 14.350000, + 611752105024679221: 0.290000, + 611752105024953316: 10.350000, + 611752105025104181: 28.890000, + 611752105026189342: 1.380000, + 611752105026523547: 27.950000, + 611752105026707760: 0.000000, + 611752105026771723: 32.100000, + 611752105026946178: 16.860000, + 611752105027047993: 20.380000, + 611752105027188746: 23.900000, + 611752105027189453: 13.640000, + 611752105027302268: 21.030000, + 611752105027557408: 13.050000, + 611752105028650636: 31.310000, + 611752105028683824: 15.920000, + 611752105029990849: 56.810000, + 611752105029993297: 2.490000, + 611752105030077711: 28.960000, + 611752105030104548: 15.830000, + 611752105030419624: 0.790000, + 611752105030419633: 8.080000, + 611752105030419688: 5.850000, + 611752105030433779: 1.080000, + 611752105015523266: 16.160000, + 611752105017233541: 43.910000, + 611752105030414513: 16.330000, + 611752105030414549: 29.890000, + 611752105030414557: 16.210000, + 611752105030414588: 18.900000, + 611752105030414597: 16.540000, + 611752105030414613: 2.210000, + 611752105030414615: 0.320000, + 611752105030414619: 26.420000, + 611752105030414633: 17.250000, + 611752105030414638: 8.300000, + 611752105030414689: 2.600000, + 611752105030414702: 0.550000, + 611752105030414742: 28.310000, + 611752105030414763: 1.040000, + 611752105030414773: 9.100000, + 611752105030414777: 34.650000, + 611752105030414779: 22.240000, + 611752105030414784: 0.580000, + 611752105030414890: 23.920000, + 611752105030414915: 6.020000, + 611752105030414925: 2.250000, + 611752105030414929: 8.110000, + 611752105030414935: 4.750000, + 611752105030414943: 1.600000, + 611752105030414957: 18.950000, + 611752105030414962: 17.460000, + 611752105030414976: 0.690000, + 611752105030414993: 30.170000, + 611752105030414995: 8.410000, + 611752105030415003: 28.620000, + 611752105030415014: 6.500000, + 611752105030415018: 2.070000, + 611752105030415032: 5.830000, + 611752105030415056: 0.440000, + 611752105030415067: 8.300000, + 611752105030415071: 22.970000, + 611752105030415074: 0.000000, + 611752105030415083: 5.010000, + 611752105030415087: 7.950000, + 611752105030415100: 1.180000, + 611752105030415103: 35.110000 +} + + +# 程序打开 +def open_app(): + # 此处填写AU_APP的绝对路径 + app = application.Application(backend='uia').start( + r"C:\Program Files\Adobe\Adobe Audition CC 2017\Adobe Audition CC.exe") + time.sleep(4) + # 设置窗口最大化74 + click_sure = get_pic("./picture/click_sure.jpeg") + pyautogui.click(click_sure) + time.sleep(2) + app_top_window = app.top_window() + app_top_window.maximize() + + +dlg = pywinauto.Desktop() + + +# 获取所有文件 +def get_all_file(): + # files = find_file(r"test", filter_strs=[".mp3", ".pkf"]) + files = find_file(r"me_3_w4_zy", include_str="dv", filter_strs=[".pkf"]) + count_file = len(files) + return files, count_file + + +# 拿到文件地址 +def get_file(): + # all_files = get_all_file()[0] + # print(all_files) + # 切割获取 + add_file = file_name[0].split('\\') + # print(add_file) + # 列表中删除已用的文件信息 + file_name.remove(file_name[0]) + # wav文件获取 + wav_file = add_file[-1] + # print(wav_file) + add_file.remove(add_file[-1]) + address = '\\'.join(add_file) + # print(address) + # address = r"D:\project\tools\au_tools\test\611752105027601574" + + vocal = r"vocal.mp3" + return address, wav_file, vocal + + +# 打开导入文件位置空间弹窗 +def open_file(file, file_address, st_tm): + send_keys("^o") + time.sleep(2) + # 切换控件窗口 + dlg_open_file = dlg[".*Open File.*"] + # 打开的文件内容输入 + send_keys(file) + time.sleep(1) + # 获取desk实例化= + start_time_inst = None + if file == get_file_name[2]: + print("这是vocal") + # dlg_open_file.window(found_index=0, class_name=".*Edit.*").click() + dlg_open_file.window(found_index=0, title_re=".*地址.*").click() + send_keys(file_address) + time.sleep(1) + dlg_open_file.window(found_index=0, title_re=".*地址区段工具栏.*").click() + # dlg_open_file.window(found_index=0, title_re=".*打开.*").click() + time.sleep(1) + # open_click = get_pic("./picture/open.png") + open_click = get_pic("./picture/open.jpeg") + # print(open_click) + # mouse.click(button="left", coords=(775,519)) + pyautogui.click(open_click) + wait_dlg("Adobe Audition CC") + time.sleep(1) + start_time_inst = get_pic("./picture/start_time.jpeg") + revise_time("0:{}".format(round(st_tm, 3)), "0:{}".format(round(st_tm + 60, 3))) + time.sleep(2) + start_vocal_vts3() + else: + # 文件打开 + print("这里是wav") + # dlg_open_file.window(found_index=0, title_re=".*打开.*").click() + open_click = get_pic("./picture/open.jpeg") + pyautogui.click(open_click) + # mouse.click(button="left", coords=(775, 519)) + wait_dlg("Adobe Audition CC") + time.sleep(1) + start_time_inst = get_pic("./picture/start_time.jpeg") + star_wav_vst3(file) + pyautogui.click(start_time_inst) + send_keys("0:0.000") + close_time = get_pic("./picture/close_time.jpeg") + pyautogui.click(close_time) + time.sleep(1) + + +def revise_time(st_tm, ed_tm): + print("reverse_time={}".format(st_tm)) + # 修改开始时间 + time.sleep(1) + # time_start = mouse.click(button="left", coords=(588, 981)) + # start_time = get_pic("./picture/start_time.png") + start_time = get_pic("./picture/start_time.jpeg") + pyautogui.click(start_time) + send_keys(st_tm) + time.sleep(1) + # time_end = mouse.click(button="left", coords=(648, 981)) + # end_time = get_pic("./picture/end_time.png") + # end_time = get_pic("./picture/end_time.jpeg") + # pyautogui.click(end_time) + # send_keys(ed_tm) + # 关闭选中时间部分 + # close_time = mouse.click(button="left", coords=(800, 981)) + close_time = get_pic("./picture/close_time.jpeg") + pyautogui.click(close_time) + + +# 通过快捷键执行vocal的vts3 +def start_vocal_vts3(): + time.sleep(2) + # send_keys("^%z") + send_keys("^'") + wait_dlg("Effect - Chameleon2") + + # 恢复状态 + # restruct_vocal = get_pic("./picture/restruct.jpeg") + # pyautogui.click(restruct_vocal) + # time.sleep(1) + # pyautogui.click(restruct_vocal) + + # 点击创建按钮 + # 打开弹窗 + time.sleep(1) + # mouse.click(button="left", coords=(766, 522)) + # 创建文件 + create_vocal = get_pic("./picture/create_vocal.jpeg") + pyautogui.click(create_vocal) + # create_file = mouse.click(button="left", coords=(951, 186)) + time.sleep(2) + send_keys("{SPACE}") + time.sleep(10) + # 保存文件 + # save_file = mouse.click(button="left", coords=(844, 501)) + # save_vocal = get_pic("./picture/save_library.png") + save_vocal = get_pic("./picture/save_library.jpeg") + time.sleep(1) + pyautogui.click(save_vocal) + # save_file = mouse.click(button="left", coords=(844, 501)) + time.sleep(2) + # 保存内容 + send_keys(f"vocal{get_file_name[1]}") + time.sleep(1) + # save_click = mouse.click(button="left", coords=(757, 592)) + save_vocal_file = get_pic("./picture/save_vocal_file.jpeg") + pyautogui.click(save_vocal_file) + time.sleep(1) + send_keys('%{F4}') + + # 关闭弹窗 + # dlg_close_window = dlg["Effect - Chameleon2"] + # dlg_close_window.window(found_index=0, title_re=".*关闭.*") + # wait_dlg("Effect - Chameleon2") + + +# 执行wav的vst3 +def star_wav_vst3(file): + time.sleep(2) + send_keys("^'") + wait_dlg("Effect - Chameleon2") + # open_library = mouse.click(button="left", coords=(1010, 188)) + + # 恢复状态 + # restruct_vocal = get_pic("./picture/restruct.jpeg") + # pyautogui.click(restruct_vocal) + # time.sleep(1) + # pyautogui.click(restruct_vocal) + + open_library = get_pic("./picture/click_library.jpeg") + pyautogui.click(open_library) + wait_dlg("ChameleonLibrary - ") + mouse.click(button="left", coords=(387, 572)) + # search_name = get_pic("./picture/search_name.jpeg") + # pyautogui.click(search_name) + send_keys(f"vocal{get_file_name[1]}") + time.sleep(2) + # 选择搜索到的文件 + # wait_dlg("ChameleonLibrary - ") + # select_profile = mouse.double_click(button="left", coords=(978, 492)) + mouse.click(button="left", coords=(722, 297)) + # move_library = get_pic("./picture/get_library.jpeg") + # pyautogui.moveTo(move_library) + time.sleep(1) + select_library = get_pic("./picture/select_library.jpeg") + pyautogui.doubleClick(select_library) + # 关闭弹窗 + time.sleep(1) + # 关闭弹窗 + send_keys('%{F4}') + + # close_profile = mouse.double_click(button="left", coords=(1675, 297)) + # 修改显示 + wait_dlg("Effect - Chameleon2") + # modify_chameleon_display = mouse.double_click(button="left", coords=(1326, 124)) + # modify_chameleon_display = get_pic("./picture/modify_display.png") + # pyautogui.doubleClick(modify_chameleon_display) + # 修改_gain + # time.sleep(2) + mouse.click(button="left", coords=(1200, 720)) + # modify_gain = get_pic("./picture/modify_gain.jpeg") + # modify_gain.y += 50 + time.sleep(1) + send_keys("15") + + apply_file = get_pic("./picture/click_apply.jpeg") + pyautogui.click(apply_file) + time.sleep(3) + wait_dlg("Adobe Audition CC") + + time.sleep(5) + save_as(file) + clear_file() + + +# 保存 +def save_as(file): + send_keys("^+s") + time.sleep(2) + send_keys(str(file).replace("_dv_", "_dev_").replace(".mp3", "")) + # save_ok = mouse.click(button="left", coords=(1014, 647)) + save_ok = get_pic("./picture/save_as_ok.jpeg") + pyautogui.click(save_ok) + time.sleep(3) + + +# 清空内容显示 +def clear_file(): + time.sleep(2) + send_keys("^]") + + +# 等到待窗口启用 +def wait_dlg(win_name): + try: + new_dlg = dlg[win_name] + new_dlg.wait(wait_for="ready", timeout=20, retry_interval=1) + # print("等待通过,当前新建连接窗口处于可见状态") + except EnvironmentError: + print("请检查弹窗未打开") + + +# star_test +def start_test(): + import os + global get_file_name + for i in range(file_count): + get_file_name = get_file() + dst_file = os.path.join("C:/Users/xianfeng.li/Desktop/jianli/test", + str(get_file_name[1]).replace("_dv_", "_dev_")) + print(dst_file) + if os.path.exists(dst_file): + continue + print(f"本次检测内容包含{get_file_name}") + st_tm = gs_rate_dict[int(str(get_file_name[1]).split("_")[0])] + open_file(get_file_name[2], get_file_name[0], st_tm) + time.sleep(3) + print(f"结束文件{get_file_name[1]}的{get_file_name[2]}") + open_file(get_file_name[1], get_file_name[0], 0) + time.sleep(3) + print(f"结束文件{get_file_name[1]}") + + +if __name__ == '__main__': + open_app() + file_count = get_all_file()[1] + file_name = get_all_file()[0] + start_test() diff --git a/AutoCoverTool/ref/so_vits_svc/Eng_docs.md b/AutoCoverTool/ref/so_vits_svc/Eng_docs.md new file mode 100644 index 0000000..78f6db8 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/Eng_docs.md @@ -0,0 +1,83 @@ +# SoftVC VITS Singing Voice Conversion +## Updates +> According to incomplete statistics, it seems that training with multiple speakers may lead to **worsened leaking of voice timbre**. It is not recommended to train models with more than 5 speakers. The current suggestion is to try to train models with only a single speaker if you want to achieve a voice timbre that is more similar to the target. +> Fixed the issue with unwanted staccato, improving audio quality by a decent amount.\ +> The 2.0 version has been moved to the 2.0 branch.\ +> Version 3.0 uses the code structure of FreeVC, which isn't compatible with older versions.\ +> Compared to [DiffSVC](https://github.com/prophesier/diff-svc) , diffsvc performs much better when the training data is of extremely high quality, but this repository may perform better on datasets with lower quality. Additionally, this repository is much faster in terms of inference speed compared to diffsvc. + +## Model Overview +A singing voice coversion (SVC) model, using the SoftVC encoder to extract features from the input audio, sent into VITS along with the F0 to replace the original input to acheive a voice conversion effect. Additionally, changing the vocoder to [NSF HiFiGAN](https://github.com/openvpi/DiffSinger/tree/refactor/modules/nsf_hifigan) to fix the issue with unwanted staccato. +## Notice ++ The current branch is the 32kHz version, which requires less vram during inferencing, as well as faster inferencing speeds, and datasets for said branch take up less disk space. Thus the 32 kHz branch is recommended for use. ++ If you want to train 48 kHz variant models, switch to the [main branch](https://github.com/innnky/so-vits-svc/tree/main). +## Colab notebook script for dataset creation and training. +[colab training notebook](https://colab.research.google.com/drive/1rCUOOVG7-XQlVZuWRAj5IpGrMM8t07pE?usp=sharing) + +## Required models ++ soft vc hubert:[hubert-soft-0d54a1f4.pt](https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt) + + Place under `hubert`. ++ Pretrained models [G_0.pth](https://huggingface.co/innnky/sovits_pretrained/resolve/main/G_0.pth) and [D_0.pth](https://huggingface.co/innnky/sovits_pretrained/resolve/main/D_0.pth) + + Place under `logs/32k`. + + Pretrained models are required, because from experiments, training from scratch can be rather unpredictable to say the least, and training with a pretrained model can greatly improve training speeds. + + The pretrained model includes云灏, 即霜, 辉宇·星AI, 派蒙, and 绫地宁宁, covering the common ranges of both male and female voices, and so it can be seen as a rather universal pretrained model. + + The pretrained model exludes the `optimizer speaker_embedding` section, rendering it only usable for pretraining and incapable of inferencing with. +```shell +# For simple downloading. +# hubert +wget -P hubert/ https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt +# G&D pretrained models +wget -P logs/32k/ https://huggingface.co/innnky/sovits_pretrained/resolve/main/G_0.pth +wget -P logs/32k/ https://huggingface.co/innnky/sovits_pretrained/resolve/main/D_0.pth + +``` + + +## Dataset preparation +All that is required is that the data be put under the `dataset_raw` folder in the structure format provided below. +```shell +dataset_raw +├───speaker0 +│ ├───xxx1-xxx1.wav +│ ├───... +│ └───Lxx-0xx8.wav +└───speaker1 + ├───xx2-0xxx2.wav + ├───... + └───xxx7-xxx007.wav +``` + +## Data pre-processing. +1. Resample to 32khz + +```shell +python resample.py + ``` +2. Automatically sort out training set, validation set, test set, and automatically generate configuration files. +```shell +python preprocess_flist_config.py +# Notice. +# The n_speakers value in the config will be set automatically according to the amount of speakers in the dataset. +# To reserve space for additionally added speakers in the dataset, the n_speakers value will be be set to twice the actual amount. +# If you want even more space for adding more data, you can edit the n_speakers value in the config after runing this step. +# This can not be changed after training starts. +``` +3. Generate hubert and F0 features/ +```shell +python preprocess_hubert_f0.py +``` +After running the step above, the `dataset` folder will contain all the pre-processed data, you can delete the `dataset_raw` folder after that. + +## Training. +```shell +python train.py -c configs/config.json -m 32k +``` + +## Inferencing. + +Use [inference_main.py](inference_main.py) ++ Edit `model_path` to your newest checkpoint. ++ Place the input audio under the `raw` folder. ++ Change `clean_names` to the output file name. ++ Use `trans` to edit the pitch shifting amount (semitones). ++ Change `spk_list` to the speaker name. diff --git a/AutoCoverTool/ref/so_vits_svc/LICENSE b/AutoCoverTool/ref/so_vits_svc/LICENSE new file mode 100644 index 0000000..c7202d4 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2021 Jingyi Li + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/AutoCoverTool/ref/so_vits_svc/README.md b/AutoCoverTool/ref/so_vits_svc/README.md new file mode 100644 index 0000000..5905096 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/README.md @@ -0,0 +1,123 @@ +# SoftVC VITS Singing Voice Conversion +## English docs +[英语资料](Eng_docs.md) + + +## Update +> 据不完全统计,多说话人似乎会导致**音色泄漏加重**,不建议训练超过5人的模型,目前的建议是如果想炼出来更像目标音色,**尽可能炼单说话人的**\ +> 断音问题已解决,音质提升了不少\ +> 2.0版本已经移至 sovits_2.0分支\ +> 3.0版本使用FreeVC的代码结构,与旧版本不通用\ +> 与[DiffSVC](https://github.com/prophesier/diff-svc) 相比,在训练数据质量非常高时diffsvc有着更好的表现,对于质量差一些的数据集,本仓库可能会有更好的表现,此外,本仓库推理速度上比diffsvc快很多 + + +## 模型简介 +歌声音色转换模型,通过SoftVC内容编码器提取源音频语音特征,与F0同时输入VITS替换原本的文本输入达到歌声转换的效果。同时,更换声码器为 [NSF HiFiGAN](https://github.com/openvpi/DiffSinger/tree/refactor/modules/nsf_hifigan) 解决断音问题 + + +## 注意 ++ 当前分支是32khz版本的分支,32khz模型推理更快,显存占用大幅减小,数据集所占硬盘空间也大幅降低,推荐训练该版本模型 ++ 如果要训练48khz的模型请切换到[main分支](https://github.com/innnky/so-vits-svc/tree/main) + + +## 预先下载的模型文件 ++ soft vc hubert:[hubert-soft-0d54a1f4.pt](https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt) + + 放在hubert目录下 ++ 预训练底模文件 [G_0.pth](https://huggingface.co/innnky/sovits_pretrained/resolve/main/G_0.pth) 与 [D_0.pth](https://huggingface.co/innnky/sovits_pretrained/resolve/main/D_0.pth) + + 放在logs/32k 目录下 + + 预训练底模为必选项,因为据测试从零开始训练有概率不收敛,同时底模也能加快训练速度 + + 预训练底模训练数据集包含云灏 即霜 辉宇·星AI 派蒙 绫地宁宁,覆盖男女生常见音域,可以认为是相对通用的底模 + + 底模删除了optimizer speaker_embedding 等无关权重, 只可以用于初始化训练,无法用于推理 + + 该底模和48khz底模通用 +```shell +# 一键下载 +# hubert +wget -P hubert/ https://github.com/bshall/hubert/releases/download/v0.1/hubert-soft-0d54a1f4.pt +# G与D预训练模型 +wget -P logs/32k/ https://huggingface.co/innnky/sovits_pretrained/resolve/main/G_0.pth +wget -P logs/32k/ https://huggingface.co/innnky/sovits_pretrained/resolve/main/D_0.pth + +``` + + +## colab一键数据集制作、训练脚本 +[一键colab](https://colab.research.google.com/drive/1_-gh9i-wCPNlRZw6pYF-9UufetcVrGBX?usp=sharing) + + +## 数据集准备 +仅需要以以下文件结构将数据集放入dataset_raw目录即可 +```shell +dataset_raw +├───speaker0 +│ ├───xxx1-xxx1.wav +│ ├───... +│ └───Lxx-0xx8.wav +└───speaker1 + ├───xx2-0xxx2.wav + ├───... + └───xxx7-xxx007.wav +``` + + +## 数据预处理 +1. 重采样至 32khz + +```shell +python resample.py + ``` +2. 自动划分训练集 验证集 测试集 以及自动生成配置文件 +```shell +python preprocess_flist_config.py +# 注意 +# 自动生成的配置文件中,说话人数量n_speakers会自动按照数据集中的人数而定 +# 为了给之后添加说话人留下一定空间,n_speakers自动设置为 当前数据集人数乘2 +# 如果想多留一些空位可以在此步骤后 自行修改生成的config.json中n_speakers数量 +# 一旦模型开始训练后此项不可再更改 +``` +3. 生成hubert与f0 +```shell +python preprocess_hubert_f0.py +``` +执行完以上步骤后 dataset 目录便是预处理完成的数据,可以删除dataset_raw文件夹了 + + +## 训练 +```shell +python train.py -c configs/config.json -m 32k +``` + + +## 推理 + +使用 [inference_main.py](inference_main.py) ++ 更改model_path为你自己训练的最新模型记录点 ++ 将待转换的音频放在raw文件夹下 ++ clean_names 写待转换的音频名称 ++ trans 填写变调半音数量 ++ spk_list 填写合成的说话人名称 + + +## Onnx导出 +### 重要的事情说三遍:导出Onnx时,请重新克隆整个仓库!!!导出Onnx时,请重新克隆整个仓库!!!导出Onnx时,请重新克隆整个仓库!!! +使用 [onnx_export.py](onnx_export.py) ++ 新建文件夹:checkpoints 并打开 ++ 在checkpoints文件夹中新建一个文件夹作为项目文件夹,文件夹名为你的项目名称 ++ 将你的模型更名为model.pth,配置文件更名为config.json,并放置到刚才创建的文件夹下 ++ 将 [onnx_export.py](onnx_export.py) 中path = "NyaruTaffy" 的 "NyaruTaffy" 修改为你的项目名称 ++ 运行 [onnx_export.py](onnx_export.py) ++ 等待执行完毕,在你的项目文件夹下会生成一个model.onnx,即为导出的模型 ++ 注意:若想导出48K模型,请按照以下步骤修改文件,或者直接使用48K.py + + 请打开[model_onnx.py](model_onnx.py),将其中最后一个class的hps中32000改为48000 + + 请打开[nvSTFT](/vdecoder/hifigan/nvSTFT.py),将其中所有32000改为48000 + ### Onnx模型支持的UI + + [MoeSS](https://github.com/NaruseMioShirakana/MoeSS) ++ 我去除了所有的训练用函数和一切复杂的转置,一行都没有保留,因为我认为只有去除了这些东西,才知道你用的是Onnx + +## Gradio(WebUI) +使用 [sovits_gradio.py](sovits_gradio.py) ++ 新建文件夹:checkpoints 并打开 ++ 在checkpoints文件夹中新建一个文件夹作为项目文件夹,文件夹名为你的项目名称 ++ 将你的模型更名为model.pth,配置文件更名为config.json,并放置到刚才创建的文件夹下 ++ 运行 [sovits_gradio.py](sovits_gradio.py) + + diff --git a/AutoCoverTool/ref/so_vits_svc/add_speaker.py b/AutoCoverTool/ref/so_vits_svc/add_speaker.py new file mode 100644 index 0000000..e224f07 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/add_speaker.py @@ -0,0 +1,62 @@ +import os +import argparse +from tqdm import tqdm +from random import shuffle +import json + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--train_list", type=str, default="./filelists/train.txt", help="path to train list") + parser.add_argument("--val_list", type=str, default="./filelists/val.txt", help="path to val list") + parser.add_argument("--test_list", type=str, default="./filelists/test.txt", help="path to test list") + parser.add_argument("--source_dir", type=str, default="./dataset/32k", help="path to source dir") + args = parser.parse_args() + + previous_config = json.load(open("configs/config.json", "rb")) + + train = [] + val = [] + test = [] + idx = 0 + spk_dict = previous_config["spk"] + spk_id = max([i for i in spk_dict.values()]) + 1 + for speaker in tqdm(os.listdir(args.source_dir)): + if speaker not in spk_dict.keys(): + spk_dict[speaker] = spk_id + spk_id += 1 + wavs = [os.path.join(args.source_dir, speaker, i)for i in os.listdir(os.path.join(args.source_dir, speaker))] + wavs = [i for i in wavs if i.endswith("wav")] + shuffle(wavs) + train += wavs[2:-10] + val += wavs[:2] + test += wavs[-10:] + + assert previous_config["model"]["n_speakers"] > len(spk_dict.keys()) + shuffle(train) + shuffle(val) + shuffle(test) + + print("Writing", args.train_list) + with open(args.train_list, "w") as f: + for fname in tqdm(train): + wavpath = fname + f.write(wavpath + "\n") + + print("Writing", args.val_list) + with open(args.val_list, "w") as f: + for fname in tqdm(val): + wavpath = fname + f.write(wavpath + "\n") + + print("Writing", args.test_list) + with open(args.test_list, "w") as f: + for fname in tqdm(test): + wavpath = fname + f.write(wavpath + "\n") + + previous_config["spk"] = spk_dict + + print("Writing configs/config.json") + with open("configs/config.json", "w") as f: + json.dump(previous_config, f, indent=2) diff --git a/AutoCoverTool/ref/so_vits_svc/attentions.py b/AutoCoverTool/ref/so_vits_svc/attentions.py new file mode 100644 index 0000000..4e0b0c1 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/attentions.py @@ -0,0 +1,303 @@ +import copy +import math +import numpy as np +import torch +from torch import nn +from torch.nn import functional as F + +import commons +import modules +from modules import LayerNorm + + +class Encoder(nn.Module): + def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0., window_size=4, **kwargs): + super().__init__() + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.window_size = window_size + + self.drop = nn.Dropout(p_dropout) + self.attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + for i in range(self.n_layers): + self.attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, window_size=window_size)) + self.norm_layers_1.append(LayerNorm(hidden_channels)) + self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout)) + self.norm_layers_2.append(LayerNorm(hidden_channels)) + + def forward(self, x, x_mask): + attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + x = x * x_mask + for i in range(self.n_layers): + y = self.attn_layers[i](x, x, attn_mask) + y = self.drop(y) + x = self.norm_layers_1[i](x + y) + + y = self.ffn_layers[i](x, x_mask) + y = self.drop(y) + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x + + +class Decoder(nn.Module): + def __init__(self, hidden_channels, filter_channels, n_heads, n_layers, kernel_size=1, p_dropout=0., proximal_bias=False, proximal_init=True, **kwargs): + super().__init__() + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.proximal_bias = proximal_bias + self.proximal_init = proximal_init + + self.drop = nn.Dropout(p_dropout) + self.self_attn_layers = nn.ModuleList() + self.norm_layers_0 = nn.ModuleList() + self.encdec_attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + for i in range(self.n_layers): + self.self_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout, proximal_bias=proximal_bias, proximal_init=proximal_init)) + self.norm_layers_0.append(LayerNorm(hidden_channels)) + self.encdec_attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout)) + self.norm_layers_1.append(LayerNorm(hidden_channels)) + self.ffn_layers.append(FFN(hidden_channels, hidden_channels, filter_channels, kernel_size, p_dropout=p_dropout, causal=True)) + self.norm_layers_2.append(LayerNorm(hidden_channels)) + + def forward(self, x, x_mask, h, h_mask): + """ + x: decoder input + h: encoder output + """ + self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(device=x.device, dtype=x.dtype) + encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + x = x * x_mask + for i in range(self.n_layers): + y = self.self_attn_layers[i](x, x, self_attn_mask) + y = self.drop(y) + x = self.norm_layers_0[i](x + y) + + y = self.encdec_attn_layers[i](x, h, encdec_attn_mask) + y = self.drop(y) + x = self.norm_layers_1[i](x + y) + + y = self.ffn_layers[i](x, x_mask) + y = self.drop(y) + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x + + +class MultiHeadAttention(nn.Module): + def __init__(self, channels, out_channels, n_heads, p_dropout=0., window_size=None, heads_share=True, block_length=None, proximal_bias=False, proximal_init=False): + super().__init__() + assert channels % n_heads == 0 + + self.channels = channels + self.out_channels = out_channels + self.n_heads = n_heads + self.p_dropout = p_dropout + self.window_size = window_size + self.heads_share = heads_share + self.block_length = block_length + self.proximal_bias = proximal_bias + self.proximal_init = proximal_init + self.attn = None + + self.k_channels = channels // n_heads + self.conv_q = nn.Conv1d(channels, channels, 1) + self.conv_k = nn.Conv1d(channels, channels, 1) + self.conv_v = nn.Conv1d(channels, channels, 1) + self.conv_o = nn.Conv1d(channels, out_channels, 1) + self.drop = nn.Dropout(p_dropout) + + if window_size is not None: + n_heads_rel = 1 if heads_share else n_heads + rel_stddev = self.k_channels**-0.5 + self.emb_rel_k = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev) + self.emb_rel_v = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev) + + nn.init.xavier_uniform_(self.conv_q.weight) + nn.init.xavier_uniform_(self.conv_k.weight) + nn.init.xavier_uniform_(self.conv_v.weight) + if proximal_init: + with torch.no_grad(): + self.conv_k.weight.copy_(self.conv_q.weight) + self.conv_k.bias.copy_(self.conv_q.bias) + + def forward(self, x, c, attn_mask=None): + q = self.conv_q(x) + k = self.conv_k(c) + v = self.conv_v(c) + + x, self.attn = self.attention(q, k, v, mask=attn_mask) + + x = self.conv_o(x) + return x + + def attention(self, query, key, value, mask=None): + # reshape [b, d, t] -> [b, n_h, t, d_k] + b, d, t_s, t_t = (*key.size(), query.size(2)) + query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3) + key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) + value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) + + scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1)) + if self.window_size is not None: + assert t_s == t_t, "Relative attention is only available for self-attention." + key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s) + rel_logits = self._matmul_with_relative_keys(query /math.sqrt(self.k_channels), key_relative_embeddings) + scores_local = self._relative_position_to_absolute_position(rel_logits) + scores = scores + scores_local + if self.proximal_bias: + assert t_s == t_t, "Proximal bias is only available for self-attention." + scores = scores + self._attention_bias_proximal(t_s).to(device=scores.device, dtype=scores.dtype) + if mask is not None: + scores = scores.masked_fill(mask == 0, -1e4) + if self.block_length is not None: + assert t_s == t_t, "Local attention is only available for self-attention." + block_mask = torch.ones_like(scores).triu(-self.block_length).tril(self.block_length) + scores = scores.masked_fill(block_mask == 0, -1e4) + p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s] + p_attn = self.drop(p_attn) + output = torch.matmul(p_attn, value) + if self.window_size is not None: + relative_weights = self._absolute_position_to_relative_position(p_attn) + value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s) + output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings) + output = output.transpose(2, 3).contiguous().view(b, d, t_t) # [b, n_h, t_t, d_k] -> [b, d, t_t] + return output, p_attn + + def _matmul_with_relative_values(self, x, y): + """ + x: [b, h, l, m] + y: [h or 1, m, d] + ret: [b, h, l, d] + """ + ret = torch.matmul(x, y.unsqueeze(0)) + return ret + + def _matmul_with_relative_keys(self, x, y): + """ + x: [b, h, l, d] + y: [h or 1, m, d] + ret: [b, h, l, m] + """ + ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1)) + return ret + + def _get_relative_embeddings(self, relative_embeddings, length): + max_relative_position = 2 * self.window_size + 1 + # Pad first before slice to avoid using cond ops. + pad_length = max(length - (self.window_size + 1), 0) + slice_start_position = max((self.window_size + 1) - length, 0) + slice_end_position = slice_start_position + 2 * length - 1 + if pad_length > 0: + padded_relative_embeddings = F.pad( + relative_embeddings, + commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]])) + else: + padded_relative_embeddings = relative_embeddings + used_relative_embeddings = padded_relative_embeddings[:,slice_start_position:slice_end_position] + return used_relative_embeddings + + def _relative_position_to_absolute_position(self, x): + """ + x: [b, h, l, 2*l-1] + ret: [b, h, l, l] + """ + batch, heads, length, _ = x.size() + # Concat columns of pad to shift from relative to absolute indexing. + x = F.pad(x, commons.convert_pad_shape([[0,0],[0,0],[0,0],[0,1]])) + + # Concat extra elements so to add up to shape (len+1, 2*len-1). + x_flat = x.view([batch, heads, length * 2 * length]) + x_flat = F.pad(x_flat, commons.convert_pad_shape([[0,0],[0,0],[0,length-1]])) + + # Reshape and slice out the padded elements. + x_final = x_flat.view([batch, heads, length+1, 2*length-1])[:, :, :length, length-1:] + return x_final + + def _absolute_position_to_relative_position(self, x): + """ + x: [b, h, l, l] + ret: [b, h, l, 2*l-1] + """ + batch, heads, length, _ = x.size() + # padd along column + x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length-1]])) + x_flat = x.view([batch, heads, length**2 + length*(length -1)]) + # add 0's in the beginning that will skew the elements after reshape + x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]])) + x_final = x_flat.view([batch, heads, length, 2*length])[:,:,:,1:] + return x_final + + def _attention_bias_proximal(self, length): + """Bias for self-attention to encourage attention to close positions. + Args: + length: an integer scalar. + Returns: + a Tensor with shape [1, 1, length, length] + """ + r = torch.arange(length, dtype=torch.float32) + diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1) + return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0) + + +class FFN(nn.Module): + def __init__(self, in_channels, out_channels, filter_channels, kernel_size, p_dropout=0., activation=None, causal=False): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.filter_channels = filter_channels + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.activation = activation + self.causal = causal + + if causal: + self.padding = self._causal_padding + else: + self.padding = self._same_padding + + self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size) + self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size) + self.drop = nn.Dropout(p_dropout) + + def forward(self, x, x_mask): + x = self.conv_1(self.padding(x * x_mask)) + if self.activation == "gelu": + x = x * torch.sigmoid(1.702 * x) + else: + x = torch.relu(x) + x = self.drop(x) + x = self.conv_2(self.padding(x * x_mask)) + return x * x_mask + + def _causal_padding(self, x): + if self.kernel_size == 1: + return x + pad_l = self.kernel_size - 1 + pad_r = 0 + padding = [[0, 0], [0, 0], [pad_l, pad_r]] + x = F.pad(x, commons.convert_pad_shape(padding)) + return x + + def _same_padding(self, x): + if self.kernel_size == 1: + return x + pad_l = (self.kernel_size - 1) // 2 + pad_r = self.kernel_size // 2 + padding = [[0, 0], [0, 0], [pad_l, pad_r]] + x = F.pad(x, commons.convert_pad_shape(padding)) + return x diff --git a/AutoCoverTool/ref/so_vits_svc/commons.py b/AutoCoverTool/ref/so_vits_svc/commons.py new file mode 100644 index 0000000..0748880 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/commons.py @@ -0,0 +1,188 @@ +import math +import numpy as np +import torch +from torch import nn +from torch.nn import functional as F + +def slice_pitch_segments(x, ids_str, segment_size=4): + ret = torch.zeros_like(x[:, :segment_size]) + for i in range(x.size(0)): + idx_str = ids_str[i] + idx_end = idx_str + segment_size + ret[i] = x[i, idx_str:idx_end] + return ret + +def rand_slice_segments_with_pitch(x, pitch, x_lengths=None, segment_size=4): + b, d, t = x.size() + if x_lengths is None: + x_lengths = t + ids_str_max = x_lengths - segment_size + 1 + ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) + ret = slice_segments(x, ids_str, segment_size) + ret_pitch = slice_pitch_segments(pitch, ids_str, segment_size) + return ret, ret_pitch, ids_str + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size*dilation - dilation)/2) + + +def convert_pad_shape(pad_shape): + l = pad_shape[::-1] + pad_shape = [item for sublist in l for item in sublist] + return pad_shape + + +def intersperse(lst, item): + result = [item] * (len(lst) * 2 + 1) + result[1::2] = lst + return result + + +def kl_divergence(m_p, logs_p, m_q, logs_q): + """KL(P||Q)""" + kl = (logs_q - logs_p) - 0.5 + kl += 0.5 * (torch.exp(2. * logs_p) + ((m_p - m_q)**2)) * torch.exp(-2. * logs_q) + return kl + + +def rand_gumbel(shape): + """Sample from the Gumbel distribution, protect from overflows.""" + uniform_samples = torch.rand(shape) * 0.99998 + 0.00001 + return -torch.log(-torch.log(uniform_samples)) + + +def rand_gumbel_like(x): + g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device) + return g + + +def slice_segments(x, ids_str, segment_size=4): + ret = torch.zeros_like(x[:, :, :segment_size]) + for i in range(x.size(0)): + idx_str = ids_str[i] + idx_end = idx_str + segment_size + ret[i] = x[i, :, idx_str:idx_end] + return ret + + +def rand_slice_segments(x, x_lengths=None, segment_size=4): + b, d, t = x.size() + if x_lengths is None: + x_lengths = t + ids_str_max = x_lengths - segment_size + 1 + ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) + ret = slice_segments(x, ids_str, segment_size) + return ret, ids_str + + +def rand_spec_segments(x, x_lengths=None, segment_size=4): + b, d, t = x.size() + if x_lengths is None: + x_lengths = t + ids_str_max = x_lengths - segment_size + ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) + ret = slice_segments(x, ids_str, segment_size) + return ret, ids_str + + +def get_timing_signal_1d( + length, channels, min_timescale=1.0, max_timescale=1.0e4): + position = torch.arange(length, dtype=torch.float) + num_timescales = channels // 2 + log_timescale_increment = ( + math.log(float(max_timescale) / float(min_timescale)) / + (num_timescales - 1)) + inv_timescales = min_timescale * torch.exp( + torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment) + scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1) + signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0) + signal = F.pad(signal, [0, 0, 0, channels % 2]) + signal = signal.view(1, channels, length) + return signal + + +def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): + b, channels, length = x.size() + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) + return x + signal.to(dtype=x.dtype, device=x.device) + + +def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1): + b, channels, length = x.size() + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) + return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis) + + +def subsequent_mask(length): + mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0) + return mask + + +@torch.jit.script +def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): + n_channels_int = n_channels[0] + in_act = input_a + input_b + t_act = torch.tanh(in_act[:, :n_channels_int, :]) + s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) + acts = t_act * s_act + return acts + + +def convert_pad_shape(pad_shape): + l = pad_shape[::-1] + pad_shape = [item for sublist in l for item in sublist] + return pad_shape + + +def shift_1d(x): + x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1] + return x + + +def sequence_mask(length, max_length=None): + if max_length is None: + max_length = length.max() + x = torch.arange(max_length, dtype=length.dtype, device=length.device) + return x.unsqueeze(0) < length.unsqueeze(1) + + +def generate_path(duration, mask): + """ + duration: [b, 1, t_x] + mask: [b, 1, t_y, t_x] + """ + device = duration.device + + b, _, t_y, t_x = mask.shape + cum_duration = torch.cumsum(duration, -1) + + cum_duration_flat = cum_duration.view(b * t_x) + path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) + path = path.view(b, t_x, t_y) + path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1] + path = path.unsqueeze(1).transpose(2,3) * mask + return path + + +def clip_grad_value_(parameters, clip_value, norm_type=2): + if isinstance(parameters, torch.Tensor): + parameters = [parameters] + parameters = list(filter(lambda p: p.grad is not None, parameters)) + norm_type = float(norm_type) + if clip_value is not None: + clip_value = float(clip_value) + + total_norm = 0 + for p in parameters: + param_norm = p.grad.data.norm(norm_type) + total_norm += param_norm.item() ** norm_type + if clip_value is not None: + p.grad.data.clamp_(min=-clip_value, max=clip_value) + total_norm = total_norm ** (1. / norm_type) + return total_norm diff --git a/AutoCoverTool/ref/so_vits_svc/configs/config.json b/AutoCoverTool/ref/so_vits_svc/configs/config.json new file mode 100644 index 0000000..13a503c --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/configs/config.json @@ -0,0 +1 @@ +请使用生成的config文件 \ No newline at end of file diff --git a/AutoCoverTool/ref/so_vits_svc/data_utils.py b/AutoCoverTool/ref/so_vits_svc/data_utils.py new file mode 100644 index 0000000..1f5d4e2 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/data_utils.py @@ -0,0 +1,154 @@ +import time +import os +import random +import numpy as np +import torch +import torch.utils.data + +import commons +from mel_processing import spectrogram_torch, spec_to_mel_torch +from utils import load_wav_to_torch, load_filepaths_and_text, transform + +# import h5py + + +"""Multi speaker version""" + + +class TextAudioSpeakerLoader(torch.utils.data.Dataset): + """ + 1) loads audio, speaker_id, text pairs + 2) normalizes text and converts them to sequences of integers + 3) computes spectrograms from audio files. + """ + + def __init__(self, audiopaths, hparams): + self.audiopaths = load_filepaths_and_text(audiopaths) + self.max_wav_value = hparams.data.max_wav_value + self.sampling_rate = hparams.data.sampling_rate + self.filter_length = hparams.data.filter_length + self.hop_length = hparams.data.hop_length + self.win_length = hparams.data.win_length + self.sampling_rate = hparams.data.sampling_rate + self.use_sr = hparams.train.use_sr + self.spec_len = hparams.train.max_speclen + self.spk_map = hparams.spk + + random.seed(1234) + random.shuffle(self.audiopaths) + + def get_audio(self, filename): + filename = filename.replace("\\", "/") + audio, sampling_rate = load_wav_to_torch(filename) + if sampling_rate != self.sampling_rate: + raise ValueError("{} SR doesn't match target {} SR".format( + sampling_rate, self.sampling_rate)) + audio_norm = audio / self.max_wav_value + audio_norm = audio_norm.unsqueeze(0) + spec_filename = filename.replace(".wav", ".spec.pt") + if os.path.exists(spec_filename): + spec = torch.load(spec_filename) + else: + spec = spectrogram_torch(audio_norm, self.filter_length, + self.sampling_rate, self.hop_length, self.win_length, + center=False) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename) + + spk = filename.split("/")[-2] + spk = torch.LongTensor([self.spk_map[spk]]) + + c = torch.load(filename + ".soft.pt").squeeze(0) + c = torch.repeat_interleave(c, repeats=2, dim=1) + + f0 = np.load(filename + ".f0.npy") + f0 = torch.FloatTensor(f0) + lmin = min(c.size(-1), spec.size(-1), f0.shape[0]) + assert abs(c.size(-1) - spec.size(-1)) < 4, (c.size(-1), spec.size(-1), f0.shape, filename) + assert abs(lmin - spec.size(-1)) < 4, (c.size(-1), spec.size(-1), f0.shape) + assert abs(lmin - c.size(-1)) < 4, (c.size(-1), spec.size(-1), f0.shape) + spec, c, f0 = spec[:, :lmin], c[:, :lmin], f0[:lmin] + audio_norm = audio_norm[:, :lmin * self.hop_length] + _spec, _c, _audio_norm, _f0 = spec, c, audio_norm, f0 + while spec.size(-1) < self.spec_len: + spec = torch.cat((spec, _spec), -1) + c = torch.cat((c, _c), -1) + f0 = torch.cat((f0, _f0), -1) + audio_norm = torch.cat((audio_norm, _audio_norm), -1) + start = random.randint(0, spec.size(-1) - self.spec_len) + end = start + self.spec_len + spec = spec[:, start:end] + c = c[:, start:end] + f0 = f0[start:end] + audio_norm = audio_norm[:, start * self.hop_length:end * self.hop_length] + + return c, f0, spec, audio_norm, spk + + def __getitem__(self, index): + return self.get_audio(self.audiopaths[index][0]) + + def __len__(self): + return len(self.audiopaths) + + +class EvalDataLoader(torch.utils.data.Dataset): + """ + 1) loads audio, speaker_id, text pairs + 2) normalizes text and converts them to sequences of integers + 3) computes spectrograms from audio files. + """ + + def __init__(self, audiopaths, hparams): + self.audiopaths = load_filepaths_and_text(audiopaths) + self.max_wav_value = hparams.data.max_wav_value + self.sampling_rate = hparams.data.sampling_rate + self.filter_length = hparams.data.filter_length + self.hop_length = hparams.data.hop_length + self.win_length = hparams.data.win_length + self.sampling_rate = hparams.data.sampling_rate + self.use_sr = hparams.train.use_sr + self.audiopaths = self.audiopaths[:5] + self.spk_map = hparams.spk + + + def get_audio(self, filename): + filename = filename.replace("\\", "/") + audio, sampling_rate = load_wav_to_torch(filename) + if sampling_rate != self.sampling_rate: + raise ValueError("{} SR doesn't match target {} SR".format( + sampling_rate, self.sampling_rate)) + audio_norm = audio / self.max_wav_value + audio_norm = audio_norm.unsqueeze(0) + spec_filename = filename.replace(".wav", ".spec.pt") + if os.path.exists(spec_filename): + spec = torch.load(spec_filename) + else: + spec = spectrogram_torch(audio_norm, self.filter_length, + self.sampling_rate, self.hop_length, self.win_length, + center=False) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename) + + spk = filename.split("/")[-2] + spk = torch.LongTensor([self.spk_map[spk]]) + + c = torch.load(filename + ".soft.pt").squeeze(0) + + c = torch.repeat_interleave(c, repeats=2, dim=1) + + f0 = np.load(filename + ".f0.npy") + f0 = torch.FloatTensor(f0) + lmin = min(c.size(-1), spec.size(-1), f0.shape[0]) + assert abs(c.size(-1) - spec.size(-1)) < 4, (c.size(-1), spec.size(-1), f0.shape) + assert abs(f0.shape[0] - spec.shape[-1]) < 4, (c.size(-1), spec.size(-1), f0.shape) + spec, c, f0 = spec[:, :lmin], c[:, :lmin], f0[:lmin] + audio_norm = audio_norm[:, :lmin * self.hop_length] + + return c, f0, spec, audio_norm, spk + + def __getitem__(self, index): + return self.get_audio(self.audiopaths[index][0]) + + def __len__(self): + return len(self.audiopaths) + diff --git a/AutoCoverTool/ref/so_vits_svc/filelists/test.txt b/AutoCoverTool/ref/so_vits_svc/filelists/test.txt new file mode 100644 index 0000000..5246bc4 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/filelists/test.txt @@ -0,0 +1,7 @@ +./dataset/32k/yunhao/001829.wav +./dataset/32k/yunhao/001827.wav +./dataset/32k/jishuang/000104.wav +./dataset/32k/nen/kne110_005.wav +./dataset/32k/nen/kne110_004.wav +./dataset/32k/jishuang/000223.wav +./dataset/32k/yunhao/001828.wav diff --git a/AutoCoverTool/ref/so_vits_svc/filelists/train.txt b/AutoCoverTool/ref/so_vits_svc/filelists/train.txt new file mode 100644 index 0000000..e69de29 diff --git a/AutoCoverTool/ref/so_vits_svc/filelists/val.txt b/AutoCoverTool/ref/so_vits_svc/filelists/val.txt new file mode 100644 index 0000000..40d9d47 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/filelists/val.txt @@ -0,0 +1,6 @@ +./dataset/32k/nen/kne110_005.wav +./dataset/32k/yunhao/001827.wav +./dataset/32k/jishuang/000104.wav +./dataset/32k/jishuang/000223.wav +./dataset/32k/nen/kne110_004.wav +./dataset/32k/yunhao/001828.wav diff --git a/AutoCoverTool/ref/so_vits_svc/flask_api.py b/AutoCoverTool/ref/so_vits_svc/flask_api.py new file mode 100644 index 0000000..8cc236a --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/flask_api.py @@ -0,0 +1,56 @@ +import io +import logging + +import soundfile +import torch +import torchaudio +from flask import Flask, request, send_file +from flask_cors import CORS + +from inference.infer_tool import Svc, RealTimeVC + +app = Flask(__name__) + +CORS(app) + +logging.getLogger('numba').setLevel(logging.WARNING) + + +@app.route("/voiceChangeModel", methods=["POST"]) +def voice_change_model(): + request_form = request.form + wave_file = request.files.get("sample", None) + # 变调信息 + f_pitch_change = float(request_form.get("fPitchChange", 0)) + # DAW所需的采样率 + daw_sample = int(float(request_form.get("sampleRate", 0))) + speaker_id = int(float(request_form.get("sSpeakId", 0))) + # http获得wav文件并转换 + input_wav_path = io.BytesIO(wave_file.read()) + + # 模型推理 + if raw_infer: + out_audio, out_sr = svc_model.infer(speaker_id, f_pitch_change, input_wav_path) + tar_audio = torchaudio.functional.resample(out_audio, svc_model.target_sample, daw_sample) + else: + out_audio = svc.process(svc_model, speaker_id, f_pitch_change, input_wav_path) + tar_audio = torchaudio.functional.resample(torch.from_numpy(out_audio), svc_model.target_sample, daw_sample) + # 返回音频 + out_wav_path = io.BytesIO() + soundfile.write(out_wav_path, tar_audio.cpu().numpy(), daw_sample, format="wav") + out_wav_path.seek(0) + return send_file(out_wav_path, download_name="temp.wav", as_attachment=True) + + +if __name__ == '__main__': + # 启用则为直接切片合成,False为交叉淡化方式 + # vst插件调整0.3-0.5s切片时间可以降低延迟,直接切片方法会有连接处爆音、交叉淡化会有轻微重叠声音 + # 自行选择能接受的方法,或将vst最大切片时间调整为1s,此处设为Ture,延迟大音质稳定一些 + raw_infer = True + # 每个模型和config是唯一对应的 + model_name = "logs/32k/G_174000-Copy1.pth" + config_name = "configs/config.json" + svc_model = Svc(model_name, config_name) + svc = RealTimeVC() + # 此处与vst插件对应,不建议更改 + app.run(port=6842, host="0.0.0.0", debug=False, threaded=False) diff --git a/AutoCoverTool/ref/so_vits_svc/hubert/__init__.py b/AutoCoverTool/ref/so_vits_svc/hubert/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AutoCoverTool/ref/so_vits_svc/hubert/hubert_model.py b/AutoCoverTool/ref/so_vits_svc/hubert/hubert_model.py new file mode 100644 index 0000000..7fb642d --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/hubert/hubert_model.py @@ -0,0 +1,222 @@ +import copy +import random +from typing import Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as t_func +from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present + + +class Hubert(nn.Module): + def __init__(self, num_label_embeddings: int = 100, mask: bool = True): + super().__init__() + self._mask = mask + self.feature_extractor = FeatureExtractor() + self.feature_projection = FeatureProjection() + self.positional_embedding = PositionalConvEmbedding() + self.norm = nn.LayerNorm(768) + self.dropout = nn.Dropout(0.1) + self.encoder = TransformerEncoder( + nn.TransformerEncoderLayer( + 768, 12, 3072, activation="gelu", batch_first=True + ), + 12, + ) + self.proj = nn.Linear(768, 256) + + self.masked_spec_embed = nn.Parameter(torch.FloatTensor(768).uniform_()) + self.label_embedding = nn.Embedding(num_label_embeddings, 256) + + def mask(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + mask = None + if self.training and self._mask: + mask = _compute_mask((x.size(0), x.size(1)), 0.8, 10, x.device, 2) + x[mask] = self.masked_spec_embed.to(x.dtype) + return x, mask + + def encode( + self, x: torch.Tensor, layer: Optional[int] = None + ) -> Tuple[torch.Tensor, torch.Tensor]: + x = self.feature_extractor(x) + x = self.feature_projection(x.transpose(1, 2)) + x, mask = self.mask(x) + x = x + self.positional_embedding(x) + x = self.dropout(self.norm(x)) + x = self.encoder(x, output_layer=layer) + return x, mask + + def logits(self, x: torch.Tensor) -> torch.Tensor: + logits = torch.cosine_similarity( + x.unsqueeze(2), + self.label_embedding.weight.unsqueeze(0).unsqueeze(0), + dim=-1, + ) + return logits / 0.1 + + def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + x, mask = self.encode(x) + x = self.proj(x) + logits = self.logits(x) + return logits, mask + + +class HubertSoft(Hubert): + def __init__(self): + super().__init__() + + @torch.inference_mode() + def units(self, wav: torch.Tensor) -> torch.Tensor: + wav = t_func.pad(wav, ((400 - 320) // 2, (400 - 320) // 2)) + x, _ = self.encode(wav) + return self.proj(x) + + +class FeatureExtractor(nn.Module): + def __init__(self): + super().__init__() + self.conv0 = nn.Conv1d(1, 512, 10, 5, bias=False) + self.norm0 = nn.GroupNorm(512, 512) + self.conv1 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv2 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv3 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv4 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv5 = nn.Conv1d(512, 512, 2, 2, bias=False) + self.conv6 = nn.Conv1d(512, 512, 2, 2, bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = t_func.gelu(self.norm0(self.conv0(x))) + x = t_func.gelu(self.conv1(x)) + x = t_func.gelu(self.conv2(x)) + x = t_func.gelu(self.conv3(x)) + x = t_func.gelu(self.conv4(x)) + x = t_func.gelu(self.conv5(x)) + x = t_func.gelu(self.conv6(x)) + return x + + +class FeatureProjection(nn.Module): + def __init__(self): + super().__init__() + self.norm = nn.LayerNorm(512) + self.projection = nn.Linear(512, 768) + self.dropout = nn.Dropout(0.1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.norm(x) + x = self.projection(x) + x = self.dropout(x) + return x + + +class PositionalConvEmbedding(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv1d( + 768, + 768, + kernel_size=128, + padding=128 // 2, + groups=16, + ) + self.conv = nn.utils.weight_norm(self.conv, name="weight", dim=2) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.conv(x.transpose(1, 2)) + x = t_func.gelu(x[:, :, :-1]) + return x.transpose(1, 2) + + +class TransformerEncoder(nn.Module): + def __init__( + self, encoder_layer: nn.TransformerEncoderLayer, num_layers: int + ) -> None: + super(TransformerEncoder, self).__init__() + self.layers = nn.ModuleList( + [copy.deepcopy(encoder_layer) for _ in range(num_layers)] + ) + self.num_layers = num_layers + + def forward( + self, + src: torch.Tensor, + mask: torch.Tensor = None, + src_key_padding_mask: torch.Tensor = None, + output_layer: Optional[int] = None, + ) -> torch.Tensor: + output = src + for layer in self.layers[:output_layer]: + output = layer( + output, src_mask=mask, src_key_padding_mask=src_key_padding_mask + ) + return output + + +def _compute_mask( + shape: Tuple[int, int], + mask_prob: float, + mask_length: int, + device: torch.device, + min_masks: int = 0, +) -> torch.Tensor: + batch_size, sequence_length = shape + + if mask_length < 1: + raise ValueError("`mask_length` has to be bigger than 0.") + + if mask_length > sequence_length: + raise ValueError( + f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and `sequence_length`: {sequence_length}`" + ) + + # compute number of masked spans in batch + num_masked_spans = int(mask_prob * sequence_length / mask_length + random.random()) + num_masked_spans = max(num_masked_spans, min_masks) + + # make sure num masked indices <= sequence_length + if num_masked_spans * mask_length > sequence_length: + num_masked_spans = sequence_length // mask_length + + # SpecAugment mask to fill + mask = torch.zeros((batch_size, sequence_length), device=device, dtype=torch.bool) + + # uniform distribution to sample from, make sure that offset samples are < sequence_length + uniform_dist = torch.ones( + (batch_size, sequence_length - (mask_length - 1)), device=device + ) + + # get random indices to mask + mask_indices = torch.multinomial(uniform_dist, num_masked_spans) + + # expand masked indices to masked spans + mask_indices = ( + mask_indices.unsqueeze(dim=-1) + .expand((batch_size, num_masked_spans, mask_length)) + .reshape(batch_size, num_masked_spans * mask_length) + ) + offsets = ( + torch.arange(mask_length, device=device)[None, None, :] + .expand((batch_size, num_masked_spans, mask_length)) + .reshape(batch_size, num_masked_spans * mask_length) + ) + mask_idxs = mask_indices + offsets + + # scatter indices to mask + mask = mask.scatter(1, mask_idxs, True) + + return mask + + +def hubert_soft( + path: str, +) -> HubertSoft: + r"""HuBERT-Soft from `"A Comparison of Discrete and Soft Speech Units for Improved Voice Conversion"`. + Args: + path (str): path of a pretrained model + """ + hubert = HubertSoft() + checkpoint = torch.load(path) + consume_prefix_in_state_dict_if_present(checkpoint, "module.") + hubert.load_state_dict(checkpoint) + hubert.eval() + return hubert diff --git a/AutoCoverTool/ref/so_vits_svc/hubert/hubert_model_onnx.py b/AutoCoverTool/ref/so_vits_svc/hubert/hubert_model_onnx.py new file mode 100644 index 0000000..d18f3c2 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/hubert/hubert_model_onnx.py @@ -0,0 +1,217 @@ +import copy +import random +from typing import Optional, Tuple + +import torch +import torch.nn as nn +import torch.nn.functional as t_func +from torch.nn.modules.utils import consume_prefix_in_state_dict_if_present + + +class Hubert(nn.Module): + def __init__(self, num_label_embeddings: int = 100, mask: bool = True): + super().__init__() + self._mask = mask + self.feature_extractor = FeatureExtractor() + self.feature_projection = FeatureProjection() + self.positional_embedding = PositionalConvEmbedding() + self.norm = nn.LayerNorm(768) + self.dropout = nn.Dropout(0.1) + self.encoder = TransformerEncoder( + nn.TransformerEncoderLayer( + 768, 12, 3072, activation="gelu", batch_first=True + ), + 12, + ) + self.proj = nn.Linear(768, 256) + + self.masked_spec_embed = nn.Parameter(torch.FloatTensor(768).uniform_()) + self.label_embedding = nn.Embedding(num_label_embeddings, 256) + + def mask(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: + mask = None + if self.training and self._mask: + mask = _compute_mask((x.size(0), x.size(1)), 0.8, 10, x.device, 2) + x[mask] = self.masked_spec_embed.to(x.dtype) + return x, mask + + def encode( + self, x: torch.Tensor, layer: Optional[int] = None + ) -> Tuple[torch.Tensor, torch.Tensor]: + x = self.feature_extractor(x) + x = self.feature_projection(x.transpose(1, 2)) + x, mask = self.mask(x) + x = x + self.positional_embedding(x) + x = self.dropout(self.norm(x)) + x = self.encoder(x, output_layer=layer) + return x, mask + + def logits(self, x: torch.Tensor) -> torch.Tensor: + logits = torch.cosine_similarity( + x.unsqueeze(2), + self.label_embedding.weight.unsqueeze(0).unsqueeze(0), + dim=-1, + ) + return logits / 0.1 + + +class HubertSoft(Hubert): + def __init__(self): + super().__init__() + + def units(self, wav: torch.Tensor) -> torch.Tensor: + wav = t_func.pad(wav, ((400 - 320) // 2, (400 - 320) // 2)) + x, _ = self.encode(wav) + return self.proj(x) + + def forward(self, x): + return self.units(x) + +class FeatureExtractor(nn.Module): + def __init__(self): + super().__init__() + self.conv0 = nn.Conv1d(1, 512, 10, 5, bias=False) + self.norm0 = nn.GroupNorm(512, 512) + self.conv1 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv2 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv3 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv4 = nn.Conv1d(512, 512, 3, 2, bias=False) + self.conv5 = nn.Conv1d(512, 512, 2, 2, bias=False) + self.conv6 = nn.Conv1d(512, 512, 2, 2, bias=False) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = t_func.gelu(self.norm0(self.conv0(x))) + x = t_func.gelu(self.conv1(x)) + x = t_func.gelu(self.conv2(x)) + x = t_func.gelu(self.conv3(x)) + x = t_func.gelu(self.conv4(x)) + x = t_func.gelu(self.conv5(x)) + x = t_func.gelu(self.conv6(x)) + return x + + +class FeatureProjection(nn.Module): + def __init__(self): + super().__init__() + self.norm = nn.LayerNorm(512) + self.projection = nn.Linear(512, 768) + self.dropout = nn.Dropout(0.1) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.norm(x) + x = self.projection(x) + x = self.dropout(x) + return x + + +class PositionalConvEmbedding(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv1d( + 768, + 768, + kernel_size=128, + padding=128 // 2, + groups=16, + ) + self.conv = nn.utils.weight_norm(self.conv, name="weight", dim=2) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = self.conv(x.transpose(1, 2)) + x = t_func.gelu(x[:, :, :-1]) + return x.transpose(1, 2) + + +class TransformerEncoder(nn.Module): + def __init__( + self, encoder_layer: nn.TransformerEncoderLayer, num_layers: int + ) -> None: + super(TransformerEncoder, self).__init__() + self.layers = nn.ModuleList( + [copy.deepcopy(encoder_layer) for _ in range(num_layers)] + ) + self.num_layers = num_layers + + def forward( + self, + src: torch.Tensor, + mask: torch.Tensor = None, + src_key_padding_mask: torch.Tensor = None, + output_layer: Optional[int] = None, + ) -> torch.Tensor: + output = src + for layer in self.layers[:output_layer]: + output = layer( + output, src_mask=mask, src_key_padding_mask=src_key_padding_mask + ) + return output + + +def _compute_mask( + shape: Tuple[int, int], + mask_prob: float, + mask_length: int, + device: torch.device, + min_masks: int = 0, +) -> torch.Tensor: + batch_size, sequence_length = shape + + if mask_length < 1: + raise ValueError("`mask_length` has to be bigger than 0.") + + if mask_length > sequence_length: + raise ValueError( + f"`mask_length` has to be smaller than `sequence_length`, but got `mask_length`: {mask_length} and `sequence_length`: {sequence_length}`" + ) + + # compute number of masked spans in batch + num_masked_spans = int(mask_prob * sequence_length / mask_length + random.random()) + num_masked_spans = max(num_masked_spans, min_masks) + + # make sure num masked indices <= sequence_length + if num_masked_spans * mask_length > sequence_length: + num_masked_spans = sequence_length // mask_length + + # SpecAugment mask to fill + mask = torch.zeros((batch_size, sequence_length), device=device, dtype=torch.bool) + + # uniform distribution to sample from, make sure that offset samples are < sequence_length + uniform_dist = torch.ones( + (batch_size, sequence_length - (mask_length - 1)), device=device + ) + + # get random indices to mask + mask_indices = torch.multinomial(uniform_dist, num_masked_spans) + + # expand masked indices to masked spans + mask_indices = ( + mask_indices.unsqueeze(dim=-1) + .expand((batch_size, num_masked_spans, mask_length)) + .reshape(batch_size, num_masked_spans * mask_length) + ) + offsets = ( + torch.arange(mask_length, device=device)[None, None, :] + .expand((batch_size, num_masked_spans, mask_length)) + .reshape(batch_size, num_masked_spans * mask_length) + ) + mask_idxs = mask_indices + offsets + + # scatter indices to mask + mask = mask.scatter(1, mask_idxs, True) + + return mask + + +def hubert_soft( + path: str, +) -> HubertSoft: + r"""HuBERT-Soft from `"A Comparison of Discrete and Soft Speech Units for Improved Voice Conversion"`. + Args: + path (str): path of a pretrained model + """ + hubert = HubertSoft() + checkpoint = torch.load(path) + consume_prefix_in_state_dict_if_present(checkpoint, "module.") + hubert.load_state_dict(checkpoint) + hubert.eval() + return hubert diff --git a/AutoCoverTool/ref/so_vits_svc/hubert/put_hubert_ckpt_here b/AutoCoverTool/ref/so_vits_svc/hubert/put_hubert_ckpt_here new file mode 100644 index 0000000..e69de29 diff --git a/AutoCoverTool/ref/so_vits_svc/inference/__init__.py b/AutoCoverTool/ref/so_vits_svc/inference/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AutoCoverTool/ref/so_vits_svc/inference/infer_tool.py b/AutoCoverTool/ref/so_vits_svc/inference/infer_tool.py new file mode 100644 index 0000000..2bfa86d --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/inference/infer_tool.py @@ -0,0 +1,327 @@ +import hashlib +import json +import logging +import os +import time +from pathlib import Path + +import librosa +import maad +import numpy as np +# import onnxruntime +import parselmouth +import soundfile +import torch +import torchaudio + +from hubert import hubert_model +import utils +from models import SynthesizerTrn + +logging.getLogger('matplotlib').setLevel(logging.WARNING) + + +def read_temp(file_name): + if not os.path.exists(file_name): + with open(file_name, "w") as f: + f.write(json.dumps({"info": "temp_dict"})) + return {} + else: + try: + with open(file_name, "r") as f: + data = f.read() + data_dict = json.loads(data) + if os.path.getsize(file_name) > 50 * 1024 * 1024: + f_name = file_name.replace("\\", "/").split("/")[-1] + print(f"clean {f_name}") + for wav_hash in list(data_dict.keys()): + if int(time.time()) - int(data_dict[wav_hash]["time"]) > 14 * 24 * 3600: + del data_dict[wav_hash] + except Exception as e: + print(e) + print(f"{file_name} error,auto rebuild file") + data_dict = {"info": "temp_dict"} + return data_dict + + +def write_temp(file_name, data): + with open(file_name, "w") as f: + f.write(json.dumps(data)) + + +def timeit(func): + def run(*args, **kwargs): + t = time.time() + res = func(*args, **kwargs) + print('executing \'%s\' costed %.3fs' % (func.__name__, time.time() - t)) + return res + + return run + + +def format_wav(audio_path): + if Path(audio_path).suffix == '.wav': + return + raw_audio, raw_sample_rate = librosa.load(audio_path, mono=True, sr=None) + soundfile.write(Path(audio_path).with_suffix(".wav"), raw_audio, raw_sample_rate) + + +def get_end_file(dir_path, end): + file_lists = [] + for root, dirs, files in os.walk(dir_path): + files = [f for f in files if f[0] != '.'] + dirs[:] = [d for d in dirs if d[0] != '.'] + for f_file in files: + if f_file.endswith(end): + file_lists.append(os.path.join(root, f_file).replace("\\", "/")) + return file_lists + + +def get_md5(content): + return hashlib.new("md5", content).hexdigest() + + +def resize2d_f0(x, target_len): + source = np.array(x) + source[source < 0.001] = np.nan + target = np.interp(np.arange(0, len(source) * target_len, len(source)) / target_len, np.arange(0, len(source)), + source) + res = np.nan_to_num(target) + return res + + +def get_f0(x, p_len, f0_up_key=0): + time_step = 160 / 16000 * 1000 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + + f0 = parselmouth.Sound(x, 16000).to_pitch_ac( + time_step=time_step / 1000, voicing_threshold=0.6, + pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency'] + if len(f0) > p_len: + f0 = f0[:p_len] + pad_size = (p_len - len(f0) + 1) // 2 + if (pad_size > 0 or p_len - len(f0) - pad_size > 0): + f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode='constant') + + f0 *= pow(2, f0_up_key / 12) + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (f0_mel_max - f0_mel_min) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0 + + +def clean_pitch(input_pitch): + num_nan = np.sum(input_pitch == 1) + if num_nan / len(input_pitch) > 0.9: + input_pitch[input_pitch != 1] = 1 + return input_pitch + + +def plt_pitch(input_pitch): + input_pitch = input_pitch.astype(float) + input_pitch[input_pitch == 1] = np.nan + return input_pitch + + +def f0_to_pitch(ff): + f0_pitch = 69 + 12 * np.log2(ff / 440) + return f0_pitch + + +def fill_a_to_b(a, b): + if len(a) < len(b): + for _ in range(0, len(b) - len(a)): + a.append(a[0]) + + +def mkdir(paths: list): + for path in paths: + if not os.path.exists(path): + os.mkdir(path) + + +class Svc(object): + def __init__(self, net_g_path, config_path, hubert_path="data/models/hubert-soft-0d54a1f4.pt", + onnx=False): + self.onnx = onnx + self.net_g_path = net_g_path + self.hubert_path = hubert_path + self.dev = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.net_g_ms = None + self.hps_ms = utils.get_hparams_from_file(config_path) + self.target_sample = self.hps_ms.data.sampling_rate + self.hop_size = self.hps_ms.data.hop_length + self.speakers = {} + for spk, sid in self.hps_ms.spk.items(): + self.speakers[sid] = spk + self.spk2id = self.hps_ms.spk + # 加载hubert + self.hubert_soft = hubert_model.hubert_soft(hubert_path) + if torch.cuda.is_available(): + self.hubert_soft = self.hubert_soft.cuda() + self.load_model() + + def load_model(self): + # 获取模型配置 + if self.onnx: + raise NotImplementedError + # self.net_g_ms = SynthesizerTrnForONNX( + # 178, + # self.hps_ms.data.filter_length // 2 + 1, + # self.hps_ms.train.segment_size // self.hps_ms.data.hop_length, + # n_speakers=self.hps_ms.data.n_speakers, + # **self.hps_ms.model) + # _ = utils.load_checkpoint(self.net_g_path, self.net_g_ms, None) + else: + self.net_g_ms = SynthesizerTrn( + self.hps_ms.data.filter_length // 2 + 1, + self.hps_ms.train.segment_size // self.hps_ms.data.hop_length, + **self.hps_ms.model) + _ = utils.load_checkpoint(self.net_g_path, self.net_g_ms, None) + if "half" in self.net_g_path and torch.cuda.is_available(): + _ = self.net_g_ms.half().eval().to(self.dev) + else: + _ = self.net_g_ms.eval().to(self.dev) + + def get_units(self, source, sr): + + source = source.unsqueeze(0).to(self.dev) + with torch.inference_mode(): + start = time.time() + units = self.hubert_soft.units(source) + use_time = time.time() - start + print("hubert use time:{}".format(use_time)) + return units + + def get_unit_pitch(self, in_path, tran): + source, sr = torchaudio.load(in_path) + source = torchaudio.functional.resample(source, sr, 16000) + if len(source.shape) == 2 and source.shape[1] >= 2: + source = torch.mean(source, dim=0).unsqueeze(0) + soft = self.get_units(source, sr).squeeze(0).cpu().numpy() + f0_coarse, f0 = get_f0(source.cpu().numpy()[0], soft.shape[0] * 2, tran) + return soft, f0 + + def infer(self, speaker_id, tran, raw_path, dev=False): + if type(speaker_id) == str: + speaker_id = self.spk2id[speaker_id] + sid = torch.LongTensor([int(speaker_id)]).to(self.dev).unsqueeze(0) + soft, pitch = self.get_unit_pitch(raw_path, tran) + f0 = torch.FloatTensor(clean_pitch(pitch)).unsqueeze(0).to(self.dev) + if "half" in self.net_g_path and torch.cuda.is_available(): + stn_tst = torch.HalfTensor(soft) + else: + stn_tst = torch.FloatTensor(soft) + with torch.no_grad(): + x_tst = stn_tst.unsqueeze(0).to(self.dev) + start = time.time() + x_tst = torch.repeat_interleave(x_tst, repeats=2, dim=1).transpose(1, 2) + audio = self.net_g_ms.infer(x_tst, f0=f0, g=sid)[0, 0].data.float() + use_time = time.time() - start + print("vits use time:{}".format(use_time)) + return audio, audio.shape[-1] + + +# class SvcONNXInferModel(object): +# def __init__(self, hubert_onnx, vits_onnx, config_path): +# self.config_path = config_path +# self.vits_onnx = vits_onnx +# self.hubert_onnx = hubert_onnx +# self.hubert_onnx_session = onnxruntime.InferenceSession(hubert_onnx, providers=['CUDAExecutionProvider', ]) +# self.inspect_onnx(self.hubert_onnx_session) +# self.vits_onnx_session = onnxruntime.InferenceSession(vits_onnx, providers=['CUDAExecutionProvider', ]) +# self.inspect_onnx(self.vits_onnx_session) +# self.hps_ms = utils.get_hparams_from_file(self.config_path) +# self.target_sample = self.hps_ms.data.sampling_rate +# self.feature_input = FeatureInput(self.hps_ms.data.sampling_rate, self.hps_ms.data.hop_length) +# +# @staticmethod +# def inspect_onnx(session): +# for i in session.get_inputs(): +# print("name:{}\tshape:{}\tdtype:{}".format(i.name, i.shape, i.type)) +# for i in session.get_outputs(): +# print("name:{}\tshape:{}\tdtype:{}".format(i.name, i.shape, i.type)) +# +# def infer(self, speaker_id, tran, raw_path): +# sid = np.array([int(speaker_id)], dtype=np.int64) +# soft, pitch = self.get_unit_pitch(raw_path, tran) +# pitch = np.expand_dims(pitch, axis=0).astype(np.int64) +# stn_tst = soft +# x_tst = np.expand_dims(stn_tst, axis=0) +# x_tst_lengths = np.array([stn_tst.shape[0]], dtype=np.int64) +# # 使用ONNX Runtime进行推理 +# start = time.time() +# audio = self.vits_onnx_session.run(output_names=["audio"], +# input_feed={ +# "hidden_unit": x_tst, +# "lengths": x_tst_lengths, +# "pitch": pitch, +# "sid": sid, +# })[0][0, 0] +# use_time = time.time() - start +# print("vits_onnx_session.run time:{}".format(use_time)) +# audio = torch.from_numpy(audio) +# return audio, audio.shape[-1] +# +# def get_units(self, source, sr): +# source = torchaudio.functional.resample(source, sr, 16000) +# if len(source.shape) == 2 and source.shape[1] >= 2: +# source = torch.mean(source, dim=0).unsqueeze(0) +# source = source.unsqueeze(0) +# # 使用ONNX Runtime进行推理 +# start = time.time() +# units = self.hubert_onnx_session.run(output_names=["embed"], +# input_feed={"source": source.numpy()})[0] +# use_time = time.time() - start +# print("hubert_onnx_session.run time:{}".format(use_time)) +# return units +# +# def transcribe(self, source, sr, length, transform): +# feature_pit = self.feature_input.compute_f0(source, sr) +# feature_pit = feature_pit * 2 ** (transform / 12) +# feature_pit = resize2d_f0(feature_pit, length) +# coarse_pit = self.feature_input.coarse_f0(feature_pit) +# return coarse_pit +# +# def get_unit_pitch(self, in_path, tran): +# source, sr = torchaudio.load(in_path) +# soft = self.get_units(source, sr).squeeze(0) +# input_pitch = self.transcribe(source.numpy()[0], sr, soft.shape[0], tran) +# return soft, input_pitch + + +class RealTimeVC: + def __init__(self): + self.last_chunk = None + self.last_o = None + self.chunk_len = 16000 # 区块长度 + self.pre_len = 3840 # 交叉淡化长度,640的倍数 + + """输入输出都是1维numpy 音频波形数组""" + + def process(self, svc_model, speaker_id, f_pitch_change, input_wav_path): + audio, sr = torchaudio.load(input_wav_path) + audio = audio.cpu().numpy()[0] + temp_wav = io.BytesIO() + if self.last_chunk is None: + input_wav_path.seek(0) + audio, sr = svc_model.infer(speaker_id, f_pitch_change, input_wav_path) + audio = audio.cpu().numpy() + self.last_chunk = audio[-self.pre_len:] + self.last_o = audio + return audio[-self.chunk_len:] + else: + audio = np.concatenate([self.last_chunk, audio]) + soundfile.write(temp_wav, audio, sr, format="wav") + temp_wav.seek(0) + audio, sr = svc_model.infer(speaker_id, f_pitch_change, temp_wav) + audio = audio.cpu().numpy() + ret = maad.util.crossfade(self.last_o, audio, self.pre_len) + self.last_chunk = audio[-self.pre_len:] + self.last_o = audio + return ret[self.chunk_len:2 * self.chunk_len] diff --git a/AutoCoverTool/ref/so_vits_svc/inference/infer_tool_grad.py b/AutoCoverTool/ref/so_vits_svc/inference/infer_tool_grad.py new file mode 100644 index 0000000..39359a8 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/inference/infer_tool_grad.py @@ -0,0 +1,160 @@ +import hashlib +import json +import logging +import os +import time +from pathlib import Path +import io +import librosa +import maad +import numpy as np +from inference import slicer +import parselmouth +import soundfile +import torch +import torchaudio + +from hubert import hubert_model +import utils +from models import SynthesizerTrn +logging.getLogger('numba').setLevel(logging.WARNING) +logging.getLogger('matplotlib').setLevel(logging.WARNING) + +def resize2d_f0(x, target_len): + source = np.array(x) + source[source < 0.001] = np.nan + target = np.interp(np.arange(0, len(source) * target_len, len(source)) / target_len, np.arange(0, len(source)), + source) + res = np.nan_to_num(target) + return res + +def get_f0(x, p_len,f0_up_key=0): + + time_step = 160 / 16000 * 1000 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + + f0 = parselmouth.Sound(x, 16000).to_pitch_ac( + time_step=time_step / 1000, voicing_threshold=0.6, + pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency'] + + pad_size=(p_len - len(f0) + 1) // 2 + if(pad_size>0 or p_len - len(f0) - pad_size>0): + f0 = np.pad(f0,[[pad_size,p_len - len(f0) - pad_size]], mode='constant') + + f0 *= pow(2, f0_up_key / 12) + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (f0_mel_max - f0_mel_min) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0 + +def clean_pitch(input_pitch): + num_nan = np.sum(input_pitch == 1) + if num_nan / len(input_pitch) > 0.9: + input_pitch[input_pitch != 1] = 1 + return input_pitch + + +def plt_pitch(input_pitch): + input_pitch = input_pitch.astype(float) + input_pitch[input_pitch == 1] = np.nan + return input_pitch + + +def f0_to_pitch(ff): + f0_pitch = 69 + 12 * np.log2(ff / 440) + return f0_pitch + + +def fill_a_to_b(a, b): + if len(a) < len(b): + for _ in range(0, len(b) - len(a)): + a.append(a[0]) + + +def mkdir(paths: list): + for path in paths: + if not os.path.exists(path): + os.mkdir(path) + + +class VitsSvc(object): + def __init__(self): + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.SVCVITS = None + self.hps = None + self.speakers = None + self.hubert_soft = hubert_model.hubert_soft("hubert/model.pt") + + def set_device(self, device): + self.device = torch.device(device) + self.hubert_soft.to(self.device) + if self.SVCVITS != None: + self.SVCVITS.to(self.device) + + def loadCheckpoint(self, path): + self.hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json") + self.SVCVITS = SynthesizerTrn( + self.hps.data.filter_length // 2 + 1, + self.hps.train.segment_size // self.hps.data.hop_length, + **self.hps.model) + _ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", self.SVCVITS, None) + _ = self.SVCVITS.eval().to(self.device) + self.speakers = self.hps.spk + + def get_units(self, source, sr): + source = source.unsqueeze(0).to(self.device) + with torch.inference_mode(): + units = self.hubert_soft.units(source) + return units + + + def get_unit_pitch(self, in_path, tran): + source, sr = torchaudio.load(in_path) + source = torchaudio.functional.resample(source, sr, 16000) + if len(source.shape) == 2 and source.shape[1] >= 2: + source = torch.mean(source, dim=0).unsqueeze(0) + soft = self.get_units(source, sr).squeeze(0).cpu().numpy() + f0_coarse, f0 = get_f0(source.cpu().numpy()[0], soft.shape[0]*2, tran) + return soft, f0 + + def infer(self, speaker_id, tran, raw_path): + speaker_id = self.speakers[speaker_id] + sid = torch.LongTensor([int(speaker_id)]).to(self.device).unsqueeze(0) + soft, pitch = self.get_unit_pitch(raw_path, tran) + f0 = torch.FloatTensor(clean_pitch(pitch)).unsqueeze(0).to(self.device) + stn_tst = torch.FloatTensor(soft) + with torch.no_grad(): + x_tst = stn_tst.unsqueeze(0).to(self.device) + x_tst = torch.repeat_interleave(x_tst, repeats=2, dim=1).transpose(1, 2) + audio = self.SVCVITS.infer(x_tst, f0=f0, g=sid)[0,0].data.float() + return audio, audio.shape[-1] + + def inference(self,srcaudio,chara,tran,slice_db): + sampling_rate, audio = srcaudio + audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32) + if len(audio.shape) > 1: + audio = librosa.to_mono(audio.transpose(1, 0)) + if sampling_rate != 16000: + audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000) + soundfile.write("tmpwav.wav", audio, 16000, format="wav") + chunks = slicer.cut("tmpwav.wav", db_thresh=slice_db) + audio_data, audio_sr = slicer.chunks2audio("tmpwav.wav", chunks) + audio = [] + for (slice_tag, data) in audio_data: + length = int(np.ceil(len(data) / audio_sr * self.hps.data.sampling_rate)) + raw_path = io.BytesIO() + soundfile.write(raw_path, data, audio_sr, format="wav") + raw_path.seek(0) + if slice_tag: + _audio = np.zeros(length) + else: + out_audio, out_sr = self.infer(chara, tran, raw_path) + _audio = out_audio.cpu().numpy() + audio.extend(list(_audio)) + audio = (np.array(audio) * 32768.0).astype('int16') + return (self.hps.data.sampling_rate,audio) diff --git a/AutoCoverTool/ref/so_vits_svc/inference/slicer.py b/AutoCoverTool/ref/so_vits_svc/inference/slicer.py new file mode 100644 index 0000000..b05840b --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/inference/slicer.py @@ -0,0 +1,142 @@ +import librosa +import torch +import torchaudio + + +class Slicer: + def __init__(self, + sr: int, + threshold: float = -40., + min_length: int = 5000, + min_interval: int = 300, + hop_size: int = 20, + max_sil_kept: int = 5000): + if not min_length >= min_interval >= hop_size: + raise ValueError('The following condition must be satisfied: min_length >= min_interval >= hop_size') + if not max_sil_kept >= hop_size: + raise ValueError('The following condition must be satisfied: max_sil_kept >= hop_size') + min_interval = sr * min_interval / 1000 + self.threshold = 10 ** (threshold / 20.) + self.hop_size = round(sr * hop_size / 1000) + self.win_size = min(round(min_interval), 4 * self.hop_size) + self.min_length = round(sr * min_length / 1000 / self.hop_size) + self.min_interval = round(min_interval / self.hop_size) + self.max_sil_kept = round(sr * max_sil_kept / 1000 / self.hop_size) + + def _apply_slice(self, waveform, begin, end): + if len(waveform.shape) > 1: + return waveform[:, begin * self.hop_size: min(waveform.shape[1], end * self.hop_size)] + else: + return waveform[begin * self.hop_size: min(waveform.shape[0], end * self.hop_size)] + + # @timeit + def slice(self, waveform): + if len(waveform.shape) > 1: + samples = librosa.to_mono(waveform) + else: + samples = waveform + if samples.shape[0] <= self.min_length: + return {"0": {"slice": False, "split_time": f"0,{len(waveform)}"}} + rms_list = librosa.feature.rms(y=samples, frame_length=self.win_size, hop_length=self.hop_size).squeeze(0) + sil_tags = [] + silence_start = None + clip_start = 0 + for i, rms in enumerate(rms_list): + # Keep looping while frame is silent. + if rms < self.threshold: + # Record start of silent frames. + if silence_start is None: + silence_start = i + continue + # Keep looping while frame is not silent and silence start has not been recorded. + if silence_start is None: + continue + # Clear recorded silence start if interval is not enough or clip is too short + is_leading_silence = silence_start == 0 and i > self.max_sil_kept + need_slice_middle = i - silence_start >= self.min_interval and i - clip_start >= self.min_length + if not is_leading_silence and not need_slice_middle: + silence_start = None + continue + # Need slicing. Record the range of silent frames to be removed. + if i - silence_start <= self.max_sil_kept: + pos = rms_list[silence_start: i + 1].argmin() + silence_start + if silence_start == 0: + sil_tags.append((0, pos)) + else: + sil_tags.append((pos, pos)) + clip_start = pos + elif i - silence_start <= self.max_sil_kept * 2: + pos = rms_list[i - self.max_sil_kept: silence_start + self.max_sil_kept + 1].argmin() + pos += i - self.max_sil_kept + pos_l = rms_list[silence_start: silence_start + self.max_sil_kept + 1].argmin() + silence_start + pos_r = rms_list[i - self.max_sil_kept: i + 1].argmin() + i - self.max_sil_kept + if silence_start == 0: + sil_tags.append((0, pos_r)) + clip_start = pos_r + else: + sil_tags.append((min(pos_l, pos), max(pos_r, pos))) + clip_start = max(pos_r, pos) + else: + pos_l = rms_list[silence_start: silence_start + self.max_sil_kept + 1].argmin() + silence_start + pos_r = rms_list[i - self.max_sil_kept: i + 1].argmin() + i - self.max_sil_kept + if silence_start == 0: + sil_tags.append((0, pos_r)) + else: + sil_tags.append((pos_l, pos_r)) + clip_start = pos_r + silence_start = None + # Deal with trailing silence. + total_frames = rms_list.shape[0] + if silence_start is not None and total_frames - silence_start >= self.min_interval: + silence_end = min(total_frames, silence_start + self.max_sil_kept) + pos = rms_list[silence_start: silence_end + 1].argmin() + silence_start + sil_tags.append((pos, total_frames + 1)) + # Apply and return slices. + if len(sil_tags) == 0: + return {"0": {"slice": False, "split_time": f"0,{len(waveform)}"}} + else: + chunks = [] + # 第一段静音并非从头开始,补上有声片段 + if sil_tags[0][0]: + chunks.append( + {"slice": False, "split_time": f"0,{min(waveform.shape[0], sil_tags[0][0] * self.hop_size)}"}) + for i in range(0, len(sil_tags)): + # 标识有声片段(跳过第一段) + if i: + chunks.append({"slice": False, + "split_time": f"{sil_tags[i - 1][1] * self.hop_size},{min(waveform.shape[0], sil_tags[i][0] * self.hop_size)}"}) + # 标识所有静音片段 + chunks.append({"slice": True, + "split_time": f"{sil_tags[i][0] * self.hop_size},{min(waveform.shape[0], sil_tags[i][1] * self.hop_size)}"}) + # 最后一段静音并非结尾,补上结尾片段 + if sil_tags[-1][1] * self.hop_size < len(waveform): + chunks.append({"slice": False, "split_time": f"{sil_tags[-1][1] * self.hop_size},{len(waveform)}"}) + chunk_dict = {} + for i in range(len(chunks)): + chunk_dict[str(i)] = chunks[i] + return chunk_dict + + +def cut(audio_path, db_thresh=-30, min_len=5000): + audio, sr = librosa.load(audio_path, sr=None) + slicer = Slicer( + sr=sr, + threshold=db_thresh, + min_length=min_len + ) + chunks = slicer.slice(audio) + return chunks + + +def chunks2audio(audio_path, chunks): + chunks = dict(chunks) + audio, sr = torchaudio.load(audio_path) + if len(audio.shape) == 2 and audio.shape[1] >= 2: + audio = torch.mean(audio, dim=0).unsqueeze(0) + audio = audio.cpu().numpy()[0] + result = [] + for k, v in chunks.items(): + tag = v["split_time"].split(",") + if tag[0] != tag[1]: + result.append((v["slice"], audio[int(tag[0]):int(tag[1])])) + return result, sr diff --git a/AutoCoverTool/ref/so_vits_svc/inference_main.py b/AutoCoverTool/ref/so_vits_svc/inference_main.py new file mode 100644 index 0000000..20a9439 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/inference_main.py @@ -0,0 +1,83 @@ +import io +import os +import sys +import logging +import time +from pathlib import Path + +import librosa +import numpy as np +import soundfile + +from inference import infer_tool +from inference import slicer +from inference.infer_tool import Svc + +logging.getLogger('numba').setLevel(logging.WARNING) +chunks_dict = infer_tool.read_temp("ref/so-vits-svc/inference/chunks_temp.json") + + +def inf(model_path, config_path, raw_audio_path, dst_path, dev): + # model_path = "logs/32k/G_174000-Copy1.pth" + # config_path = "configs/config.json" + svc_model = Svc(model_path, config_path) + out_dir = os.path.dirname(dst_path) + print(dst_path) + os.makedirs(out_dir, exist_ok=True) + # 支持多个wav文件,放在raw文件夹下 + tran = 0 + spk_list = ['speaker0'] # 每次同时合成多语者音色 + slice_db = -40 # 默认-40,嘈杂的音频可以-30,干声保留呼吸可以-50 + wav_format = 'wav' # 音频输出格式 + + # infer_tool.fill_a_to_b(trans, clean_names) + # for clean_name, tran in zip(clean_names, trans): + # raw_audio_path = f"raw/{clean_name}" + # if "." not in raw_audio_path: + # raw_audio_path += ".wav" + infer_tool.format_wav(raw_audio_path) + wav_path = Path(raw_audio_path).with_suffix('.wav') + chunks = slicer.cut(wav_path, db_thresh=slice_db) + audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks) + + for spk in spk_list: + audio = [] + for (slice_tag, data) in audio_data: + print(f'#=====segment start, {round(len(data) / audio_sr, 3)}s======') + length = int(np.ceil(len(data) / audio_sr * svc_model.target_sample)) + raw_path = io.BytesIO() + soundfile.write(raw_path, data, audio_sr, format="wav") + raw_path.seek(0) + if slice_tag: + print('jump empty segment') + _audio = np.zeros(length) + else: + out_audio, out_sr = svc_model.infer(spk, tran, raw_path, dev == "test") + _audio = out_audio.cpu().numpy() + audio.extend(list(_audio)) + soundfile.write(dst_path, audio, svc_model.target_sample, format=wav_format) + + +if __name__ == '__main__': + g_model = sys.argv[1] # 模型地址 + g_config = sys.argv[2] # 配置文件地址 + g_audio_path = sys.argv[3] # 输入的音频文件地址,wav + g_dst_path = sys.argv[4] # 输出的音频文件地址 + if os.path.exists(g_dst_path): + print("{} success ...".format(g_dst_path)) + exit(0) + + g_dev = "prod" + if len(sys.argv) > 5: + g_dev = sys.argv[5] + + g_aa, g_sr = librosa.load(g_audio_path) + d = librosa.get_duration(g_aa, g_sr) + # if g_dev != "test": + # if d > 250: + # print("{} too long".format(g_audio_path)) + # exit(0) + + st = time.time() + inf(g_model, g_config, g_audio_path, g_dst_path, g_dev) + print("{}, inference sp={}".format(g_audio_path, time.time() - st)) diff --git a/AutoCoverTool/ref/so_vits_svc/logs/32k/put_pretrained_model_here b/AutoCoverTool/ref/so_vits_svc/logs/32k/put_pretrained_model_here new file mode 100644 index 0000000..e69de29 diff --git a/AutoCoverTool/ref/so_vits_svc/losses.py b/AutoCoverTool/ref/so_vits_svc/losses.py new file mode 100644 index 0000000..41f9be6 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/losses.py @@ -0,0 +1,61 @@ +import torch +from torch.nn import functional as F + +import commons + + +def feature_loss(fmap_r, fmap_g): + loss = 0 + for dr, dg in zip(fmap_r, fmap_g): + for rl, gl in zip(dr, dg): + rl = rl.float().detach() + gl = gl.float() + loss += torch.mean(torch.abs(rl - gl)) + + return loss * 2 + + +def discriminator_loss(disc_real_outputs, disc_generated_outputs): + loss = 0 + r_losses = [] + g_losses = [] + for dr, dg in zip(disc_real_outputs, disc_generated_outputs): + dr = dr.float() + dg = dg.float() + r_loss = torch.mean((1-dr)**2) + g_loss = torch.mean(dg**2) + loss += (r_loss + g_loss) + r_losses.append(r_loss.item()) + g_losses.append(g_loss.item()) + + return loss, r_losses, g_losses + + +def generator_loss(disc_outputs): + loss = 0 + gen_losses = [] + for dg in disc_outputs: + dg = dg.float() + l = torch.mean((1-dg)**2) + gen_losses.append(l) + loss += l + + return loss, gen_losses + + +def kl_loss(z_p, logs_q, m_p, logs_p, z_mask): + """ + z_p, logs_q: [b, h, t_t] + m_p, logs_p: [b, h, t_t] + """ + z_p = z_p.float() + logs_q = logs_q.float() + m_p = m_p.float() + logs_p = logs_p.float() + z_mask = z_mask.float() + #print(logs_p) + kl = logs_p - logs_q - 0.5 + kl += 0.5 * ((z_p - m_p)**2) * torch.exp(-2. * logs_p) + kl = torch.sum(kl * z_mask) + l = kl / torch.sum(z_mask) + return l diff --git a/AutoCoverTool/ref/so_vits_svc/mel_processing.py b/AutoCoverTool/ref/so_vits_svc/mel_processing.py new file mode 100644 index 0000000..99c5b35 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/mel_processing.py @@ -0,0 +1,112 @@ +import math +import os +import random +import torch +from torch import nn +import torch.nn.functional as F +import torch.utils.data +import numpy as np +import librosa +import librosa.util as librosa_util +from librosa.util import normalize, pad_center, tiny +from scipy.signal import get_window +from scipy.io.wavfile import read +from librosa.filters import mel as librosa_mel_fn + +MAX_WAV_VALUE = 32768.0 + + +def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): + """ + PARAMS + ------ + C: compression factor + """ + return torch.log(torch.clamp(x, min=clip_val) * C) + + +def dynamic_range_decompression_torch(x, C=1): + """ + PARAMS + ------ + C: compression factor used to compress + """ + return torch.exp(x) / C + + +def spectral_normalize_torch(magnitudes): + output = dynamic_range_compression_torch(magnitudes) + return output + + +def spectral_de_normalize_torch(magnitudes): + output = dynamic_range_decompression_torch(magnitudes) + return output + + +mel_basis = {} +hann_window = {} + + +def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False): + if torch.min(y) < -1.: + print('min value is ', torch.min(y)) + if torch.max(y) > 1.: + print('max value is ', torch.max(y)) + + global hann_window + dtype_device = str(y.dtype) + '_' + str(y.device) + wnsize_dtype_device = str(win_size) + '_' + dtype_device + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + + y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect') + y = y.squeeze(1) + + spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device], + center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False) + + spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + return spec + + +def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): + global mel_basis + dtype_device = str(spec.dtype) + '_' + str(spec.device) + fmax_dtype_device = str(fmax) + '_' + dtype_device + if fmax_dtype_device not in mel_basis: + mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) + mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) + spec = torch.matmul(mel_basis[fmax_dtype_device], spec) + spec = spectral_normalize_torch(spec) + return spec + + +def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False): + if torch.min(y) < -1.: + print('min value is ', torch.min(y)) + if torch.max(y) > 1.: + print('max value is ', torch.max(y)) + + global mel_basis, hann_window + dtype_device = str(y.dtype) + '_' + str(y.device) + fmax_dtype_device = str(fmax) + '_' + dtype_device + wnsize_dtype_device = str(win_size) + '_' + dtype_device + if fmax_dtype_device not in mel_basis: + mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) + mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + + y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect') + y = y.squeeze(1) + + spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device], + center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False) + + spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + + spec = torch.matmul(mel_basis[fmax_dtype_device], spec) + spec = spectral_normalize_torch(spec) + + return spec diff --git a/AutoCoverTool/ref/so_vits_svc/model_onnx.py b/AutoCoverTool/ref/so_vits_svc/model_onnx.py new file mode 100644 index 0000000..eaae733 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/model_onnx.py @@ -0,0 +1,328 @@ +import copy +import math +import torch +from torch import nn +from torch.nn import functional as F + +import attentions +import commons +import modules + +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm +from commons import init_weights, get_padding +from vdecoder.hifigan.models import Generator +from utils import f0_to_coarse + +class ResidualCouplingBlock(nn.Module): + def __init__(self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + n_flows=4, + gin_channels=0): + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.n_flows = n_flows + self.gin_channels = gin_channels + + self.flows = nn.ModuleList() + for i in range(n_flows): + self.flows.append(modules.ResidualCouplingLayer(channels, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels, mean_only=True)) + self.flows.append(modules.Flip()) + + def forward(self, x, x_mask, g=None, reverse=False): + if not reverse: + for flow in self.flows: + x, _ = flow(x, x_mask, g=g, reverse=reverse) + else: + for flow in reversed(self.flows): + x = flow(x, x_mask, g=g, reverse=reverse) + return x + + +class Encoder(nn.Module): + def __init__(self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.enc = modules.WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, x, x_lengths, g=None): + # print(x.shape,x_lengths.shape) + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x = self.pre(x) * x_mask + x = self.enc(x, x_mask, g=g) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask + return z, m, logs, x_mask + + +class TextEncoder(nn.Module): + def __init__(self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0, + filter_channels=None, + n_heads=None, + p_dropout=None): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + self.f0_emb = nn.Embedding(256, hidden_channels) + + self.enc_ = attentions.Encoder( + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout) + + def forward(self, x, x_lengths, f0=None): + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x = self.pre(x) * x_mask + x = x + self.f0_emb(f0.long()).transpose(1,2) + x = self.enc_(x * x_mask, x_mask) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask + + return z, m, logs, x_mask + + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + self.use_spectral_norm = use_spectral_norm + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(get_padding(kernel_size, 1), 0))), + ]) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv1d(1, 16, 15, 1, padding=7)), + norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)), + norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ]) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(MultiPeriodDiscriminator, self).__init__() + periods = [2,3,5,7,11] + + discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods] + self.discriminators = nn.ModuleList(discs) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + y_d_gs.append(y_d_g) + fmap_rs.append(fmap_r) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class SpeakerEncoder(torch.nn.Module): + def __init__(self, mel_n_channels=80, model_num_layers=3, model_hidden_size=256, model_embedding_size=256): + super(SpeakerEncoder, self).__init__() + self.lstm = nn.LSTM(mel_n_channels, model_hidden_size, model_num_layers, batch_first=True) + self.linear = nn.Linear(model_hidden_size, model_embedding_size) + self.relu = nn.ReLU() + + def forward(self, mels): + self.lstm.flatten_parameters() + _, (hidden, _) = self.lstm(mels) + embeds_raw = self.relu(self.linear(hidden[-1])) + return embeds_raw / torch.norm(embeds_raw, dim=1, keepdim=True) + + def compute_partial_slices(self, total_frames, partial_frames, partial_hop): + mel_slices = [] + for i in range(0, total_frames-partial_frames, partial_hop): + mel_range = torch.arange(i, i+partial_frames) + mel_slices.append(mel_range) + + return mel_slices + + def embed_utterance(self, mel, partial_frames=128, partial_hop=64): + mel_len = mel.size(1) + last_mel = mel[:,-partial_frames:] + + if mel_len > partial_frames: + mel_slices = self.compute_partial_slices(mel_len, partial_frames, partial_hop) + mels = list(mel[:,s] for s in mel_slices) + mels.append(last_mel) + mels = torch.stack(tuple(mels), 0).squeeze(1) + + with torch.no_grad(): + partial_embeds = self(mels) + embed = torch.mean(partial_embeds, axis=0).unsqueeze(0) + #embed = embed / torch.linalg.norm(embed, 2) + else: + with torch.no_grad(): + embed = self(last_mel) + + return embed + + +class SynthesizerTrn(nn.Module): + """ + Synthesizer for Training + """ + + def __init__(self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels, + ssl_dim, + n_speakers, + **kwargs): + + super().__init__() + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + self.ssl_dim = ssl_dim + self.emb_g = nn.Embedding(n_speakers, gin_channels) + + self.enc_p_ = TextEncoder(ssl_dim, inter_channels, hidden_channels, 5, 1, 16,0, filter_channels, n_heads, p_dropout) + hps = { + "sampling_rate": 32000, + "inter_channels": 192, + "resblock": "1", + "resblock_kernel_sizes": [3, 7, 11], + "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + "upsample_rates": [10, 8, 2, 2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16, 16, 4, 4], + "gin_channels": 256, + } + self.dec = Generator(h=hps) + self.enc_q = Encoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16, gin_channels=gin_channels) + self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels) + + def forward(self, c, c_lengths, f0, g=None): + g = self.emb_g(g.unsqueeze(0)).transpose(1,2) + z_p, m_p, logs_p, c_mask = self.enc_p_(c.transpose(1,2), c_lengths, f0=f0_to_coarse(f0)) + z = self.flow(z_p, c_mask, g=g, reverse=True) + o = self.dec(z * c_mask, g=g, f0=f0.float()) + return o + diff --git a/AutoCoverTool/ref/so_vits_svc/model_onnx_48k.py b/AutoCoverTool/ref/so_vits_svc/model_onnx_48k.py new file mode 100644 index 0000000..6d9955b --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/model_onnx_48k.py @@ -0,0 +1,328 @@ +import copy +import math +import torch +from torch import nn +from torch.nn import functional as F + +import attentions +import commons +import modules + +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm +from commons import init_weights, get_padding +from vdecoder.hifigan.models import Generator +from utils import f0_to_coarse + +class ResidualCouplingBlock(nn.Module): + def __init__(self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + n_flows=4, + gin_channels=0): + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.n_flows = n_flows + self.gin_channels = gin_channels + + self.flows = nn.ModuleList() + for i in range(n_flows): + self.flows.append(modules.ResidualCouplingLayer(channels, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels, mean_only=True)) + self.flows.append(modules.Flip()) + + def forward(self, x, x_mask, g=None, reverse=False): + if not reverse: + for flow in self.flows: + x, _ = flow(x, x_mask, g=g, reverse=reverse) + else: + for flow in reversed(self.flows): + x = flow(x, x_mask, g=g, reverse=reverse) + return x + + +class Encoder(nn.Module): + def __init__(self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.enc = modules.WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, x, x_lengths, g=None): + # print(x.shape,x_lengths.shape) + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x = self.pre(x) * x_mask + x = self.enc(x, x_mask, g=g) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask + return z, m, logs, x_mask + + +class TextEncoder(nn.Module): + def __init__(self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0, + filter_channels=None, + n_heads=None, + p_dropout=None): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + self.f0_emb = nn.Embedding(256, hidden_channels) + + self.enc_ = attentions.Encoder( + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout) + + def forward(self, x, x_lengths, f0=None): + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x = self.pre(x) * x_mask + x = x + self.f0_emb(f0.long()).transpose(1,2) + x = self.enc_(x * x_mask, x_mask) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask + + return z, m, logs, x_mask + + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + self.use_spectral_norm = use_spectral_norm + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(get_padding(kernel_size, 1), 0))), + ]) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv1d(1, 16, 15, 1, padding=7)), + norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)), + norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ]) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(MultiPeriodDiscriminator, self).__init__() + periods = [2,3,5,7,11] + + discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods] + self.discriminators = nn.ModuleList(discs) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + y_d_gs.append(y_d_g) + fmap_rs.append(fmap_r) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class SpeakerEncoder(torch.nn.Module): + def __init__(self, mel_n_channels=80, model_num_layers=3, model_hidden_size=256, model_embedding_size=256): + super(SpeakerEncoder, self).__init__() + self.lstm = nn.LSTM(mel_n_channels, model_hidden_size, model_num_layers, batch_first=True) + self.linear = nn.Linear(model_hidden_size, model_embedding_size) + self.relu = nn.ReLU() + + def forward(self, mels): + self.lstm.flatten_parameters() + _, (hidden, _) = self.lstm(mels) + embeds_raw = self.relu(self.linear(hidden[-1])) + return embeds_raw / torch.norm(embeds_raw, dim=1, keepdim=True) + + def compute_partial_slices(self, total_frames, partial_frames, partial_hop): + mel_slices = [] + for i in range(0, total_frames-partial_frames, partial_hop): + mel_range = torch.arange(i, i+partial_frames) + mel_slices.append(mel_range) + + return mel_slices + + def embed_utterance(self, mel, partial_frames=128, partial_hop=64): + mel_len = mel.size(1) + last_mel = mel[:,-partial_frames:] + + if mel_len > partial_frames: + mel_slices = self.compute_partial_slices(mel_len, partial_frames, partial_hop) + mels = list(mel[:,s] for s in mel_slices) + mels.append(last_mel) + mels = torch.stack(tuple(mels), 0).squeeze(1) + + with torch.no_grad(): + partial_embeds = self(mels) + embed = torch.mean(partial_embeds, axis=0).unsqueeze(0) + #embed = embed / torch.linalg.norm(embed, 2) + else: + with torch.no_grad(): + embed = self(last_mel) + + return embed + + +class SynthesizerTrn(nn.Module): + """ + Synthesizer for Training + """ + + def __init__(self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels, + ssl_dim, + n_speakers, + **kwargs): + + super().__init__() + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + self.ssl_dim = ssl_dim + self.emb_g = nn.Embedding(n_speakers, gin_channels) + + self.enc_p_ = TextEncoder(ssl_dim, inter_channels, hidden_channels, 5, 1, 16,0, filter_channels, n_heads, p_dropout) + hps = { + "sampling_rate": 48000, + "inter_channels": 192, + "resblock": "1", + "resblock_kernel_sizes": [3, 7, 11], + "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + "upsample_rates": [10, 8, 2, 2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16, 16, 4, 4], + "gin_channels": 256, + } + self.dec = Generator(h=hps) + self.enc_q = Encoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16, gin_channels=gin_channels) + self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels) + + def forward(self, c, c_lengths, f0, g=None): + g = self.emb_g(g.unsqueeze(0)).transpose(1,2) + z_p, m_p, logs_p, c_mask = self.enc_p_(c.transpose(1,2), c_lengths, f0=f0_to_coarse(f0)) + z = self.flow(z_p, c_mask, g=g, reverse=True) + o = self.dec(z * c_mask, g=g, f0=f0.float()) + return o + diff --git a/AutoCoverTool/ref/so_vits_svc/models.py b/AutoCoverTool/ref/so_vits_svc/models.py new file mode 100644 index 0000000..bdbce84 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/models.py @@ -0,0 +1,351 @@ +import copy +import math +import torch +from torch import nn +from torch.nn import functional as F + +import attentions +import commons +import modules + +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm +from commons import init_weights, get_padding +from vdecoder.hifigan.models import Generator +from utils import f0_to_coarse + +class ResidualCouplingBlock(nn.Module): + def __init__(self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + n_flows=4, + gin_channels=0): + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.n_flows = n_flows + self.gin_channels = gin_channels + + self.flows = nn.ModuleList() + for i in range(n_flows): + self.flows.append(modules.ResidualCouplingLayer(channels, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels, mean_only=True)) + self.flows.append(modules.Flip()) + + def forward(self, x, x_mask, g=None, reverse=False): + if not reverse: + for flow in self.flows: + x, _ = flow(x, x_mask, g=g, reverse=reverse) + else: + for flow in reversed(self.flows): + x = flow(x, x_mask, g=g, reverse=reverse) + return x + + +class Encoder(nn.Module): + def __init__(self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.enc = modules.WN(hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=gin_channels) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, x, x_lengths, g=None): + # print(x.shape,x_lengths.shape) + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x = self.pre(x) * x_mask + x = self.enc(x, x_mask, g=g) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask + return z, m, logs, x_mask + + +class TextEncoder(nn.Module): + def __init__(self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0, + filter_channels=None, + n_heads=None, + p_dropout=None): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + self.f0_emb = nn.Embedding(256, hidden_channels) + + self.enc_ = attentions.Encoder( + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout) + + def forward(self, x, x_lengths, f0=None): + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x = self.pre(x) * x_mask + x = x + self.f0_emb(f0).transpose(1,2) + x = self.enc_(x * x_mask, x_mask) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask + + return z, m, logs, x_mask + + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + self.use_spectral_norm = use_spectral_norm + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), + norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(get_padding(kernel_size, 1), 0))), + ]) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv1d(1, 16, 15, 1, padding=7)), + norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)), + norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ]) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(MultiPeriodDiscriminator, self).__init__() + periods = [2,3,5,7,11] + + discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + discs = discs + [DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods] + self.discriminators = nn.ModuleList(discs) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + y_d_gs.append(y_d_g) + fmap_rs.append(fmap_r) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class SpeakerEncoder(torch.nn.Module): + def __init__(self, mel_n_channels=80, model_num_layers=3, model_hidden_size=256, model_embedding_size=256): + super(SpeakerEncoder, self).__init__() + self.lstm = nn.LSTM(mel_n_channels, model_hidden_size, model_num_layers, batch_first=True) + self.linear = nn.Linear(model_hidden_size, model_embedding_size) + self.relu = nn.ReLU() + + def forward(self, mels): + self.lstm.flatten_parameters() + _, (hidden, _) = self.lstm(mels) + embeds_raw = self.relu(self.linear(hidden[-1])) + return embeds_raw / torch.norm(embeds_raw, dim=1, keepdim=True) + + def compute_partial_slices(self, total_frames, partial_frames, partial_hop): + mel_slices = [] + for i in range(0, total_frames-partial_frames, partial_hop): + mel_range = torch.arange(i, i+partial_frames) + mel_slices.append(mel_range) + + return mel_slices + + def embed_utterance(self, mel, partial_frames=128, partial_hop=64): + mel_len = mel.size(1) + last_mel = mel[:,-partial_frames:] + + if mel_len > partial_frames: + mel_slices = self.compute_partial_slices(mel_len, partial_frames, partial_hop) + mels = list(mel[:,s] for s in mel_slices) + mels.append(last_mel) + mels = torch.stack(tuple(mels), 0).squeeze(1) + + with torch.no_grad(): + partial_embeds = self(mels) + embed = torch.mean(partial_embeds, axis=0).unsqueeze(0) + #embed = embed / torch.linalg.norm(embed, 2) + else: + with torch.no_grad(): + embed = self(last_mel) + + return embed + + +class SynthesizerTrn(nn.Module): + """ + Synthesizer for Training + """ + + def __init__(self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels, + ssl_dim, + n_speakers, + **kwargs): + + super().__init__() + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + self.ssl_dim = ssl_dim + self.emb_g = nn.Embedding(n_speakers, gin_channels) + + self.enc_p_ = TextEncoder(ssl_dim, inter_channels, hidden_channels, 5, 1, 16,0, filter_channels, n_heads, p_dropout) + hps = { + "sampling_rate": 32000, + "inter_channels": 192, + "resblock": "1", + "resblock_kernel_sizes": [3, 7, 11], + "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + "upsample_rates": [10, 8, 2, 2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16, 16, 4, 4], + "gin_channels": 256, + } + self.dec = Generator(h=hps) + self.enc_q = Encoder(spec_channels, inter_channels, hidden_channels, 5, 1, 16, gin_channels=gin_channels) + self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels) + + def forward(self, c, f0, spec, g=None, mel=None, c_lengths=None, spec_lengths=None): + if c_lengths == None: + c_lengths = (torch.ones(c.size(0)) * c.size(-1)).to(c.device) + if spec_lengths == None: + spec_lengths = (torch.ones(spec.size(0)) * spec.size(-1)).to(spec.device) + + g = self.emb_g(g).transpose(1,2) + + z_ptemp, m_p, logs_p, _ = self.enc_p_(c, c_lengths, f0=f0_to_coarse(f0)) + z, m_q, logs_q, spec_mask = self.enc_q(spec, spec_lengths, g=g) + + z_p = self.flow(z, spec_mask, g=g) + z_slice, pitch_slice, ids_slice = commons.rand_slice_segments_with_pitch(z, f0, spec_lengths, self.segment_size) + + # o = self.dec(z_slice, g=g) + o = self.dec(z_slice, g=g, f0=pitch_slice) + + return o, ids_slice, spec_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + def infer(self, c, f0, g=None, mel=None, c_lengths=None): + if c_lengths == None: + c_lengths = (torch.ones(c.size(0)) * c.size(-1)).to(c.device) + g = self.emb_g(g).transpose(1,2) + + z_p, m_p, logs_p, c_mask = self.enc_p_(c, c_lengths, f0=f0_to_coarse(f0)) + z = self.flow(z_p, c_mask, g=g, reverse=True) + + o = self.dec(z * c_mask, g=g, f0=f0) + + return o diff --git a/AutoCoverTool/ref/so_vits_svc/modules.py b/AutoCoverTool/ref/so_vits_svc/modules.py new file mode 100644 index 0000000..52ee14e --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/modules.py @@ -0,0 +1,342 @@ +import copy +import math +import numpy as np +import scipy +import torch +from torch import nn +from torch.nn import functional as F + +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm + +import commons +from commons import init_weights, get_padding + + +LRELU_SLOPE = 0.1 + + +class LayerNorm(nn.Module): + def __init__(self, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.gamma = nn.Parameter(torch.ones(channels)) + self.beta = nn.Parameter(torch.zeros(channels)) + + def forward(self, x): + x = x.transpose(1, -1) + x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) + return x.transpose(1, -1) + + +class ConvReluNorm(nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout): + super().__init__() + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.p_dropout = p_dropout + assert n_layers > 1, "Number of layers should be larger than 0." + + self.conv_layers = nn.ModuleList() + self.norm_layers = nn.ModuleList() + self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size//2)) + self.norm_layers.append(LayerNorm(hidden_channels)) + self.relu_drop = nn.Sequential( + nn.ReLU(), + nn.Dropout(p_dropout)) + for _ in range(n_layers-1): + self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size//2)) + self.norm_layers.append(LayerNorm(hidden_channels)) + self.proj = nn.Conv1d(hidden_channels, out_channels, 1) + self.proj.weight.data.zero_() + self.proj.bias.data.zero_() + + def forward(self, x, x_mask): + x_org = x + for i in range(self.n_layers): + x = self.conv_layers[i](x * x_mask) + x = self.norm_layers[i](x) + x = self.relu_drop(x) + x = x_org + self.proj(x) + return x * x_mask + + +class DDSConv(nn.Module): + """ + Dialted and Depth-Separable Convolution + """ + def __init__(self, channels, kernel_size, n_layers, p_dropout=0.): + super().__init__() + self.channels = channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.p_dropout = p_dropout + + self.drop = nn.Dropout(p_dropout) + self.convs_sep = nn.ModuleList() + self.convs_1x1 = nn.ModuleList() + self.norms_1 = nn.ModuleList() + self.norms_2 = nn.ModuleList() + for i in range(n_layers): + dilation = kernel_size ** i + padding = (kernel_size * dilation - dilation) // 2 + self.convs_sep.append(nn.Conv1d(channels, channels, kernel_size, + groups=channels, dilation=dilation, padding=padding + )) + self.convs_1x1.append(nn.Conv1d(channels, channels, 1)) + self.norms_1.append(LayerNorm(channels)) + self.norms_2.append(LayerNorm(channels)) + + def forward(self, x, x_mask, g=None): + if g is not None: + x = x + g + for i in range(self.n_layers): + y = self.convs_sep[i](x * x_mask) + y = self.norms_1[i](y) + y = F.gelu(y) + y = self.convs_1x1[i](y) + y = self.norms_2[i](y) + y = F.gelu(y) + y = self.drop(y) + x = x + y + return x * x_mask + + +class WN(torch.nn.Module): + def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0, p_dropout=0): + super(WN, self).__init__() + assert(kernel_size % 2 == 1) + self.hidden_channels =hidden_channels + self.kernel_size = kernel_size, + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + self.p_dropout = p_dropout + + self.in_layers = torch.nn.ModuleList() + self.res_skip_layers = torch.nn.ModuleList() + self.drop = nn.Dropout(p_dropout) + + if gin_channels != 0: + cond_layer = torch.nn.Conv1d(gin_channels, 2*hidden_channels*n_layers, 1) + self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name='weight') + + for i in range(n_layers): + dilation = dilation_rate ** i + padding = int((kernel_size * dilation - dilation) / 2) + in_layer = torch.nn.Conv1d(hidden_channels, 2*hidden_channels, kernel_size, + dilation=dilation, padding=padding) + in_layer = torch.nn.utils.weight_norm(in_layer, name='weight') + self.in_layers.append(in_layer) + + # last one is not necessary + if i < n_layers - 1: + res_skip_channels = 2 * hidden_channels + else: + res_skip_channels = hidden_channels + + res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1) + res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name='weight') + self.res_skip_layers.append(res_skip_layer) + + def forward(self, x, x_mask, g=None, **kwargs): + output = torch.zeros_like(x) + n_channels_tensor = torch.IntTensor([self.hidden_channels]) + + if g is not None: + g = self.cond_layer(g) + + for i in range(self.n_layers): + x_in = self.in_layers[i](x) + if g is not None: + cond_offset = i * 2 * self.hidden_channels + g_l = g[:,cond_offset:cond_offset+2*self.hidden_channels,:] + else: + g_l = torch.zeros_like(x_in) + + acts = commons.fused_add_tanh_sigmoid_multiply( + x_in, + g_l, + n_channels_tensor) + acts = self.drop(acts) + + res_skip_acts = self.res_skip_layers[i](acts) + if i < self.n_layers - 1: + res_acts = res_skip_acts[:,:self.hidden_channels,:] + x = (x + res_acts) * x_mask + output = output + res_skip_acts[:,self.hidden_channels:,:] + else: + output = output + res_skip_acts + return output * x_mask + + def remove_weight_norm(self): + if self.gin_channels != 0: + torch.nn.utils.remove_weight_norm(self.cond_layer) + for l in self.in_layers: + torch.nn.utils.remove_weight_norm(l) + for l in self.res_skip_layers: + torch.nn.utils.remove_weight_norm(l) + + +class ResBlock1(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock1, self).__init__() + self.convs1 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]))) + ]) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))) + ]) + self.convs2.apply(init_weights) + + def forward(self, x, x_mask=None): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c2(xt) + x = xt + x + if x_mask is not None: + x = x * x_mask + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + + +class ResBlock2(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3)): + super(ResBlock2, self).__init__() + self.convs = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))) + ]) + self.convs.apply(init_weights) + + def forward(self, x, x_mask=None): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c(xt) + x = xt + x + if x_mask is not None: + x = x * x_mask + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_weight_norm(l) + + +class Log(nn.Module): + def forward(self, x, x_mask, reverse=False, **kwargs): + if not reverse: + y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask + logdet = torch.sum(-y, [1, 2]) + return y, logdet + else: + x = torch.exp(x) * x_mask + return x + + +class Flip(nn.Module): + def forward(self, x, *args, reverse=False, **kwargs): + x = torch.flip(x, [1]) + if not reverse: + logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device) + return x, logdet + else: + return x + + +class ElementwiseAffine(nn.Module): + def __init__(self, channels): + super().__init__() + self.channels = channels + self.m = nn.Parameter(torch.zeros(channels,1)) + self.logs = nn.Parameter(torch.zeros(channels,1)) + + def forward(self, x, x_mask, reverse=False, **kwargs): + if not reverse: + y = self.m + torch.exp(self.logs) * x + y = y * x_mask + logdet = torch.sum(self.logs * x_mask, [1,2]) + return y, logdet + else: + x = (x - self.m) * torch.exp(-self.logs) * x_mask + return x + + +class ResidualCouplingLayer(nn.Module): + def __init__(self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + p_dropout=0, + gin_channels=0, + mean_only=False): + assert channels % 2 == 0, "channels should be divisible by 2" + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.half_channels = channels // 2 + self.mean_only = mean_only + + self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1) + self.enc = WN(hidden_channels, kernel_size, dilation_rate, n_layers, p_dropout=p_dropout, gin_channels=gin_channels) + self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1) + self.post.weight.data.zero_() + self.post.bias.data.zero_() + + def forward(self, x, x_mask, g=None, reverse=False): + x0, x1 = torch.split(x, [self.half_channels]*2, 1) + h = self.pre(x0) * x_mask + h = self.enc(h, x_mask, g=g) + stats = self.post(h) * x_mask + if not self.mean_only: + m, logs = torch.split(stats, [self.half_channels]*2, 1) + else: + m = stats + logs = torch.zeros_like(m) + + if not reverse: + x1 = m + x1 * torch.exp(logs) * x_mask + x = torch.cat([x0, x1], 1) + logdet = torch.sum(logs, [1,2]) + return x, logdet + else: + x1 = (x1 - m) * torch.exp(-logs) * x_mask + x = torch.cat([x0, x1], 1) + return x diff --git a/AutoCoverTool/ref/so_vits_svc/onnx_export.py b/AutoCoverTool/ref/so_vits_svc/onnx_export.py new file mode 100644 index 0000000..a762b23 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/onnx_export.py @@ -0,0 +1,73 @@ +import argparse +import time +import numpy as np +import onnx +from onnxsim import simplify +import onnxruntime as ort +import onnxoptimizer +import torch +from model_onnx import SynthesizerTrn +import utils +from hubert import hubert_model_onnx + +def main(HubertExport,NetExport): + + path = "NyaruTaffy" + + if(HubertExport): + device = torch.device("cuda") + hubert_soft = hubert_model_onnx.hubert_soft("hubert/model.pt") + test_input = torch.rand(1, 1, 16000) + input_names = ["source"] + output_names = ["embed"] + torch.onnx.export(hubert_soft.to(device), + test_input.to(device), + "hubert3.0.onnx", + dynamic_axes={ + "source": { + 2: "sample_length" + } + }, + verbose=False, + opset_version=13, + input_names=input_names, + output_names=output_names) + if(NetExport): + device = torch.device("cuda") + hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json") + SVCVITS = SynthesizerTrn( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + **hps.model) + _ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None) + _ = SVCVITS.eval().to(device) + for i in SVCVITS.parameters(): + i.requires_grad = False + test_hidden_unit = torch.rand(1, 50, 256) + test_lengths = torch.LongTensor([50]) + test_pitch = torch.rand(1, 50) + test_sid = torch.LongTensor([0]) + input_names = ["hidden_unit", "lengths", "pitch", "sid"] + output_names = ["audio", ] + SVCVITS.eval() + torch.onnx.export(SVCVITS, + ( + test_hidden_unit.to(device), + test_lengths.to(device), + test_pitch.to(device), + test_sid.to(device) + ), + f"checkpoints/{path}/model.onnx", + dynamic_axes={ + "hidden_unit": [0, 1], + "pitch": [1] + }, + do_constant_folding=False, + opset_version=16, + verbose=False, + input_names=input_names, + output_names=output_names) + + +if __name__ == '__main__': + main(False,True) diff --git a/AutoCoverTool/ref/so_vits_svc/onnx_export_48k.py b/AutoCoverTool/ref/so_vits_svc/onnx_export_48k.py new file mode 100644 index 0000000..9a04635 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/onnx_export_48k.py @@ -0,0 +1,73 @@ +import argparse +import time +import numpy as np +import onnx +from onnxsim import simplify +import onnxruntime as ort +import onnxoptimizer +import torch +from model_onnx_48k import SynthesizerTrn +import utils +from hubert import hubert_model_onnx + +def main(HubertExport,NetExport): + + path = "NyaruTaffy" + + if(HubertExport): + device = torch.device("cuda") + hubert_soft = hubert_model_onnx.hubert_soft("hubert/model.pt") + test_input = torch.rand(1, 1, 16000) + input_names = ["source"] + output_names = ["embed"] + torch.onnx.export(hubert_soft.to(device), + test_input.to(device), + "hubert3.0.onnx", + dynamic_axes={ + "source": { + 2: "sample_length" + } + }, + verbose=False, + opset_version=13, + input_names=input_names, + output_names=output_names) + if(NetExport): + device = torch.device("cuda") + hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json") + SVCVITS = SynthesizerTrn( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + **hps.model) + _ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None) + _ = SVCVITS.eval().to(device) + for i in SVCVITS.parameters(): + i.requires_grad = False + test_hidden_unit = torch.rand(1, 50, 256) + test_lengths = torch.LongTensor([50]) + test_pitch = torch.rand(1, 50) + test_sid = torch.LongTensor([0]) + input_names = ["hidden_unit", "lengths", "pitch", "sid"] + output_names = ["audio", ] + SVCVITS.eval() + torch.onnx.export(SVCVITS, + ( + test_hidden_unit.to(device), + test_lengths.to(device), + test_pitch.to(device), + test_sid.to(device) + ), + f"checkpoints/{path}/model.onnx", + dynamic_axes={ + "hidden_unit": [0, 1], + "pitch": [1] + }, + do_constant_folding=False, + opset_version=16, + verbose=False, + input_names=input_names, + output_names=output_names) + + +if __name__ == '__main__': + main(False,True) diff --git a/AutoCoverTool/ref/so_vits_svc/preprocess_flist_config.py b/AutoCoverTool/ref/so_vits_svc/preprocess_flist_config.py new file mode 100644 index 0000000..5b7e80a --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/preprocess_flist_config.py @@ -0,0 +1,132 @@ +import os +import argparse +import re + +from tqdm import tqdm +from random import shuffle +import json + +config_template = { + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 400, # 由10000->400 + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 12, + "fp16_run": False, + "lr_decay": 0.999875, + "segment_size": 17920, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0, + "use_sr": True, + "max_speclen": 384, + "port": "8001" + }, + "data": { + "training_files": "filelists/train.txt", + "validation_files": "filelists/val.txt", + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 1280, + "hop_length": 320, + "win_length": 1280, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": None + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [3, 7, 11], + "resblock_dilation_sizes": [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + "upsample_rates": [10, 8, 2, 2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16, 16, 4, 4], + "n_layers_q": 3, + "use_spectral_norm": False, + "gin_channels": 256, + "ssl_dim": 256, + "n_speakers": 0, + }, + "spk": { + "nen": 0, + "paimon": 1, + "yunhao": 2 + } +} + +pattern = re.compile(r'^[\.a-zA-Z0-9_\/]+$') + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--train_list", type=str, default="./filelists/train.txt", help="path to train list") + parser.add_argument("--val_list", type=str, default="./filelists/val.txt", help="path to val list") + parser.add_argument("--test_list", type=str, default="./filelists/test.txt", help="path to test list") + parser.add_argument("--source_dir", type=str, default="./dataset/32k", help="path to source dir") + parser.add_argument("--config_path", type=str, default="./config/config.json", help="path to source dir") + args = parser.parse_args() + + train = [] + val = [] + test = [] + idx = 0 + spk_dict = {} + spk_id = 0 + for speaker in tqdm(os.listdir(args.source_dir)): + spk_dict[speaker] = spk_id + spk_id += 1 + wavs = ["/".join([args.source_dir, speaker, i]) for i in os.listdir(os.path.join(args.source_dir, speaker))] + for wavpath in wavs: + if not pattern.match(wavpath): + print(f"warning:文件名{wavpath}中包含非字母数字下划线,可能会导致错误。(也可能不会)") + if len(wavs) < 10: + print(f"warning:{speaker}数据集数量小于10条,请补充数据") + wavs = [i for i in wavs if i.endswith("wav")] + shuffle(wavs) + train += wavs[2:-2] + val += wavs[:2] + test += wavs[-2:] + n_speakers = len(spk_dict.keys()) * 2 + shuffle(train) + shuffle(val) + shuffle(test) + + print("Writing", args.train_list) + with open(args.train_list, "w") as f: + for fname in tqdm(train): + wavpath = fname + f.write(wavpath + "\n") + + print("Writing", args.val_list) + with open(args.val_list, "w") as f: + for fname in tqdm(val): + wavpath = fname + f.write(wavpath + "\n") + + print("Writing", args.test_list) + with open(args.test_list, "w") as f: + for fname in tqdm(test): + wavpath = fname + f.write(wavpath + "\n") + + config_template["model"]["n_speakers"] = n_speakers + config_template["spk"] = spk_dict + print("Writing configs/config.json") + + # 修改配置文件 + config_template["data"]["training_files"] = args.train_list + config_template["data"]["validation_files"] = args.val_list + + with open(args.config_path, "w") as f: + json.dump(config_template, f, indent=2) diff --git a/AutoCoverTool/ref/so_vits_svc/preprocess_hubert_f0.py b/AutoCoverTool/ref/so_vits_svc/preprocess_hubert_f0.py new file mode 100644 index 0000000..4fe7f21 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/preprocess_hubert_f0.py @@ -0,0 +1,106 @@ +import os +import argparse + +import torch +import json +from glob import glob + +from pyworld import pyworld +from tqdm import tqdm +from scipy.io import wavfile + +import utils +from mel_processing import mel_spectrogram_torch +#import h5py +import logging +logging.getLogger('numba').setLevel(logging.WARNING) + +import parselmouth +import librosa +import numpy as np + + +def get_f0(path,p_len=None, f0_up_key=0): + x, _ = librosa.load(path, 32000) + if p_len is None: + p_len = x.shape[0]//320 + else: + assert abs(p_len-x.shape[0]//320) < 3, (path, p_len, x.shape) + time_step = 320 / 32000 * 1000 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + + f0 = parselmouth.Sound(x, 32000).to_pitch_ac( + time_step=time_step / 1000, voicing_threshold=0.6, + pitch_floor=f0_min, pitch_ceiling=f0_max).selected_array['frequency'] + + pad_size=(p_len - len(f0) + 1) // 2 + if(pad_size>0 or p_len - len(f0) - pad_size>0): + f0 = np.pad(f0,[[pad_size,p_len - len(f0) - pad_size]], mode='constant') + + f0bak = f0.copy() + f0 *= pow(2, f0_up_key / 12) + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (f0_mel_max - f0_mel_min) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0bak + +def resize2d(x, target_len): + source = np.array(x) + source[source<0.001] = np.nan + target = np.interp(np.arange(0, len(source)*target_len, len(source))/ target_len, np.arange(0, len(source)), source) + res = np.nan_to_num(target) + return res + +def compute_f0(path, c_len): + x, sr = librosa.load(path, sr=32000) + f0, t = pyworld.dio( + x.astype(np.double), + fs=sr, + f0_ceil=800, + frame_period=1000 * 320 / sr, + ) + f0 = pyworld.stonemask(x.astype(np.double), f0, t, 32000) + for index, pitch in enumerate(f0): + f0[index] = round(pitch, 1) + assert abs(c_len - x.shape[0]//320) < 3, (c_len, f0.shape) + + return None, resize2d(f0, c_len) + + +def process(filename): + print(filename) + save_name = filename+".soft.pt" + if not os.path.exists(save_name): + devive = torch.device("cuda" if torch.cuda.is_available() else "cpu") + wav, _ = librosa.load(filename, sr=16000) + wav = torch.from_numpy(wav).unsqueeze(0).to(devive) + c = utils.get_hubert_content(hmodel, wav) + torch.save(c.cpu(), save_name) + else: + c = torch.load(save_name) + f0path = filename+".f0.npy" + if not os.path.exists(f0path): + cf0, f0 = compute_f0(filename, c.shape[-1] * 2) + np.save(f0path, f0) + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--in_dir", type=str, default="dataset/32k", help="path to input dir") + args = parser.parse_args() + + print("Loading hubert for content...") + hmodel = utils.get_hubert_model(0 if torch.cuda.is_available() else None) + print("Loaded hubert.") + + filenames = glob(f'{args.in_dir}/*/*.wav', recursive=True)#[:10] + + for filename in tqdm(filenames): + process(filename) + \ No newline at end of file diff --git a/AutoCoverTool/ref/so_vits_svc/requirements.txt b/AutoCoverTool/ref/so_vits_svc/requirements.txt new file mode 100644 index 0000000..2f40497 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/requirements.txt @@ -0,0 +1,19 @@ +Flask==2.1.2 +Flask_Cors==3.0.10 +gradio==3.4.1 +numpy==1.19.2 +playsound==1.3.0 +PyAudio==0.2.12 +pydub==0.25.1 +pyworld==0.3.0 +requests==2.28.1 +scipy==1.7.3 +sounddevice==0.4.5 +SoundFile==0.10.3.post1 +starlette==0.19.1 +tqdm==4.63.0 +scikit-maad +praat-parselmouth +onnx +onnxsim +onnxoptimizer diff --git a/AutoCoverTool/ref/so_vits_svc/resample.py b/AutoCoverTool/ref/so_vits_svc/resample.py new file mode 100644 index 0000000..b9b6a4f --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/resample.py @@ -0,0 +1,50 @@ +import os +import argparse +import librosa +import numpy as np +from multiprocessing import Pool, cpu_count +from scipy.io import wavfile +from tqdm import tqdm + + +def process(item): + spkdir, wav_name, args = item + # speaker 's5', 'p280', 'p315' are excluded, + speaker = spkdir.replace("\\", "/").split("/")[-1] + wav_path = os.path.join(args.in_dir, speaker, wav_name) + print(wav_path) + if os.path.exists(wav_path) and '.wav' in wav_path: + os.makedirs(os.path.join(args.out_dir2, speaker), exist_ok=True) + wav, sr = librosa.load(wav_path, None) + wav, _ = librosa.effects.trim(wav, top_db=20) + peak = np.abs(wav).max() + if peak > 1.0: + wav = 0.98 * wav / peak + wav2 = librosa.resample(wav, orig_sr=sr, target_sr=args.sr2) + wav2 /= max(wav2.max(), -wav2.min()) + save_name = wav_name + save_path2 = os.path.join(args.out_dir2, speaker, save_name) + wavfile.write( + save_path2, + args.sr2, + (wav2 * np.iinfo(np.int16).max).astype(np.int16) + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--sr2", type=int, default=32000, help="sampling rate") + parser.add_argument("--in_dir", type=str, default="./dataset_raw", help="path to source dir") + parser.add_argument("--out_dir2", type=str, default="./dataset/32k", help="path to target dir") + args = parser.parse_args() + processs = cpu_count() - 2 if cpu_count() > 4 else 1 + pool = Pool(processes=processs) + speaker = "speaker0" + spk_dir = os.path.join(args.in_dir, speaker) + # for speaker in os.listdir(args.in_dir): + # spk_dir = os.path.join(args.in_dir, speaker) + if os.path.isdir(spk_dir): + print(spk_dir) + for _ in tqdm(pool.imap_unordered(process, + [(spk_dir, i, args) for i in os.listdir(spk_dir) if i.endswith("wav")])): + pass diff --git a/AutoCoverTool/ref/so_vits_svc/sovits_gradio.py b/AutoCoverTool/ref/so_vits_svc/sovits_gradio.py new file mode 100644 index 0000000..9a59e88 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/sovits_gradio.py @@ -0,0 +1,47 @@ +from inference.infer_tool_grad import VitsSvc +import gradio as gr +import os + +class VitsGradio: + def __init__(self): + self.so = VitsSvc() + self.lspk = [] + self.modelPaths = [] + for root,dirs,files in os.walk("checkpoints"): + for dir in dirs: + self.modelPaths.append(dir) + with gr.Blocks() as self.Vits: + with gr.Tab("VoiceConversion"): + with gr.Row(visible=False) as self.VoiceConversion: + with gr.Column(): + with gr.Row(): + with gr.Column(): + self.srcaudio = gr.Audio(label = "输入音频") + self.btnVC = gr.Button("说话人转换") + with gr.Column(): + self.dsid = gr.Dropdown(label = "目标角色", choices = self.lspk) + self.tran = gr.Slider(label = "升降调", maximum = 60, minimum = -60, step = 1, value = 0) + self.th = gr.Slider(label = "切片阈值", maximum = 32767, minimum = -32768, step = 0.1, value = -40) + with gr.Row(): + self.VCOutputs = gr.Audio() + self.btnVC.click(self.so.inference, inputs=[self.srcaudio,self.dsid,self.tran,self.th], outputs=[self.VCOutputs]) + with gr.Tab("SelectModel"): + with gr.Column(): + modelstrs = gr.Dropdown(label = "模型", choices = self.modelPaths, value = self.modelPaths[0], type = "value") + devicestrs = gr.Dropdown(label = "设备", choices = ["cpu","cuda"], value = "cpu", type = "value") + btnMod = gr.Button("载入模型") + btnMod.click(self.loadModel, inputs=[modelstrs,devicestrs], outputs = [self.dsid,self.VoiceConversion]) + + def loadModel(self, path, device): + self.lspk = [] + self.so.set_device(device) + self.so.loadCheckpoint(path) + for spk, sid in self.so.hps.spk.items(): + self.lspk.append(spk) + VChange = gr.update(visible = True) + SDChange = gr.update(choices = self.lspk, value = self.lspk[0]) + return [SDChange,VChange] + +grVits = VitsGradio() + +grVits.Vits.launch() \ No newline at end of file diff --git a/AutoCoverTool/ref/so_vits_svc/spec_gen.py b/AutoCoverTool/ref/so_vits_svc/spec_gen.py new file mode 100644 index 0000000..85ad318 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/spec_gen.py @@ -0,0 +1,22 @@ +from data_utils import TextAudioSpeakerLoader, EvalDataLoader +import json +from tqdm import tqdm + +from utils import HParams + +config_path = 'configs/config.json' +with open(config_path, "r") as f: + data = f.read() +config = json.loads(data) +hps = HParams(**config) + +train_dataset = TextAudioSpeakerLoader("filelists/train.txt", hps) +test_dataset = TextAudioSpeakerLoader("filelists/test.txt", hps) +eval_dataset = TextAudioSpeakerLoader("filelists/val.txt", hps) + +for _ in tqdm(train_dataset): + pass +for _ in tqdm(eval_dataset): + pass +for _ in tqdm(test_dataset): + pass \ No newline at end of file diff --git a/AutoCoverTool/ref/so_vits_svc/train.py b/AutoCoverTool/ref/so_vits_svc/train.py new file mode 100644 index 0000000..152bb59 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/train.py @@ -0,0 +1,283 @@ +import logging + +logging.getLogger('matplotlib').setLevel(logging.WARNING) +import os +import json +import argparse +import itertools +import math +import torch +from torch import nn, optim +from torch.nn import functional as F +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter +import torch.multiprocessing as mp +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.cuda.amp import autocast, GradScaler + +import commons +import utils +from data_utils import TextAudioSpeakerLoader, EvalDataLoader +from models import ( + SynthesizerTrn, + MultiPeriodDiscriminator, +) +from losses import ( + kl_loss, + generator_loss, discriminator_loss, feature_loss +) + +from mel_processing import mel_spectrogram_torch, spec_to_mel_torch + +torch.backends.cudnn.benchmark = True +global_step = 0 + + +# os.environ['TORCH_DISTRIBUTED_DEBUG'] = 'INFO' + + +def main(): + """Assume Single Node Multi GPUs Training Only""" + assert torch.cuda.is_available(), "CPU training is not allowed." + hps = utils.get_hparams() + + n_gpus = torch.cuda.device_count() + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = hps.train.port + + mp.spawn(run, nprocs=n_gpus, args=(n_gpus, hps,)) + + +def run(rank, n_gpus, hps): + global global_step + if rank == 0: + logger = utils.get_logger(hps.model_dir) + logger.info(hps) + utils.check_git_hash(hps.model_dir) + writer = SummaryWriter(log_dir=hps.model_dir) + writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval")) + + dist.init_process_group(backend='nccl', init_method='env://', world_size=n_gpus, rank=rank) + torch.manual_seed(hps.train.seed) + torch.cuda.set_device(rank) + + train_dataset = TextAudioSpeakerLoader(hps.data.training_files, hps) + train_loader = DataLoader(train_dataset, num_workers=8, shuffle=False, pin_memory=True, + batch_size=hps.train.batch_size) + if rank == 0: + eval_dataset = EvalDataLoader(hps.data.validation_files, hps) + eval_loader = DataLoader(eval_dataset, num_workers=1, shuffle=False, + batch_size=1, pin_memory=False, + drop_last=False) + + net_g = SynthesizerTrn( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + **hps.model).cuda(rank) + net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm).cuda(rank) + optim_g = torch.optim.AdamW( + net_g.parameters(), + hps.train.learning_rate, + betas=hps.train.betas, + eps=hps.train.eps) + optim_d = torch.optim.AdamW( + net_d.parameters(), + hps.train.learning_rate, + betas=hps.train.betas, + eps=hps.train.eps) + net_g = DDP(net_g, device_ids=[rank]) # , find_unused_parameters=True) + net_d = DDP(net_d, device_ids=[rank]) + + try: + _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, + optim_g) + _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, + optim_d) + global_step = (epoch_str - 1) * len(train_loader) + print("load checkpoint ok !") + except: + epoch_str = 1 + global_step = 0 + + scheduler_g = torch.optim.lr_scheduler.ExponentialLR(optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2) + scheduler_d = torch.optim.lr_scheduler.ExponentialLR(optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2) + + scaler = GradScaler(enabled=hps.train.fp16_run) + + for epoch in range(epoch_str, hps.train.epochs + 1): + if rank == 0: + train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, + [train_loader, eval_loader], logger, [writer, writer_eval]) + else: + train_and_evaluate(rank, epoch, hps, [net_g, net_d], [optim_g, optim_d], [scheduler_g, scheduler_d], scaler, + [train_loader, None], None, None) + scheduler_g.step() + scheduler_d.step() + + +def train_and_evaluate(rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers): + net_g, net_d = nets + optim_g, optim_d = optims + scheduler_g, scheduler_d = schedulers + train_loader, eval_loader = loaders + if writers is not None: + writer, writer_eval = writers + + # train_loader.batch_sampler.set_epoch(epoch) + global global_step + + net_g.train() + net_d.train() + for batch_idx, items in enumerate(train_loader): + c, f0, spec, y, spk = items + g = spk.cuda(rank, non_blocking=True) + spec, y = spec.cuda(rank, non_blocking=True), y.cuda(rank, non_blocking=True) + c = c.cuda(rank, non_blocking=True) + f0 = f0.cuda(rank, non_blocking=True) + mel = spec_to_mel_torch( + spec, + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.mel_fmin, + hps.data.mel_fmax) + + with autocast(enabled=hps.train.fp16_run): + y_hat, ids_slice, z_mask, \ + (z, z_p, m_p, logs_p, m_q, logs_q) = net_g(c, f0, spec, g=g, mel=mel) + + y_mel = commons.slice_segments(mel, ids_slice, hps.train.segment_size // hps.data.hop_length) + y_hat_mel = mel_spectrogram_torch( + y_hat.squeeze(1), + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.hop_length, + hps.data.win_length, + hps.data.mel_fmin, + hps.data.mel_fmax + ) + y = commons.slice_segments(y, ids_slice * hps.data.hop_length, hps.train.segment_size) # slice + + # Discriminator + y_d_hat_r, y_d_hat_g, _, _ = net_d(y, y_hat.detach()) + + with autocast(enabled=False): + loss_disc, losses_disc_r, losses_disc_g = discriminator_loss(y_d_hat_r, y_d_hat_g) + loss_disc_all = loss_disc + + optim_d.zero_grad() + scaler.scale(loss_disc_all).backward() + scaler.unscale_(optim_d) + grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None) + scaler.step(optim_d) + + with autocast(enabled=hps.train.fp16_run): + # Generator + y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(y, y_hat) + with autocast(enabled=False): + loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel + loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl + loss_fm = feature_loss(fmap_r, fmap_g) + loss_gen, losses_gen = generator_loss(y_d_hat_g) + loss_gen_all = loss_gen + loss_fm + loss_mel + loss_kl + optim_g.zero_grad() + scaler.scale(loss_gen_all).backward() + scaler.unscale_(optim_g) + grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None) + scaler.step(optim_g) + scaler.update() + + if rank == 0: + if global_step % hps.train.log_interval == 0: + lr = optim_g.param_groups[0]['lr'] + losses = [loss_disc, loss_gen, loss_fm, loss_mel, loss_kl] + logger.info('Train Epoch: {} [{:.0f}%]'.format( + epoch, + 100. * batch_idx / len(train_loader))) + logger.info([x.item() for x in losses] + [global_step, lr]) + + scalar_dict = {"loss/g/total": loss_gen_all, "loss/d/total": loss_disc_all, "learning_rate": lr, + "grad_norm_d": grad_norm_d, "grad_norm_g": grad_norm_g} + scalar_dict.update({"loss/g/fm": loss_fm, "loss/g/mel": loss_mel, "loss/g/kl": loss_kl}) + + scalar_dict.update({"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)}) + scalar_dict.update({"loss/d_r/{}".format(i): v for i, v in enumerate(losses_disc_r)}) + scalar_dict.update({"loss/d_g/{}".format(i): v for i, v in enumerate(losses_disc_g)}) + image_dict = { + "slice/mel_org": utils.plot_spectrogram_to_numpy(y_mel[0].data.cpu().numpy()), + "slice/mel_gen": utils.plot_spectrogram_to_numpy(y_hat_mel[0].data.cpu().numpy()), + "all/mel": utils.plot_spectrogram_to_numpy(mel[0].data.cpu().numpy()), + } + + utils.summarize( + writer=writer, + global_step=global_step, + images=image_dict, + scalars=scalar_dict + ) + + if global_step % hps.train.eval_interval == 0: + evaluate(hps, net_g, eval_loader, writer_eval) + utils.save_checkpoint(net_g, optim_g, hps.train.learning_rate, epoch, + os.path.join(hps.model_dir, "G_{}.pth".format(global_step))) + utils.save_checkpoint(net_d, optim_d, hps.train.learning_rate, epoch, + os.path.join(hps.model_dir, "D_{}.pth".format(global_step))) + global_step += 1 + + if rank == 0: + logger.info('====> Epoch: {},{}'.format(epoch, global_step)) + + +def evaluate(hps, generator, eval_loader, writer_eval): + generator.eval() + image_dict = {} + audio_dict = {} + with torch.no_grad(): + for batch_idx, items in enumerate(eval_loader): + c, f0, spec, y, spk = items + g = spk[:1].cuda(0) + spec, y = spec[:1].cuda(0), y[:1].cuda(0) + c = c[:1].cuda(0) + f0 = f0[:1].cuda(0) + mel = spec_to_mel_torch( + spec, + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.mel_fmin, + hps.data.mel_fmax) + y_hat = generator.module.infer(c, f0, g=g, mel=mel) + + y_hat_mel = mel_spectrogram_torch( + y_hat.squeeze(1).float(), + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.hop_length, + hps.data.win_length, + hps.data.mel_fmin, + hps.data.mel_fmax + ) + + audio_dict.update({ + f"gen/audio_{batch_idx}": y_hat[0], + f"gt/audio_{batch_idx}": y[0] + }) + image_dict.update({ + f"gen/mel": utils.plot_spectrogram_to_numpy(y_hat_mel[0].cpu().numpy()), + "gt/mel": utils.plot_spectrogram_to_numpy(mel[0].cpu().numpy()) + }) + utils.summarize( + writer=writer_eval, + global_step=global_step, + images=image_dict, + audios=audio_dict, + audio_sampling_rate=hps.data.sampling_rate + ) + generator.train() + + +if __name__ == "__main__": + main() diff --git a/AutoCoverTool/ref/so_vits_svc/utils.py b/AutoCoverTool/ref/so_vits_svc/utils.py new file mode 100644 index 0000000..9eb9679 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/utils.py @@ -0,0 +1,360 @@ +import os +import glob +import re +import sys +import argparse +import logging +import json +import subprocess + +import librosa +import numpy as np +import torchaudio +from scipy.io.wavfile import read +import torch +import torchvision +from torch.nn import functional as F +from commons import sequence_mask +from hubert import hubert_model + +MATPLOTLIB_FLAG = False + +logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) +logger = logging + +f0_bin = 256 +f0_max = 1100.0 +f0_min = 50.0 +f0_mel_min = 1127 * np.log(1 + f0_min / 700) +f0_mel_max = 1127 * np.log(1 + f0_max / 700) + + +def f0_to_coarse(f0): + is_torch = isinstance(f0, torch.Tensor) + f0_mel = 1127 * (1 + f0 / 700).log() if is_torch else 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * (f0_bin - 2) / (f0_mel_max - f0_mel_min) + 1 + + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > f0_bin - 1] = f0_bin - 1 + f0_coarse = (f0_mel + 0.5).long() if is_torch else np.rint(f0_mel).astype(np.int) + assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, (f0_coarse.max(), f0_coarse.min()) + return f0_coarse + + +def get_hubert_model(rank=None): + hubert_soft = hubert_model.hubert_soft("data/models/hubert-soft-0d54a1f4.pt") + if rank is not None: + hubert_soft = hubert_soft.cuda(rank) + return hubert_soft + + +def get_hubert_content(hmodel, y=None, path=None): + if path is not None: + source, sr = torchaudio.load(path) + source = torchaudio.functional.resample(source, sr, 16000) + if len(source.shape) == 2 and source.shape[1] >= 2: + source = torch.mean(source, dim=0).unsqueeze(0) + else: + source = y + source = source.unsqueeze(0) + with torch.inference_mode(): + units = hmodel.units(source) + return units.transpose(1, 2) + + +def get_content(cmodel, y): + with torch.no_grad(): + c = cmodel.extract_features(y.squeeze(1))[0] + c = c.transpose(1, 2) + return c + + +def transform(mel, height): # 68-92 + # r = np.random.random() + # rate = r * 0.3 + 0.85 # 0.85-1.15 + # height = int(mel.size(-2) * rate) + tgt = torchvision.transforms.functional.resize(mel, (height, mel.size(-1))) + if height >= mel.size(-2): + return tgt[:, :mel.size(-2), :] + else: + silence = tgt[:, -1:, :].repeat(1, mel.size(-2) - height, 1) + silence += torch.randn_like(silence) / 10 + return torch.cat((tgt, silence), 1) + + +def stretch(mel, width): # 0.5-2 + return torchvision.transforms.functional.resize(mel, (mel.size(-2), width)) + + +def load_checkpoint(checkpoint_path, model, optimizer=None): + assert os.path.isfile(checkpoint_path) + checkpoint_dict = torch.load(checkpoint_path, map_location='cpu') + iteration = checkpoint_dict['iteration'] + learning_rate = checkpoint_dict['learning_rate'] + if iteration is None: + iteration = 1 + if learning_rate is None: + learning_rate = 0.0002 + if optimizer is not None and checkpoint_dict['optimizer'] is not None: + optimizer.load_state_dict(checkpoint_dict['optimizer']) + saved_state_dict = checkpoint_dict['model'] + if hasattr(model, 'module'): + state_dict = model.module.state_dict() + else: + state_dict = model.state_dict() + new_state_dict = {} + for k, v in state_dict.items(): + try: + new_state_dict[k] = saved_state_dict[k] + except: + logger.info("%s is not in the checkpoint" % k) + new_state_dict[k] = v + if hasattr(model, 'module'): + model.module.load_state_dict(new_state_dict) + else: + model.load_state_dict(new_state_dict) + logger.info("Loaded checkpoint '{}' (iteration {})".format( + checkpoint_path, iteration)) + return model, optimizer, learning_rate, iteration + + +def save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path): + logger.info("Saving model and optimizer state at iteration {} to {}".format( + iteration, checkpoint_path)) + if hasattr(model, 'module'): + state_dict = model.module.state_dict() + else: + state_dict = model.state_dict() + torch.save({'model': state_dict, + 'iteration': iteration, + 'optimizer': optimizer.state_dict(), + 'learning_rate': learning_rate}, checkpoint_path) + clean_ckpt = False + if clean_ckpt: + clean_checkpoints(path_to_models='logs/32k/', n_ckpts_to_keep=3, sort_by_time=True) + + +def clean_checkpoints(path_to_models='logs/48k/', n_ckpts_to_keep=2, sort_by_time=True): + """Freeing up space by deleting saved ckpts + + Arguments: + path_to_models -- Path to the model directory + n_ckpts_to_keep -- Number of ckpts to keep, excluding G_0.pth and D_0.pth + sort_by_time -- True -> chronologically delete ckpts + False -> lexicographically delete ckpts + """ + ckpts_files = [f for f in os.listdir(path_to_models) if os.path.isfile(os.path.join(path_to_models, f))] + name_key = (lambda _f: int(re.compile('._(\d+)\.pth').match(_f).group(1))) + time_key = (lambda _f: os.path.getmtime(os.path.join(path_to_models, _f))) + sort_key = time_key if sort_by_time else name_key + x_sorted = lambda _x: sorted([f for f in ckpts_files if f.startswith(_x) and not f.endswith('_0.pth')], + key=sort_key) + to_del = [os.path.join(path_to_models, fn) for fn in + (x_sorted('G')[:-n_ckpts_to_keep] + x_sorted('D')[:-n_ckpts_to_keep])] + del_info = lambda fn: logger.info(f".. Free up space by deleting ckpt {fn}") + del_routine = lambda x: [os.remove(x), del_info(x)] + rs = [del_routine(fn) for fn in to_del] + + +def summarize(writer, global_step, scalars={}, histograms={}, images={}, audios={}, audio_sampling_rate=22050): + for k, v in scalars.items(): + writer.add_scalar(k, v, global_step) + for k, v in histograms.items(): + writer.add_histogram(k, v, global_step) + for k, v in images.items(): + writer.add_image(k, v, global_step, dataformats='HWC') + for k, v in audios.items(): + writer.add_audio(k, v, global_step, audio_sampling_rate) + + +def latest_checkpoint_path(dir_path, regex="G_*.pth"): + f_list = glob.glob(os.path.join(dir_path, regex)) + f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f)))) + x = f_list[-1] + print(x) + return x + + +def plot_spectrogram_to_numpy(spectrogram): + global MATPLOTLIB_FLAG + if not MATPLOTLIB_FLAG: + import matplotlib + matplotlib.use("Agg") + MATPLOTLIB_FLAG = True + mpl_logger = logging.getLogger('matplotlib') + mpl_logger.setLevel(logging.WARNING) + import matplotlib.pylab as plt + import numpy as np + + fig, ax = plt.subplots(figsize=(10, 2)) + im = ax.imshow(spectrogram, aspect="auto", origin="lower", + interpolation='none') + plt.colorbar(im, ax=ax) + plt.xlabel("Frames") + plt.ylabel("Channels") + plt.tight_layout() + + fig.canvas.draw() + data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') + data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) + plt.close() + return data + + +def plot_alignment_to_numpy(alignment, info=None): + global MATPLOTLIB_FLAG + if not MATPLOTLIB_FLAG: + import matplotlib + matplotlib.use("Agg") + MATPLOTLIB_FLAG = True + mpl_logger = logging.getLogger('matplotlib') + mpl_logger.setLevel(logging.WARNING) + import matplotlib.pylab as plt + import numpy as np + + fig, ax = plt.subplots(figsize=(6, 4)) + im = ax.imshow(alignment.transpose(), aspect='auto', origin='lower', + interpolation='none') + fig.colorbar(im, ax=ax) + xlabel = 'Decoder timestep' + if info is not None: + xlabel += '\n\n' + info + plt.xlabel(xlabel) + plt.ylabel('Encoder timestep') + plt.tight_layout() + + fig.canvas.draw() + data = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') + data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,)) + plt.close() + return data + + +def load_wav_to_torch(full_path): + sampling_rate, data = read(full_path) + return torch.FloatTensor(data.astype(np.float32)), sampling_rate + + +def load_filepaths_and_text(filename, split="|"): + with open(filename, encoding='utf-8') as f: + filepaths_and_text = [line.strip().split(split) for line in f] + return filepaths_and_text + + +def get_hparams(init=True): + parser = argparse.ArgumentParser() + parser.add_argument('-c', '--config', type=str, default="./configs/base.json", + help='JSON file for configuration') + parser.add_argument('-m', '--model', type=str, required=True, + help='Model name') + parser.add_argument('-l', '--logs', type=str, required=True, + help='log Name') + + args = parser.parse_args() + model_dir = os.path.join(args.logs, args.model) + + if not os.path.exists(model_dir): + os.makedirs(model_dir) + + config_path = args.config + config_save_path = os.path.join(model_dir, "config.json") + if init: + with open(config_path, "r") as f: + data = f.read() + with open(config_save_path, "w") as f: + f.write(data) + else: + with open(config_save_path, "r") as f: + data = f.read() + config = json.loads(data) + + hparams = HParams(**config) + hparams.model_dir = model_dir + return hparams + + +def get_hparams_from_dir(model_dir): + config_save_path = os.path.join(model_dir, "config.json") + with open(config_save_path, "r") as f: + data = f.read() + config = json.loads(data) + + hparams = HParams(**config) + hparams.model_dir = model_dir + return hparams + + +def get_hparams_from_file(config_path): + with open(config_path, "r") as f: + data = f.read() + config = json.loads(data) + + hparams = HParams(**config) + return hparams + + +def check_git_hash(model_dir): + source_dir = os.path.dirname(os.path.realpath(__file__)) + if not os.path.exists(os.path.join(source_dir, ".git")): + logger.warn("{} is not a git repository, therefore hash value comparison will be ignored.".format( + source_dir + )) + return + + cur_hash = subprocess.getoutput("git rev-parse HEAD") + + path = os.path.join(model_dir, "githash") + if os.path.exists(path): + saved_hash = open(path).read() + if saved_hash != cur_hash: + logger.warn("git hash values are different. {}(saved) != {}(current)".format( + saved_hash[:8], cur_hash[:8])) + else: + open(path, "w").write(cur_hash) + + +def get_logger(model_dir, filename="train.log"): + global logger + logger = logging.getLogger(os.path.basename(model_dir)) + logger.setLevel(logging.DEBUG) + + formatter = logging.Formatter("%(asctime)s\t%(name)s\t%(levelname)s\t%(message)s") + if not os.path.exists(model_dir): + os.makedirs(model_dir) + h = logging.FileHandler(os.path.join(model_dir, filename)) + h.setLevel(logging.DEBUG) + h.setFormatter(formatter) + logger.addHandler(h) + return logger + + +class HParams(): + def __init__(self, **kwargs): + for k, v in kwargs.items(): + if type(v) == dict: + v = HParams(**v) + self[k] = v + + def keys(self): + return self.__dict__.keys() + + def items(self): + return self.__dict__.items() + + def values(self): + return self.__dict__.values() + + def __len__(self): + return len(self.__dict__) + + def __getitem__(self, key): + return getattr(self, key) + + def __setitem__(self, key, value): + return setattr(self, key, value) + + def __contains__(self, key): + return key in self.__dict__ + + def __repr__(self): + return self.__dict__.__repr__() diff --git a/AutoCoverTool/ref/so_vits_svc/vdecoder/__init__.py b/AutoCoverTool/ref/so_vits_svc/vdecoder/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/env.py b/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/env.py new file mode 100644 index 0000000..2bdbc95 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/env.py @@ -0,0 +1,15 @@ +import os +import shutil + + +class AttrDict(dict): + def __init__(self, *args, **kwargs): + super(AttrDict, self).__init__(*args, **kwargs) + self.__dict__ = self + + +def build_env(config, config_name, path): + t_path = os.path.join(path, config_name) + if config != t_path: + os.makedirs(path, exist_ok=True) + shutil.copyfile(config, os.path.join(path, config_name)) diff --git a/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/models.py b/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/models.py new file mode 100644 index 0000000..9747301 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/models.py @@ -0,0 +1,503 @@ +import os +import json +from .env import AttrDict +import numpy as np +import torch +import torch.nn.functional as F +import torch.nn as nn +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm +from .utils import init_weights, get_padding + +LRELU_SLOPE = 0.1 + + +def load_model(model_path, device='cuda'): + config_file = os.path.join(os.path.split(model_path)[0], 'config.json') + with open(config_file) as f: + data = f.read() + + global h + json_config = json.loads(data) + h = AttrDict(json_config) + + generator = Generator(h).to(device) + + cp_dict = torch.load(model_path) + generator.load_state_dict(cp_dict['generator']) + generator.eval() + generator.remove_weight_norm() + del cp_dict + return generator, h + + +class ResBlock1(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock1, self).__init__() + self.h = h + self.convs1 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]))) + ]) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1, + padding=get_padding(kernel_size, 1))) + ]) + self.convs2.apply(init_weights) + + def forward(self, x): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + xt = c2(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + + +class ResBlock2(torch.nn.Module): + def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)): + super(ResBlock2, self).__init__() + self.h = h + self.convs = nn.ModuleList([ + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]))), + weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]))) + ]) + self.convs.apply(init_weights) + + def forward(self, x): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + xt = c(xt) + x = xt + x + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_weight_norm(l) + + +def padDiff(x): + return F.pad(F.pad(x, (0,0,-1,1), 'constant', 0) - x, (0,0,0,-1), 'constant', 0) + +class SineGen(torch.nn.Module): + """ Definition of sine generator + SineGen(samp_rate, harmonic_num = 0, + sine_amp = 0.1, noise_std = 0.003, + voiced_threshold = 0, + flag_for_pulse=False) + samp_rate: sampling rate in Hz + harmonic_num: number of harmonic overtones (default 0) + sine_amp: amplitude of sine-wavefrom (default 0.1) + noise_std: std of Gaussian noise (default 0.003) + voiced_thoreshold: F0 threshold for U/V classification (default 0) + flag_for_pulse: this SinGen is used inside PulseGen (default False) + Note: when flag_for_pulse is True, the first time step of a voiced + segment is always sin(np.pi) or cos(0) + """ + + def __init__(self, samp_rate, harmonic_num=0, + sine_amp=0.1, noise_std=0.003, + voiced_threshold=0, + flag_for_pulse=False): + super(SineGen, self).__init__() + self.sine_amp = sine_amp + self.noise_std = noise_std + self.harmonic_num = harmonic_num + self.dim = self.harmonic_num + 1 + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + self.flag_for_pulse = flag_for_pulse + + def _f02uv(self, f0): + # generate uv signal + uv = (f0 > self.voiced_threshold).type(torch.float32) + return uv + + def _f02sine(self, f0_values): + """ f0_values: (batchsize, length, dim) + where dim indicates fundamental tone and overtones + """ + # convert to F0 in rad. The interger part n can be ignored + # because 2 * np.pi * n doesn't affect phase + rad_values = (f0_values / self.sampling_rate) % 1 + + # initial phase noise (no noise for fundamental component) + rand_ini = torch.rand(f0_values.shape[0], f0_values.shape[2], \ + device=f0_values.device) + rand_ini[:, 0] = 0 + rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini + + # instantanouse phase sine[t] = sin(2*pi \sum_i=1 ^{t} rad) + if not self.flag_for_pulse: + # for normal case + + # To prevent torch.cumsum numerical overflow, + # it is necessary to add -1 whenever \sum_k=1^n rad_value_k > 1. + # Buffer tmp_over_one_idx indicates the time step to add -1. + # This will not change F0 of sine because (x-1) * 2*pi = x * 2*pi + tmp_over_one = torch.cumsum(rad_values, 1) % 1 + tmp_over_one_idx = (padDiff(tmp_over_one)) < 0 + cumsum_shift = torch.zeros_like(rad_values) + cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + + sines = torch.sin(torch.cumsum(rad_values + cumsum_shift, dim=1) + * 2 * np.pi) + else: + # If necessary, make sure that the first time step of every + # voiced segments is sin(pi) or cos(0) + # This is used for pulse-train generation + + # identify the last time step in unvoiced segments + uv = self._f02uv(f0_values) + uv_1 = torch.roll(uv, shifts=-1, dims=1) + uv_1[:, -1, :] = 1 + u_loc = (uv < 1) * (uv_1 > 0) + + # get the instantanouse phase + tmp_cumsum = torch.cumsum(rad_values, dim=1) + # different batch needs to be processed differently + for idx in range(f0_values.shape[0]): + temp_sum = tmp_cumsum[idx, u_loc[idx, :, 0], :] + temp_sum[1:, :] = temp_sum[1:, :] - temp_sum[0:-1, :] + # stores the accumulation of i.phase within + # each voiced segments + tmp_cumsum[idx, :, :] = 0 + tmp_cumsum[idx, u_loc[idx, :, 0], :] = temp_sum + + # rad_values - tmp_cumsum: remove the accumulation of i.phase + # within the previous voiced segment. + i_phase = torch.cumsum(rad_values - tmp_cumsum, dim=1) + + # get the sines + sines = torch.cos(i_phase * 2 * np.pi) + return sines + + def forward(self, f0): + """ sine_tensor, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output sine_tensor: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + """ + with torch.no_grad(): + f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, + device=f0.device) + # fundamental component + fn = torch.multiply(f0, torch.FloatTensor([[range(1, self.harmonic_num + 2)]]).to(f0.device)) + + # generate sine waveforms + sine_waves = self._f02sine(fn) * self.sine_amp + + # generate uv signal + # uv = torch.ones(f0.shape) + # uv = uv * (f0 > self.voiced_threshold) + uv = self._f02uv(f0) + + # noise: for unvoiced should be similar to sine_amp + # std = self.sine_amp/3 -> max value ~ self.sine_amp + # . for voiced regions is self.noise_std + noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 + noise = noise_amp * torch.randn_like(sine_waves) + + # first: set the unvoiced part to 0 by uv + # then: additive noise + sine_waves = sine_waves * uv + noise + return sine_waves, uv, noise + + +class SourceModuleHnNSF(torch.nn.Module): + """ SourceModule for hn-nsf + SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0) + sampling_rate: sampling_rate in Hz + harmonic_num: number of harmonic above F0 (default: 0) + sine_amp: amplitude of sine source signal (default: 0.1) + add_noise_std: std of additive Gaussian noise (default: 0.003) + note that amplitude of noise in unvoiced is decided + by sine_amp + voiced_threshold: threhold to set U/V given F0 (default: 0) + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + uv (batchsize, length, 1) + """ + + def __init__(self, sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0): + super(SourceModuleHnNSF, self).__init__() + + self.sine_amp = sine_amp + self.noise_std = add_noise_std + + # to produce sine waveforms + self.l_sin_gen = SineGen(sampling_rate, harmonic_num, + sine_amp, add_noise_std, voiced_threshod) + + # to merge source harmonics into a single excitation + self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) + self.l_tanh = torch.nn.Tanh() + + def forward(self, x): + """ + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + """ + # source for harmonic branch + sine_wavs, uv, _ = self.l_sin_gen(x) + sine_merge = self.l_tanh(self.l_linear(sine_wavs)) + + # source for noise branch, in the same shape as uv + noise = torch.randn_like(uv) * self.sine_amp / 3 + return sine_merge, noise, uv + + +class Generator(torch.nn.Module): + def __init__(self, h): + super(Generator, self).__init__() + self.h = h + + self.num_kernels = len(h["resblock_kernel_sizes"]) + self.num_upsamples = len(h["upsample_rates"]) + self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(h["upsample_rates"])) + self.m_source = SourceModuleHnNSF( + sampling_rate=h["sampling_rate"], + harmonic_num=8) + self.noise_convs = nn.ModuleList() + self.conv_pre = weight_norm(Conv1d(h["inter_channels"], h["upsample_initial_channel"], 7, 1, padding=3)) + resblock = ResBlock1 if h["resblock"] == '1' else ResBlock2 + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(h["upsample_rates"], h["upsample_kernel_sizes"])): + c_cur = h["upsample_initial_channel"] // (2 ** (i + 1)) + self.ups.append(weight_norm( + ConvTranspose1d(h["upsample_initial_channel"] // (2 ** i), h["upsample_initial_channel"] // (2 ** (i + 1)), + k, u, padding=(k - u) // 2))) + if i + 1 < len(h["upsample_rates"]): # + stride_f0 = np.prod(h["upsample_rates"][i + 1:]) + self.noise_convs.append(Conv1d( + 1, c_cur, kernel_size=stride_f0 * 2, stride=stride_f0, padding=stride_f0 // 2)) + else: + self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = h["upsample_initial_channel"] // (2 ** (i + 1)) + for j, (k, d) in enumerate(zip(h["resblock_kernel_sizes"], h["resblock_dilation_sizes"])): + self.resblocks.append(resblock(h, ch, k, d)) + + self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3)) + self.ups.apply(init_weights) + self.conv_post.apply(init_weights) + self.cond = nn.Conv1d(h['gin_channels'], h['upsample_initial_channel'], 1) + + def forward(self, x, f0, g=None): + # print(1,x.shape,f0.shape,f0[:, None].shape) + f0 = self.f0_upsamp(f0[:, None]).transpose(1, 2) # bs,n,t + # print(2,f0.shape) + har_source, noi_source, uv = self.m_source(f0) + har_source = har_source.transpose(1, 2) + x = self.conv_pre(x) + x = x + self.cond(g) + # print(124,x.shape,har_source.shape) + for i in range(self.num_upsamples): + x = F.leaky_relu(x, LRELU_SLOPE) + # print(3,x.shape) + x = self.ups[i](x) + x_source = self.noise_convs[i](har_source) + # print(4,x_source.shape,har_source.shape,x.shape) + x = x + x_source + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + print('Removing weight norm...') + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + remove_weight_norm(self.conv_pre) + remove_weight_norm(self.conv_post) + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))), + norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))), + ]) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, periods=None): + super(MultiPeriodDiscriminator, self).__init__() + self.periods = periods if periods is not None else [2, 3, 5, 7, 11] + self.discriminators = nn.ModuleList() + for period in self.periods: + self.discriminators.append(DiscriminatorP(period)) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + fmap_rs.append(fmap_r) + y_d_gs.append(y_d_g) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList([ + norm_f(Conv1d(1, 128, 15, 1, padding=7)), + norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)), + norm_f(Conv1d(128, 256, 41, 2, groups=16, padding=20)), + norm_f(Conv1d(256, 512, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(512, 1024, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 1, groups=16, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ]) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class MultiScaleDiscriminator(torch.nn.Module): + def __init__(self): + super(MultiScaleDiscriminator, self).__init__() + self.discriminators = nn.ModuleList([ + DiscriminatorS(use_spectral_norm=True), + DiscriminatorS(), + DiscriminatorS(), + ]) + self.meanpools = nn.ModuleList([ + AvgPool1d(4, 2, padding=2), + AvgPool1d(4, 2, padding=2) + ]) + + def forward(self, y, y_hat): + y_d_rs = [] + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + if i != 0: + y = self.meanpools[i - 1](y) + y_hat = self.meanpools[i - 1](y_hat) + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + y_d_rs.append(y_d_r) + fmap_rs.append(fmap_r) + y_d_gs.append(y_d_g) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +def feature_loss(fmap_r, fmap_g): + loss = 0 + for dr, dg in zip(fmap_r, fmap_g): + for rl, gl in zip(dr, dg): + loss += torch.mean(torch.abs(rl - gl)) + + return loss * 2 + + +def discriminator_loss(disc_real_outputs, disc_generated_outputs): + loss = 0 + r_losses = [] + g_losses = [] + for dr, dg in zip(disc_real_outputs, disc_generated_outputs): + r_loss = torch.mean((1 - dr) ** 2) + g_loss = torch.mean(dg ** 2) + loss += (r_loss + g_loss) + r_losses.append(r_loss.item()) + g_losses.append(g_loss.item()) + + return loss, r_losses, g_losses + + +def generator_loss(disc_outputs): + loss = 0 + gen_losses = [] + for dg in disc_outputs: + l = torch.mean((1 - dg) ** 2) + gen_losses.append(l) + loss += l + + return loss, gen_losses diff --git a/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/nvSTFT.py b/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/nvSTFT.py new file mode 100644 index 0000000..88597d6 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/nvSTFT.py @@ -0,0 +1,111 @@ +import math +import os +os.environ["LRU_CACHE_CAPACITY"] = "3" +import random +import torch +import torch.utils.data +import numpy as np +import librosa +from librosa.util import normalize +from librosa.filters import mel as librosa_mel_fn +from scipy.io.wavfile import read +import soundfile as sf + +def load_wav_to_torch(full_path, target_sr=None, return_empty_on_exception=False): + sampling_rate = None + try: + data, sampling_rate = sf.read(full_path, always_2d=True)# than soundfile. + except Exception as ex: + print(f"'{full_path}' failed to load.\nException:") + print(ex) + if return_empty_on_exception: + return [], sampling_rate or target_sr or 32000 + else: + raise Exception(ex) + + if len(data.shape) > 1: + data = data[:, 0] + assert len(data) > 2# check duration of audio file is > 2 samples (because otherwise the slice operation was on the wrong dimension) + + if np.issubdtype(data.dtype, np.integer): # if audio data is type int + max_mag = -np.iinfo(data.dtype).min # maximum magnitude = min possible value of intXX + else: # if audio data is type fp32 + max_mag = max(np.amax(data), -np.amin(data)) + max_mag = (2**31)+1 if max_mag > (2**15) else ((2**15)+1 if max_mag > 1.01 else 1.0) # data should be either 16-bit INT, 32-bit INT or [-1 to 1] float32 + + data = torch.FloatTensor(data.astype(np.float32))/max_mag + + if (torch.isinf(data) | torch.isnan(data)).any() and return_empty_on_exception:# resample will crash with inf/NaN inputs. return_empty_on_exception will return empty arr instead of except + return [], sampling_rate or target_sr or 32000 + if target_sr is not None and sampling_rate != target_sr: + data = torch.from_numpy(librosa.core.resample(data.numpy(), orig_sr=sampling_rate, target_sr=target_sr)) + sampling_rate = target_sr + + return data, sampling_rate + +def dynamic_range_compression(x, C=1, clip_val=1e-5): + return np.log(np.clip(x, a_min=clip_val, a_max=None) * C) + +def dynamic_range_decompression(x, C=1): + return np.exp(x) / C + +def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): + return torch.log(torch.clamp(x, min=clip_val) * C) + +def dynamic_range_decompression_torch(x, C=1): + return torch.exp(x) / C + +class STFT(): + def __init__(self, sr=22050, n_mels=80, n_fft=1024, win_size=1024, hop_length=256, fmin=20, fmax=11025, clip_val=1e-5): + self.target_sr = sr + + self.n_mels = n_mels + self.n_fft = n_fft + self.win_size = win_size + self.hop_length = hop_length + self.fmin = fmin + self.fmax = fmax + self.clip_val = clip_val + self.mel_basis = {} + self.hann_window = {} + + def get_mel(self, y, center=False): + sampling_rate = self.target_sr + n_mels = self.n_mels + n_fft = self.n_fft + win_size = self.win_size + hop_length = self.hop_length + fmin = self.fmin + fmax = self.fmax + clip_val = self.clip_val + + if torch.min(y) < -1.: + print('min value is ', torch.min(y)) + if torch.max(y) > 1.: + print('max value is ', torch.max(y)) + + if fmax not in self.mel_basis: + mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=n_mels, fmin=fmin, fmax=fmax) + self.mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device) + self.hann_window[str(y.device)] = torch.hann_window(self.win_size).to(y.device) + + y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_length)/2), int((n_fft-hop_length)/2)), mode='reflect') + y = y.squeeze(1) + + spec = torch.stft(y, n_fft, hop_length=hop_length, win_length=win_size, window=self.hann_window[str(y.device)], + center=center, pad_mode='reflect', normalized=False, onesided=True) + # print(111,spec) + spec = torch.sqrt(spec.pow(2).sum(-1)+(1e-9)) + # print(222,spec) + spec = torch.matmul(self.mel_basis[str(fmax)+'_'+str(y.device)], spec) + # print(333,spec) + spec = dynamic_range_compression_torch(spec, clip_val=clip_val) + # print(444,spec) + return spec + + def __call__(self, audiopath): + audio, sr = load_wav_to_torch(audiopath, target_sr=self.target_sr) + spect = self.get_mel(audio.unsqueeze(0)).squeeze(0) + return spect + +stft = STFT() diff --git a/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/utils.py b/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/utils.py new file mode 100644 index 0000000..84bff02 --- /dev/null +++ b/AutoCoverTool/ref/so_vits_svc/vdecoder/hifigan/utils.py @@ -0,0 +1,68 @@ +import glob +import os +import matplotlib +import torch +from torch.nn.utils import weight_norm +matplotlib.use("Agg") +import matplotlib.pylab as plt + + +def plot_spectrogram(spectrogram): + fig, ax = plt.subplots(figsize=(10, 2)) + im = ax.imshow(spectrogram, aspect="auto", origin="lower", + interpolation='none') + plt.colorbar(im, ax=ax) + + fig.canvas.draw() + plt.close() + + return fig + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def apply_weight_norm(m): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + weight_norm(m) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size*dilation - dilation)/2) + + +def load_checkpoint(filepath, device): + assert os.path.isfile(filepath) + print("Loading '{}'".format(filepath)) + checkpoint_dict = torch.load(filepath, map_location=device) + print("Complete.") + return checkpoint_dict + + +def save_checkpoint(filepath, obj): + print("Saving checkpoint to {}".format(filepath)) + torch.save(obj, filepath) + print("Complete.") + + +def del_old_checkpoints(cp_dir, prefix, n_models=2): + pattern = os.path.join(cp_dir, prefix + '????????') + cp_list = glob.glob(pattern) # get checkpoint paths + cp_list = sorted(cp_list)# sort by iter + if len(cp_list) > n_models: # if more than n_models models are found + for cp in cp_list[:-n_models]:# delete the oldest models other than lastest n_models + open(cp, 'w').close()# empty file contents + os.unlink(cp)# delete file (move to trash when using Colab) + + +def scan_checkpoint(cp_dir, prefix): + pattern = os.path.join(cp_dir, prefix + '????????') + cp_list = glob.glob(pattern) + if len(cp_list) == 0: + return None + return sorted(cp_list)[-1] + diff --git a/AutoCoverTool/ref/split_dirty_frame/custom_models/mobilenet_v2_custom.py b/AutoCoverTool/ref/split_dirty_frame/custom_models/mobilenet_v2_custom.py new file mode 100644 index 0000000..57b1227 --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/custom_models/mobilenet_v2_custom.py @@ -0,0 +1,142 @@ +""" +直接从代码库中拷贝出的代码 +目的: mobilenet_v2只允许输入图片的通道数为3,不满足要求,因此拷贝出来做修改 +""" + +from torch import nn + + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + padding = (kernel_size - 1) // 2 + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2Custom(nn.Module): + def __init__(self, num_classes=2, in_channel=1, width_mult=1.0, inverted_residual_setting=None, round_nearest=8): + """ + MobileNet V2 main class + + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + """ + super(MobileNetV2Custom, self).__init__() + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + + if inverted_residual_setting is None: + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + # 修改的地方,原来in_channel=3 + features = [ConvBNReLU(in_channel, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + # building last several layers + features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) + # make it nn.Sequential + self.features = nn.Sequential(*features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(0.2), + nn.Linear(self.last_channel, num_classes), + ) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def forward(self, x): + x = self.features(x) + x = x.mean([2, 3]) + x = self.classifier(x) + return x diff --git a/AutoCoverTool/ref/split_dirty_frame/custom_models/model.py b/AutoCoverTool/ref/split_dirty_frame/custom_models/model.py new file mode 100644 index 0000000..e2f21d4 --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/custom_models/model.py @@ -0,0 +1,80 @@ +from custom_models.mobilenet_v2_custom import MobileNetV2Custom +import torch +import torch.nn as nn +from torchstat import stat + +MFCC_LEN = 80 +FRAME_LEN = 32 + + +class MobileNetV2Dirty(MobileNetV2Custom): + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + return super(MobileNetV2Dirty, self).forward(x) + + +class ModelV1(nn.Module): + def __init__(self): + super(ModelV1, self).__init__() + # 输入shape, 32 * 80 + self.model = nn.Sequential( + nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, stride=2, padding=1), + nn.BatchNorm2d(16), + nn.ReLU(), + + nn.Conv2d(in_channels=16, out_channels=8, kernel_size=3, stride=2, padding=1), + nn.BatchNorm2d(8), + nn.ReLU(), + ) + + self.fc = nn.Sequential( + nn.Linear(1280, 256), + nn.ReLU(), + nn.Linear(256, 16), + nn.ReLU(), + nn.Linear(16, 2) + ) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.model(x) + x = x.view(-1, 1280) + return self.fc(x) + + +class ModelV2(nn.Module): + def __init__(self): + super(ModelV2, self).__init__() + # 输入shape, 11 * 80 + # 在11的维度上进行卷积,将channel认定为80 + self.model = nn.Sequential( + nn.Conv1d(in_channels=80, out_channels=8, kernel_size=3, stride=2, padding=1), + nn.BatchNorm1d(8), + nn.ReLU(), + ) + + self.fc = nn.Sequential( + nn.Linear(48, 16), + nn.ReLU(), + nn.Linear(16, 2) + ) + + def forward(self, x): + x = x.view([-1, FRAME_LEN, MFCC_LEN]) + x = x.permute(0, 2, 1) + x = self.model(x) + x = x.view(-1, 48) + return self.fc(x) + + +def get_cur_model(): + return ModelV1() + + +if __name__ == '__main__': + mv = ModelV1() + data = torch.rand(32, 80) + stat(mv, (1, 32, 80)) + out = mv.forward(data) + print(out.shape) diff --git a/AutoCoverTool/ref/split_dirty_frame/dataset/dataset.py b/AutoCoverTool/ref/split_dirty_frame/dataset/dataset.py new file mode 100644 index 0000000..db9fcfd --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/dataset/dataset.py @@ -0,0 +1,220 @@ +""" +数据集 +---dataset + ---data + xxx.wav + ---train.txt + ---test.txt +""" + +import os +import glob +import librosa +import numpy as np +import torch.utils.data as data + +gs_frame_num = 32 + + +def load_file(filename): + target_msg = [] + other_msg = [] + other_st = 0 + with open(filename, "r") as f: + while True: + line = f.readline() + if not line: + break + line_arr = line.strip().split(",") + filename = line_arr[0] + tp = int(line_arr[1]) + st = float(line_arr[2]) + ed = float(line_arr[3]) + # 格式: 文件名,类型,开始时间,结束时间 + target_msg.append([filename, tp, st, ed]) + if st - other_st > 0.3: + other_msg.append([filename, 0, other_st, st]) + other_st = ed + target_frames = get_feature_idx(target_msg) + other_frames = get_feature_idx(other_msg) + return target_frames, other_frames + + +def load_file_v1(filename, feature_dir, predict=False): + """ + 32帧,对应512ms + 问题帧: + 1. 问题段长度占音频的总占比超过20% + 2. 问题段长度占音频的总占比不到20%,但是整个音频段都被包含 + 非问题帧: + 1. 完全不包含问题帧 + :return: + """ + target_msg = {} + with open(filename, "r") as f: + while True: + line = f.readline() + if not line: + break + line_arr = line.strip().split(",") + filename = line_arr[0] + tp = int(line_arr[1]) + st = float(line_arr[2]) + ed = float(line_arr[3]) + # 格式: 文件名,类型,开始时间,结束时间 + if filename not in target_msg.keys(): + target_msg[filename] = [] + target_msg[filename].append([filename, tp, st, ed]) + # 只用1的数据,2的数据不使用 + target_frame_idx = [] + other_frame_idx = [] + frame_ms = 0.016 + for filename, arr in target_msg.items(): + file_msg = target_msg[filename] + abs_filename = os.path.join(feature_dir, filename + ".npy") + data = np.load(abs_filename) + for i in range(0, len(data) - gs_frame_num): + st_tm = i * frame_ms + ed_tm = st_tm + gs_frame_num * frame_ms + flag = False + for msg in file_msg: + _, tp, st, ed = msg + # 标记中的时间小于当前时间,意味着还需要继续向后获取 + if ed < st_tm: + continue + # 标记中的开始时间大于了当前时间的结尾时间,意味着后面都比它大,不需要再向后了 + if st > ed_tm: + break + # 到这里意味着已经有重合的地方了 + flag = True + # 大于等于2的情况下意味着大家不确定,所以不要 + if int(tp) >= 2: + continue + if ed - st <= 0: + print("{}, params err!\n".format(msg[0])) + exit(-1) + + # 目前一定有重合的地方,且重合的长度为min(ed_tm, ed) - max(st_tm, st) + # 重合程度大于20%,或者当前帧完全在本段中,都认为是问题段 + inter = min(ed_tm, ed) - max(st_tm, st) + rate = 0.2 + if predict: + rate = 0.5 + if (inter / (gs_frame_num * frame_ms)) > rate or (st_tm < st < ed_tm and st_tm < ed < ed_tm): + target_frame_idx.append([filename, tp, int(st_tm / frame_ms), int(ed_tm / frame_ms)]) + # 只有当本段和每个分段都没有重合时,才认为是正常段 + if not flag: + other_frame_idx.append([filename, 0, int(st_tm / frame_ms), int(ed_tm / frame_ms)]) + return target_frame_idx, other_frame_idx + + +def file2mfcc(in_file): + mfcc, sr = librosa.load(in_file, sr=16000, mono=True) + if len(mfcc) < 512: + return [] + # 32ms的长度,16ms一帧 + mfcc = librosa.feature.mfcc(y=mfcc, sr=sr, n_fft=512, hop_length=256, n_mfcc=80) + return mfcc.transpose() + + +def file2stft(in_file): + audio, sr = librosa.load(in_file, sr=16000, mono=True) + if len(audio) < 512: + return [] + mfcc = np.abs(librosa.stft(audio, n_fft=512, hop_length=256)) + return mfcc.transpose() + + +def dir2mfcc(wav_dir): + files = glob.glob(os.path.join(wav_dir, "*wav")) + for file in files: + mfcc_file = file.replace(".wav", "") + mfcc = file2mfcc(file) + # mfcc = file2stft(file) + # 大于1.6s才可用 + if len(mfcc) > 100: + np.save(mfcc_file, mfcc) + + +def get_feature_idx(target_msg): + feature_idx = [] + frame_ms = 256 / 16000 + for idx, msg in enumerate(target_msg): + # 格式包含: filename, tp, st, ed + # 将st和ed都转为帧号 + st_frame = int(msg[2] / frame_ms) + ed_frame = int(msg[3] / frame_ms) + # 2以及以上的都不要 + if int(msg[1]) >= 2: + continue + # 带上两端,从而可以保证当占比超过一半的时候也判定为异常 + for frame_idx in range(st_frame, ed_frame): + if frame_idx - 5 < 0: + continue + feature_idx.append([msg[0], msg[1], frame_idx - 5, frame_idx + 6]) # 包左不包右 + return feature_idx + + +def construct(msg): + frames = [] + label = [] + for line in msg: + frames.append([line[0], line[2], line[3]]) + label.append(line[1]) + return frames, label + + +class CustomDataset(data.Dataset): + def __init__(self, root, label_set='train', predict=False): + self.work_dir = root + self.feature_dir = os.path.join(root, "data") + self.predict = predict + filename = os.path.join(root, '{}.txt'.format(label_set)) + + # target_frames, other_frames = load_file(filename) + target_frames, other_frames = load_file_v1(filename, self.feature_dir) + print("before, len: {}, {}".format(len(target_frames), len(other_frames))) + if len(other_frames) > 3 * len(target_frames) and not self.predict: + np.random.shuffle(other_frames) + other_frames = other_frames[:3 * len(target_frames)] + print("after, len: {}, {}".format(len(target_frames), len(other_frames))) + # 构建结果 + target_frames.extend(other_frames) + np.random.shuffle(target_frames) + self.frames, self.label = construct(target_frames) + + def __len__(self): + return len(self.label) + + def __getitem__(self, idx): + msg = self.frames[idx] + filename = os.path.join(self.feature_dir, str(msg[0]) + ".npy") + mfcc = np.load(filename) + st_frame = msg[1] + ed_frame = msg[2] + if len(mfcc[st_frame:ed_frame]) != 32: + # print("err: idx={},{},{},{},{}".format(idx, filename, st_frame, ed_frame, len(mfcc))) + idx = np.random.randint(0, self.__len__()) + return self.__getitem__(idx) + item = mfcc[st_frame:ed_frame] + label = int(self.label[idx] != 0) + if self.predict: + return item, label, filename, st_frame, ed_frame + return item, label + + +if __name__ == '__main__': + file2mfcc("") + # out = file2stft("/data/rsync/jiang.yang/dataset/dataset_dev/data_wav/4_5629499489839033.wav") + # print(out.shape) + # dir2mfcc("/data/rsync/jiang.yang/dataset/dataset_dev/data_wav") + # load_file_v1("/data/rsync/jianli.yang/AutoCoverTool/ref/split_dirty_frame/tmp/11.txt", + # "/data/rsync/jianli.yang/AutoCoverTool/data/dataset_dev/data") + + # dir2mfcc( + # "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/AutoCoverTool/resource/dataset_dev/4_wav") + # root = "/data/rsync/jianli.yang/AutoCoverTool/data/dataset_dev" + # dat = CustomDataset(root) + # it, lb = dat.__getitem__(0) + # print(it.shape) + # print(lb.shape) diff --git a/AutoCoverTool/ref/split_dirty_frame/readme.txt b/AutoCoverTool/ref/split_dirty_frame/readme.txt new file mode 100644 index 0000000..c4a56c8 --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/readme.txt @@ -0,0 +1,76 @@ +目前数据集: 46首训练, 10首测试 +base_line: +v1的情况: + t_loss:0.350482 t_acc:84.79 v_loss:0.520381 v_acc:76.58 time:145.974121 epoch:2 +实验一下,对于标注为2的不认为异常: + t_loss:0.285704 t_acc:87.83 v_loss:0.662188 v_acc:73.78 time:114.760812 epoch:4 + +实验3: +猜想: 模型太复杂了,在实验2的基础上,降低模型复杂度,2维度卷积加两层fc +t_loss:0.352131 t_acc:84.88 v_loss:0.542022 v_acc:78.05 time:86.826910 epoch:5 +结论: 效果有提升,验证集增加近4.27% + +实验4: +猜想: 模型太复杂了,在实验2的基础上,降低模型复杂度,1维度卷积加两层fc +t_loss:0.352438 t_acc:84.72 v_loss:0.567134 v_acc:75.52 time:86.549661 epoch:19 +结论: 效果相较于3来说降低了一些,可能是复杂度较低导致的 + +实验5:在实验3的基础上,训练集新增9个音频 +t_loss:0.410002 t_acc:80.69 v_loss:0.584062 v_acc:74.71 time:164.807675 epoch:5 + +实验5_1: 在五的基础上,增加模型复杂度 +t_loss:0.396976 t_acc:81.49 v_loss:0.585027 v_acc:74.58 time:164.734624 epoch:2 +结论: 没有价值 + +实验5_2: 在5的基础上,换数据格式,修改模型结构 +t_loss:0.357168 t_acc:85.44 v_loss:0.723428 v_acc:64.19 epoch:16 +结论: 很差 +--------------------------------------------------->>>>>> +思路: 先分析错误数据,修复两个数据上的Bug +实验5_2: 修复bug之后 +t_loss:0.335836 t_acc:85.05 v_loss:0.438867 v_acc:83.24 time:176.928082 epoch:1 +结论: 效果明显提升,继续分析数据 +修复数据集的明显问题: +1. 数据集中每一行的时间差<=0 +本期修复数据: +19_10414574138721494.wav +47_1688849864840588.wav +54_3634463651.wav +8_10414574140317353.wav +实验5_3: 初步修复数据集问题 +t_loss:0.245894 t_acc:89.91 v_loss:0.390855 v_acc:85.28 time:155.996837 epoch:3 + +实验5_3_1: 微调模型 +1. 切换为1DCNN的模型 +2. 在2DCNN上增加模型复杂度 + +实验5_3_2: 修复数据88_之后 +88_10414574138721494 +t_loss:0.297692 t_acc:87.22 v_loss:0.400453 v_acc:85.44 time:148.522071 epoch:1 +结论: 效果不明显,继续修复 +21_10414574140317353 +t_loss:0.315018 t_acc:86.42 v_loss:0.364222 v_acc:85.79 time:146.168289 epoch:1 +3_6755399374234747 +15_8162774327817435 +t_loss:0.209222 t_acc:91.69 v_loss:0.376367 v_acc:85.87 time:143.195726 epoch:3 +结论: 轻微效果,符合认知 +在验证集合上,对于整首歌级别,每个段都参与时: +acc:0.3458628198149156, recall:0.8955528930861936 + +实验5_3_3: 采用50%认定异常段的方式 +t_loss:0.291052 t_acc:87.80 v_loss:0.360148 v_acc:86.08 time:116.858534 epoch:9 +结论: 轻微提升,效果不明显 +实验5_3_4: 在2的基础上,将正常样本的数量提升到问题样本的3倍 +t_loss:0.257707 t_acc:89.17 v_loss:0.309852 v_acc:87.43 time:282.694043 epoch:5 +在验证集合上,对于整首歌级别,每个段都参与时: +acc:0.5000699398517275, recall:0.7557074408117249 + + + + + +实验5: +猜想: 目前的总长度太短,重新划分情况 + 1. 如果本段被500ms的时间片全部包含,则认为该段存在问题 + 2. 500ms的时间片中如果包含异常数据的占比超过1/5,也认为异常 + 3. 只有完全不包含异常数据的才认为正常 \ No newline at end of file diff --git a/AutoCoverTool/ref/split_dirty_frame/script/ana_err_log.py b/AutoCoverTool/ref/split_dirty_frame/script/ana_err_log.py new file mode 100644 index 0000000..33efef1 --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/script/ana_err_log.py @@ -0,0 +1,85 @@ +""" +查看一下验证集合错误的情况 +""" +import os +import torch +import numpy as np +from tqdm import tqdm +from torch.utils.data import DataLoader + +from models.model import get_cur_model +from dataset.dataset import load_file_v1, construct, CustomDataset + + +def get_frames_and_labels(root): + feature_dir = os.path.join(root, "data") + filename = os.path.join(root, 'train.txt') + target_frames, other_frames = load_file_v1(filename, feature_dir) + print("before, len: {}, {}".format(len(target_frames), len(other_frames))) + if len(other_frames) > len(target_frames): + np.random.shuffle(other_frames) + other_frames = other_frames[:len(target_frames)] + print("after, len: {}, {}".format(len(target_frames), len(other_frames))) + # 构建结果 + target_frames.extend(other_frames) + np.random.shuffle(target_frames) + return construct(target_frames) + + +def get_one_frame(filename, st_frame, ed_frame, label): + mfcc = np.load(filename) + if len(mfcc[st_frame:ed_frame]) != 32: + return None, None + item = torch.tensor([mfcc[st_frame:ed_frame]]).to('cuda') + label = int(label != 0) + return item, label + + +def val(): + device = "cuda" + model = get_cur_model() + # model_path = "output_v5_2/epoch_1_0.8324131096481914.pth" + # model_path = "output_v5_2/epoch_10_0.808451900615842.pth" + # model_path = "output_v5_3/epoch_3_0.8527927799886299.pth" + model_path = "output_v5_3_4/epoch_5_0.874253837407618.pth" + params = torch.load(model_path, map_location=torch.device(device)) + model.load_state_dict(state_dict=params) + model.eval() + model.to('cuda') + out_dict = { + 0: { + 0: 0, # key是标注,val是predict + 1: 0 + }, + 1: { + 0: 0, + 1: 0 + } + } + predict = CustomDataset("/data/rsync/jianli.yang/AutoCoverTool/data/dataset_dev", "val", True) + predict_loader = DataLoader(predict, batch_size=32, shuffle=True, num_workers=8) + for images, labels, filename, st_frame, ed_frame in predict_loader: + # batch_size = images.size(0) + images = images.to(device) + labels = labels.to(device) + predicts = model(images) + + _, predicts = predicts.max(dim=1) + for lb, pe, f, s, e in zip(labels.cpu().numpy(), predicts.cpu().numpy(), filename, st_frame, ed_frame): + out_dict[int(lb)][int(pe)] += 1 + # filename, label, st_frame, ed_frame, true/false 相等是true,不等是false + print( + "{},{},{},{},{}".format(f, int(lb), round(int(s) * 0.016, 3), round(int(e) * 0.016, 3), int(lb == pe))) + + print("---------------->>") + tt = out_dict[1][1] + tf = out_dict[1][0] + ft = out_dict[0][1] + ff = out_dict[0][0] + print("{},{}".format(tt, tf)) + print("{},{}".format(ft, ff)) + print("acc:{}, recall:{}".format(tt / (tt + ft), tt / (tt + tf))) + + +if __name__ == '__main__': + val() diff --git a/AutoCoverTool/ref/split_dirty_frame/script/get_durations.py b/AutoCoverTool/ref/split_dirty_frame/script/get_durations.py new file mode 100644 index 0000000..d570fcb --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/script/get_durations.py @@ -0,0 +1,36 @@ +import os +import glob +import time +import json +import librosa + + +def exec_cmd(cmd): + r = os.popen(cmd) + text = r.read() + r.close() + return text + + +def get_d(audio_path): + cmd = "/usr/local/bin/ffprobe -v quiet -print_format json -show_format -show_streams {}".format(audio_path) + data = exec_cmd(cmd) + data = json.loads(data) + return float(data["format"]["duration"]) + + +def get_duration(): + dirs = glob.glob("/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/me_top500/jianli/*") + for dir in dirs: + st = time.time() + wavs = glob.glob(os.path.join(dir, "*wav")) + a_duration = get_d(wavs[0]) + acc_duration = get_d(os.path.join(dir, "acc.mp3")) + vocal_duration = get_d(os.path.join(dir, "vocal.mp3")) + if a_duration > acc_duration + 5 or a_duration > vocal_duration + 5: + print("ERROR: {}".format(dir)) + print("dir={},sp={}".format(dir, time.time() - st)) + + +if __name__ == '__main__': + get_duration() diff --git a/AutoCoverTool/ref/split_dirty_frame/script/label_format.py b/AutoCoverTool/ref/split_dirty_frame/script/label_format.py new file mode 100644 index 0000000..34ccac8 --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/script/label_format.py @@ -0,0 +1,168 @@ +""" +格式转换 +""" +import os + + +def tm2sec(tm): + """ + 分:秒.xxx 转为 tm + :param tm: + :return: + """ + tm_arr = str(tm).split(":") + return int(int(tm_arr[0]) * 60) + float(tm_arr[1]) + + +def sec2tm(sec): + sec = float(sec) + m = sec // 60 + s = sec - m * 60 + return "{}:{}".format(int(m), round(s, 3)) + + +def custom_label2au_format(in_file): + """ + Name Start Duration Time Format Type Description + 1 0:56.429 0:04.824 decimal Cue + :param in_file: + :return: + """ + + lines = [] + header = False + with open(in_file, "r") as f: + while True: + line = f.readline() + if not line: + break + if header: + header = False + continue + line = line.strip().split(",") + tp = line[1] + st_tm = tm2sec(line[2]) + ed_tm = st_tm + tm2sec(line[3]) + lines.append([tp, sec2tm(st_tm), sec2tm(ed_tm - st_tm)]) + + with open(in_file + "_out.csv", "w") as f: + f.write("Name\tStart\tDuration\tTime Format\tType\n") + for line in lines: + strr = "{}\t{}\t{}\t{}\n".format(line[0], line[1], line[2], "decimal\tCue\t") + f.write(strr) + # f.write("{}\t{}".format("\t".join(line), "decimal\tCue\t\t\n")) + # print("{}\t{}".format("\t".join(line), "decimal Cue\n")) + + +def label2txt(in_file): + lines = [] + header = True + filename = os.path.basename(in_file).replace(".csv", "") + with open(in_file, "r") as f: + while True: + line = f.readline() + if not line: + break + if header: + header = False + continue + line = line.split("\t") + tp = line[0] + st_tm = tm2sec(line[1]) + ed_tm = st_tm + tm2sec(line[2]) + lines.append([filename, tp, str(round(st_tm, 3)), str(round(ed_tm, 3))]) + with open(in_file + "_out.csv", "w") as f: + for line in lines: + print(line) + f.write("{}\n".format(",".join(line))) + + +def label12txt(in_file): + lines = [] + with open(in_file, "r") as f: + while True: + line = f.readline() + if not line: + break + line = line.split(",") + if len(line) < 4: + continue + tp = line[1] + st_tm = tm2sec(line[2]) + ed_tm = tm2sec(line[3]) + lines.append([line[0], tp, str(round(st_tm, 3)), str(round(ed_tm, 3))]) + with open(in_file + "_out.csv", "w") as f: + for line in lines: + print(line) + f.write("{}\n".format(",".join(line))) + + +if __name__ == '__main__': + # custom_label2au_format( + # "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/AutoCoverTool/resource/dataset_dev/4/8/8_10414574140317353.txt") + label2txt( + "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/AutoCoverTool/resource/dataset_dev/data/out1/15_8162774327817435.csv") + # label12txt( + # "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/AutoCoverTool/resource/dataset_dev/4/all.csv") + + # arr = [ + # "10_5629499489839033.csv", + # "11_10414574140317353.csv", + # "13_1688849864840588.csv", + # "14_8162774327817435.csv", + # "15_8162774327817435.csv", + # "16_8162774327817435.csv", + # "21_10414574140317353.csv", + # "24_6755399374234747.csv", + # "25_8162774327817435.csv", + # "26_6755399374234747.csv", + # "28_8162774327817435.csv", + # "29_3634463651.csv", + # "2_8162774329368194.csv", + # "30_5910973794723621.csv", + # "31_10414574140317353.csv", + # "32_10414574140317353.csv", + # "33_3634463651.csv", + # "35_3634463651.csv", + # "36_5910973794723621.csv", + # "38_8162774329368194.csv", + # "40_6755399374234747.csv", + # "41_5629499489839033.csv", + # "42_10414574138721494.csv", + # "44_3634463651.csv", + # "48_5629499489839033.csv", + # "49_5910973794723621.csv", + # "4_5629499489839033.csv", + # "50_3634463651.csv", + # "51_1688849864840588.csv", + # "52_6755399374234747.csv", + # "53_10414574140317353.csv", + # "55_10414574138721494.csv", + # "56_1688849864840588.csv", + # "57_5629499489839033.csv", + # "59_3634463651.csv", + # "5_10414574138721494.csv", + # "60_5910973794723621.csv", + # "7_10414574140317353.csv", + # "82_8162774329368194.csv", + # "83_10414574138721494.csv", + # "84_8162774329368194.csv", + # "85_1688849864840588.csv", + # "86_8162774329368194.csv", + # "87_6755399374234747.csv", + # "89_10414574138721494.csv", + # "90_8162774327817435.csv", + # "91_8162774327817435.csv", + # "92_10414574138721494.csv", + # "93_1688849864840588.csv", + # "94_3634463651.csv", + # "96_5629499489839033.csv", + # "97_5910973794723621.csv", + # "99_5910973794723621.csv", + # "9_10414574138721494.csv" + # ] + # # ii = "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/AutoCoverTool/resource/dataset_dev/11.txt" + # base = "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/AutoCoverTool/resource/dataset_dev/data" + # for file in arr: + # out_file = os.path.join(base, file) + # label2txt(out_file) diff --git a/AutoCoverTool/ref/split_dirty_frame/script/preprocess.py b/AutoCoverTool/ref/split_dirty_frame/script/preprocess.py new file mode 100644 index 0000000..c6fb08c --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/script/preprocess.py @@ -0,0 +1,418 @@ +""" +1. 降噪 +2. 拉伸 +""" +import os +import time + +gs_denoise_exe = "/opt/soft/bin/denoise_exe" +gs_draw_volume_exe = "/opt/soft/bin/draw_volume" + + +def process(): + # input_wavs = [ + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105027601574/611752105027601574_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105029951597/611752105029951597_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105029951624/611752105029951624_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105029951624/611752105029951624_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105029951624/611752105029951624_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248982/611752105030248982_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248982/611752105030248982_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248982/611752105030248982_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248988/611752105030248988_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248988/611752105030248988_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248988/611752105030248988_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248992/611752105030248992_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248992/611752105030248992_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248992/611752105030248992_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248994/611752105030248994_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248994/611752105030248994_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248994/611752105030248994_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248995/611752105030248995_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248995/611752105030248995_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248995/611752105030248995_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249000/611752105030249000_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249000/611752105030249000_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249000/611752105030249000_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249001/611752105030249001_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249010/611752105030249010_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249010/611752105030249010_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249011/611752105030249011_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249011/611752105030249011_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249011/611752105030249011_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249016/611752105030249016_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249016/611752105030249016_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249019/611752105030249019_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249022/611752105030249022_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249022/611752105030249022_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249022/611752105030249022_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249030/611752105030249030_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249031/611752105030249031_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249031/611752105030249031_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249031/611752105030249031_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249032/611752105030249032_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249032/611752105030249032_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249034/611752105030249034_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249034/611752105030249034_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249034/611752105030249034_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249035/611752105030249035_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249035/611752105030249035_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249036/611752105030249036_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249036/611752105030249036_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249036/611752105030249036_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249037/611752105030249037_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249037/611752105030249037_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249037/611752105030249037_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249038/611752105030249038_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249040/611752105030249040_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249040/611752105030249040_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249041/611752105030249041_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249041/611752105030249041_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249048/611752105030249048_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249049/611752105030249049_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249049/611752105030249049_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249052/611752105030249052_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249052/611752105030249052_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249053/611752105030249053_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249053/611752105030249053_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249053/611752105030249053_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249055/611752105030249055_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249055/611752105030249055_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249055/611752105030249055_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249067/611752105030249067_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249067/611752105030249067_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249067/611752105030249067_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249070/611752105030249070_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249071/611752105030249071_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249073/611752105030249073_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249073/611752105030249073_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249074/611752105030249074_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249074/611752105030249074_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249074/611752105030249074_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249091/611752105030249091_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249091/611752105030249091_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249091/611752105030249091_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249104/611752105030249104_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249104/611752105030249104_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249105/611752105030249105_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249112/611752105030249112_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249112/611752105030249112_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249112/611752105030249112_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249113/611752105030249113_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249113/611752105030249113_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249118/611752105030249118_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249118/611752105030249118_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249127/611752105030249127_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249127/611752105030249127_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249128/611752105030249128_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249134/611752105030249134_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249134/611752105030249134_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249134/611752105030249134_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249136/611752105030249136_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249136/611752105030249136_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249136/611752105030249136_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249160/611752105030249160_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249173/611752105030249173_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249173/611752105030249173_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249173/611752105030249173_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249174/611752105030249174_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249174/611752105030249174_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249174/611752105030249174_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249175/611752105030249175_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249175/611752105030249175_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249176/611752105030249176_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249176/611752105030249176_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249176/611752105030249176_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249177/611752105030249177_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249177/611752105030249177_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249177/611752105030249177_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249195/611752105030249195_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249195/611752105030249195_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249206/611752105030249206_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249206/611752105030249206_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249206/611752105030249206_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249213/611752105030249213_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249213/611752105030249213_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249216/611752105030249216_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249216/611752105030249216_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249216/611752105030249216_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249227/611752105030249227_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249227/611752105030249227_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249233/611752105030249233_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249233/611752105030249233_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249233/611752105030249233_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249240/611752105030249240_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249240/611752105030249240_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249243/611752105030249243_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249243/611752105030249243_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249250/611752105030249250_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249250/611752105030249250_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249250/611752105030249250_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249258/611752105030249258_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249264/611752105030249264_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249267/611752105030249267_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249273/611752105030249273_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249273/611752105030249273_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249273/611752105030249273_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249275/611752105030249275_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249275/611752105030249275_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249278/611752105030249278_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249278/611752105030249278_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249278/611752105030249278_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249280/611752105030249280_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249280/611752105030249280_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249280/611752105030249280_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249281/611752105030249281_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249281/611752105030249281_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249281/611752105030249281_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249282/611752105030249282_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249282/611752105030249282_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249283/611752105030249283_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249284/611752105030249284_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249284/611752105030249284_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249287/611752105030249287_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249287/611752105030249287_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249289/611752105030249289_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249289/611752105030249289_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249289/611752105030249289_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249292/611752105030249292_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249292/611752105030249292_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249292/611752105030249292_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249293/611752105030249293_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249293/611752105030249293_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249293/611752105030249293_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249295/611752105030249295_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249295/611752105030249295_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249295/611752105030249295_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249296/611752105030249296_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249302/611752105030249302_5910973794723621.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249302/611752105030249302_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249302/611752105030249302_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250690/611752105030250690_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250690/611752105030250690_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250690/611752105030250690_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250691/611752105030250691_10414574140317353.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250691/611752105030250691_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250691/611752105030250691_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250695/611752105030250695_3634463651.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250695/611752105030250695_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250695/611752105030250695_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250699/611752105030250699_5629499489839033.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250699/611752105030250699_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250699/611752105030250699_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250702/611752105030250702_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250702/611752105030250702_6755399374234747.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250704/611752105030250704_1688849864840588.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250704/611752105030250704_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250704/611752105030250704_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250711/611752105030250711_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250711/611752105030250711_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250715/611752105030250715_10414574138721494.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250715/611752105030250715_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250715/611752105030250715_8162774329368194.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250717/611752105030250717_8162774327817435.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250717/611752105030250717_8162774329368194.wav" + # ] + + input_wavs = [ + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105028480653/611752105028480653_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105028480653/611752105028480653_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105028480653/611752105028480653_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248965/611752105030248965_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248972/611752105030248972_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248973/611752105030248973_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248973/611752105030248973_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030248974/611752105030248974_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249319/611752105030249319_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249322/611752105030249322_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249322/611752105030249322_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249324/611752105030249324_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249324/611752105030249324_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249324/611752105030249324_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249330/611752105030249330_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249333/611752105030249333_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249333/611752105030249333_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249334/611752105030249334_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249336/611752105030249336_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249337/611752105030249337_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249338/611752105030249338_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249338/611752105030249338_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249338/611752105030249338_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249340/611752105030249340_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249340/611752105030249340_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249342/611752105030249342_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249342/611752105030249342_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249342/611752105030249342_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249347/611752105030249347_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249347/611752105030249347_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249347/611752105030249347_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249348/611752105030249348_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249352/611752105030249352_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249353/611752105030249353_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249353/611752105030249353_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249353/611752105030249353_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249354/611752105030249354_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249354/611752105030249354_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249355/611752105030249355_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249355/611752105030249355_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249355/611752105030249355_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249356/611752105030249356_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249359/611752105030249359_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249361/611752105030249361_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249361/611752105030249361_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249364/611752105030249364_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249364/611752105030249364_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249364/611752105030249364_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249365/611752105030249365_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249365/611752105030249365_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249365/611752105030249365_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249366/611752105030249366_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249366/611752105030249366_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249366/611752105030249366_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249368/611752105030249368_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249368/611752105030249368_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249370/611752105030249370_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249370/611752105030249370_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249371/611752105030249371_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249371/611752105030249371_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249371/611752105030249371_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249372/611752105030249372_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249372/611752105030249372_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249374/611752105030249374_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249374/611752105030249374_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249375/611752105030249375_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249375/611752105030249375_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249377/611752105030249377_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249377/611752105030249377_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249383/611752105030249383_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249383/611752105030249383_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249383/611752105030249383_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249384/611752105030249384_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249385/611752105030249385_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249385/611752105030249385_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249386/611752105030249386_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249387/611752105030249387_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249387/611752105030249387_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249388/611752105030249388_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249388/611752105030249388_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249388/611752105030249388_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249391/611752105030249391_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249391/611752105030249391_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249393/611752105030249393_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249393/611752105030249393_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249394/611752105030249394_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249394/611752105030249394_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249395/611752105030249395_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249395/611752105030249395_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249397/611752105030249397_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249397/611752105030249397_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249398/611752105030249398_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249398/611752105030249398_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249401/611752105030249401_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249401/611752105030249401_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249401/611752105030249401_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249402/611752105030249402_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249402/611752105030249402_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249403/611752105030249403_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249403/611752105030249403_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249405/611752105030249405_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249406/611752105030249406_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249406/611752105030249406_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249409/611752105030249409_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249409/611752105030249409_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249410/611752105030249410_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249414/611752105030249414_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249414/611752105030249414_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249414/611752105030249414_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249417/611752105030249417_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249417/611752105030249417_6755399374234747.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249417/611752105030249417_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249431/611752105030249431_10414574140317353.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249431/611752105030249431_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030249431/611752105030249431_3634463651.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250728/611752105030250728_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250728/611752105030250728_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250730/611752105030250730_5910973794723621.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250730/611752105030250730_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250733/611752105030250733_8162774327817435.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250733/611752105030250733_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250735/611752105030250735_10414574138721494.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250735/611752105030250735_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250735/611752105030250735_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250739/611752105030250739_1688849864840588.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250741/611752105030250741_8162774329368194.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250743/611752105030250743_5629499489839033.wav", + "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500/611752105030250743/611752105030250743_5910973794723621.wav" + ] + dst_base_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/user_out_data/me_top500/step2" + for i in range(len(input_wavs)): + # input_wavs[i] = os.path.join("/data/rsync/jianli.yang/AutoCoverTool", input_wavs[i]) + in_f = input_wavs[i] + in_v_f = input_wavs[i] + v_f = "/".join(input_wavs[i].replace("out_data", "inf_users").split("/")[:-1]) + "/vocal.wav" + + st = time.time() + # 降噪 + denoise_path = in_f.replace(".wav", "_d.wav") + cmd = "{} {} {}".format(gs_denoise_exe, in_f, denoise_path) + os.system(cmd) + if not os.path.exists(denoise_path): + print("{} sp={} err 1".format(in_f, time.time() - st)) + continue + # 拉伸 + out_path = in_f.replace(".wav", "_dv.wav") + cmd = "{} {} {} {}".format(gs_draw_volume_exe, denoise_path, v_f, out_path) + os.system(cmd) + if not os.path.exists(out_path): + print("{} sp={} err 2".format(in_f, time.time() - st)) + continue + + # 重采样到44k单声道 + out441_path = in_f.replace(".wav", "_dv441.wav") + cmd = "ffmpeg -i {} -ar 44100 -ac 1 {}".format(out_path, out441_path) + os.system(cmd) + if not os.path.exists(out441_path): + print("{} sp={} err 7".format(in_f, time.time() - st)) + os.unlink(out_path) + continue + + # 拷贝数据到目标位置 + song_id = in_f.split("/")[-2] + dst_dir = os.path.join(dst_base_dir, song_id) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + # 拷贝人声和伴奏 + dst_vocal_path = os.path.join(dst_dir, "vocal.mp3") + dst_acc_path = os.path.join(dst_dir, "acc.mp3") + dst_mp3_path = os.path.join(dst_dir, "src.mp3") + if not os.path.exists(dst_vocal_path): + cmd = "ffmpeg -i {} -ab 320k {}".format(v_f, dst_vocal_path) + os.system(cmd) + if not os.path.exists(dst_vocal_path): + print("{} sp={} err 3".format(in_f, time.time() - st)) + continue + + if not os.path.exists(dst_acc_path): + cmd = "ffmpeg -i {} -ab 320k {}".format(v_f.replace("vocal.wav", "acc.wav"), dst_acc_path) + os.system(cmd) + if not os.path.exists(dst_acc_path): + print("{} sp={} err 4".format(in_f, time.time() - st)) + continue + if not os.path.exists(dst_mp3_path): + cmd = "cp {} {}".format(v_f.replace("vocal.wav", "src.mp3"), dst_mp3_path) + os.system(cmd) + if not os.path.exists(dst_mp3_path): + print("{} sp={} err 5".format(in_f, time.time() - st)) + continue + name = out_path.split("/")[-1] + dst_path = os.path.join(dst_dir, name) + cmd = "cp {} {}".format(out_path, dst_path) + os.system(cmd) + if not os.path.exists(dst_path): + print("{} sp={} err 6".format(in_f, time.time() - st)) + continue + print("{} sp={} finish".format(in_f, time.time() - st)) + + +if __name__ == '__main__': + process() diff --git a/AutoCoverTool/ref/split_dirty_frame/script/process_one.py b/AutoCoverTool/ref/split_dirty_frame/script/process_one.py new file mode 100644 index 0000000..5754df4 --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/script/process_one.py @@ -0,0 +1,1842 @@ +""" +处理单个音频文件的操作 +1. 查找到异常的位置 +2. 统计异常的位置 并确定出要替换的位置 +3. 使用原唱进行替换 +""" +import os +import time +import glob +import torch +import madmom +import librosa +import soundfile +import numpy as np + +from custom_models.model import get_cur_model +from dataset.dataset import file2mfcc, gs_frame_num + +gs_err_code_success = 0 +gs_err_code_no_file = 1 +gs_err_code_file_too_short = 2 + + +def construct_power_fragment(points): + fragments = [] + st_frame = -1 + tot_rate = [] + for idx, cur_tp_rate in enumerate(points): + """ + 1. 当前tp==1: + 前面也是1,意味着继续 + 前面不是1,意味着从当前开始 + 2. 当前tp==0: + 前面是0,继续 + 前面不是0,意味着可以截断 + """ + tp = int(cur_tp_rate < 0.01) + if int(tp) == 1: + tot_rate.append(cur_tp_rate) + + # 从此处开始 + if st_frame == -1 and int(tp) == 1: + st_frame = idx + continue + # 到此处截断 + if st_frame != -1 and int(tp) == 0: + fragments.append([st_frame, idx - st_frame, sum(tot_rate) / len(tot_rate)]) + st_frame = -1 + tot_rate = [] + + # 做一次合并,如果某个分段前后的时间长度小于等于100ms,并且能量均值小于15,则合并段 + idx = 1 + while idx < len(fragments): + last_fragment = fragments[idx - 1] + cur_fragment = fragments[idx] + cur_duration = cur_fragment[0] - (last_fragment[0] + last_fragment[1]) + if 10 > cur_duration > 0 and \ + np.mean(points[last_fragment[0] + last_fragment[1]:last_fragment[0] + last_fragment[ + 1] + cur_duration]) < 0.015: + fragments[idx - 1][1] = cur_fragment[0] + cur_fragment[1] - fragments[idx - 1][0] + del fragments[idx] + idx -= 1 + idx += 1 + return fragments + + +# 能量切分段 +def split_vocal2fragment(in_file): + """ + 先归一化,按照能量选取出适合分割的点 + :param in_file: + :return: + """ + audio, sr = librosa.load(in_file, sr=16000, mono=True) + audio = librosa.util.normalize(audio) + # 帧长100ms,帧移10ms,计算能量 + power_arr = [] + for i in range(0, len(audio) - 1600, 160): + power_arr.append(np.sum(np.abs(audio[i:i + 160])) / 160) + + # 将能量小于等于10的部分做成段 + power_arr = construct_power_fragment(power_arr) + fragments = [] + out_file = in_file + "_power.csv" + with open(out_file, "w") as f: + f.write("Name\tStart\tDuration\tTime Format\tType\n") + for idx, line in enumerate(power_arr): + start = round(float(line[0]) * 0.01, 3) + duration = round(float(line[1]) * 0.01, 3) + fragments.append([start, duration]) + strr = "{}\t{}\t{}\t{}\n".format(str(round(line[2] * 1000, 2)), start, duration, "decimal\tCue\t") + f.write(strr) + return fragments + + +def get_onsets(in_file): + """ + 获取onset点 + :param in_file: + :return: + """ + proc = madmom.features.OnsetPeakPickingProcessor(fps=100) + act = madmom.features.RNNOnsetProcessor()(in_file) + times = proc(act) + return times + + +def get_downbeats(in_file): + """ + 获取节奏点 + :param in_file: + :return: + """ + proc = madmom.features.DBNDownBeatTrackingProcessor(beats_per_bar=[3, 4], fps=100) + act = madmom.features.RNNDownBeatProcessor()(in_file) + res = proc(act) + # times = np.array(res[:, 0]) + # 只要=1的那个时间点 + downbeats = [] + for i in range(0, len(res)): + if res[i][1] == 1: + downbeats.append(res[i][0]) + downbeats = np.array(downbeats) + bpm = 60 / (np.mean(np.diff(downbeats)) / 4) + return downbeats, bpm + + +def split_vocal2fragment_v1(in_file): + onsets_times_s = get_onsets(in_file) + print("len={}".format(len(onsets_times_s))) + with open(in_file + "_onsets.csv", "w") as f: + f.write("Name\tStart\tDuration\tTime Format\tType\n") + for idx, line in enumerate(onsets_times_s): + start = round(float(onsets_times_s[idx]), 3) + duration = 0.01 + strr = "{}\t{}\t{}\t{}\n".format("bb", start, duration, "decimal\tCue\t") + f.write(strr) + + +class ReplaceVocalFrame: + def __init__(self, model_path): + st = time.time() + model = get_cur_model() + device = 'cuda' if torch.cuda.is_available() else 'cpu' + params = torch.load(model_path, map_location=torch.device(device)) + model.load_state_dict(state_dict=params) + model.eval() + model.to('cuda') + self.model = model + self.device = device + print("load model sp={}".format(time.time() - st)) + + def get_batch_frames(self, mfcc): + # 切片成32帧一组 + predict_mfcc = [] + for i in range(0, len(mfcc) - gs_frame_num): + predict_mfcc.append(mfcc[i:i + gs_frame_num]) + # 批量推理 + batch_num = 32 + real_predict_mfcc = [] + i = 0 + while i < len(predict_mfcc): + real_predict_mfcc.append(predict_mfcc[i:i + batch_num]) + i += batch_num + return real_predict_mfcc + + def construct_fragment(self, vocal_result, slience_fragment): + """ + 构造出连续的段 + :param vocal_result: + :return: + """ + # 静音段不会是异常段 + vocal_result = np.array(vocal_result) + for fragment in slience_fragment: + st_s = int(fragment[0] / 0.016) + ed_s = int((fragment[0] + fragment[1]) / 0.016) + vocal_result[st_s:ed_s] = 0 + + fragments = [] + st_frame = -1 + tot_rate = [] + for idx, cur_tp_rate in enumerate(vocal_result): + """ + 1. 当前tp==1: + 前面也是1,意味着继续 + 前面不是1,意味着从当前开始 + 2. 当前tp==0: + 前面是0,继续 + 前面不是0,意味着可以截断 + """ + tp = int(cur_tp_rate > 0.5) + if int(tp) == 1: + tot_rate.append(cur_tp_rate) + + # 从此处开始 + if st_frame == -1 and int(tp) == 1: + st_frame = idx + continue + # 到此处截断 + if st_frame != -1 and int(tp) == 0: + fragments.append( + [float(st_frame) * 0.016, float(idx - st_frame) * 0.016, sum(tot_rate) / len(tot_rate)]) + st_frame = -1 + tot_rate = [] + + return fragments + + def fragment_filter(self, fragments): + """ + 1. 过滤掉不符合常理的段 + 2. 合并相近的段 + 如果当前段小于64ms,并且,前后相近的64ms内均没有段,则不用该段 + :return: + """ + idx = 0 + while idx < len(fragments): + fragment = fragments[idx] + st_idx = fragment[0] + duration = fragment[1] + ed_idx = fragment[0] + fragment[1] + if duration < 0.064: + # 前面有,则向前面探查 + # 后面有,则向后面探查 + # 两者都有,选择距离最近的 + before_idx = 10000 + after_idx = 10000 + if idx > 0: + before_idx = st_idx - (fragments[idx - 1][0] + fragments[idx - 1][1]) + if idx + 1 < len(fragments): + after_idx = fragments[idx + 1][0] - ed_idx + + # 如果前面比后面小,并且前面相距小于64ms + if before_idx < after_idx and before_idx < 0.064: + fragments[idx - 1][1] = ed_idx - fragments[idx - 1][0] + + # 归属到后面 + if before_idx > after_idx and after_idx < 0.064: + fragments[idx + 1][0] = st_idx + fragments[idx + 1][1] += after_idx + duration + del fragments[idx] + idx -= 1 + idx += 1 + return fragments + + def fragment_filter_by_power(self, fragments, in_file): + """ + 使用能量卡一下分段,从而保证不会带有平滑段 + :param fragments: + :return: + """ + audio, sr = librosa.load(in_file, sr=16000, mono=True) + hop_len = 160 + win_len = 160 * 5 + threshold = 0.015 + i = 0 + while i < len(fragments): + st_s = fragments[i][0] + ed_s = fragments[i][0] + fragments[i][1] + st_pos = int(st_s * sr + 0.5) + ed_pos = int(ed_s * sr + 0.5) + # 对于每个分段从两段向中间卡,10ms帧移,50ms帧长,均值小于15就卡掉 + # 如果本段不够50ms,则评估能量,均值小于15就直接干掉 + # 如果整体能量太低,也会被卡掉 + + if ed_pos - st_pos < win_len: + if np.mean(np.abs(audio[st_pos:ed_pos])) < threshold: + print("Remove:{},{}".format(st_s, ed_s, )) + del fragments[i] + i -= 1 + else: + # 从左边向右边卡 + left_pos = st_pos + for idx in range(st_pos, ed_pos - win_len, hop_len): + cur_win_len = win_len + if idx + win_len > ed_pos: + cur_win_len = ed_pos - idx + if np.mean(np.abs(audio[idx:idx + cur_win_len])) < threshold: + left_pos = idx + continue + break + + right_pos = ed_pos + for idx in range(ed_pos - win_len, st_pos - 1, -hop_len): + cur_win_len = win_len + if idx + win_len > ed_pos: + cur_win_len = ed_pos - idx + if np.mean(np.abs(audio[idx:idx + cur_win_len])) < threshold: + right_pos = idx + continue + break + # 左右的探寻方式不同,如果两者交叉,说明本段能量较小,得干掉 + if right_pos < left_pos: + print("RemoveM:{},{}".format(st_s, ed_s)) + del fragments[i] + i -= 1 + else: + fragments[i][0] = left_pos / sr + fragments[i][1] = (right_pos - left_pos) / sr + i += 1 + + # 检查一遍,是否有重合项 + for i in range(1, len(fragments)): + if fragments[i][1] < 0: + print("ERROR, {} < 0!".format(fragments[i][1])) + exit(-1) + if fragments[i][0] < (fragments[i - 1][0] + fragments[i - 1][1]): + print("ERROR!, {} < {}!".format(fragments[i][0], fragments[i - 1][0] + fragments[i - 1][1])) + exit(-1) + return fragments + + def write_fragments2file(self, fragments, out_file): + with open(out_file + "_out.csv", "w") as f: + f.write("Name\tStart\tDuration\tTime Format\tType\n") + for line in fragments: + start = str(round(float(line[0]), 3)) + duration = str(round(float(line[1]), 3)) + strr = "{}\t{}\t{}\t{}\n".format(str(round(line[2], 2)), start, duration, "decimal\tCue\t") + f.write(strr) + + def get_vocal_frames(self, in_file, slience_fragment): + if not os.path.exists(in_file): + return gs_err_code_no_file, [] + st = time.time() + mfcc = file2mfcc(in_file) + print("{} file2mfcc sp={}".format(in_file, time.time() - st)) + if len(mfcc) < gs_frame_num: + return gs_err_code_file_too_short, [] + + st = time.time() + real_predict_mfcc = self.get_batch_frames(mfcc) + print("{} get_batch_frames sp={}".format(in_file, time.time() - st)) + + # 推理 + st = time.time() + vocal_result = [] + with torch.no_grad(): + for batch in real_predict_mfcc: + result = self.model(torch.tensor(batch).to(self.device)) + result = result.softmax(dim=1)[:, 1] # 只看1的概率 + + # _, result = result.max(dim=1) # 0代表正常,1代表异常 + vocal_result.extend(list(result.cpu().numpy())) + print("{} model sp={}".format(in_file, time.time() - st)) + # 构造出连续的段 + st = time.time() + fragments = self.construct_fragment(vocal_result, slience_fragment) + fragments = self.fragment_filter_by_power(fragments, in_file) + fragments = self.fragment_filter(fragments) + self.write_fragments2file(fragments, in_file + "_label.csv") + print("{} write_fragments2file sp={}".format(in_file, time.time() - st)) + return gs_err_code_success, fragments + + def replace_logic_rate(self, in_file, frames, fragments): + replace_fragment = [] + for idx, frame in enumerate(frames): + start_s = frame[0] + duration_s = frame[1] + ed_s = start_s + duration_s + rate = frame[2] + if rate < 0.85: + continue + # 找到能将该时间片容纳住的分段 + # 分段的意思是,从该时间段开始,向两边扩,扩展到有静音分段位置 + left_start_s = -1 + left_start_idx = -1 + for ii, fragment in enumerate(fragments): + f_start_s = fragment[0] + if f_start_s > start_s: + left_start_s = fragments[ii - 1][0] + left_start_idx = ii - 1 + break + + right_ed_s = -1 + right_ed_idx = -1 + for ii in range(len(fragments) - 1, -1, -1): + f_ed_s = fragments[ii][0] + fragments[ii][1] + if f_ed_s < ed_s and ii + 1 < len(fragments): + right_ed_s = fragments[ii + 1][0] + fragments[ii + 1][1] + right_ed_idx = ii + 1 + break + max_sec = 3 + if left_start_s != -1 and right_ed_s != -1: + # 修补逻辑, 从当前分段向前奏各自探寻3s,寻找到最大的分割段 + # 向左探寻 + left_max_sed_idx = left_start_idx + for ii in range(left_start_idx, -1, -1): + if fragments[ii][1] > fragments[left_max_sed_idx][1]: + left_max_sed_idx = ii + if fragments[left_start_idx][0] - fragments[ii][0] >= max_sec: + break + # 向右探寻 + right_max_sed_idx = right_ed_idx + for ii in range(right_ed_idx, len(fragments), 1): + if fragments[ii][1] > fragments[right_max_sed_idx][1]: + right_max_sed_idx = ii + if fragments[ii][0] - fragments[right_ed_idx][0] >= max_sec: + break + left_start_s = fragments[left_max_sed_idx][0] + right_ed_s = fragments[right_max_sed_idx][0] + fragments[right_max_sed_idx][1] + # 存放开始时间、持续时间,左端静音段长度,右边静音段长度 + replace_fragment.append([left_start_s, right_ed_s - left_start_s, rate, + fragments[left_max_sed_idx][1], + fragments[right_max_sed_idx][1]]) + + # 分段合并 + # 会出现相邻的两个分段之间重合,此时直接合并,因为都是用的分段信息,所以不会出现两个分段之间相距比较近的情况 + idx = 1 + while idx < len(replace_fragment): + last_fragment = replace_fragment[idx - 1] + if last_fragment[0] + last_fragment[1] > replace_fragment[idx][0]: + replace_fragment[idx - 1][1] = replace_fragment[idx][0] + replace_fragment[idx][1] - \ + replace_fragment[idx - 1][0] + del replace_fragment[idx] + idx -= 1 + idx += 1 + + # 分段太短,则剔除掉| 小于1s + audio, sr = librosa.load(in_file, sr=44100, mono=True) + replace_tot_sec = 0 + for idx, fragment in enumerate(replace_fragment): + if fragment[1] - (fragment[3] + fragment[4]) < 1: + print("TooShort: {},{}".format(fragment[0], fragment[1])) + del replace_fragment[idx] + continue + replace_tot_sec += fragment[1] - (fragment[3] + fragment[4]) + print("{}, final: len= {}, tot_sec={}, replace={}, rate={}". + format(in_file, len(replace_fragment), len(audio) / sr, replace_tot_sec, + replace_tot_sec / (len(audio) / sr))) + rate = replace_tot_sec / (len(audio) / sr) + return replace_fragment, rate + + def replace_logic(self, in_file, vocal_file, frames, fragments): + """ + 异常段进行替换 + :param in_file: + :param vocal_file: + :param frames: + :param fragments: + :return: + """ + audio, sr = librosa.load(in_file, sr=44100, mono=True) + replace_fragment, rate = self.replace_logic_rate(in_file, frames, fragments) + # 写入文件 + with open(in_file + "_replace.csv", "w") as f: + f.write("Name\tStart\tDuration\tTime Format\tType\n") + for idx, line in enumerate(replace_fragment): + start = str(round(line[0], 3)) + duration = str(round(line[1], 3)) + strr = "{}\t{}\t{}\t{}\n".format(str(round(line[2], 3)), start, duration, "decimal\tCue\t") + f.write(strr) + for idx, line in enumerate(frames): + start = str(round(line[0], 3)) + duration = str(round(line[1], 3)) + if line[2] < 0.85: + continue + strr = "ll_{}\t{}\t{}\t{}\n".format(str(round(line[2], 3)), start, duration, "decimal\tCue\t") + f.write(strr) + + # 从vocal中替换出合适的句子出来 + vocal, sr = librosa.load(vocal_file, sr=44100, mono=True) + fade_len = int(sr * 0.05) + for idx, fragment in enumerate(replace_fragment): + st_pos = int(fragment[0] * sr + 0.5) + ed_pos = st_pos + int(fragment[1] * sr + 0.5) + # 替换的方式: fade_in, fade_out + for kk in range(0, fade_len): + audio[st_pos + kk] = vocal[st_pos + kk] * (kk / fade_len) + audio[st_pos + kk] * (1 - kk / fade_len) + audio[ed_pos - kk] = vocal[ed_pos - kk] * (kk / fade_len) + audio[ed_pos - kk] * (1 - kk / fade_len) + + audio[st_pos + fade_len:ed_pos - fade_len] = vocal[st_pos + fade_len:ed_pos - fade_len] + soundfile.write(in_file + "_replace.wav", audio, 44100, format="wav") + + def process(self, in_file, in_volume_file, vocal_file): + + # 获取分割段,单位是秒 + # [[start, duration], ...., [start, duration]] + fragments = split_vocal2fragment(in_file) + + # [[start, duration, rate]] + # 单位是秒 + err, frames = self.get_vocal_frames(in_file, fragments) + if err != gs_err_code_success: + return err + + # 替换逻辑 + self.replace_logic(in_volume_file, vocal_file, frames, fragments) + print("replace ....") + + def get_rate(self, in_file): + # 获取分割段,单位是秒 + # [[start, duration], ...., [start, duration]] + fragments = split_vocal2fragment(in_file) + # [[start, duration, rate]] + # 单位是秒 + err, frames = self.get_vocal_frames(in_file, fragments) + if err != gs_err_code_success: + return err + replace_fragments, rate = self.replace_logic_rate(in_file, frames, fragments) + return rate + + +def process_v0(): + """ + 直接采用替换的方式,不加混响和音量调整 + :return: + """ + input_wavs = [ + "611752105030249010", + "611752105030249011", + "611752105030249016", + "611752105030249019", + "611752105030249022", + "611752105030249030", + "611752105030249031", + "611752105030249032", + "611752105030249034", + "611752105030249035" + ] + m_path = "output_v5_3/epoch_3_0.8527927799886299.pth" + simple_mixer_path = "/opt/soft/bin/simple_mixer" + input_wavs_base = "/data/rsync/jianli.yang/AutoCoverTool/data/user_out_data/me_top500/out_test/v0" + # for input_wav in input_wavs: + # input_wav = os.path.join(input_wavs_base, input_wav) + # print("cp -r {} ./ \n".format(input_wav)) + # exit(-1) + + re = ReplaceVocalFrame(m_path) + for i in range(0, len(input_wavs)): + wavs = glob.glob(os.path.join(os.path.join(input_wavs_base, input_wavs[i]), "*dv.wav")) + for wav in wavs: + i_v = wav + e_v = wav + v_v = os.path.join( + os.path.join(os.path.join("/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500", + input_wavs[i])), "vocal.wav") + st = time.time() + re.process(i_v, e_v, v_v) + + dst_path = e_v + "_replace.wav" + if not os.path.exists(dst_path): + print("ERROR:={}, replace sp={}".format(i_v, time.time() - st)) + continue + + # 先转码 + dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") + if not os.path.exists(dst_path_442): + cmd = "ffmpeg -i {} -ar 44100 -ac 2 {}".format(dst_path, dst_path_442) + os.system(cmd) + if not os.path.exists(dst_path_442): + print("ERROR:={}, resample sp={}".format(i_v, time.time() - st)) + continue + mix_path = dst_path_442.replace("_replace442.wav", "_replace442_mix.wav") + cmd = "{} {} {} {}".format(simple_mixer_path, dst_path_442, v_v.replace("vocal.wav", "acc.wav"), mix_path) + print("{}".format(cmd)) + os.system(cmd) + if not os.path.exists(mix_path): + print("ERROR:={}, mix sp={}".format(i_v, time.time() - st)) + continue + print("sp={}".format(time.time() - st)) + + +def real_process(): + input_wavs_base = "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/me_top500" + input_wavs = [ + "611752105030249022/611752105030249022_3634463651.wav", + "611752105030249038/611752105030249038_6755399374234747.wav", + "611752105030249067/611752105030249067_5910973794723621.wav", + "611752105030249112/611752105030249112_5910973794723621.wav", + "611752105030249173/611752105030249173_3634463651.wav", + "611752105030249216/611752105030249216_1688849864840588.wav", + "611752105030249267/611752105030249267_10414574138721494.wav", + "611752105030249284/611752105030249284_1688849864840588.wav", + "611752105030250690/611752105030250690_5629499489839033.wav" + ] + + effect_wavs_base = "/data/rsync/jianli.yang/AutoCoverTool/data/user_effect_out_data/step1/cyl" + effect_wavs = [ + "611752105030249022_3634463651_human.wav", + "611752105030249038_6755399374234747_human.wav", + "611752105030249067_5910973794723621_human.wav", + "611752105030249112_5910973794723621_human.wav", + "611752105030249173_3634463651_human.wav", + "611752105030249216_1688849864840588_human.wav", + "611752105030249267_10414574138721494_human.wav", + "611752105030249284_1688849864840588_human.wav", + "611752105030250690_5629499489839033_human.wav" + ] + + vocal_base_path = "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/me_top500" + vocal_wavs = [ + "611752105030249022/vocal.wav", + "611752105030249038/vocal.wav", + "611752105030249067/vocal.wav", + "611752105030249112/vocal.wav", + "611752105030249173/vocal.wav", + "611752105030249216/vocal.wav", + "611752105030249267/vocal.wav", + "611752105030249284/vocal.wav", + "611752105030250690/vocal.wav" + ] + + m_path = "output_v5_3/epoch_3_0.8527927799886299.pth" + simple_mixer_path = "/opt/soft/bin/simple_mixer" + re = ReplaceVocalFrame(m_path) + for i_v, e_v, v_v in zip(input_wavs, effect_wavs, vocal_wavs): + i_v = os.path.join(input_wavs_base, i_v) + e_v = os.path.join(effect_wavs_base, e_v) + v_v = os.path.join(vocal_base_path, v_v) + st = time.time() + re.process(i_v, e_v, v_v) + + dst_path = e_v + "_replace.wav" + if not os.path.exists(dst_path): + print("ERROR:={}, replace sp={}".format(i_v, time.time() - st)) + continue + + # 先转码 + dst_path_442 = dst_path.replace("_replace.wav", "_replace442.wav") + if not os.path.exists(dst_path_442): + cmd = "ffmpeg -i {} -ar 44100 -ac 2 {}".format(dst_path, dst_path_442) + os.system(cmd) + if not os.path.exists(dst_path_442): + print("ERROR:={}, resample sp={}".format(i_v, time.time() - st)) + continue + mix_path = dst_path_442.replace("_replace442.wav", "_replace442_mix.wav") + cmd = "{} {} {} {}".format(simple_mixer_path, dst_path_442, v_v.replace("vocal.wav", "acc.wav"), mix_path) + os.system(cmd) + if not os.path.exists(mix_path): + print("ERROR:={}, mix sp={}".format(i_v, time.time() - st)) + continue + print("sp={}".format(time.time() - st)) + + +if __name__ == '__main__': + real_process() + # process_v0() + # test() + # split_vocal2fragment_v1("/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/9/9_10414574138721494.wav") + # exit(-1) + # m_path = "output_v5_3/epoch_3_0.8527927799886299.pth" + # re = ReplaceVocalFrame(m_path) + # + # old_input_wavs = [ + # "data/out_data/me_top500/611752105027601574/611752105027601574_10414574138721494.wav", + # "data/out_data/me_top500/611752105027601574/611752105027601574_10414574140317353.wav", + # "data/out_data/me_top500/611752105027601574/611752105027601574_1688849864840588.wav", + # "data/out_data/me_top500/611752105028392007/611752105028392007_10414574138721494.wav", + # "data/out_data/me_top500/611752105028392007/611752105028392007_10414574140317353.wav", + # "data/out_data/me_top500/611752105028392007/611752105028392007_5910973794723621.wav", + # "data/out_data/me_top500/611752105028480056/611752105028480056_3634463651.wav", + # "data/out_data/me_top500/611752105028480056/611752105028480056_6755399374234747.wav", + # "data/out_data/me_top500/611752105028480056/611752105028480056_8162774327817435.wav", + # "data/out_data/me_top500/611752105029951597/611752105029951597_1688849864840588.wav", + # "data/out_data/me_top500/611752105029951597/611752105029951597_5629499489839033.wav", + # "data/out_data/me_top500/611752105029951597/611752105029951597_8162774329368194.wav", + # "data/out_data/me_top500/611752105029951624/611752105029951624_10414574138721494.wav", + # "data/out_data/me_top500/611752105029951624/611752105029951624_6755399374234747.wav", + # "data/out_data/me_top500/611752105029951624/611752105029951624_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248977/611752105030248977_10414574138721494.wav", + # "data/out_data/me_top500/611752105030248977/611752105030248977_3634463651.wav", + # "data/out_data/me_top500/611752105030248977/611752105030248977_6755399374234747.wav", + # "data/out_data/me_top500/611752105030248981/611752105030248981_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248981/611752105030248981_5910973794723621.wav", + # "data/out_data/me_top500/611752105030248981/611752105030248981_8162774329368194.wav", + # "data/out_data/me_top500/611752105030248982/611752105030248982_10414574138721494.wav", + # "data/out_data/me_top500/611752105030248982/611752105030248982_10414574140317353.wav", + # "data/out_data/me_top500/611752105030248982/611752105030248982_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248988/611752105030248988_10414574138721494.wav", + # "data/out_data/me_top500/611752105030248988/611752105030248988_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248988/611752105030248988_8162774329368194.wav", + # "data/out_data/me_top500/611752105030248992/611752105030248992_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248992/611752105030248992_6755399374234747.wav", + # "data/out_data/me_top500/611752105030248992/611752105030248992_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248994/611752105030248994_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248994/611752105030248994_3634463651.wav", + # "data/out_data/me_top500/611752105030248994/611752105030248994_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248995/611752105030248995_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248995/611752105030248995_6755399374234747.wav", + # "data/out_data/me_top500/611752105030248995/611752105030248995_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249000/611752105030249000_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249000/611752105030249000_3634463651.wav", + # "data/out_data/me_top500/611752105030249000/611752105030249000_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249001/611752105030249001_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249001/611752105030249001_3634463651.wav", + # "data/out_data/me_top500/611752105030249001/611752105030249001_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249007/611752105030249007_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249007/611752105030249007_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249007/611752105030249007_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249009/611752105030249009_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249009/611752105030249009_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249009/611752105030249009_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249010/611752105030249010_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249010/611752105030249010_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249010/611752105030249010_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249011/611752105030249011_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249011/611752105030249011_3634463651.wav", + # "data/out_data/me_top500/611752105030249011/611752105030249011_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249016/611752105030249016_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249016/611752105030249016_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249016/611752105030249016_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249018/611752105030249018_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249018/611752105030249018_3634463651.wav", + # "data/out_data/me_top500/611752105030249018/611752105030249018_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249019/611752105030249019_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249019/611752105030249019_3634463651.wav", + # "data/out_data/me_top500/611752105030249019/611752105030249019_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249020/611752105030249020_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249020/611752105030249020_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249020/611752105030249020_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249022/611752105030249022_3634463651.wav", + # "data/out_data/me_top500/611752105030249022/611752105030249022_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249022/611752105030249022_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249025/611752105030249025_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249025/611752105030249025_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249025/611752105030249025_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249030/611752105030249030_3634463651.wav", + # "data/out_data/me_top500/611752105030249030/611752105030249030_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249030/611752105030249030_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249031/611752105030249031_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249031/611752105030249031_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249031/611752105030249031_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249032/611752105030249032_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249032/611752105030249032_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249032/611752105030249032_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249034/611752105030249034_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249034/611752105030249034_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249034/611752105030249034_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249035/611752105030249035_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249035/611752105030249035_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249035/611752105030249035_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249036/611752105030249036_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249036/611752105030249036_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249036/611752105030249036_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249037/611752105030249037_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249037/611752105030249037_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249037/611752105030249037_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249038/611752105030249038_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249038/611752105030249038_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249038/611752105030249038_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249040/611752105030249040_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249040/611752105030249040_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249040/611752105030249040_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249041/611752105030249041_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249041/611752105030249041_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249041/611752105030249041_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249048/611752105030249048_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249048/611752105030249048_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249048/611752105030249048_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249049/611752105030249049_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249049/611752105030249049_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249049/611752105030249049_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249052/611752105030249052_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249052/611752105030249052_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249052/611752105030249052_3634463651.wav", + # "data/out_data/me_top500/611752105030249053/611752105030249053_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249053/611752105030249053_3634463651.wav", + # "data/out_data/me_top500/611752105030249053/611752105030249053_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249055/611752105030249055_3634463651.wav", + # "data/out_data/me_top500/611752105030249055/611752105030249055_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249055/611752105030249055_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249057/611752105030249057_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249057/611752105030249057_3634463651.wav", + # "data/out_data/me_top500/611752105030249057/611752105030249057_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249058/611752105030249058_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249058/611752105030249058_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249058/611752105030249058_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249065/611752105030249065_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249065/611752105030249065_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249065/611752105030249065_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249067/611752105030249067_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249067/611752105030249067_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249067/611752105030249067_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249070/611752105030249070_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249070/611752105030249070_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249070/611752105030249070_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249071/611752105030249071_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249071/611752105030249071_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249071/611752105030249071_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249073/611752105030249073_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249073/611752105030249073_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249073/611752105030249073_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249074/611752105030249074_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249074/611752105030249074_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249074/611752105030249074_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249077/611752105030249077_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249077/611752105030249077_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249077/611752105030249077_3634463651.wav", + # "data/out_data/me_top500/611752105030249079/611752105030249079_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249079/611752105030249079_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249079/611752105030249079_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249082/611752105030249082_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249082/611752105030249082_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249082/611752105030249082_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249091/611752105030249091_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249091/611752105030249091_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249091/611752105030249091_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249094/611752105030249094_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249094/611752105030249094_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249094/611752105030249094_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249099/611752105030249099_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249099/611752105030249099_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249099/611752105030249099_3634463651.wav", + # "data/out_data/me_top500/611752105030249104/611752105030249104_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249104/611752105030249104_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249104/611752105030249104_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249105/611752105030249105_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249105/611752105030249105_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249105/611752105030249105_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249111/611752105030249111_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249111/611752105030249111_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249111/611752105030249111_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249112/611752105030249112_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249112/611752105030249112_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249112/611752105030249112_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249113/611752105030249113_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249113/611752105030249113_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249113/611752105030249113_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249118/611752105030249118_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249118/611752105030249118_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249118/611752105030249118_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249121/611752105030249121_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249121/611752105030249121_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249121/611752105030249121_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249127/611752105030249127_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249127/611752105030249127_3634463651.wav", + # "data/out_data/me_top500/611752105030249127/611752105030249127_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249128/611752105030249128_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249128/611752105030249128_3634463651.wav", + # "data/out_data/me_top500/611752105030249128/611752105030249128_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249133/611752105030249133_3634463651.wav", + # "data/out_data/me_top500/611752105030249133/611752105030249133_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249133/611752105030249133_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249134/611752105030249134_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249134/611752105030249134_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249134/611752105030249134_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249136/611752105030249136_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249136/611752105030249136_3634463651.wav", + # "data/out_data/me_top500/611752105030249136/611752105030249136_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249143/611752105030249143_3634463651.wav", + # "data/out_data/me_top500/611752105030249143/611752105030249143_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249143/611752105030249143_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249148/611752105030249148_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249148/611752105030249148_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249148/611752105030249148_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249153/611752105030249153_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249153/611752105030249153_3634463651.wav", + # "data/out_data/me_top500/611752105030249153/611752105030249153_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249157/611752105030249157_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249157/611752105030249157_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249157/611752105030249157_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249160/611752105030249160_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249160/611752105030249160_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249160/611752105030249160_3634463651.wav", + # "data/out_data/me_top500/611752105030249161/611752105030249161_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249161/611752105030249161_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249161/611752105030249161_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249162/611752105030249162_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249162/611752105030249162_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249162/611752105030249162_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249163/611752105030249163_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249163/611752105030249163_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249163/611752105030249163_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249171/611752105030249171_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249171/611752105030249171_3634463651.wav", + # "data/out_data/me_top500/611752105030249171/611752105030249171_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249173/611752105030249173_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249173/611752105030249173_3634463651.wav", + # "data/out_data/me_top500/611752105030249173/611752105030249173_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249174/611752105030249174_3634463651.wav", + # "data/out_data/me_top500/611752105030249174/611752105030249174_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249174/611752105030249174_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249175/611752105030249175_3634463651.wav", + # "data/out_data/me_top500/611752105030249175/611752105030249175_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249175/611752105030249175_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249176/611752105030249176_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249176/611752105030249176_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249176/611752105030249176_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249177/611752105030249177_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249177/611752105030249177_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249177/611752105030249177_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249178/611752105030249178_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249178/611752105030249178_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249178/611752105030249178_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249181/611752105030249181_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249181/611752105030249181_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249181/611752105030249181_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249191/611752105030249191_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249191/611752105030249191_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249191/611752105030249191_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249195/611752105030249195_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249195/611752105030249195_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249195/611752105030249195_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249200/611752105030249200_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249200/611752105030249200_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249200/611752105030249200_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249201/611752105030249201_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249201/611752105030249201_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249201/611752105030249201_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249206/611752105030249206_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249206/611752105030249206_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249206/611752105030249206_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249209/611752105030249209_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249209/611752105030249209_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249209/611752105030249209_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249211/611752105030249211_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249211/611752105030249211_3634463651.wav", + # "data/out_data/me_top500/611752105030249211/611752105030249211_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249213/611752105030249213_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249213/611752105030249213_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249213/611752105030249213_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249216/611752105030249216_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249216/611752105030249216_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249216/611752105030249216_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249218/611752105030249218_3634463651.wav", + # "data/out_data/me_top500/611752105030249218/611752105030249218_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249218/611752105030249218_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249224/611752105030249224_3634463651.wav", + # "data/out_data/me_top500/611752105030249224/611752105030249224_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249224/611752105030249224_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249227/611752105030249227_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249227/611752105030249227_3634463651.wav", + # "data/out_data/me_top500/611752105030249227/611752105030249227_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249233/611752105030249233_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249233/611752105030249233_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249233/611752105030249233_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249237/611752105030249237_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249237/611752105030249237_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249237/611752105030249237_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249240/611752105030249240_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249240/611752105030249240_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249240/611752105030249240_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249243/611752105030249243_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249243/611752105030249243_3634463651.wav", + # "data/out_data/me_top500/611752105030249243/611752105030249243_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249244/611752105030249244_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249244/611752105030249244_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249244/611752105030249244_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249245/611752105030249245_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249245/611752105030249245_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249245/611752105030249245_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249250/611752105030249250_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249250/611752105030249250_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249250/611752105030249250_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249255/611752105030249255_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249255/611752105030249255_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249255/611752105030249255_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249258/611752105030249258_3634463651.wav", + # "data/out_data/me_top500/611752105030249258/611752105030249258_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249258/611752105030249258_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249264/611752105030249264_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249264/611752105030249264_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249264/611752105030249264_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249267/611752105030249267_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249267/611752105030249267_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249267/611752105030249267_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249273/611752105030249273_3634463651.wav", + # "data/out_data/me_top500/611752105030249273/611752105030249273_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249273/611752105030249273_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249275/611752105030249275_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249275/611752105030249275_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249275/611752105030249275_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249278/611752105030249278_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249278/611752105030249278_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249278/611752105030249278_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249280/611752105030249280_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249280/611752105030249280_3634463651.wav", + # "data/out_data/me_top500/611752105030249280/611752105030249280_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249281/611752105030249281_3634463651.wav", + # "data/out_data/me_top500/611752105030249281/611752105030249281_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249281/611752105030249281_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249282/611752105030249282_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249282/611752105030249282_3634463651.wav", + # "data/out_data/me_top500/611752105030249282/611752105030249282_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249283/611752105030249283_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249283/611752105030249283_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249283/611752105030249283_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249284/611752105030249284_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249284/611752105030249284_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249284/611752105030249284_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249287/611752105030249287_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249287/611752105030249287_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249287/611752105030249287_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249288/611752105030249288_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249288/611752105030249288_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249288/611752105030249288_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249289/611752105030249289_3634463651.wav", + # "data/out_data/me_top500/611752105030249289/611752105030249289_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249289/611752105030249289_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249292/611752105030249292_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249292/611752105030249292_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249292/611752105030249292_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249293/611752105030249293_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249293/611752105030249293_3634463651.wav", + # "data/out_data/me_top500/611752105030249293/611752105030249293_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249295/611752105030249295_3634463651.wav", + # "data/out_data/me_top500/611752105030249295/611752105030249295_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249295/611752105030249295_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249296/611752105030249296_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249296/611752105030249296_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249296/611752105030249296_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249299/611752105030249299_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249299/611752105030249299_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249299/611752105030249299_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249302/611752105030249302_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249302/611752105030249302_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249302/611752105030249302_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249307/611752105030249307_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249307/611752105030249307_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249307/611752105030249307_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249309/611752105030249309_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249309/611752105030249309_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249309/611752105030249309_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250690/611752105030250690_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250690/611752105030250690_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250690/611752105030250690_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250691/611752105030250691_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250691/611752105030250691_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250691/611752105030250691_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250695/611752105030250695_3634463651.wav", + # "data/out_data/me_top500/611752105030250695/611752105030250695_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250695/611752105030250695_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250698/611752105030250698_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250698/611752105030250698_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250698/611752105030250698_5910973794723621.wav", + # "data/out_data/me_top500/611752105030250699/611752105030250699_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250699/611752105030250699_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250699/611752105030250699_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250701/611752105030250701_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250701/611752105030250701_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250701/611752105030250701_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250702/611752105030250702_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250702/611752105030250702_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250702/611752105030250702_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250704/611752105030250704_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250704/611752105030250704_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250704/611752105030250704_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250711/611752105030250711_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250711/611752105030250711_3634463651.wav", + # "data/out_data/me_top500/611752105030250711/611752105030250711_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250715/611752105030250715_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250715/611752105030250715_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250715/611752105030250715_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250716/611752105030250716_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250716/611752105030250716_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250716/611752105030250716_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250717/611752105030250717_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250717/611752105030250717_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250717/611752105030250717_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250718/611752105030250718_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250718/611752105030250718_3634463651.wav", + # "data/out_data/me_top500/611752105030250718/611752105030250718_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250720/611752105030250720_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250720/611752105030250720_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250720/611752105030250720_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250721/611752105030250721_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250721/611752105030250721_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250721/611752105030250721_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250725/611752105030250725_5910973794723621.wav", + # "data/out_data/me_top500/611752105030250725/611752105030250725_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250725/611752105030250725_8162774327817435.wav" + # ] + # + # input_wavs = [ + # "data/out_data/me_top500/611752105027601574/611752105027601574_10414574138721494.wav", + # "data/out_data/me_top500/611752105027601574/611752105027601574_10414574140317353.wav", + # "data/out_data/me_top500/611752105027601574/611752105027601574_1688849864840588.wav", + # "data/out_data/me_top500/611752105028392007/611752105028392007_10414574138721494.wav", + # "data/out_data/me_top500/611752105028392007/611752105028392007_10414574140317353.wav", + # "data/out_data/me_top500/611752105028392007/611752105028392007_5910973794723621.wav", + # "data/out_data/me_top500/611752105028480056/611752105028480056_3634463651.wav", + # "data/out_data/me_top500/611752105028480056/611752105028480056_6755399374234747.wav", + # "data/out_data/me_top500/611752105028480056/611752105028480056_8162774327817435.wav", + # "data/out_data/me_top500/611752105028480653/611752105028480653_5910973794723621.wav", + # "data/out_data/me_top500/611752105028480653/611752105028480653_6755399374234747.wav", + # "data/out_data/me_top500/611752105028480653/611752105028480653_8162774329368194.wav", + # "data/out_data/me_top500/611752105029951597/611752105029951597_1688849864840588.wav", + # "data/out_data/me_top500/611752105029951597/611752105029951597_5629499489839033.wav", + # "data/out_data/me_top500/611752105029951597/611752105029951597_8162774329368194.wav", + # "data/out_data/me_top500/611752105029951624/611752105029951624_10414574138721494.wav", + # "data/out_data/me_top500/611752105029951624/611752105029951624_6755399374234747.wav", + # "data/out_data/me_top500/611752105029951624/611752105029951624_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248965/611752105030248965_10414574140317353.wav", + # "data/out_data/me_top500/611752105030248965/611752105030248965_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248965/611752105030248965_5910973794723621.wav", + # "data/out_data/me_top500/611752105030248971/611752105030248971_10414574138721494.wav", + # "data/out_data/me_top500/611752105030248971/611752105030248971_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248971/611752105030248971_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248972/611752105030248972_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248972/611752105030248972_3634463651.wav", + # "data/out_data/me_top500/611752105030248972/611752105030248972_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248973/611752105030248973_10414574138721494.wav", + # "data/out_data/me_top500/611752105030248973/611752105030248973_10414574140317353.wav", + # "data/out_data/me_top500/611752105030248973/611752105030248973_5910973794723621.wav", + # "data/out_data/me_top500/611752105030248974/611752105030248974_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248974/611752105030248974_3634463651.wav", + # "data/out_data/me_top500/611752105030248974/611752105030248974_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248977/611752105030248977_10414574138721494.wav", + # "data/out_data/me_top500/611752105030248977/611752105030248977_3634463651.wav", + # "data/out_data/me_top500/611752105030248977/611752105030248977_6755399374234747.wav", + # "data/out_data/me_top500/611752105030248981/611752105030248981_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248981/611752105030248981_5910973794723621.wav", + # "data/out_data/me_top500/611752105030248981/611752105030248981_8162774329368194.wav", + # "data/out_data/me_top500/611752105030248982/611752105030248982_10414574138721494.wav", + # "data/out_data/me_top500/611752105030248982/611752105030248982_10414574140317353.wav", + # "data/out_data/me_top500/611752105030248982/611752105030248982_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248988/611752105030248988_10414574138721494.wav", + # "data/out_data/me_top500/611752105030248988/611752105030248988_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248988/611752105030248988_8162774329368194.wav", + # "data/out_data/me_top500/611752105030248992/611752105030248992_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248992/611752105030248992_6755399374234747.wav", + # "data/out_data/me_top500/611752105030248992/611752105030248992_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248994/611752105030248994_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248994/611752105030248994_3634463651.wav", + # "data/out_data/me_top500/611752105030248994/611752105030248994_8162774327817435.wav", + # "data/out_data/me_top500/611752105030248995/611752105030248995_1688849864840588.wav", + # "data/out_data/me_top500/611752105030248995/611752105030248995_6755399374234747.wav", + # "data/out_data/me_top500/611752105030248995/611752105030248995_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249000/611752105030249000_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249000/611752105030249000_3634463651.wav", + # "data/out_data/me_top500/611752105030249000/611752105030249000_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249001/611752105030249001_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249001/611752105030249001_3634463651.wav", + # "data/out_data/me_top500/611752105030249001/611752105030249001_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249007/611752105030249007_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249007/611752105030249007_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249007/611752105030249007_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249009/611752105030249009_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249009/611752105030249009_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249009/611752105030249009_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249010/611752105030249010_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249010/611752105030249010_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249010/611752105030249010_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249011/611752105030249011_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249011/611752105030249011_3634463651.wav", + # "data/out_data/me_top500/611752105030249011/611752105030249011_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249016/611752105030249016_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249016/611752105030249016_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249016/611752105030249016_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249018/611752105030249018_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249018/611752105030249018_3634463651.wav", + # "data/out_data/me_top500/611752105030249018/611752105030249018_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249019/611752105030249019_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249019/611752105030249019_3634463651.wav", + # "data/out_data/me_top500/611752105030249019/611752105030249019_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249020/611752105030249020_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249020/611752105030249020_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249020/611752105030249020_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249022/611752105030249022_3634463651.wav", + # "data/out_data/me_top500/611752105030249022/611752105030249022_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249022/611752105030249022_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249025/611752105030249025_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249025/611752105030249025_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249025/611752105030249025_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249030/611752105030249030_3634463651.wav", + # "data/out_data/me_top500/611752105030249030/611752105030249030_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249030/611752105030249030_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249031/611752105030249031_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249031/611752105030249031_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249031/611752105030249031_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249032/611752105030249032_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249032/611752105030249032_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249032/611752105030249032_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249034/611752105030249034_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249034/611752105030249034_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249034/611752105030249034_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249035/611752105030249035_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249035/611752105030249035_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249035/611752105030249035_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249036/611752105030249036_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249036/611752105030249036_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249036/611752105030249036_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249037/611752105030249037_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249037/611752105030249037_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249037/611752105030249037_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249038/611752105030249038_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249038/611752105030249038_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249038/611752105030249038_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249040/611752105030249040_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249040/611752105030249040_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249040/611752105030249040_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249041/611752105030249041_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249041/611752105030249041_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249041/611752105030249041_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249048/611752105030249048_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249048/611752105030249048_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249048/611752105030249048_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249049/611752105030249049_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249049/611752105030249049_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249049/611752105030249049_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249052/611752105030249052_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249052/611752105030249052_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249052/611752105030249052_3634463651.wav", + # "data/out_data/me_top500/611752105030249053/611752105030249053_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249053/611752105030249053_3634463651.wav", + # "data/out_data/me_top500/611752105030249053/611752105030249053_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249055/611752105030249055_3634463651.wav", + # "data/out_data/me_top500/611752105030249055/611752105030249055_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249055/611752105030249055_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249057/611752105030249057_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249057/611752105030249057_3634463651.wav", + # "data/out_data/me_top500/611752105030249057/611752105030249057_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249058/611752105030249058_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249058/611752105030249058_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249058/611752105030249058_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249065/611752105030249065_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249065/611752105030249065_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249065/611752105030249065_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249067/611752105030249067_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249067/611752105030249067_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249067/611752105030249067_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249070/611752105030249070_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249070/611752105030249070_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249070/611752105030249070_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249071/611752105030249071_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249071/611752105030249071_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249071/611752105030249071_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249073/611752105030249073_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249073/611752105030249073_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249073/611752105030249073_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249074/611752105030249074_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249074/611752105030249074_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249074/611752105030249074_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249077/611752105030249077_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249077/611752105030249077_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249077/611752105030249077_3634463651.wav", + # "data/out_data/me_top500/611752105030249079/611752105030249079_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249079/611752105030249079_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249079/611752105030249079_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249082/611752105030249082_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249082/611752105030249082_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249082/611752105030249082_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249091/611752105030249091_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249091/611752105030249091_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249091/611752105030249091_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249094/611752105030249094_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249094/611752105030249094_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249094/611752105030249094_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249099/611752105030249099_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249099/611752105030249099_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249099/611752105030249099_3634463651.wav", + # "data/out_data/me_top500/611752105030249104/611752105030249104_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249104/611752105030249104_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249104/611752105030249104_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249105/611752105030249105_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249105/611752105030249105_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249105/611752105030249105_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249111/611752105030249111_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249111/611752105030249111_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249111/611752105030249111_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249112/611752105030249112_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249112/611752105030249112_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249112/611752105030249112_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249113/611752105030249113_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249113/611752105030249113_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249113/611752105030249113_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249118/611752105030249118_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249118/611752105030249118_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249118/611752105030249118_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249121/611752105030249121_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249121/611752105030249121_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249121/611752105030249121_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249127/611752105030249127_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249127/611752105030249127_3634463651.wav", + # "data/out_data/me_top500/611752105030249127/611752105030249127_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249128/611752105030249128_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249128/611752105030249128_3634463651.wav", + # "data/out_data/me_top500/611752105030249128/611752105030249128_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249133/611752105030249133_3634463651.wav", + # "data/out_data/me_top500/611752105030249133/611752105030249133_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249133/611752105030249133_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249134/611752105030249134_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249134/611752105030249134_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249134/611752105030249134_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249136/611752105030249136_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249136/611752105030249136_3634463651.wav", + # "data/out_data/me_top500/611752105030249136/611752105030249136_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249143/611752105030249143_3634463651.wav", + # "data/out_data/me_top500/611752105030249143/611752105030249143_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249143/611752105030249143_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249148/611752105030249148_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249148/611752105030249148_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249148/611752105030249148_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249153/611752105030249153_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249153/611752105030249153_3634463651.wav", + # "data/out_data/me_top500/611752105030249153/611752105030249153_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249157/611752105030249157_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249157/611752105030249157_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249157/611752105030249157_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249160/611752105030249160_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249160/611752105030249160_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249160/611752105030249160_3634463651.wav", + # "data/out_data/me_top500/611752105030249161/611752105030249161_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249161/611752105030249161_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249161/611752105030249161_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249162/611752105030249162_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249162/611752105030249162_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249162/611752105030249162_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249163/611752105030249163_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249163/611752105030249163_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249163/611752105030249163_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249171/611752105030249171_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249171/611752105030249171_3634463651.wav", + # "data/out_data/me_top500/611752105030249171/611752105030249171_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249173/611752105030249173_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249173/611752105030249173_3634463651.wav", + # "data/out_data/me_top500/611752105030249173/611752105030249173_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249174/611752105030249174_3634463651.wav", + # "data/out_data/me_top500/611752105030249174/611752105030249174_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249174/611752105030249174_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249175/611752105030249175_3634463651.wav", + # "data/out_data/me_top500/611752105030249175/611752105030249175_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249175/611752105030249175_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249176/611752105030249176_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249176/611752105030249176_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249176/611752105030249176_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249177/611752105030249177_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249177/611752105030249177_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249177/611752105030249177_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249178/611752105030249178_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249178/611752105030249178_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249178/611752105030249178_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249181/611752105030249181_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249181/611752105030249181_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249181/611752105030249181_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249191/611752105030249191_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249191/611752105030249191_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249191/611752105030249191_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249195/611752105030249195_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249195/611752105030249195_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249195/611752105030249195_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249200/611752105030249200_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249200/611752105030249200_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249200/611752105030249200_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249201/611752105030249201_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249201/611752105030249201_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249201/611752105030249201_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249206/611752105030249206_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249206/611752105030249206_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249206/611752105030249206_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249209/611752105030249209_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249209/611752105030249209_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249209/611752105030249209_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249211/611752105030249211_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249211/611752105030249211_3634463651.wav", + # "data/out_data/me_top500/611752105030249211/611752105030249211_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249213/611752105030249213_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249213/611752105030249213_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249213/611752105030249213_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249216/611752105030249216_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249216/611752105030249216_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249216/611752105030249216_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249218/611752105030249218_3634463651.wav", + # "data/out_data/me_top500/611752105030249218/611752105030249218_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249218/611752105030249218_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249224/611752105030249224_3634463651.wav", + # "data/out_data/me_top500/611752105030249224/611752105030249224_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249224/611752105030249224_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249227/611752105030249227_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249227/611752105030249227_3634463651.wav", + # "data/out_data/me_top500/611752105030249227/611752105030249227_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249233/611752105030249233_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249233/611752105030249233_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249233/611752105030249233_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249237/611752105030249237_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249237/611752105030249237_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249237/611752105030249237_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249240/611752105030249240_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249240/611752105030249240_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249240/611752105030249240_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249243/611752105030249243_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249243/611752105030249243_3634463651.wav", + # "data/out_data/me_top500/611752105030249243/611752105030249243_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249244/611752105030249244_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249244/611752105030249244_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249244/611752105030249244_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249245/611752105030249245_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249245/611752105030249245_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249245/611752105030249245_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249250/611752105030249250_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249250/611752105030249250_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249250/611752105030249250_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249255/611752105030249255_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249255/611752105030249255_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249255/611752105030249255_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249258/611752105030249258_3634463651.wav", + # "data/out_data/me_top500/611752105030249258/611752105030249258_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249258/611752105030249258_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249264/611752105030249264_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249264/611752105030249264_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249264/611752105030249264_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249267/611752105030249267_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249267/611752105030249267_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249267/611752105030249267_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249273/611752105030249273_3634463651.wav", + # "data/out_data/me_top500/611752105030249273/611752105030249273_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249273/611752105030249273_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249275/611752105030249275_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249275/611752105030249275_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249275/611752105030249275_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249278/611752105030249278_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249278/611752105030249278_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249278/611752105030249278_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249280/611752105030249280_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249280/611752105030249280_3634463651.wav", + # "data/out_data/me_top500/611752105030249280/611752105030249280_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249281/611752105030249281_3634463651.wav", + # "data/out_data/me_top500/611752105030249281/611752105030249281_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249281/611752105030249281_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249282/611752105030249282_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249282/611752105030249282_3634463651.wav", + # "data/out_data/me_top500/611752105030249282/611752105030249282_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249283/611752105030249283_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249283/611752105030249283_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249283/611752105030249283_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249284/611752105030249284_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249284/611752105030249284_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249284/611752105030249284_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249287/611752105030249287_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249287/611752105030249287_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249287/611752105030249287_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249288/611752105030249288_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249288/611752105030249288_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249288/611752105030249288_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249289/611752105030249289_3634463651.wav", + # "data/out_data/me_top500/611752105030249289/611752105030249289_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249289/611752105030249289_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249292/611752105030249292_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249292/611752105030249292_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249292/611752105030249292_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249293/611752105030249293_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249293/611752105030249293_3634463651.wav", + # "data/out_data/me_top500/611752105030249293/611752105030249293_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249295/611752105030249295_3634463651.wav", + # "data/out_data/me_top500/611752105030249295/611752105030249295_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249295/611752105030249295_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249296/611752105030249296_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249296/611752105030249296_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249296/611752105030249296_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249299/611752105030249299_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249299/611752105030249299_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249299/611752105030249299_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249302/611752105030249302_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249302/611752105030249302_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249302/611752105030249302_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249307/611752105030249307_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249307/611752105030249307_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249307/611752105030249307_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249309/611752105030249309_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249309/611752105030249309_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249309/611752105030249309_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249318/611752105030249318_3634463651.wav", + # "data/out_data/me_top500/611752105030249318/611752105030249318_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249318/611752105030249318_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249319/611752105030249319_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249319/611752105030249319_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249319/611752105030249319_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249320/611752105030249320_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249320/611752105030249320_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249320/611752105030249320_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249321/611752105030249321_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249321/611752105030249321_3634463651.wav", + # "data/out_data/me_top500/611752105030249321/611752105030249321_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249322/611752105030249322_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249322/611752105030249322_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249322/611752105030249322_3634463651.wav", + # "data/out_data/me_top500/611752105030249323/611752105030249323_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249323/611752105030249323_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249323/611752105030249323_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249324/611752105030249324_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249324/611752105030249324_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249324/611752105030249324_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249325/611752105030249325_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249325/611752105030249325_3634463651.wav", + # "data/out_data/me_top500/611752105030249325/611752105030249325_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249327/611752105030249327_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249327/611752105030249327_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249327/611752105030249327_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249329/611752105030249329_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249329/611752105030249329_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249329/611752105030249329_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249330/611752105030249330_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249330/611752105030249330_3634463651.wav", + # "data/out_data/me_top500/611752105030249330/611752105030249330_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249332/611752105030249332_3634463651.wav", + # "data/out_data/me_top500/611752105030249332/611752105030249332_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249332/611752105030249332_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249333/611752105030249333_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249333/611752105030249333_3634463651.wav", + # "data/out_data/me_top500/611752105030249333/611752105030249333_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249334/611752105030249334_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249334/611752105030249334_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249334/611752105030249334_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249336/611752105030249336_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249336/611752105030249336_3634463651.wav", + # "data/out_data/me_top500/611752105030249336/611752105030249336_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249337/611752105030249337_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249337/611752105030249337_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249337/611752105030249337_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249338/611752105030249338_3634463651.wav", + # "data/out_data/me_top500/611752105030249338/611752105030249338_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249338/611752105030249338_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249339/611752105030249339_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249339/611752105030249339_3634463651.wav", + # "data/out_data/me_top500/611752105030249339/611752105030249339_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249340/611752105030249340_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249340/611752105030249340_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249340/611752105030249340_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249341/611752105030249341_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249341/611752105030249341_3634463651.wav", + # "data/out_data/me_top500/611752105030249341/611752105030249341_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249342/611752105030249342_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249342/611752105030249342_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249342/611752105030249342_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249343/611752105030249343_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249343/611752105030249343_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249343/611752105030249343_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249345/611752105030249345_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249345/611752105030249345_3634463651.wav", + # "data/out_data/me_top500/611752105030249345/611752105030249345_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249346/611752105030249346_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249346/611752105030249346_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249346/611752105030249346_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249347/611752105030249347_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249347/611752105030249347_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249347/611752105030249347_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249348/611752105030249348_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249348/611752105030249348_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249348/611752105030249348_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249349/611752105030249349_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249349/611752105030249349_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249349/611752105030249349_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249350/611752105030249350_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249350/611752105030249350_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249350/611752105030249350_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249352/611752105030249352_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249352/611752105030249352_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249352/611752105030249352_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249353/611752105030249353_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249353/611752105030249353_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249353/611752105030249353_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249354/611752105030249354_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249354/611752105030249354_3634463651.wav", + # "data/out_data/me_top500/611752105030249354/611752105030249354_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249355/611752105030249355_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249355/611752105030249355_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249355/611752105030249355_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249356/611752105030249356_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249356/611752105030249356_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249356/611752105030249356_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249357/611752105030249357_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249357/611752105030249357_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249357/611752105030249357_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249358/611752105030249358_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249358/611752105030249358_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249358/611752105030249358_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249359/611752105030249359_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249359/611752105030249359_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249359/611752105030249359_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249361/611752105030249361_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249361/611752105030249361_3634463651.wav", + # "data/out_data/me_top500/611752105030249361/611752105030249361_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249362/611752105030249362_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249362/611752105030249362_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249362/611752105030249362_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249363/611752105030249363_3634463651.wav", + # "data/out_data/me_top500/611752105030249363/611752105030249363_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249363/611752105030249363_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249364/611752105030249364_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249364/611752105030249364_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249364/611752105030249364_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249365/611752105030249365_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249365/611752105030249365_3634463651.wav", + # "data/out_data/me_top500/611752105030249365/611752105030249365_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249366/611752105030249366_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249366/611752105030249366_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249366/611752105030249366_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249367/611752105030249367_3634463651.wav", + # "data/out_data/me_top500/611752105030249367/611752105030249367_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249367/611752105030249367_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249368/611752105030249368_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249368/611752105030249368_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249368/611752105030249368_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249369/611752105030249369_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249369/611752105030249369_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249369/611752105030249369_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249370/611752105030249370_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249370/611752105030249370_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249370/611752105030249370_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249371/611752105030249371_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249371/611752105030249371_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249371/611752105030249371_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249372/611752105030249372_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249372/611752105030249372_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249372/611752105030249372_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249373/611752105030249373_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249373/611752105030249373_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249373/611752105030249373_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249374/611752105030249374_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249374/611752105030249374_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249374/611752105030249374_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249375/611752105030249375_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249375/611752105030249375_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249375/611752105030249375_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249377/611752105030249377_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249377/611752105030249377_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249377/611752105030249377_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249378/611752105030249378_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249378/611752105030249378_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249378/611752105030249378_3634463651.wav", + # "data/out_data/me_top500/611752105030249379/611752105030249379_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249379/611752105030249379_3634463651.wav", + # "data/out_data/me_top500/611752105030249379/611752105030249379_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249381/611752105030249381_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249381/611752105030249381_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249381/611752105030249381_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249383/611752105030249383_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249383/611752105030249383_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249383/611752105030249383_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249384/611752105030249384_3634463651.wav", + # "data/out_data/me_top500/611752105030249384/611752105030249384_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249384/611752105030249384_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249385/611752105030249385_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249385/611752105030249385_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249385/611752105030249385_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249386/611752105030249386_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249386/611752105030249386_3634463651.wav", + # "data/out_data/me_top500/611752105030249386/611752105030249386_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249387/611752105030249387_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249387/611752105030249387_3634463651.wav", + # "data/out_data/me_top500/611752105030249387/611752105030249387_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249388/611752105030249388_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249388/611752105030249388_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249388/611752105030249388_3634463651.wav", + # "data/out_data/me_top500/611752105030249390/611752105030249390_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249390/611752105030249390_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249390/611752105030249390_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249391/611752105030249391_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249391/611752105030249391_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249391/611752105030249391_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249392/611752105030249392_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249392/611752105030249392_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249392/611752105030249392_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249393/611752105030249393_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249393/611752105030249393_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249393/611752105030249393_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249394/611752105030249394_3634463651.wav", + # "data/out_data/me_top500/611752105030249394/611752105030249394_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249394/611752105030249394_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249395/611752105030249395_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249395/611752105030249395_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249395/611752105030249395_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249396/611752105030249396_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249396/611752105030249396_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249396/611752105030249396_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249397/611752105030249397_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249397/611752105030249397_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249397/611752105030249397_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249398/611752105030249398_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249398/611752105030249398_3634463651.wav", + # "data/out_data/me_top500/611752105030249398/611752105030249398_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249399/611752105030249399_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249399/611752105030249399_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249399/611752105030249399_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249401/611752105030249401_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249401/611752105030249401_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249401/611752105030249401_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249402/611752105030249402_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249402/611752105030249402_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249402/611752105030249402_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249403/611752105030249403_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249403/611752105030249403_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249403/611752105030249403_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249404/611752105030249404_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249404/611752105030249404_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249404/611752105030249404_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249405/611752105030249405_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249405/611752105030249405_3634463651.wav", + # "data/out_data/me_top500/611752105030249405/611752105030249405_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249406/611752105030249406_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249406/611752105030249406_3634463651.wav", + # "data/out_data/me_top500/611752105030249406/611752105030249406_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249407/611752105030249407_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249407/611752105030249407_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249407/611752105030249407_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249408/611752105030249408_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249408/611752105030249408_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249408/611752105030249408_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249409/611752105030249409_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249409/611752105030249409_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249409/611752105030249409_8162774327817435.wav", + # "data/out_data/me_top500/611752105030249410/611752105030249410_3634463651.wav", + # "data/out_data/me_top500/611752105030249410/611752105030249410_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249410/611752105030249410_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249412/611752105030249412_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249412/611752105030249412_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249412/611752105030249412_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249413/611752105030249413_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249413/611752105030249413_3634463651.wav", + # "data/out_data/me_top500/611752105030249413/611752105030249413_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249414/611752105030249414_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249414/611752105030249414_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249414/611752105030249414_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249415/611752105030249415_3634463651.wav", + # "data/out_data/me_top500/611752105030249415/611752105030249415_5910973794723621.wav", + # "data/out_data/me_top500/611752105030249415/611752105030249415_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249416/611752105030249416_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249416/611752105030249416_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249416/611752105030249416_3634463651.wav", + # "data/out_data/me_top500/611752105030249417/611752105030249417_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249417/611752105030249417_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249417/611752105030249417_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249418/611752105030249418_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249418/611752105030249418_3634463651.wav", + # "data/out_data/me_top500/611752105030249418/611752105030249418_8162774329368194.wav", + # "data/out_data/me_top500/611752105030249419/611752105030249419_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249419/611752105030249419_3634463651.wav", + # "data/out_data/me_top500/611752105030249419/611752105030249419_5629499489839033.wav", + # "data/out_data/me_top500/611752105030249420/611752105030249420_10414574138721494.wav", + # "data/out_data/me_top500/611752105030249420/611752105030249420_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249420/611752105030249420_6755399374234747.wav", + # "data/out_data/me_top500/611752105030249431/611752105030249431_10414574140317353.wav", + # "data/out_data/me_top500/611752105030249431/611752105030249431_1688849864840588.wav", + # "data/out_data/me_top500/611752105030249431/611752105030249431_3634463651.wav", + # "data/out_data/me_top500/611752105030250690/611752105030250690_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250690/611752105030250690_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250690/611752105030250690_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250691/611752105030250691_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250691/611752105030250691_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250691/611752105030250691_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250695/611752105030250695_3634463651.wav", + # "data/out_data/me_top500/611752105030250695/611752105030250695_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250695/611752105030250695_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250698/611752105030250698_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250698/611752105030250698_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250698/611752105030250698_5910973794723621.wav", + # "data/out_data/me_top500/611752105030250699/611752105030250699_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250699/611752105030250699_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250699/611752105030250699_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250701/611752105030250701_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250701/611752105030250701_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250701/611752105030250701_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250702/611752105030250702_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250702/611752105030250702_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250702/611752105030250702_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250704/611752105030250704_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250704/611752105030250704_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250704/611752105030250704_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250711/611752105030250711_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250711/611752105030250711_3634463651.wav", + # "data/out_data/me_top500/611752105030250711/611752105030250711_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250715/611752105030250715_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250715/611752105030250715_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250715/611752105030250715_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250716/611752105030250716_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250716/611752105030250716_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250716/611752105030250716_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250717/611752105030250717_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250717/611752105030250717_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250717/611752105030250717_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250718/611752105030250718_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250718/611752105030250718_3634463651.wav", + # "data/out_data/me_top500/611752105030250718/611752105030250718_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250720/611752105030250720_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250720/611752105030250720_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250720/611752105030250720_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250721/611752105030250721_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250721/611752105030250721_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250721/611752105030250721_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250725/611752105030250725_5910973794723621.wav", + # "data/out_data/me_top500/611752105030250725/611752105030250725_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250725/611752105030250725_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250726/611752105030250726_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250726/611752105030250726_3634463651.wav", + # "data/out_data/me_top500/611752105030250726/611752105030250726_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250728/611752105030250728_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250728/611752105030250728_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250728/611752105030250728_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250729/611752105030250729_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250729/611752105030250729_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250729/611752105030250729_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250730/611752105030250730_3634463651.wav", + # "data/out_data/me_top500/611752105030250730/611752105030250730_5910973794723621.wav", + # "data/out_data/me_top500/611752105030250730/611752105030250730_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250732/611752105030250732_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250732/611752105030250732_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250732/611752105030250732_3634463651.wav", + # "data/out_data/me_top500/611752105030250733/611752105030250733_3634463651.wav", + # "data/out_data/me_top500/611752105030250733/611752105030250733_8162774327817435.wav", + # "data/out_data/me_top500/611752105030250733/611752105030250733_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250735/611752105030250735_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250735/611752105030250735_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250735/611752105030250735_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250736/611752105030250736_10414574140317353.wav", + # "data/out_data/me_top500/611752105030250736/611752105030250736_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250736/611752105030250736_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250739/611752105030250739_10414574138721494.wav", + # "data/out_data/me_top500/611752105030250739/611752105030250739_1688849864840588.wav", + # "data/out_data/me_top500/611752105030250739/611752105030250739_5910973794723621.wav", + # "data/out_data/me_top500/611752105030250741/611752105030250741_3634463651.wav", + # "data/out_data/me_top500/611752105030250741/611752105030250741_6755399374234747.wav", + # "data/out_data/me_top500/611752105030250741/611752105030250741_8162774329368194.wav", + # "data/out_data/me_top500/611752105030250743/611752105030250743_3634463651.wav", + # "data/out_data/me_top500/611752105030250743/611752105030250743_5629499489839033.wav", + # "data/out_data/me_top500/611752105030250743/611752105030250743_5910973794723621.wav" + # ] + # input_wavs_volume = [ + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/90/90_dev.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/91/91_dev.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/92/92_dev.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/93/93_dev.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/94/94_dev.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/96/96_dev.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/97/97_dev.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/99/99_dev.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/9/9_dev.wav", + # ] + # + # vocals = [ + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/90/vocal.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/91/vocal.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/92/vocal.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/93/vocal.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/94/vocal.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/96/vocal.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/97/vocal.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/99/vocal.wav", + # "/data/rsync/jianli.yang/AutoCoverTool/data/inf_users/youtube_me_100/9/vocal.wav" + # ] + + # for in_f, in_v_f, v_f in zip(input_wavs, input_wavs_volume, vocals): + # for i in range(len(input_wavs)): + # if input_wavs[i] in old_input_wavs: + # continue + # input_wavs[i] = os.path.join("/data/rsync/jianli.yang/AutoCoverTool", input_wavs[i]) + # in_f = input_wavs[i] + # in_v_f = input_wavs[i] + # v_f = "/".join(input_wavs[i].replace("out_data", "inf_users").split("/")[:-1]) + "/vocal.wav" + # st = time.time() + # re.process(in_f, in_v_f, v_f) + # print("sp={}".format(time.time() - st)) + # split_vocal2fragment("/data/rsync/jianli.yang/AutoCoverTool/data/out_data/youtube_me_100/94/94_3634463651.wav") diff --git a/AutoCoverTool/ref/split_dirty_frame/split_dirty_frame.py b/AutoCoverTool/ref/split_dirty_frame/split_dirty_frame.py new file mode 100644 index 0000000..4ab89f4 --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/split_dirty_frame.py @@ -0,0 +1,87 @@ +""" +切割出有问题的段 +1. 载入音频 +2. 分帧,采样率44100,帧长2048,帧移1024, 23ms一帧 +3. 每11帧做一个统计,当做中间那一帧的结果,打印出对应帧的均值和方差 +""" +import librosa +import numpy as np +import matplotlib.pyplot as plt + + +def tm2sec(tm): + """ + 分:秒.xxx 转为 tm + :param tm: + :return: + """ + tm_arr = str(tm).split(":") + return int(int(tm_arr[0]) * 60) + float(tm_arr[1]) + + +def load_data(): + label_txt = "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/AutoCoverTool/resource/test1/test6/top_100/example/example.txt" + # filename => [[st, end, label]] + msg_dict = {} + with open(label_txt) as f: + lines = f.readlines() + for line in lines: + line = line.strip().split(",") + if len(line) != 4: + continue + filename = line[0] + tp = line[1] + st = tm2sec(line[2]) + ed = tm2sec(line[3]) + if filename not in msg_dict.keys(): + msg_dict[filename] = [] + msg_dict[filename].append([st, ed, tp]) + return msg_dict + + +def get_feature(path): + data, sr = librosa.load(path, sr=44100, mono=True) + stft_data = np.abs(librosa.stft(data, win_length=2048, hop_length=1024)) + return stft_data.transpose() + + +def process(path): + msg_dict = load_data() + msg = msg_dict["611752105020942848_10414574140317353"] + data = get_feature(path) + frame_ms = 1024 / 44100 + st_ms = frame_ms * 5 + + normal_data = [] + color_data = [] + for i in range(5, len(data) - 6): + cur_data = data[i - 5:i + 6] + tm = round(st_ms, 2) + mean = round(np.mean(np.mean(cur_data, axis=1)), 4) + std = round(np.mean(np.std(cur_data, axis=1)), 4) + power = np.sum(cur_data) + st_ms += frame_ms + color = 'black' + for idx in range(len(msg)): + st = msg[idx][0] + ed = msg[idx][1] + # 在时间段中 + if st <= st_ms <= ed: + color = "red" + break + # 查询时该时间段已经大于当前时间了,向后查没有必要 + if st > st_ms: + break + + normal_data.append([float(tm), float(mean), float(std), float(power)]) + color_data.append(color) + normal_data = np.array(normal_data) + color_data = np.array(color_data) + print(normal_data.shape) + plt.scatter(normal_data[::2, 0], normal_data[::2, 3], c=color_data[::2]) + plt.show() + + +if __name__ == '__main__': + pp = "/Users/yangjianli/starmaker-work/research/tmp_code/SVC方案调研/prod/AutoCoverTool/resource/test1/test6/top_100/example/611752105020942848/611752105020942848_10414574140317353.wav" + process(pp) diff --git a/AutoCoverTool/ref/split_dirty_frame/train.py b/AutoCoverTool/ref/split_dirty_frame/train.py new file mode 100644 index 0000000..582ae81 --- /dev/null +++ b/AutoCoverTool/ref/split_dirty_frame/train.py @@ -0,0 +1,142 @@ +from dataset.dataset import * +from models.model import * + +import torch.nn as nn +import torch +import torch.nn.functional as functional +from tqdm import tqdm +import os +import sys +from torch.optim.lr_scheduler import MultiStepLR +import time +from torch.utils.data import DataLoader +import torch.utils.data as data + +# 通用配置 +BatchSize = 32 +ThreadNum = 8 +# 训练配置参数 +MaxEpoch = 200 +LR = 1e-3 +Momentum = 0 +WeightDecay = 0 + +Milestones = [20, 30, 50, 100] +Gamma = 0.1 + + +def get_dataloader(root): + train_set = CustomDataset(root, "train") + train_loader = DataLoader(train_set, batch_size=BatchSize, shuffle=True, num_workers=ThreadNum) + + val_set = CustomDataset(root, "val") + val_loader = DataLoader(val_set, batch_size=BatchSize, shuffle=False, num_workers=ThreadNum) + + return train_loader, val_loader + + +def train_one_epoch(model, device, loader, optimizer, criterion): + model.train() + + total_num = 0 + total_loss = 0 + correct = 0 + + for images, labels in tqdm(loader): + batch_size = images.size(0) + images = images.to(device) + labels = labels.to(device) + + predicts = model(images) + + optimizer.zero_grad() + loss = criterion(predicts, labels) + loss.backward() + optimizer.step() + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def val_one_epoch(model, device, loader, criterion): + model.eval() + + total_num = 0 + total_loss = 0 + correct = 0 + + with torch.no_grad(): + for images, labels in loader: + batch_size = images.size(0) + images = images.to(device) + labels = labels.to(device) + predicts = model(images) + + loss = criterion(predicts, labels) + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def train(): + set_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/dataset_dev" + device = 'cuda' if torch.cuda.is_available() else 'cpu' + model = get_cur_model() + model.to(device) + + # 模型保存路径 + save_directory = "output_v5_3_4" + if not os.path.exists(save_directory): + os.makedirs(save_directory) + + optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=Momentum, weight_decay=WeightDecay) + # optimizer = torch.optim.Adam(model.parameters(), lr=LR) + criterion = nn.CrossEntropyLoss() + scheduler = MultiStepLR(optimizer, Milestones, Gamma) + + # 文件地址 + train_loader, val_loader = get_dataloader(set_dir) + + max_acc = 0 + for i in range(MaxEpoch): + start = time.time() + t_loss, t_acc = train_one_epoch(model, device, train_loader, optimizer, criterion) + v_loss, v_acc = val_one_epoch(model, device, val_loader, criterion) + end = time.time() + + scheduler.step(i) + + msg = 't_loss:%f\tt_acc:%.2f' % (t_loss, t_acc * 100) + msg += '\tv_loss:%f\tv_acc:%.2f' % (v_loss, v_acc * 100) + msg += '\ttime:%f\tepoch:%d' % (end - start, i) + print(msg) + + params = model.state_dict() + save_path = os.path.join(save_directory, 'epoch_' + str(i) + '_' + str(v_acc) + '.pth') + torch.save(params, save_path) + + max_acc = max(max_acc, v_acc) + + print('最大acc为:', max_acc) + + +if __name__ == '__main__': + train() diff --git a/AutoCoverTool/ref/tools/mixer/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/CMakeLists.txt new file mode 100644 index 0000000..1be8553 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/CMakeLists.txt @@ -0,0 +1,116 @@ +cmake_minimum_required(VERSION 2.8) +project(mixer) + +set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) #����lib���ɵ�Ŀ¼ +set(CMAKE_CXX_STANDARD 11) + +include_directories(alimter/inc) +include_directories(waves/inc) +include_directories(ebur128/inc) +include_directories(audio_mixer/inc) +include_directories(audio_effects_lib/inc audio_effects_lib/example) +include_directories(denoise/webrtc/include) +add_subdirectory(denoise) + +# 引入audio_effects_lib +include_directories(audio_effects_lib) +include_directories(audio_effects_lib/src) +include_directories(audio_effects_lib/inc) +include_directories(audio_effects_lib/common) +include_directories(audio_effects_lib/ref) +include_directories(audio_effects_lib/ref/al_reverb/inc) +include_directories(audio_effects_lib/ref/al_reverb/src) +include_directories(audio_effects_lib/ref/autotune/inc) +include_directories(audio_effects_lib/ref/autotune/src) +include_directories(audio_effects_lib/ref/iir_eq/inc) +include_directories(audio_effects_lib/ref/iir_eq/src) +include_directories(audio_effects_lib/ref/phonograph/inc) +include_directories(audio_effects_lib/ref/phonograph/src) +include_directories(audio_effects_lib/ref/reverb/inc) +include_directories(audio_effects_lib/ref/reverb/src) +include_directories(audio_effects_lib/ref/saudio_effects/inc) +include_directories(audio_effects_lib/ref/saudio_effects/src) +include_directories(audio_effects_lib/ref/slow_flanging/inc) +include_directories(audio_effects_lib/ref/slow_flanging/src) +include_directories(audio_effects_lib/ref/tone_shift/inc) +include_directories(audio_effects_lib/ref/tone_shift/src) +include_directories(audio_effects_lib/ref/common) + +include_directories(audio_effects_lib/ref/al_reverb/src/biquad_filters) +include_directories(audio_effects_lib/ref/al_reverb/src/fast_delay) +include_directories(audio_effects_lib/ref/al_reverb/src/filter) +include_directories(audio_effects_lib/ref/al_reverb/src/AlReverbApi.cpp) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_biquad) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_common) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_early_reflection) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_echo) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_late_allpass) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_late_lowpass) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_late_reverb) +include_directories(audio_effects_lib/ref/al_reverb/src/al_reverb_modulation) +include_directories(audio_effects_lib/ref/iir_eq/src/audacious_arma) +include_directories(audio_effects_lib/ref/iir_eq/src/audacious_eq) +include_directories(audio_effects_lib/ref/saudio_effects/src/all_plat audio_effects_lib/ref/saudio_effects/src/audio_effect audio_effects_lib/ref/saudio_effects/src/biquad + audio_effects_lib/ref/saudio_effects/src/buffer audio_effects_lib/ref/saudio_effects/src/damper audio_effects_lib/ref/saudio_effects/src/delay audio_effects_lib/ref/saudio_effects/src/delayi audio_effects_lib/ref/saudio_effects/src/envelope_follower + audio_effects_lib/ref/saudio_effects/src/equalizer audio_effects_lib/ref/saudio_effects/src/reverb audio_effects_lib/ref/saudio_effects/src/simple_delay_effect audio_effects_lib/ref/saudio_effects/src/simple_reverb_effect) +include_directories(audio_effects_lib/ref/tone_shift/src/aa_filter) +include_directories(audio_effects_lib/ref/tone_shift/src/bpm_detect) +include_directories(audio_effects_lib/ref/tone_shift/src/cpu_detect) +include_directories(audio_effects_lib/ref/tone_shift/src/fifo_sample_buffer) +include_directories(audio_effects_lib/ref/tone_shift/src/fir_filter) +include_directories(audio_effects_lib/ref/tone_shift/src/peak_finder) +include_directories(audio_effects_lib/ref/tone_shift/src/rate_transposer) +include_directories(audio_effects_lib/ref/tone_shift/src/sound_touch) +include_directories(audio_effects_lib/ref/tone_shift/src/td_stretch) +include_directories(audio_effects_lib/ref/supersound/inc) +include_directories(audio_effects_lib/ref/supersound/src) +include_directories(audio_effects_lib/ref/supersound/src/common) +include_directories(audio_effects_lib/ref/supersound/src/impulse) +include_directories(audio_effects_lib/ref/supersound/ref) +include_directories(audio_effects_lib/ref/supersound/ref/kiss_fft) + +AUX_SOURCE_DIRECTORY(audio_effects_lib/common AE_SRC_COMMON_DIR) +file(GLOB_RECURSE AE_CPP_SRC_DIR audio_effects_lib/src/*cpp) +file(GLOB_RECURSE AE_CPP_REF_DIR audio_effects_lib/ref/*cpp) +file(GLOB_RECURSE AE_C_REF_DIR audio_effects_lib/ref/*c) + +include_directories(audio_effects_lib/ref/waves/inc) +list(REMOVE_ITEM AE_CPP_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/audio_effects_lib/ref/audio_resample/src/FfmpegResampler.cpp") + + +AUX_SOURCE_DIRECTORY(alimter/src DIR_ALIMTER_SRCS) +AUX_SOURCE_DIRECTORY(waves/src DIR_WAVES_SRCS) +AUX_SOURCE_DIRECTORY(ebur128/src DIR_EBUR128_SRCS) +AUX_SOURCE_DIRECTORY(audio_mixer/src DIR_AUDIO_MIXER_SRCS) + +#add_executable(mixer main.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} +# ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} +# audio_effects_lib/example/ae_server/CAeServer.cpp) +#target_link_libraries(mixer ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) + + +# 音量拉伸 +add_executable(draw_volume draw_volume.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} + ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} + audio_effects_lib/example/ae_server/CAeServer.cpp) +target_link_libraries(draw_volume ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) + +# 降噪 +add_executable(denoise_exe denoise.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} + ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} + audio_effects_lib/example/ae_server/CAeServer.cpp) +target_link_libraries(denoise_exe ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) + + +# 简单的混合 +add_executable(simple_mixer simple_mixer.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} + ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} + audio_effects_lib/example/ae_server/CAeServer.cpp) +target_link_libraries(simple_mixer ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) + +# 音效 +add_executable(im_effect_exe im_effect.cpp ${DIR_ALIMTER_SRCS} ${DIR_WAVES_SRCS} ${DIR_EBUR128_SRCS} ${DIR_AUDIO_MIXER_SRCS} + ${AE_CPP_SRC_DIR} ${AE_CPP_REF_DIR} ${AE_C_REF_DIR} ${AE_SRC_COMMON_DIR} + audio_effects_lib/example/ae_server/CAeServer.cpp) +target_link_libraries(im_effect_exe ${LIBRARY_OUTPUT_PATH}/libwebrtc.a -lpthread) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/alimter/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/alimter/CMakeLists.txt new file mode 100644 index 0000000..9748c4d --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/alimter/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_ALIMTER_SRCS) +add_library(alimiter ${DIR_ALIMTER_SRCS}) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/alimter/inc/alimiter.h b/AutoCoverTool/ref/tools/mixer/alimter/inc/alimiter.h new file mode 100755 index 0000000..8022d39 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/alimter/inc/alimiter.h @@ -0,0 +1,99 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现 FFMPEG 中的限制器,这个压限器对频谱友好,但是压得比较厉害 + +#ifndef __ALIMITER_H__ +#define __ALIMITER_H__ + +#include +#define ERROR_SUPERSOUND_SUCCESS 0 +#define ERROR_SUPERSOUND_PARAM -1 +#define ERROR_SUPERSOUND_MEMORY -2 +typedef struct AudioLimiterContext +{ + float limit; + float attack; + float release; + float att; + float level_in; + float level_out; + int32_t auto_release; + int32_t auto_level; + float asc; + int32_t asc_c; + int32_t asc_pos; + float asc_coeff; + + float *buffer; + int32_t buffer_size; + int32_t buffer_max_size; + int32_t pos; + int32_t *nextpos; + float *nextdelta; + + float delta; + int32_t nextiter; + int32_t nextlen; + int32_t asc_changed; +}AudioLimiterContext; + +namespace SUPERSOUND +{ + + +class Alimiter +{ +public: + Alimiter(); + ~Alimiter(); + +public: + void Flush(); + int32_t GetLatecy(); + int32_t SetParam(int32_t fs, int32_t channels); + void Filter(float * input, float * output, int32_t num); + +private: + void Uninit(); + int32_t config_input(); + float get_rdelta(AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc); + +private: + AudioLimiterContext m_alimiterCtx; + int m_nChannels; + int m_nFs; +}; + + +} + +#endif /* __ALIMITER_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/alimter/src/alimiter.cpp b/AutoCoverTool/ref/tools/mixer/alimter/src/alimiter.cpp new file mode 100755 index 0000000..abbd622 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/alimter/src/alimiter.cpp @@ -0,0 +1,306 @@ + +#include "alimiter.h" +#include +#include +#include +#include + +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#define MIDDLE(x, y, z) ((x)<(y)?((y)<(z)?(y):(x)<(z)?(z):(x)):((y)>(z)?(y):(x)>(z)?(z):(x))) +#define SAFE_DELETE_PTR(ptr) \ +{ \ + if(ptr) \ + { \ + delete [] ptr; \ + ptr = NULL; \ + } \ +} + +namespace SUPERSOUND +{ + + +Alimiter::Alimiter() +{ + memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx)); + + m_nChannels = 0; + m_nFs = 0; + + Flush(); +} + +Alimiter::~Alimiter() +{ + Uninit(); +} + +void Alimiter::Flush() +{ + float * buffer = m_alimiterCtx.buffer; + float * nextdelta = m_alimiterCtx.nextdelta; + int32_t * nextpos = m_alimiterCtx.nextpos; + int32_t buffer_max_size = m_alimiterCtx.buffer_max_size; + int32_t buffer_size = m_alimiterCtx.buffer_size; + + if(buffer) + memset(buffer, 0, sizeof(float) * buffer_max_size); + if(nextdelta) + memset(nextdelta, 0, sizeof(float) * buffer_max_size); + if(nextpos) + memset(nextpos, -1, sizeof(float) * buffer_max_size); + + memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx)); + + m_alimiterCtx.level_in = 1; + m_alimiterCtx.level_out = 32000 / 32768.0; + m_alimiterCtx.limit = 1; + m_alimiterCtx.attack = 5; + m_alimiterCtx.release = 50; + m_alimiterCtx.auto_release = 0; + m_alimiterCtx.asc_coeff = 0.5; + m_alimiterCtx.auto_level = 1; + + m_alimiterCtx.attack /= 1000; + m_alimiterCtx.release /= 1000; + m_alimiterCtx.att = 1; + m_alimiterCtx.asc_pos = -1; + m_alimiterCtx.asc_coeff = pow(0.5f, m_alimiterCtx.asc_coeff - 0.5f) * 2 * -1; + + m_alimiterCtx.buffer = buffer; + m_alimiterCtx.nextdelta = nextdelta; + m_alimiterCtx.nextpos = nextpos; + m_alimiterCtx.buffer_max_size = buffer_max_size; + m_alimiterCtx.buffer_size = buffer_size; +} + +int32_t Alimiter::GetLatecy() +{ + return m_alimiterCtx.buffer_size / m_nChannels; +} + +int32_t Alimiter::SetParam( int32_t fs, int32_t channels ) +{ + if((fs == m_nFs) && (channels == m_nChannels)) + return ERROR_SUPERSOUND_SUCCESS; + + m_nChannels = channels; + m_nFs = fs; + + return config_input(); +} + +void Alimiter::Filter( float * input, float * output, int32_t num ) +{ + num = num / m_nChannels; + int channels = m_nChannels; + int buffer_size = m_alimiterCtx.buffer_size; + float * buffer = m_alimiterCtx.buffer; + float release = m_alimiterCtx.release; + float limit = m_alimiterCtx.limit; + float * nextdelta = m_alimiterCtx.nextdelta; + float level = m_alimiterCtx.auto_level ? 1 / limit : 1; + float level_out = m_alimiterCtx.level_out; + float level_in = m_alimiterCtx.level_in; + int *nextpos = m_alimiterCtx.nextpos; + + float * buf; + float * dst; + float * src; + int n, c, i; + AudioLimiterContext * s = &m_alimiterCtx; + + dst = output; + src = input; + + for (n = 0; n < num; n++) { + float peak = 0; + + for (c = 0; c < channels; c++) { + float sample = src[c] * level_in; + + buffer[s->pos + c] = sample; + peak = MAX(peak, fabs(sample)); + } + + if (s->auto_release && peak > limit) { + s->asc += peak; + s->asc_c++; + } + + if (peak > limit) { + float patt = MIN(limit / peak, 1); + float rdelta = get_rdelta(s, release, m_nFs, + peak, limit, patt, 0); + float delta = (limit / peak - s->att) / buffer_size * channels; + int found = 0; + + if (delta < s->delta) { + s->delta = delta; + nextpos[0] = s->pos; + nextpos[1] = -1; + nextdelta[0] = rdelta; + s->nextlen = 1; + s->nextiter= 0; + } else { + for (i = s->nextiter; i < s->nextiter + s->nextlen; i++) { + int j = i % buffer_size; + float ppeak, pdelta; + + ppeak = fabs(buffer[nextpos[j]]) > fabs(buffer[nextpos[j] + 1]) ? + fabs(buffer[nextpos[j]]) : fabs(buffer[nextpos[j] + 1]); + pdelta = (limit / peak - limit / ppeak) / (((buffer_size - nextpos[j] + s->pos) % buffer_size) / channels); + if (pdelta < nextdelta[j]) { + nextdelta[j] = pdelta; + found = 1; + break; + } + } + if (found) { + s->nextlen = i - s->nextiter + 1; + nextpos[(s->nextiter + s->nextlen) % buffer_size] = s->pos; + nextdelta[(s->nextiter + s->nextlen) % buffer_size] = rdelta; + nextpos[(s->nextiter + s->nextlen + 1) % buffer_size] = -1; + s->nextlen++; + } + } + } + + buf = &s->buffer[(s->pos + channels) % buffer_size]; + peak = 0; + for (c = 0; c < channels; c++) { + float sample = buf[c]; + + peak = MAX(peak, fabs(sample)); + } + + if (s->pos == s->asc_pos && !s->asc_changed) + s->asc_pos = -1; + + if (s->auto_release && s->asc_pos == -1 && peak > limit) { + s->asc -= peak; + s->asc_c--; + } + + s->att += s->delta; + + for (c = 0; c < channels; c++) + dst[c] = buf[c] * s->att; + + if ((s->pos + channels) % buffer_size == nextpos[s->nextiter]) { + if (s->auto_release) { + s->delta = get_rdelta(s, release, m_nFs, + peak, limit, s->att, 1); + if (s->nextlen > 1) { + int pnextpos = nextpos[(s->nextiter + 1) % buffer_size]; + float ppeak = fabs(buffer[pnextpos]) > fabs(buffer[pnextpos + 1]) ? + fabs(buffer[pnextpos]) : + fabs(buffer[pnextpos + 1]); + float pdelta = (limit / ppeak - s->att) / + (((buffer_size + pnextpos - + ((s->pos + channels) % buffer_size)) % + buffer_size) / channels); + if (pdelta < s->delta) + s->delta = pdelta; + } + } else { + s->delta = nextdelta[s->nextiter]; + s->att = limit / peak; + } + + s->nextlen -= 1; + nextpos[s->nextiter] = -1; + s->nextiter = (s->nextiter + 1) % buffer_size; + } + + if (s->att > 1.) { + s->att = 1.; + s->delta = 0.; + s->nextiter = 0; + s->nextlen = 0; + nextpos[0] = -1; + } + + if (s->att <= 0.) { + s->att = 0.000001f; + s->delta = (1 - s->att) / (m_nFs * release); + } + + if (s->att != 1 && (1 - s->att) < 0.000001f) + s->att = 1; + + if (s->delta != 0 && fabs(s->delta) < 0.000001f) + s->delta = 0; + + for (c = 0; c < channels; c++) + dst[c] = MIDDLE(dst[c], -limit, limit) * level * level_out; + + s->pos = (s->pos + channels) % buffer_size; + src += channels; + dst += channels; + } +} + +void Alimiter::Uninit() +{ + SAFE_DELETE_PTR(m_alimiterCtx.buffer); + SAFE_DELETE_PTR(m_alimiterCtx.nextdelta); + SAFE_DELETE_PTR(m_alimiterCtx.nextpos); +} + +int32_t Alimiter::config_input() +{ + int obuffer_size = int(m_nFs * m_nChannels * 100 / 1000. + m_nChannels); + if(obuffer_size < m_nChannels) + return ERROR_SUPERSOUND_PARAM; + + if(obuffer_size > m_alimiterCtx.buffer_max_size) + { + SAFE_DELETE_PTR(m_alimiterCtx.buffer); + m_alimiterCtx.buffer = new(std::nothrow) float[obuffer_size]; + if(m_alimiterCtx.buffer == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.buffer, 0, sizeof(float) * obuffer_size); + + SAFE_DELETE_PTR(m_alimiterCtx.nextdelta); + m_alimiterCtx.nextdelta = new(std::nothrow) float[obuffer_size]; + if(m_alimiterCtx.nextdelta == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.nextdelta, 0, sizeof(float) * obuffer_size); + + SAFE_DELETE_PTR(m_alimiterCtx.nextpos); + m_alimiterCtx.nextpos = new(std::nothrow) int32_t[obuffer_size]; + if(m_alimiterCtx.nextpos == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.nextpos, -1, obuffer_size*sizeof(int32_t)); + + m_alimiterCtx.buffer_max_size = obuffer_size; + } + + m_alimiterCtx.buffer_size = int(m_nFs * m_alimiterCtx.attack * m_nChannels); + m_alimiterCtx.buffer_size -= m_alimiterCtx.buffer_size % m_nChannels; + + return ERROR_SUPERSOUND_SUCCESS; +} + +float Alimiter::get_rdelta( AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc ) +{ + float rdelta = (1 - patt) / (sample_rate * release); + + if (asc && s->auto_release && s->asc_c > 0) { + float a_att = limit / (s->asc_coeff * s->asc) * (float)s->asc_c; + + if (a_att > patt) { + float delta = MAX((a_att - patt) / (sample_rate * release), rdelta / 10); + + if (delta < rdelta) + rdelta = delta; + } + } + + return rdelta; +} + + +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/CMakeLists.txt new file mode 100644 index 0000000..6724db8 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/CMakeLists.txt @@ -0,0 +1,183 @@ +cmake_minimum_required(VERSION 3.4) +project(audio_effects_lib) +#set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + +option(AELIB_BUILD_WHOLE_LIBS "Audio Effect Lib build as a whole lib" ON) +option(WITH_FFT "Audio Effect Lib build with fft" ON) + +#add_definitions(-DAE_CONFUSE_CODE) +include_directories(./) +include_directories(inc common src) +include_directories(ref) + +# 指定本机的ffmpeg地址,如果为arm编译,则不需要给,因为代码库中自带了 +#include_directories(/Users/yangjianli/starMaker/ffmpeg_lib/ffmpeg-4.3.1/mac/include) +#set(FFMPEG_LIB /Users/yangjianli/starMaker/ffmpeg_lib/ffmpeg-4.3.1/mac/lib) + +IF(NOT AELIB_BUILD_WHOLE_LIBS) + # 子项目 + add_subdirectory(ref) + + AUX_SOURCE_DIRECTORY(common SRC_COMMON_DIR) + file(GLOB_RECURSE CPP_SRC_DIR src/*cpp) + add_library(audio_effects_lib ${CPP_SRC_DIR} ${SRC_COMMON_DIR}) +ELSE() + include_directories(ref/al_reverb/inc) + include_directories(ref/al_reverb/src) + include_directories(ref/autotune/inc) + include_directories(ref/autotune/src) + include_directories(ref/iir_eq/inc) + include_directories(ref/iir_eq/src) + include_directories(ref/phonograph/inc) + include_directories(ref/phonograph/src) + include_directories(ref/reverb/inc) + include_directories(ref/reverb/src) + include_directories(ref/saudio_effects/inc) + include_directories(ref/saudio_effects/src) + include_directories(ref/slow_flanging/inc) + include_directories(ref/slow_flanging/src) + include_directories(ref/tone_shift/inc) + include_directories(ref/tone_shift/src) + #include_directories(ref/waves/inc) + #include_directories(ref/waves/src) + + include_directories(ref/common) + + include_directories(ref/al_reverb/src/biquad_filters) + include_directories(ref/al_reverb/src/fast_delay) + include_directories(ref/al_reverb/src/filter) + include_directories(ref/al_reverb/src/AlReverbApi.cpp) + include_directories(ref/al_reverb/src/al_reverb) + include_directories(ref/al_reverb/src/al_reverb_biquad) + include_directories(ref/al_reverb/src/al_reverb_common) + include_directories(ref/al_reverb/src/al_reverb_early_reflection) + include_directories(ref/al_reverb/src/al_reverb_echo) + include_directories(ref/al_reverb/src/al_reverb_late_allpass) + include_directories(ref/al_reverb/src/al_reverb_late_lowpass) + include_directories(ref/al_reverb/src/al_reverb_late_reverb) + include_directories(ref/al_reverb/src/al_reverb_modulation) + + include_directories(ref/iir_eq/src/audacious_arma) + include_directories(ref/iir_eq/src/audacious_eq) + + include_directories(ref/saudio_effects/src/all_plat ref/saudio_effects/src/audio_effect ref/saudio_effects/src/biquad + ref/saudio_effects/src/buffer ref/saudio_effects/src/damper ref/saudio_effects/src/delay ref/saudio_effects/src/delayi ref/saudio_effects/src/envelope_follower + ref/saudio_effects/src/equalizer ref/saudio_effects/src/reverb ref/saudio_effects/src/simple_delay_effect ref/saudio_effects/src/simple_reverb_effect) + + include_directories(ref/tone_shift/src/aa_filter) + include_directories(ref/tone_shift/src/bpm_detect) + include_directories(ref/tone_shift/src/cpu_detect) + include_directories(ref/tone_shift/src/fifo_sample_buffer) + include_directories(ref/tone_shift/src/fir_filter) + include_directories(ref/tone_shift/src/peak_finder) + include_directories(ref/tone_shift/src/rate_transposer) + include_directories(ref/tone_shift/src/sound_touch) + include_directories(ref/tone_shift/src/td_stretch) + + include_directories(ref/supersound/inc) + include_directories(ref/supersound/src) + include_directories(ref/supersound/src/common) + include_directories(ref/supersound/src/impulse) + include_directories(ref/supersound/ref) + include_directories(ref/supersound/ref/kiss_fft) + +# include_directories(ref/audio_resample/inc) + + AUX_SOURCE_DIRECTORY(common SRC_COMMON_DIR) + file(GLOB_RECURSE CPP_SRC_DIR src/*cpp) + file(GLOB_RECURSE CPP_REF_DIR ref/*cpp) + file(GLOB_RECURSE C_REF_DIR ref/*c) + + include_directories(ref/waves/inc) +# include_directories(ref/audio_codec/inc) +# list(REMOVE_ITEM CPP_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ref/waves/src/STWaveFile.cpp") +# list(REMOVE_ITEM CPP_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ref/autotune/src/common/util/util.cpp") + list(REMOVE_ITEM CPP_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ref/audio_resample/src/FfmpegResampler.cpp") + + +# IF(NOT WITH_FFT) +# list(REMOVE_ITEM CPP_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ref/supersound/ref/kiss_fft/kiss_fft.cpp") +# list(REMOVE_ITEM CPP_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ref/supersound/ref/kiss_fft/kiss_fftr.cpp") +# list(REMOVE_ITEM CPP_REF_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ref/supersound/ref/kiss_fft/kiss_fftnd.c") +# endif() + + add_library(audio_effects_lib ${CPP_SRC_DIR} ${CPP_REF_DIR} ${C_REF_DIR} ${SRC_COMMON_DIR}) + +# set_target_properties(audio_effects_lib PROPERTIES CXX_VISIBILITY_PRESET hidden) +ENDIF() + + +#set_target_properties(audio_effects_lib PROPERTIES CXX_VISIBILITY_PRESET hidden) +#add_executable(main example/main.cpp example/ae_server/CAeServer.cpp) + + +# 强制链接.a中的所有变量,否则每个类无法自动注册到map中,外部无法调用 +# 静态链接库链接的时候,会将没有任何调用的变量抛弃 +#if(APPLE) +# target_link_libraries(main +# -Wl,-all_load ${LIBRARY_OUTPUT_PATH}/libaudio_codec.a -Wl,-noall_load +# ) +# target_link_libraries(main +# -Wl,-all_load ${LIBRARY_OUTPUT_PATH}/libaudio_effects_lib.a -Wl,-noall_load +# ) +#else() +# target_link_libraries(main +# -Wl,--whole-archive ${LIBRARY_OUTPUT_PATH}/libaudio_codec.a -Wl,--no-whole-archive +# ) +# target_link_libraries(main +# -Wl,-all_load ${LIBRARY_OUTPUT_PATH}/libaudio_effects_lib.a -Wl,-noall_load +# ) +#endif() + + +#add_executable(effect_im_tool example/effect_im_tool.cpp example/ae_server/CAeServer.cpp ${CPP_SRC_DIR} ${CPP_REF_DIR} ${C_REF_DIR} ${SRC_COMMON_DIR}) +#if(APPLE) +# target_link_libraries(effect_im_tool +# -Wl,-all_load ${LIBRARY_OUTPUT_PATH}/libaudio_effects_lib.a -Wl,-noall_load +# ) +#else() +# target_link_libraries(effect_im_tool +# -Wl,--whole-archive ${LIBRARY_OUTPUT_PATH}/libaudio_effects_lib.a -Wl,--whole-archive +# ) +#endif() +#target_link_libraries(effect_im_tool +# -lpthread -lz -lbz2 -ldl +# ) + +#target_link_libraries(main +# ${LIBRARY_OUTPUT_PATH}/libaudio_effects_lib.a +# ${LIBRARY_OUTPUT_PATH}/libwaves.a +# ${LIBRARY_OUTPUT_PATH}/libiir_eq.a +# ${LIBRARY_OUTPUT_PATH}/libsaudio_effects.a +# ${LIBRARY_OUTPUT_PATH}/libautotune.a +# ${LIBRARY_OUTPUT_PATH}/libreverb.a +# ${LIBRARY_OUTPUT_PATH}/libal_reverb.a +# ${LIBRARY_OUTPUT_PATH}/libphonograph.a +# ${LIBRARY_OUTPUT_PATH}/libslow_flanging.a +# ${LIBRARY_OUTPUT_PATH}/libref_common.a +# ${LIBRARY_OUTPUT_PATH}/libtone_shift.a +# ) +# +#target_link_libraries(main +# ${FFMPEG_LIB}/libavfilter.a +# ${FFMPEG_LIB}/libavformat.a +# ${FFMPEG_LIB}/libavcodec.a +# ${FFMPEG_LIB}/libswresample.a +# ${FFMPEG_LIB}/libswscale.a +# ${FFMPEG_LIB}/libavutil.a +# -lz -lbz2 -liconv -llzma +# "-framework VideoToolbox" +# "-framework Security" +# "-framework CoreFoundation" +# "-framework CoreMedia" +# "-framework CoreVideo" +# "-framework VideoDecodeAcceleration" +# "-framework AVFoundation" +# "-framework CoreGraphics" +# "-framework Foundation" +# "-framework CoreServices" +# +## ${LIBRARY_OUTPUT_PATH}/libwaves.a +# ) diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/README.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/README.txt new file mode 100644 index 0000000..b5e2234 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/README.txt @@ -0,0 +1,24 @@ +项目简介: + 音效lib库,主要包括各类音效以及均衡器效果。 + +代码结构介绍: + --inc 头文件目录 + --src 源代码目录 + --audio_eq 为每个ref中的音效进行一层封装,以供统一使用 + --manager 音效注册以及获取注册后的音效 + --common 各个模块之间可以公用的部分 + --ref 子项目目录 --[具体每个音效都是每一个子项目] + --common 子项目中可以公用的部分 + --example 项目测试文件夹 + --main.cpp 项目测试入口程序 + --ae_server 基于当前业务给出的使用程序的样本 + +使用方法: + 以android为例子: + 执行build_android.sh,在./build/libs/android/中可以 + 看到各个平台的.a文件,将其拷贝到android平台。 + 头文件使用inc中的文件即可 + +新音效增加方式: + 在ref中增加一个新的子目录[新音效] + 在inc和src分别增加对该音效的封装,封装方法,参考当前的音效 \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/build_android.sh b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/build_android.sh new file mode 100755 index 0000000..6b81de3 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/build_android.sh @@ -0,0 +1,64 @@ +#!/bin/sh + +# @Time : 2019-06-18 17:50 +# @Author : AlanWang +# @FileName: build_android.sh + +# MY_NDK 和 MY_CMAKE 需要改成自己对应的 ndk 中的目录 +#MY_NDK="/Users/yangjianli/Library/Android/sdk/ndk-bundle" +MY_CMAKE="/Users/yangjianli/Library/Android/sdk/cmake/3.6.4111459/bin/cmake" +MY_NDK="/Users/yangjianli/android-ndk/android-ndk-r16b" +#MY_CMAKE="/Users/wangjianjun/AndroidDev/sdk/cmake/3.10.2.4988404/bin/cmake" + +if [ -z "$MY_NDK" ]; then + echo "Please set MY_NDK to the Android NDK folder" + exit 1 +fi + +if [ -z "$MY_CMAKE" ]; then + echo "Please set MY_CMAKE to the Android CMake folder" + exit 1 +fi + +OUTPUT_LIBS="./build/libs/android" +# arme_abis=(armeabi armeabi-v7a arm64-v8a x86 x86_64 mips mips64) + +function build_with_armeabi() { + ARME_ABI=$1 + echo ${ARME_ABI} + ANDROID_NATIVE_API_LEVEL="android-$2" + echo ${ANDROID_NATIVE_API_LEVEL} + + BUILD_DIR="./build/android/${ARME_ABI}" + BUILD_REF_DIR="./build/android/${ARME_ABI}/ref" + OUTPUT_SO_DIR="${BUILD_DIR}/build/android/libs/${ARME_ABI}" + + PRE_EXE_DIR=$(pwd) + echo ${PRE_EXE_DIR} + + ${MY_CMAKE} \ + -H"./" \ + -B"${BUILD_DIR}" \ + -DANDROID_ABI="${ARME_ABI}" \ + -DANDROID_NDK="${MY_NDK}" \ + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY="./build/android/libs/${ARME_ABI}" \ + -DCMAKE_BUILD_TYPE="Release" \ + -DCMAKE_TOOLCHAIN_FILE="${MY_NDK}/build/cmake/android.toolchain.cmake" \ + -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} \ + -DANDROID_TOOLCHAIN="clang" \ + -DCMAKE_C_FLAGS="-fpic -fexceptions -frtti -Wno-narrowing" \ + -DCMAKE_CXX_FLAGS="-fpic -fexceptions -frtti -Wno-narrowing" \ + -DANDROID_STL="c++_static" \ + + + cd ${BUILD_DIR} + make + + cd ${PRE_EXE_DIR} + mkdir -p ${OUTPUT_LIBS}/${ARME_ABI}/ + mv ${PRE_EXE_DIR}/lib/* ${OUTPUT_LIBS}/${ARME_ABI}/ + rm -r ./build/android +} + +build_with_armeabi armeabi-v7a 16 +build_with_armeabi arm64-v8a 21 diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/build_ios.sh b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/build_ios.sh new file mode 100755 index 0000000..f228a1f --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/build_ios.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +OUTPUT_LIBS="./build/libs/ios" + +function build_with_platform_and_armeabi() { + PLATFORM=$1 + ARME_ABI=$2 + echo ${PLATFORM} + echo ${ARME_ABI} + + BUILD_DIR="./build/ios/${ARME_ABI}" + PRE_EXE_DIR=$(pwd) + echo ${PRE_EXE_DIR} + + cmake \ + -H"./" \ + -B"${BUILD_DIR}" \ + -DCMAKE_BUILD_TYPE="Release" \ + -DCMAKE_TOOLCHAIN_FILE="./toolchain/ios.toolchain.cmake" \ + -DIOS_PLATFORM=${PLATFORM} \ +# -DIOS_ARCH=${ARME_ABI} + + # 生成目标文件 + cd ${BUILD_DIR} + make + + # 将目标文件移至指定目录 + cd ${PRE_EXE_DIR} + mkdir -p ${OUTPUT_LIBS}/${ARME_ABI}/ + mv ${PRE_EXE_DIR}/lib/* ${OUTPUT_LIBS}/${ARME_ABI}/ + rm -r ./build/ios +} + +build_with_platform_and_armeabi "OS" "all" +#build_with_platform_and_armeabi "OS" "armv7s" +#build_with_platform_and_armeabi "OS" "arm64" + +#build_with_platform_and_armeabi "SIMULATOR64" "x86_64" +#build_with_platform_and_armeabi "SIMULATOR" "i386" diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/ae_server/CAeServer.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/ae_server/CAeServer.cpp new file mode 100644 index 0000000..1406fad --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/ae_server/CAeServer.cpp @@ -0,0 +1,480 @@ +// +// Created by yangjianli on 2020-01-16. +// + +#include "CAeServer.h" +#include "cstring" +#include "WaveFile.h" + +#ifdef FFMEPG +#include "DecoderWrapper.h" +#endif + +//////////////////////////////////////////内部函数////////////////////////////////////////////////////////////// + +int32_t CAeServer::get_im_params_by_ffmpeg(void* params, Impulse_Param* new_param) +{ + AE_PARAMS_IM_EFFECT* cur_params = (AE_PARAMS_IM_EFFECT*) params; + + // 空字符串则直接返回一个结构出去 + if (cur_params->effect_path.empty()) + { + return AE_ERR_PARAMS_ERR; + } + + std::string audio_path(cur_params->effect_path); + if (m_im_path2params.find(audio_path) == m_im_path2params.end()) + { +#ifdef FFMPEG + // 载入数据 + CDecoderWrapper* m_decode_inst = new(std::nothrow) CDecoderWrapper(); + if (NULL == m_decode_inst) + { + return AE_ERR_NO_BUFFER; + } + MediaParam param; + param.duration = 0; + param.prelude_time = 0; + param.start_time = 0; + param.end_time = 0; + param.need_decrypt = false; + param.path = cur_params->effect_path.c_str(); + + MediaInfo info; + int errcode = m_decode_inst->init(¶m, m_sample_rate, m_channel, CONTEXT_FFMPEG, DECODER_FFMPEG, PROTOCOL_TYPE_FILE); + if(0 != errcode) + { + m_decode_inst->uninit(); + delete m_decode_inst; + return errcode; + } + m_decode_inst->get_media_info(&info); + + // 解码音频 + int frame_len = 512 * m_channel; + int cap_len = int(info.duration * m_sample_rate / 1000.0 * m_channel) + 10; + cap_len = (cap_len / frame_len + 1) * frame_len; + int out_len = 0; + float* out_buf = new float[cap_len]; + + AudioFrameBuffer m_tmp_buffer; + m_tmp_buffer.init(512 * m_channel); + errcode = m_decode_inst->decode(&m_tmp_buffer); + while(E_NATIVE_DECODER_SUCCESS == errcode) + { + // 这种情况基本不会出现 + if (cap_len < out_len + m_tmp_buffer.get_size()) + { + cap_len = out_len + m_tmp_buffer.get_size(); + float* tmp_out = new float[cap_len]; + memcpy(tmp_out, out_buf, sizeof(float) * out_len); + delete[] out_buf; + out_buf = tmp_out; + } + memcpy(out_buf+out_len, m_tmp_buffer.get_buffer(), sizeof(float) * m_tmp_buffer.get_size()); + out_len += m_tmp_buffer.get_size(); + memset(m_tmp_buffer.get_buffer(), 0, sizeof(float) * m_tmp_buffer.get_size()); + errcode = m_decode_inst->decode(&m_tmp_buffer); + } + + delete m_decode_inst; + if (errcode != E_NATIVE_DECODER_END) + { + delete [] out_buf; + return errcode; + } +#else + CWaveFile wave_im = CWaveFile(cur_params->effect_path.c_str(), false); + if(!wave_im.GetStatus()) + { + return AE_ERR_NO_BUFFER; + } + + int channel = wave_im.GetChannels(); + int sample_rate = wave_im.GetSampleRate(); + if (sample_rate != m_sample_rate || channel != m_channel) + { + printf("impluse params err!\n"); + return AE_ERR_NO_BUFFER; + } + int out_len = wave_im.GetTotalFrames() * channel; + float* out_buf = new float[out_len]; + wave_im.ReadFrameAsfloat(out_buf, out_len / channel); +#endif + Impulse_Param* im_params = new Impulse_Param(); + im_params->in_channels = m_channel; + im_params->out_channels = m_channel; + im_params->fs = m_sample_rate; + im_params->im_response = out_buf; + im_params->response_len = out_len / m_channel; + im_params->response_channels = m_channel; + // 一般最小是192的buffer_size,fft搞小一点,更合适 + im_params->window_bits = 9; + im_params->process_buffer_len = m_buffer_size; + im_params->high_performance = true; + im_params->effect_path = cur_params->effect_path; + m_im_path2params[audio_path] = im_params; + +// STCWaveFile wave_out = STCWaveFile("/Users/yangjianli/starmaker-work/research/tmp_code/音效相关/test1/t1.wav", true); +// wave_out.SetChannels(m_channel); +// wave_out.SetSampleRate(m_sample_rate); +// wave_out.SetSampleFormat(SF_IEEE_FLOAT); +// wave_out.SetupDone(); +// wave_out.WriteFrame(out_buf, out_len / m_channel); + } + copy_impluse_params(new_param, m_im_path2params[audio_path]); + return AE_ERR_SUCCESS; +} + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +CAeServer::CAeServer() +{ + m_chain = nullptr; + m_ae2group_id.clear(); + m_ae2inst_map.clear(); + m_ae_group.clear(); +} + +CAeServer::~CAeServer() +{ + uninit(); +} + +int32_t CAeServer::init(int sample_rate, int channel, int buffer_size) +{ + m_chain = ae_create_object(); + ae_init(m_chain, sample_rate, channel); + AE_TYPE ae_types[] = {AE_TYPE_NONE, AE_TYPE_KTV, AE_TYPE_AUTOTUNE,AE_TYPE_DISTANT, + AE_TYPE_WARM,AE_TYPE_PHONOGRAPH,AE_TYPE_MAGNETIC,AE_TYPE_ETHEREAL, + AE_TYPE_DIZZY,AE_TYPE_NEW_DISTANT,AE_TYPE_TONE_SHIFT,AE_TYPE_CUSTOM, + AE_TYPE_KARAOKE,SAE_POP,SAE_STUDIO, AE_TYPE_IM_EFFECT}; + + combine_group(ae_types, 16); + AE_TYPE ae_types1[] = {EQ_TYPE_BEGIN, EQ_TYPE_END}; + combine_group(ae_types1, 2); + m_sample_rate = sample_rate; + m_channel = channel; + m_buffer_size = buffer_size; + return AE_ERR_SUCCESS; +} + +int32_t CAeServer::uninit() +{ + if(nullptr != m_chain) + { + ae_destory_object(m_chain); + m_chain = nullptr; + } + m_ae_group.clear(); + m_ae2inst_map.clear(); + m_ae2group_id.clear(); + + // 删除im效果器中所有参数映射过的结果 + std::map::iterator itt; + for(itt = m_im_path2params.begin(); itt != m_im_path2params.end(); itt++) + { + if (itt->second != NULL) + { + if (itt->second->im_response != NULL) + { + delete[] itt->second->im_response; + itt->second->im_response = NULL; + } + delete itt->second; + itt->second = NULL; + } + } + m_im_path2params.clear(); + return AE_ERR_SUCCESS; +} + +int CAeServer::get_latency_ms() +{ + return ae_get_latency_ms(m_chain); +} + +int32_t CAeServer::reset() +{ + return ae_reset(m_chain); +} + +int32_t CAeServer::get_ae_effect_params(AE_TYPE ae_type, void *ae_params, AE_EFFECT_TYPE &ae_effect_type, void*& ret) +{ + pAECustomParam tp_custom; + AE_PARAMS_REVERB* tp_reverb; + int err_code = AE_ERR_SUCCESS; + switch(ae_type){ + case AE_TYPE_KTV: + ae_effect_type = AE_EFFECT_TYPE_REVERB; + ret = new AE_PARAMS_REVERB(); + memcpy(ret, &gs_ae_params_reverb_params[AE_PARAMS_TYPE_REVERB_ID_11 - AE_PARAMS_TYPE_RERVERB], + sizeof(AE_PARAMS_REVERB)); + break; + case AE_TYPE_AUTOTUNE: + ae_effect_type = AE_EFFECT_TYPE_AUTOTUNE; + break; + case AE_TYPE_DISTANT: + ae_effect_type = AE_EFFECT_TYPE_REVERB; + ret = new AE_PARAMS_REVERB(); + memcpy(ret, &gs_ae_params_reverb_params[AE_PARAMS_TYPE_REVERB_ID_15 - AE_PARAMS_TYPE_RERVERB], + sizeof(AE_PARAMS_REVERB)); + break; + case AE_TYPE_WARM: + ae_effect_type = AE_EFFECT_TYPE_AL_REVERB; + ret = new AE_PARAMS_AL_REVERB(); + memcpy(ret, &gs_ae_params_al_reverbs[AE_PARAMS_TYPE_AL_REVERB_CITY_STREETS - AE_PARAMS_TYPE_AL_REVERB], + sizeof(AE_PARAMS_AL_REVERB)); + break; + case AE_TYPE_PHONOGRAPH: + ae_effect_type = AE_EFFECT_TYPE_PHONOGRAPH; + break; + case AE_TYPE_MAGNETIC: + ae_effect_type = AE_EFFECT_TYPE_AL_REVERB; + ret = new AE_PARAMS_AL_REVERB(); + memcpy(ret, &gs_ae_params_al_reverbs[AE_PARAMS_TYPE_AL_REVERB_GENERIC_1 - AE_PARAMS_TYPE_AL_REVERB], + sizeof(AE_PARAMS_AL_REVERB)); + break; + case AE_TYPE_ETHEREAL: + ae_effect_type = AE_EFFECT_TYPE_AL_REVERB; + ret = new AE_PARAMS_AL_REVERB(); + memcpy(ret, &gs_ae_params_al_reverbs[AE_PARAMS_TYPE_AL_REVERB_CASTLE_COURTYARD - AE_PARAMS_TYPE_AL_REVERB], + sizeof(AE_PARAMS_AL_REVERB)); + break; + case AE_TYPE_DIZZY: + ae_effect_type = AE_EFFECT_TYPE_SLOWFLANGING; + break; + case AE_TYPE_NEW_DISTANT: + ae_effect_type = AE_EFFECT_TYPE_REVERB; + ret = new AE_PARAMS_REVERB(); + memcpy(ret, &gs_ae_params_reverb_params[AE_PARAMS_TYPE_REVERB_NEW_CONCERT - AE_PARAMS_TYPE_RERVERB], + sizeof(AE_PARAMS_REVERB)); + break; + case AE_TYPE_TONE_SHIFT: + ae_effect_type = AE_EFFECT_TYPE_TONE_SHIFT; + ret = new AE_PARAMS_TONE_SHIFT(); + ((AE_PARAMS_TONE_SHIFT*) ret)->shift_value = ((AEToneShiftParam*) ae_params)->tone_shift; + break; + case AE_TYPE_CUSTOM: + ae_effect_type = AE_EFFECT_TYPE_REVERB; + ret = new AE_PARAMS_REVERB(); + memcpy(ret, &gs_ae_params_reverb_params[AE_PARAMS_TYPE_REVERB_ID_18 - AE_PARAMS_TYPE_RERVERB], + sizeof(AE_PARAMS_REVERB)); + tp_custom = (pAECustomParam) ae_params; + tp_reverb = (AE_PARAMS_REVERB*) ret; + tp_reverb->wet = tp_custom->reverb_wet / 3.0; // 兼容android当前的使用方式 + tp_reverb->room_size = tp_custom->room_size; + break; + case AE_TYPE_KARAOKE: + ae_effect_type = AE_EFFECT_TYPE_REVERB; + ret = new AE_PARAMS_REVERB(); + memcpy(ret, &gs_ae_params_reverb_params[AE_PARAMS_TYPE_REVERB_ID_18 - AE_PARAMS_TYPE_RERVERB], + sizeof(AE_PARAMS_REVERB)); + break; + case SAE_POP: + ae_effect_type = AE_EFFECT_TYPE_SAE; + ret = new AE_PARAMS_SAE(); + ((AE_PARAMS_SAE*)ret)->params_list.assign( + gs_sae_params[AE_PARAMS_TYPE_SAE_POP - AE_PARAMS_TYPE_SAE].params_list.begin(), + gs_sae_params[AE_PARAMS_TYPE_SAE_POP - AE_PARAMS_TYPE_SAE].params_list.end() + ); + break; + case SAE_STUDIO: + ae_effect_type = AE_EFFECT_TYPE_SAE; + ret = new AE_PARAMS_SAE(); + ((AE_PARAMS_SAE*)ret)->params_list.assign( + gs_sae_params[AE_PARAMS_TYPE_SAE_STUDIO - AE_PARAMS_TYPE_SAE].params_list.begin(), + gs_sae_params[AE_PARAMS_TYPE_SAE_STUDIO - AE_PARAMS_TYPE_SAE].params_list.end() + ); + break; + case EQ_TYPE_END: + ae_effect_type = AE_EFFECT_TYPE_EQ; + ret = new AE_PARAMS_EQ(); + memcpy(ret, ae_params, sizeof(AE_PARAMS_EQ)); + break; + case AE_TYPE_IM_EFFECT: + ae_effect_type = AE_EFFECT_TYPE_IM_EFFECT; + ret = new Impulse_Param (); // 内部的对象地址是复制的,不会被释放,将由外部做释放 + err_code = get_im_params_by_ffmpeg(ae_params, (Impulse_Param*)ret); + break; + default: + ae_effect_type = AE_EFFECT_TYPE_NONE; + } + return err_code; +} + +int32_t CAeServer::combine_group(AE_TYPE *ae_types, int size) +{ + int group_id = m_ae_group.size(); + for(int i=0;i::iterator iter = m_ae2group_id.find(ae_type); + if(iter != m_ae2group_id.end()) + { + int group_id = iter->second; + if(group_id >= 0 && group_id < m_ae_group.size()) + { + // 删除ae2inst中效果 + std::map::iterator itt; + for(itt = m_ae2inst_map.begin(); itt != m_ae2inst_map.end(); itt++) + { + if(m_ae_group[group_id] == itt->second) + { + m_ae2inst_map.erase(itt); + break; + } + } + // 删除效果,去掉type2ins映射 + ae_delete_effect(m_chain, m_ae_group[group_id]); + m_ae_group[group_id] = nullptr; // 这块搞空 + } + } + return AE_ERR_SUCCESS; + } + + // 判断是否在效果链上 + std::map::iterator it = m_ae2inst_map.find(ae_type); + if(it != m_ae2inst_map.end()) + { + // 在效果链上 + ae_set_params(m_chain, it->second, ae_params); + }else + { + // 没在链上,添加效果 + void* new_effect_ptr = ae_add_effect(m_chain, effect_type); + if(nullptr == new_effect_ptr) + { + // 没有该效果 + if(nullptr != ae_params) + { + delete ae_params; + } + return AE_ERR_NO_EFFECTS; + } + ae_set_params(m_chain, new_effect_ptr, ae_params); + m_ae2inst_map.insert(std::make_pair(ae_type, new_effect_ptr)); + + // 删除该效果所在分组的其他效果 + std::map::iterator iter = m_ae2group_id.find(ae_type); + if(iter != m_ae2group_id.end()) + { + int group_id = iter->second; + if(group_id >= 0 && group_id < m_ae_group.size()) + { + // 删除分组音效,存放上自己的音效 + if(nullptr != m_ae_group[group_id]) + { + // 删除ae2inst中效果 + std::map::iterator itt; + for(itt = m_ae2inst_map.begin(); itt != m_ae2inst_map.end(); itt++) + { + if(m_ae_group[group_id] == itt->second) + { + m_ae2inst_map.erase(itt); + break; + } + } + + // 删除效果,去掉type2ins映射 + ae_delete_effect(m_chain, m_ae_group[group_id]); + m_ae_group[group_id] = nullptr; // 这块搞空 + } + + m_ae_group[group_id] = new_effect_ptr; + } + } + } + + // 在上面return 不需要处理的原因是,当出现上述条件时,ae_params + // 没有被创建 + if(nullptr != ae_params) + { + delete ae_params; + } + return AE_ERR_SUCCESS; +} + +int32_t CAeServer::get_params(AE_TYPE ae_type, void *params) +{ + std::map::iterator it = m_ae2inst_map.find(ae_type); + if(it != m_ae2inst_map.end()) + { + // 在效果链上才有结果 + // 根据对应的类型选择参数结构 + AE_EFFECT_TYPE effect_type; + void* cur_params = nullptr; + int err_code = get_ae_effect_params(ae_type, params, effect_type, cur_params); + if (cur_params == nullptr) + { + return err_code; + } + AE_PARAMS* ae_params = (AE_PARAMS*)cur_params; + ae_get_params(m_chain, it->second, (AE_PARAMS*) ae_params); + + switch (ae_type) + { + case AE_TYPE_TONE_SHIFT: + { + pAEToneShiftParam tp_server = (pAEToneShiftParam) params; + AE_PARAMS_TONE_SHIFT* tp_system = (AE_PARAMS_TONE_SHIFT*) ae_params; + tp_server->tone_shift = int(tp_system->shift_value); + tp_server->max_shift = int(tp_system->max_value); + tp_server->min_shift = int(tp_system->min_value); + break; + } + case AE_TYPE_CUSTOM: + { + pAECustomParam tp_server = (pAECustomParam) params; + AE_PARAMS_REVERB* tp_system = (AE_PARAMS_REVERB*) ae_params; + tp_server->reverb_wet = tp_system->wet; + tp_server->room_size = tp_system->room_size; + break; + } + case AE_TYPE_IM_EFFECT: + { + AE_PARAMS_IM_EFFECT* tp_server = (AE_PARAMS_IM_EFFECT*) params; + Impulse_Param * tp_system = (Impulse_Param*) ae_params; + tp_server->effect_path = tp_system->effect_path; + break; + } + case EQ_TYPE_END: + { + pAECustomEqParam tp_server = (pAECustomEqParam) params; + AE_PARAMS_EQ* tp_system = (AE_PARAMS_EQ*) ae_params; + memcpy(tp_server->params, tp_system->params, sizeof(float) * 10); + } + } + delete ae_params; + return AE_ERR_SUCCESS; + } + return AE_ERR_EFFECT_NOT_IN_CHAIN; +} + +int32_t CAeServer::process(float *in_buf, float *out_buf, int length) +{ + return ae_process(m_chain, in_buf, out_buf, length); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/ae_server/CAeServer.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/ae_server/CAeServer.h new file mode 100644 index 0000000..844c525 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/ae_server/CAeServer.h @@ -0,0 +1,191 @@ +// +// Created by yangjianli on 2020-01-16. +// + +#ifndef AUDIO_EFFECTS_LIB_CAESERVER_H +#define AUDIO_EFFECTS_LIB_CAESERVER_H + +/** + * 音效系统业务类  + * 这部分是参考当前业务场景给出的工作类 + * 可能会经常修改,跟业务代码一起编译 + * + * 当前的业务逻辑是: + * 每个AE_TYPE对应一个效果器,其中部分效果器互斥,内部互相不可以叠加, + * 与其他可叠加 + */ + +#include "CAudioEffectsChainApi.h" +#include "map" +#include "vector" + +enum AE_TYPE +{ + AE_TYPE_BEGIN, + AE_TYPE_NONE = AE_TYPE_BEGIN, + AE_TYPE_KTV, + AE_TYPE_AUTOTUNE, + AE_TYPE_DISTANT, + AE_TYPE_WARM, + AE_TYPE_PHONOGRAPH, + AE_TYPE_MAGNETIC, + AE_TYPE_ETHEREAL, + AE_TYPE_DIZZY, + AE_TYPE_NEW_DISTANT, + AE_TYPE_TONE_SHIFT, + AE_TYPE_CUSTOM, + AE_TYPE_SPEED_SHIFT, + AE_TYPE_KARAOKE, + SAE_POP, + SAE_STUDIO, + AE_TYPE_END, + AE_TYPE_IM_EFFECT, + EQ_TYPE_BEGIN = 10000, + EQ_TYPE_END = 20000, +}; + +// ToneShift 音效参数 +typedef struct _AEToneShiftParam +{ + //tone shift + int tone_shift; + int min_shift; + int max_shift; +}AEToneShiftParam, *pAEToneShiftParam; + +// custom 音效参数 +typedef struct _AECustomParam +{ + float reverb_wet; + float room_size; +}AECustomParam, *pAECustomParam; + +// 均衡器参数 +typedef struct _AECustomEqParam +{ + float params[10]; +}AECustomEqParam, *pAECustomEqParam; + + +class CAeServer +{ +public: + CAeServer(); + ~CAeServer(); + +public: + + /** + * 初始化函数 + * 作用: 初始化内部成员 + * 注意: 创建新类之后,必须进行init才可以进行其他操作 + * @param sample_rate 采样率 + * @param channel 通道数 + * @param buffer_size 单次处理的音频的buffer的长度[单声道*通道数] + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + int32_t init(int sample_rate, int channel, int buffer_size); + + /** + * 逆初始化函数 + * 作用: 销毁内部成员,释放空间 + * 注意:在结束类处理之前,必须进行uninit释放空间,否则可能会有内存泄漏的风险 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + int32_t uninit(); + + /** + * 重置函数 + * 作用: 清空内部缓存 + * 注意: 一般是在输入数据发生变化(比如播放音乐的seek操作)之后,使用该函数 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + int32_t reset(); + + /** + * 获取本系统的延迟时间(ms) + * 作用: 获取延迟时间 + * 注意: 在内部音效切换或者叠加时有可能引发音效延迟改变 + * @return 返回延迟时间(ms) + */ + int get_latency_ms(); + + /** + * 音效的添加修改或者删除 + * 作用: 系统中音效状态控制[增删改] + * 注意: 选择需要的音效添加即可[具体删除和修改参见combine_group] + * @param ae_type AE_TYPE + * @param params 根据AE_TYPE 选择具体结构体 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + int32_t set_params(AE_TYPE ae_type, void* params); + + /** + * 音效参数的获取 + * 作用: 获取目前已经在效果链上的给定音效的参数 + * 注意: 如果给定的音效不在效果链上,那么返回值不可信 + * @param ae_type AE_TYPE + * @param params 根据AE_TYPE 选择具体结构体 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + int32_t get_params(AE_TYPE ae_type, void* params); + + /** + * 处理函数 + * 作用: 音效系统主处理函数 + * 注意: in_buf和out_buf可以是同一块buf[建议不是同一块,防止之后添加新效果,新效果有此要求] + * in_buf 和 out_buf 必须一致,且长度为length + * 输入和输出是一致的 + * @param in_buf + * @param out_buf + * @param length buf长度 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + int32_t process(float* in_buf, float* out_buf, int length); + + /** + * 绑定音效组[每次输入需要绑定的一组音效] + * 作用: 将不同的音效绑定成为一组,同一组内的音效不能共存,不同组的音效可以叠加,同一个音效无法在多个组 + * 注意: 在init函数中已经根据Android App进行了一次预先绑定,需要的时候可以直接在init函数中修改 + * Tips: 通过在每一组中增加了一个None音效保证可以删除本组音效 + * @param ae_types + * @param size + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + int32_t combine_group(AE_TYPE* ae_types, int size); + +private: + + /** + * 获取外部音效类型与内部音效类型结构的转换函数[不对外暴露] + * 作用: 转化外部与内部结构 + * 注意: 新增音效时需要增加对应关系 + * @param ae_type + * @param ae_params + * @param ae_effect_type + * @param ret_params 内部创建空间(因为外部不知道具体类型) + * @return 0 表示正常 + */ + int32_t get_ae_effect_params(AE_TYPE ae_type,void* ae_params, AE_EFFECT_TYPE & ae_effect_type, void*& ret); + + + /** + * 根据外部输入的音频地址读取数据并进行处理 + * 1. 进行重采样 + * 2. 通道数处理 + * @param params AE_PARAMS_IM_EFFECT*, Impulse_Param* (out) + * @return 0 表示正常 + */ + int32_t get_im_params_by_ffmpeg(void* params, Impulse_Param* new_param); +private: + void* m_chain; // 效果链实例 + std::map m_ae2inst_map; // 音效type和音效实例之间的关系 + std::map m_ae2group_id; // 音效type所属组id + std::vector m_ae_group; // 每个group中最多只有一个可用,所以只保存一个即可 + int32_t m_sample_rate; // 采样率 + int32_t m_channel; // 通道数 + int32_t m_buffer_size; // 每次处理的音频数据的单声道的长度 + std::map m_im_path2params; // im效果器本地数据和参数对应关系 +}; + +#endif //AUDIO_EFFECTS_LIB_CAESERVER_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/effect_im_tool.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/effect_im_tool.cpp new file mode 100644 index 0000000..578e8e5 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/effect_im_tool.cpp @@ -0,0 +1,72 @@ +// +// Created by yangjianli on 2023/2/16. +// + +#include "waves/inc/STWaveFile.h" +#include "CAudioEffectsChainApi.h" +#include "string" +#include "ae_server/CAeServer.h" +#include +#include +#include +#include +#include +void cae_server_float_im(std::string in_file, std::string effect_file, std::string out_file) +{ + STCWaveFile oWaveFile = STCWaveFile(in_file.c_str(), false); + int length = oWaveFile.GetTotalFrames() * oWaveFile.GetChannels(); + int sample_rate = oWaveFile.GetSampleRate(); + int channel = oWaveFile.GetChannels(); + float* in_buf = new float[length]; + oWaveFile.ReadFrameAsfloat(in_buf, oWaveFile.GetTotalFrames()); + + // 处理逻辑 + int32_t process_buffer_len = 2048; + CAeServer cAeServer; + cAeServer.init(sample_rate, channel, process_buffer_len); + + AE_PARAMS_IM_EFFECT im_params = { + .effect_path = effect_file, + }; + int ret = cAeServer.set_params(AE_TYPE_IM_EFFECT, (void*) &im_params); + printf("set params: %d\n", ret); + int step = process_buffer_len * channel; + + struct timeval start; + struct timeval end; + gettimeofday(&start, NULL); + for(int i=0,frame=0;i length) step = length - i; + cAeServer.process(in_buf + i, in_buf + i, step); + } + + cAeServer.get_params(AE_TYPE_IM_EFFECT, (void*)&im_params); + gettimeofday(&end, NULL); + printf("Out====>%s, %f latency=%d\n", im_params.effect_path.c_str(), (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0, cAeServer.get_latency_ms()); + cAeServer.uninit(); + + // 保存起来 + STCWaveFile out_file_inst = STCWaveFile(out_file.c_str(), true); + out_file_inst.SetSampleRate(sample_rate); + out_file_inst.SetSampleFormat(SF_IEEE_FLOAT); + out_file_inst.SetChannels(channel); + out_file_inst.SetupDone(); + out_file_inst.WriteFrame(in_buf, length / 2); +} + +int main(int argc, char* argv[]) +{ + + if(argc != 4) + { + printf("input err! ./main s_audio_path s_audio_im_path s_audio_out\n"); + return -1; + } + + std::string sAudio = argv[1]; + std::string sAudioIm = argv[2]; + std::string sAudioOut = argv[3]; + cae_server_float_im(sAudio, sAudioIm, sAudioOut); + return 0; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/main.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/main.cpp new file mode 100644 index 0000000..728664b --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/example/main.cpp @@ -0,0 +1,674 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#include "CAudioEffectsChainApi.h" +#include "iir_eq/inc/CAudaciousEqApi.h" +#include "waves/inc/STWaveFile.h" +#include "string" +#include "saudio_effects/inc/SAudioEffectsApi.h" +#include "autotune/inc/ATndkWrapper.h" +#include "reverb/inc/CReverb.h" +#include "al_reverb/inc/AlReverbApi.h" +#include "phonograph/inc/CPhonograph.h" +#include "slow_flanging/inc/CSlowFlanging.h" +#include "tone_shift/inc/CToneShift.h" +#include "tone_shift/inc/CSpeedShift.h" +#include "ae_server/CAeServer.h" +#include +#include +#include +#include +#include +#define STYLES_LEN 25 +static const float STYLES[STYLES_LEN][10] = { + {4,2,0,-3,-6,-6,-3,0,1,3}, // pop 0 + {7,6,3,0,0,-4,-6,-6,0,0}, // dance 1 + {3,6,8,3,-2,0,4,7,9,10}, // blues 2 + {0,0,0,0,0,0,-6,-6,-6,-8}, // classic 3 + {0,0,1,4,4,4,0,1,3,3}, // jazz 4 + {5,4,2,0,-2,0,3,6,7,8}, // slow 5 + {6,5,0,-5,-4,0,6,8,8,7}, // electronica 6 + {7,4,-4,7,-2,1,5,7,9,9}, // rock 7 + {5,6,2,-5,1,1,-5,3,8,5}, // country 8 + {-2,-1,-1,0,3,4,3,0,0,1},// voice 9 + {5.8,5.8,3,0,-1.5,-1.5,0,0,0,0},//低音 10 + {13.8,12.6,6.3,0,-1.5,-1.5,0,0,0,0},//超重低音 11 + {5.8,5.8,3,0,-1.5,-1.5,0,1.5,5.8,5.8},//低音&高音 12 + {-3,-3,-3,-3,-1.5,-1.5,0,6.3,9.6,12.3},//高音 13 + {0,0,0,0,0,0,-3,-3,-3,-4.5},//经典 14 + {5.8,3.2,2.1,0,0,-2.7,-2,-2.2,-0.6,-0.1},//舞曲 15 + {5.8,3.2,1.3,-3,-2.3,2.2,3.6,5.8,5.8,5.8},//摇滚 16 + {5.8,5.8,0,-2.9,-2.1,0,3.6,7.4,7.7,7.5},//电子 17 + {-2.9,-2.9,2.5,3.6,4.8,4.7,6,6,3,3},//扬声器(响亮) 18 + {-4.5,-3.8,2.2,2.2,2.2,2.2,2.1,1.5,1.5,1.5},//现场 19 + {-4.5,-4.5,-1.5,-1.5,4.5,4.5,1.5,0,-4.5,-6},//中音 20 + {1.5,4.5,5.8,3,1.5,0,0,0,1.5,3},//流行 21 + {5.5,2.5,0.9,-2.6,-5,-10.6,-12.4,-13.3,-10.6,-6.7},//柔和 22 + {3,3,3,0,-3,-3,0,0,0,0},//柔和低音 23 + {0,0,-4.5,-4.5,-4.5,-4.5,-3,0,4.5,4.5},//柔和高音 24 +}; +#define MAX_IM_NUMS 9 +Impulse_Param impulseParam[MAX_IM_NUMS]; +std::string paths[MAX_IM_NUMS] = { + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/responses/impulse/2f82_l.wav", + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/responses/impulse/36af_l.wav", + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/responses/impulse/43e7.wav", + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/responses/impulse/4d11_l.wav", + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/responses/impulse/8da7_l.wav", + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/responses/impulse/b7c9.wav", + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/responses/impulse/c2e6.wav", + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/responses/impulse/d8d3_l.wav", + "/Users/yangjianli/starmaker-work/research/qh_code/av_tools/audio_effects/ImpluseEffecLibs/supersound/resource/impluse_test_2.wav", +}; + +void get_params() +{ + for(int i=0;iGetTotalFrames() * cWaveFile->GetChannels(); + float* response = new float[nLength]; + cWaveFile->ReadFrameAsfloat(response, nLength); + + impulseParam[i] = Impulse_Param(); + impulseParam[i].in_channels = 2; + impulseParam[i].out_channels = 2; + impulseParam[i].fs = 44100; + impulseParam[i].im_response = response; + impulseParam[i].response_len = nLength / cWaveFile->GetChannels(); + impulseParam[i].response_channels = cWaveFile->GetChannels(); + impulseParam[i].window_bits = 10; // 代表了单声道的BufferSize + + delete cWaveFile; + } +} + +void eq(short* pInBuf, int nLength, int nSampleRate, int nChannel) +{ + + CAudaciousEqApi eqApi; + eqApi.init(nSampleRate, nChannel); + eqApi.set_param(0); + + float* buf = new float[nLength]; + for(int i=0;i nLength) nStep = nLength - i; + if(nCnt % 100 == 0) + { + eqApi.reset(); + eqApi.set_param(nCnt % 26); + } + eqApi.process(buf + i, buf + i, nStep); + for(int j=0;jinit(nSampleRate, nChannel); + int nStep = 512 * nChannel; + for(int i=0, nCnt = 0;i nLength) nStep = nLength - i; + autotune->process(buf+i, buf+i, nStep, 0); + for(int j=0;juninit(); + delete autotune; + delete[] buf; +} + +void reverb(short* pInBuf, int nLength, int nSampleRate, int nChannel) +{ + float* buf = new float[nLength]; + for(int i=0;iinit(nSampleRate, nChannel); +// cReverb->set_type_id(KALA_VB_ID_11); + AE_PARAMS_REVERB rb = gs_ae_params_reverb_params[AE_PARAMS_TYPE_REVERB_ID_18 - AE_PARAMS_TYPE_RERVERB]; + rb.room_size = 0.8; + rb.wet = 0.8 / 3; + cReverb->set_params(&rb); + int nStep = 512 * nChannel; + for(int i=0, nCnt = 0;i nLength) nStep = nLength - i; +// printf("%d\n", nStep); + cReverb->process(buf + i, nStep, buf + i, nStep); + for(int j=0;juninit(); + delete cReverb; +} + +void phonograph(short* pInBuf, int nLength, int nSampleRate, int nChannel) +{ + float* buf = new float[nLength]; + for(int i=0;iinit(nSampleRate, nChannel); + int nStep = 512 * nChannel; + for(int i=0, nCnt = 0;i nLength) nStep = nLength - i; + printf("%d\n", nStep); + cReverb->process(buf + i, nStep); + for(int j=0;juninit(); + delete cReverb; +} + +void slow_flanging(short* pInBuf, int nLength, int nSampleRate, int nChannel) +{ + float* buf = new float[nLength]; + for(int i=0;iinit(nChannel, nSampleRate); + int nStep = 512 * nChannel; + for(int i=0, nCnt = 0;i nLength) nStep = nLength - i; + printf("%d\n", nStep); + cReverb->process(buf + i, buf + i, nStep); + for(int j=0;juninit(); + delete cReverb; +} + +void sound_touch(short* pInBuf, int nLength, int nSampleRate, int nChannel) +{ + float* buf = new float[nLength]; + float* buf_out = new float[nLength]; + for(int i=0;iinit(nSampleRate, nChannel); + cReverb->set_shift_value(2); + cReverb->reset(); + int nStep = 512 * nChannel; + for(int i=0, nCnt = 0;i nLength) nStep = nLength - i; + int ret = cReverb->process(buf + i, nStep, buf + i, nStep); + printf("%d %d\n", nStep, ret); + for(int j=0;juninit(); + delete cReverb; + delete[] buf; + delete[] buf_out; +} + +void al_reverb(short* pInBuf, int nLength, int nSampleRate, int nChannel) +{ + float* buf = new float[nLength]; + for(int i=0;iinit(nSampleRate,nChannel, 0); + AE_PARAMS_AL_REVERB aeParamsAlReverb = + gs_ae_params_al_reverbs[AE_PARAMS_TYPE_AL_REVERB_DIZZY_NEW - AE_PARAMS_TYPE_AL_REVERB]; +// cReverb->set_type_id(KALA_VB_ID_11); + cReverb->set_param(&aeParamsAlReverb); + int nStep = 512 * nChannel; + for(int i=0, nCnt = 0;i nLength) nStep = nLength - i; + printf("%d\n", nStep); + cReverb->process(buf+i, nStep); + for(int j=0;juninit(); + delete cReverb; +} + +//void sm_audio_effects_single(short* pInBuf, int nLength, int nSampleRate, int nChannel) +//{ +// AE_EFFECT_TYPE type = AE_EFFECT_TYPE_TONE_SHIFT; +// +// void* p = ae_create_object(); +// AE_EFFECT_TYPE tps[] = {type}; +// AE_PARAMS_TONE_SHIFT rb = { +// .shift_value = 2 +// }; +// +// ae_init(p,tps,1, nSampleRate, nChannel); +// ae_set_params(p, type, (AE_PARAMS*) (&rb)); +// float* buf = new float[nLength]; +// float* buf_out = new float[nLength]; +// for(int i=0;i nLength) nStep = nLength - i; +// if(nCnt % 500 == 0) +// { +// if(b_on) +// { +// b_on = false; +//// delete_effect(p, type); +// ae_set_params(p, type, nullptr); +// }else +// { +// b_on = true; +// int t = nLength / nStep * 12; +// rb.shift_value = t; +// ae_set_params(p, type, (AE_PARAMS*)&rb); +// } +// } +// ae_process(p, buf+i, buf_out+i, nStep); +// for(int j=0;jinit(nSampleRate, nChannel); + + float* buf = new float[nLength]; + for(int i=0;i nLength) nStep = nLength - i; + if(nCnt % 100 == 0) + { + if(type == AE_PARAMS_TYPE_SAE_POP) + { + type = AE_PARAMS_TYPE_SAE_STUDIO; + }else if(type == AE_PARAMS_TYPE_SAE_STUDIO) + { + type = AE_PARAMS_TYPE_SAE_POP; + } + const AE_PARAMS_SAE* saudio_effect_params = + &gs_sae_params[type - AE_PARAMS_TYPE_SAE]; + sAudioEffectsApi->set_audio_effect((AE_PARAMS*) saudio_effect_params); + } + sAudioEffectsApi->process(buf + i, buf + i, nStep); + for(int j=0;juninit(); + delete sAudioEffectsApi; + delete[] buf; +} + +void chain(short* in_buf, int length, int sample_rate, int channel) +{ + // 转换short->float + float* buf = new float[length]; + for(int i=0;i length) step = length - i; + if(frame == 400) + { + effects[elen++] = ae_add_effect(chain, AE_EFFECT_TYPE_AUTOTUNE); + ae_set_params(chain, effects[elen - 1], nullptr); + }else if(frame == 800) + { + effects[elen++] = ae_add_effect(chain, AE_EFFECT_TYPE_EQ); + ae_set_params(chain, effects[elen - 1], nullptr); + ae_delete_effect(chain, effects[0]); + } else if(frame == 1200) + { + AE_PARAMS_EQ params_eq; + memcpy(params_eq.params, STYLES[11], sizeof(float)*10); + ae_set_params(chain, effects[elen - 1], (AE_PARAMS *) ¶ms_eq); + } else if(frame == 1400) + { + effects[elen++] = ae_add_effect(chain, AE_EFFECT_TYPE_EQ); + AE_PARAMS_EQ params_eq; + memcpy(params_eq.params, STYLES[13], sizeof(float)*10); + ae_set_params(chain, effects[elen - 1], (AE_PARAMS *) ¶ms_eq); + } + ae_process(chain, buf + i, buf + i, step); + } + ae_destory_object(chain); + + // 转换float->short + for(int i=0;i length) nStep = length - i; +// if(nCnt % 400 == 0) +// { +// if(nCC % nums == nums - 1) +// { +// AE_PARAMS* tp = NULL; +//// set_params(p, AE_TYPE_EQ,tp); +// ae_set_params(p, AE_EFFECT_TYPE_AL_REVERB, tp); +//// delete_effect(p, AE_TYPE_SAE); +// +// }else +// { +//// memcpy(audioEffectsEqParams.params, STYLES[nCnt % 26], +//// sizeof(float) * 10); +//// set_params(p, AE_TYPE_EQ, +//// (AE_PARAMS*)&audioEffectsEqParams); +// const AE_PARAMS_AL_REVERB* saudio_effect_params = +// &gs_ae_params_al_reverbs[(AE_EFFECT_TYPE_AL_REVERB + nCC % nums) - AE_EFFECT_TYPE_AL_REVERB]; +// ae_set_params(p, AE_EFFECT_TYPE_AL_REVERB, (AE_PARAMS*)saudio_effect_params); +// } +// nCC++; +// } +// ae_process(p, buf + i, buf + i, nStep); +// for(int j=0;j length) nStep = length - i; +// if(nCnt % 400 == 0) +// { +// if(nCC % nums == nums - 1) +// { +// AE_PARAMS* tp = NULL; +// ae_set_params(p, AE_EFFECT_TYPE_AL_REVERB, tp); +// +// }else +// { +// const AE_PARAMS_AL_REVERB* saudio_effect_params = +// &gs_ae_params_al_reverbs[(AE_EFFECT_TYPE_AL_REVERB + nCC % nums) - AE_EFFECT_TYPE_AL_REVERB]; +// ae_set_params(p, AE_EFFECT_TYPE_AL_REVERB, (AE_PARAMS*)saudio_effect_params); +// } +// +// +// if(nCC % nums == 0) +// { +// // EQ +// AE_PARAMS_EQ eq; +// memcpy(eq.params, STYLES[2], sizeof(float) * 10); +// ae_set_params(p, AE_EFFECT_TYPE_EQ, (AE_PARAMS*) &eq); +// }else +// if(nCC % nums == 1) +// { +// ae_set_params(p, AE_EFFECT_TYPE_PHONOGRAPH, nullptr); +// }else if(nCC % nums == 2) +// { +// ae_set_params(p, AE_EFFECT_TYPE_AUTOTUNE, nullptr); +// } +// nCC++; +// } +// ae_process(p, buf + i, buf + i, nStep); +// for(int j=0;jfloat + float* buf = new float[length]; + for(int i=0;i length) step = length - i; + cAeServer.process(buf + i, buf + i, step); + fwrite(buf+i, sizeof(float) * step, 1, file); + } + + cAeServer.uninit(); + // 转换float->short + for(int i=0;i length) step = length - i; +// if (800 * 5 == frame) +// { +// im_params.effect_path = cd_string; +// cAeServer.set_params(AE_TYPE_IM_EFFECT, (void*) &im_params); +// } +// +// if (frame == 400 * 5) +// { +// im_params.effect_path = studio_string; +// cAeServer.set_params(AE_TYPE_IM_EFFECT, (void*) &im_params); +// } + cAeServer.process(in_buf + i, in_buf + i, step); + } + + im_params.effect_path = "dsdsd"; + cAeServer.get_params(AE_TYPE_IM_EFFECT, (void*)&im_params); + gettimeofday(&end, NULL); + printf("Out====>%s, %f latency=%d\n", im_params.effect_path.c_str(), (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0, cAeServer.get_latency_ms()); + cAeServer.uninit(); + + // 保存起来 + STCWaveFile out_file_inst = STCWaveFile(out_file, true); + out_file_inst.SetSampleRate(sample_rate); + out_file_inst.SetSampleFormat(SF_IEEE_FLOAT); + out_file_inst.SetChannels(channel); + out_file_inst.SetupDone(); + out_file_inst.WriteFrame(in_buf, length / 2); +} + +void cae_server_float_im(std::string in_file, std::string effect_file, std::string out_file) +{ + STCWaveFile oWaveFile = STCWaveFile(in_file.c_str(), false); + int length = oWaveFile.GetTotalFrames() * oWaveFile.GetChannels(); + int sample_rate = oWaveFile.GetSampleRate(); + int channel = oWaveFile.GetChannels(); + float* in_buf = new float[length]; + oWaveFile.ReadFrameAsfloat(in_buf, oWaveFile.GetTotalFrames()); + + // 处理逻辑 + int32_t process_buffer_len = 2048; + CAeServer cAeServer; + cAeServer.init(sample_rate, channel, process_buffer_len); + + AE_PARAMS_IM_EFFECT im_params = { + .effect_path = effect_file, + }; + int ret = cAeServer.set_params(AE_TYPE_IM_EFFECT, (void*) &im_params); + printf("set params: %d\n", ret); + int step = process_buffer_len * channel; + + struct timeval start; + struct timeval end; + gettimeofday(&start, NULL); + for(int i=0,frame=0;i length) step = length - i; + cAeServer.process(in_buf + i, in_buf + i, step); + } + + cAeServer.get_params(AE_TYPE_IM_EFFECT, (void*)&im_params); + gettimeofday(&end, NULL); + printf("Out====>%s, %f latency=%d\n", im_params.effect_path.c_str(), (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0, cAeServer.get_latency_ms()); + cAeServer.uninit(); + + // 保存起来 + STCWaveFile out_file_inst = STCWaveFile(out_file.c_str(), true); + out_file_inst.SetSampleRate(sample_rate); + out_file_inst.SetSampleFormat(SF_IEEE_FLOAT); + out_file_inst.SetChannels(channel); + out_file_inst.SetupDone(); + out_file_inst.WriteFrame(in_buf, length / 2); +} +int main(int argc, char* argv[]) +{ + + if(argc != 2) + { + printf("input err! ./main s_audio_path\n"); + return -1; + } + + std::string sAudio = argv[1]; + std::string sAudioIm = argv[2]; + std::string sAudioOut = argv[3]; + cae_server_float_im(sAudio, sAudioIm, sAudioOut); + return 0; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/AudioEffectsConf.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/AudioEffectsConf.h new file mode 100644 index 0000000..2668156 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/AudioEffectsConf.h @@ -0,0 +1,112 @@ +// +// Created by yangjianli on 2020/10/27. +// + +#ifndef AUDIO_EFFECTS_LIB_AUDIOEFFECTSCONF_H +#define AUDIO_EFFECTS_LIB_AUDIOEFFECTSCONF_H +#include +#include +//#define AE_CONFUSE_CODE +#ifdef AE_CONFUSE_CODE + #define AE_NEW_NAME(NAME) AE_ ## NAME + + // AlReverb + #define AlReverb AE_NEW_NAME(C_ALR_30001) + #define AlReverbApi AE_NEW_NAME(C_ALRA_30002) + #define AlReverbBiquad AE_NEW_NAME(C_ALRB_30003) + #define AlReverbBiquad AE_NEW_NAME(C_ALRB_30003) + #define AlReverbEarlyReflection AE_NEW_NAME(C_ALRER_30004) + #define AlReverbEcho AE_NEW_NAME(C_ALRE_30005) + #define AlReverbLateAllpass AE_NEW_NAME(C_ALRLA_30006) + #define AlReverbLateLowpass AE_NEW_NAME(C_ALRLL_30007) + #define AlReverbLateReverb AE_NEW_NAME(C_ALRLR_30008) + #define AlReverbModulation AE_NEW_NAME(C_ALRM_30009) + #define SuperSoundFastDelay AE_NEW_NAME(C_SSFD_30010) + #define CFilters AE_NEW_NAME(C_CF_30011) + + // autotune + #define CATndkWrapper AE_NEW_NAME(C_ATW_40001) + #define CAutoTuneWrapper AE_NEW_NAME(C_ATW_40002) + #define CAutoTune AE_NEW_NAME(C_AT_40003) + + // common + #define APFilter AE_NEW_NAME(C_APF_50001) + #define CBaseFilter AE_NEW_NAME(C_F_50002) + #define LPFilter AE_NEW_NAME(C_LPF_50003) + #define HPFilter AE_NEW_NAME(C_LPF_50004) + #define BPFilter AE_NEW_NAME(C_LPF_50005) + #define NTFilter AE_NEW_NAME(C_NTF_50006) + #define LSFilter AE_NEW_NAME(C_LSF_50007) + #define HSFilter AE_NEW_NAME(C_HSF_50008) + #define PKFilter AE_NEW_NAME(C_PKF_50009) + #define APFilter AE_NEW_NAME(C_APF_50010) + + // iir_eq + #define CAudaciousEq AE_NEW_NAME(C_AEQ_60000) + #define CAudaciousEqApi AE_NEW_NAME(C_AEQA_60001) + #define CAudaciousArma AE_NEW_NAME(C_AEQA_60002) + + // phonograph + #define CPhonograph AE_NEW_NAME(C_CPG_70000) + #define CPhonographFilters AE_NEW_NAME(C_CPGF_70001) + + // CReverb + #define CReverb AE_NEW_NAME(C_CRB_80001) + #define allpass AE_NEW_NAME(C_AP_80002) + #define comb AE_NEW_NAME(C_CB_80003) + #define revmodel AE_NEW_NAME(C_RM_80004) + + // Equalizer + #define Equalizer AE_NEW_NAME(C_EQ_90001) + #define SimpleDelayEffect AE_NEW_NAME(C_SDE_90002) + #define SimpleReverbEffect AE_NEW_NAME(C_SRE_90003) + #define SAudioEffectsApi AE_NEW_NAME(C_SEA_90004) + #define Allplat AE_NEW_NAME(C_AP_90005) + #define Parameter AE_NEW_NAME(C_PM_90006) + #define MidiContext AE_NEW_NAME(C_MC_90007) + #define FXConfigInfo AE_NEW_NAME(C_FCI_90008) + #define AudioEffect AE_NEW_NAME(C_AE_90009) + #define Biquad AE_NEW_NAME(C_BQ_90010) + #define Buffer AE_NEW_NAME(C_BF_90011) + #define Damper AE_NEW_NAME(C_DP_90012) + #define Delay AE_NEW_NAME(C_DL_90013) + #define DelayI AE_NEW_NAME(C_DLI_90014) + #define EnvelopeFollower AE_NEW_NAME(C_EF_90015) + #define Reverb AE_NEW_NAME(C_RB_90016) + + // CSlowFlanging + #define CSlowFlanging AE_NEW_NAME(C_CSF_100000) + + // ToneShift + #define CToneShift AE_NEW_NAME(C_CTS_110000) + #define ToneShiftInterface AE_NEW_NAME(C_TSI_110001) + #define CSpeedShift AE_NEW_NAME(C_CSS_110002) + #define SpeedShiftInterface AE_NEW_NAME(C_SSI_110003) + + // src + #define IAudioEffects AE_NEW_NAME(C_IAE_120000) + #define CAudioAlReverb AE_NEW_NAME(C_CAALR_120001) + #define CAudioAlReverbCreator AE_NEW_NAME(C_CAALRC_120002) + #define CAudioAutoTune AE_NEW_NAME(C_IAE_120003) + #define CAudioAutoTuneCreator AE_NEW_NAME(C_IAEC_120004) + #define CAudioEffectsChain AE_NEW_NAME(C_IAEC_120005) + #define CAudioEqApi AE_NEW_NAME(C_CAEQA_120006) + #define CEqApiCreator AE_NEW_NAME(C_CAEQAC_120007) + #define CAudioPhonograph AE_NEW_NAME(C_CAPG_120008) + #define CAudioPhonographCreator AE_NEW_NAME(C_CAPGC_120009) + #define CAudioReverbApi AE_NEW_NAME(C_CARA_120010) + #define CAudioReverbApiCreator AE_NEW_NAME(C_CARAC_120011) + #define CAudioSAudioEffectsApi AE_NEW_NAME(C_CASAEA_120012) + #define CAudioSAudioEffectsCreator AE_NEW_NAME(C_CASAEAC_120013) + #define CAudioSlowFlanging AE_NEW_NAME(C_CASF_120014) + #define CAudioSlowFlangingCreator AE_NEW_NAME(C_CASFC_120015) + #define CAudioSmoothWrapper AE_NEW_NAME(C_CASW_120016) + #define CAudioToneShift AE_NEW_NAME(C_CATS_120017) + #define CKeepVolumeSteady AE_NEW_NAME(C_CKVS_120018) + #define ICreator AE_NEW_NAME(C_IC_120019) + + // ae_server + #define CAeServer AE_NEW_NAME(C_CAS_130000) + +#endif +#endif //AUDIO_EFFECTS_LIB_AUDIOEFFECTSCONF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/AudioEffectsDef.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/AudioEffectsDef.h new file mode 100644 index 0000000..c8ade5f --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/AudioEffectsDef.h @@ -0,0 +1,97 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#ifndef AUDIO_EFFECTS_LIB_AUDIOEFFECTSDEF_H +#define AUDIO_EFFECTS_LIB_AUDIOEFFECTSDEF_H + +#include "AudioEffectsConf.h" +#include "SAudioEffectsParams.h" +#include "CReverbParams.h" +#include "CAlReverbParams.h" +#include "CImEffectParams.h" +#include "ae_defs.h" + +// 效果器类型 +enum AE_EFFECT_TYPE +{ + AE_EFFECT_TYPE_NONE = 0, // 无效果 + AE_EFFECT_TYPE_EQ = 1, + AE_EFFECT_TYPE_SAE = 10000, // 使用自定义参数 + AE_EFFECT_TYPE_AUTOTUNE = 20000, + AE_EFFECT_TYPE_REVERB, + AE_EFFECT_TYPE_AL_REVERB = 30000, + AE_EFFECT_TYPE_PHONOGRAPH = 40000, + AE_EFFECT_TYPE_SLOWFLANGING, // AE_TYPE_DIZZY + AE_EFFECT_TYPE_TONE_SHIFT, + AE_EFFECT_TYPE_IM_EFFECT, +}; + +// 参数预定义类型 +enum AE_PARAMS_TYPE +{ + // SAE + AE_PARAMS_TYPE_SAE = AE_EFFECT_TYPE::AE_EFFECT_TYPE_SAE + 1, + AE_PARAMS_TYPE_SAE_POP = AE_PARAMS_TYPE_SAE, + AE_PARAMS_TYPE_SAE_STUDIO, + + AE_PARAMS_TYPE_RERVERB = AE_EFFECT_TYPE::AE_EFFECT_TYPE_REVERB + 1, // 使用自定义参数 + AE_PARAMS_TYPE_REVERB_RECORD_STUDIO = AE_PARAMS_TYPE_RERVERB, + AE_PARAMS_TYPE_REVERB_KTV, + AE_PARAMS_TYPE_REVERB_CONCERT, + AE_PARAMS_TYPE_REVERB_THEATER, + AE_PARAMS_TYPE_REVERB_NEW_KTV, + AE_PARAMS_TYPE_REVERB_NEW_CONCERT, // AE_TYPE_NEW_DISTANT + AE_PARAMS_TYPE_REVERB_NEW_THEATER, + AE_PARAMS_TYPE_REVERB_ID_7, + AE_PARAMS_TYPE_REVERB_ID_8, + AE_PARAMS_TYPE_REVERB_ID_9, + AE_PARAMS_TYPE_REVERB_ID_10, + AE_PARAMS_TYPE_REVERB_ID_11, // AE_TYPE_KTV + AE_PARAMS_TYPE_REVERB_ID_12, + AE_PARAMS_TYPE_REVERB_ID_13, + AE_PARAMS_TYPE_REVERB_ID_14, + AE_PARAMS_TYPE_REVERB_ID_15, // AE_TYPE_DISTANT + AE_PARAMS_TYPE_REVERB_ID_16, + AE_PARAMS_TYPE_REVERB_ID_17, + AE_PARAMS_TYPE_REVERB_ID_18, // AE_TYPE_KARAOKE/ [AE_TYPE_CUSTOM, 更新其wet和dry] + + AE_PARAMS_TYPE_AL_REVERB = AE_EFFECT_TYPE::AE_EFFECT_TYPE_AL_REVERB + 1, + AE_PARAMS_TYPE_AL_REVERB_GENERIC = AE_PARAMS_TYPE_AL_REVERB, + AE_PARAMS_TYPE_AL_REVERB_GENERIC_1, // AE_TYPE_MAGNETIC + AE_PARAMS_TYPE_AL_REVERB_CITY_STREETS, // AE_TYPE_WRAM + AE_PARAMS_TYPE_AL_REVERB_CASTLE_COURTYARD, // AE_TYPE_ETHEREAL + AE_PARAMS_TYPE_AL_REVERB_CASTLE_HALL, + AE_PARAMS_TYPE_AL_REVERB_DIZZY_NEW, + AE_PARAMS_TYPE_AL_REVERB_CASTLE_HALL_NEW, +}; + +// 错误码 +enum AE_ERR +{ + AE_ERR_SUCCESS = 0, + AE_ERR_NO_BUFFER = -1, + AE_ERR_NO_EFFECTS = -2, // 没有该音效[两个原因:1有可能没有进行静态链接库使用全符号链接,2:某音效没有做全局变量] + AE_ERR_NO_INIT = -3, // 没有做初始化 + AE_ERR_EFFECT_NOT_IN_CHAIN = -4, // 该效果器不在链上 + AE_ERR_PARAMS_ERR = -5, // 参数设置错误 +}; + +ST_AE_LIB_API struct AE_PARAMS +{ + +}; + +// EQ 参数结构 +ST_AE_LIB_API struct AE_PARAMS_EQ +{ + float params[10]; +}; + +ST_AE_LIB_API struct AE_PARAMS_TONE_SHIFT +{ + float shift_value; // [-12,12] + float max_value; + float min_value; +}; +#endif //AUDIO_EFFECTS_LIB_AUDIOEFFECTSDEF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CAlReverbParams.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CAlReverbParams.h new file mode 100644 index 0000000..a1b648c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CAlReverbParams.h @@ -0,0 +1,256 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#ifndef AUDIO_EFFECTS_LIB_CALREVERBPARAMS_H +#define AUDIO_EFFECTS_LIB_CALREVERBPARAMS_H + +#include "ae_defs.h" + +//需要传入的参数 +ST_AE_LIB_API typedef struct AE_PARAMS_AL_REVERB +{ + //采样率,单位 hz + int32_t fs; + //输入声道数 + int32_t in_channels; + //输出声道数,需和输入通道相同 + int32_t out_channels; + //密度 + float density; + //衍射 + float diffusion; + //混响增益 + float reverbGain; + //高低切增益 + float lowpass_gain; + float highpass_gain; + //衰减时间 + float decay_time; + //高切截止比例 + float highpass_ratio; + float lowpass_ratio; + //前期反射和后期混响增益 + float early_gain; + float late_gain; + //高低切频率 + float lowpass_reference; + float highpass_reference; + //回声时间和回声深度 + float echo_time; + float echo_depth; + //调制时间和调制深度 + float modulation_time; + float modulation_depth; + //前期反射和后期混响时间 + float early_delay; + float late_delay; + //低通空气衰减增益 + float lowpass_air_absorption_gain; + //是否启用低切衰减限制 + int32_t b_decay_lowpass_limit; + // 是否进行滤波 + bool b_need_filter; +}; +ST_AE_LIB_API const AE_PARAMS_AL_REVERB gs_ae_params_al_reverb_generic = +{ + .fs = 0, + .in_channels = 0, + .out_channels = 0, + .density=1.000000, + .diffusion=1.000000, + .reverbGain=0.010000, + .lowpass_gain=1.000000, + .highpass_gain=0.891300, + .decay_time=1.490000, + .highpass_ratio=0.830000, + .lowpass_ratio=1.000000, + .early_gain=0.050000, + .late_gain=1.258900, + .lowpass_reference=250.000000, + .highpass_reference=5000.000000, + .echo_time=0.250000, + .echo_depth=0.000000, + .modulation_time=0.250000, + .modulation_depth=0.000000, + .early_delay=0.007000, + .late_delay=0.011000, + .lowpass_air_absorption_gain=0.994300, + .b_decay_lowpass_limit=1, + .b_need_filter = false +}; +ST_AE_LIB_API const AE_PARAMS_AL_REVERB gs_ae_params_al_reverb_generic_1 = +{ + .fs = 0, + .in_channels = 0, + .out_channels = 0, + .density=1.000000, + .diffusion=1.000000, + .reverbGain=0.10000, + .lowpass_gain=1.000000, + .highpass_gain=0.891300, + .decay_time=1.490000, + .highpass_ratio=0.830000, + .lowpass_ratio=1.000000, + .early_gain=0.050000, + .late_gain=1.258900, + .lowpass_reference=250.000000, + .highpass_reference=5000.000000, + .echo_time=0.250000, + .echo_depth=0.000000, + .modulation_time=0.250000, + .modulation_depth=0.000000, + .early_delay=0.007000, + .late_delay=0.011000, + .lowpass_air_absorption_gain=0.994300, + .b_decay_lowpass_limit=1, + .b_need_filter = false +}; +ST_AE_LIB_API const AE_PARAMS_AL_REVERB gs_ae_params_al_reverb_city_streets = +{ + .fs = 0, + .in_channels = 0, + .out_channels = 0, + .density=1.000000, + .diffusion=0.780000, + .reverbGain=0.100000, + .lowpass_gain=0.891300, + .highpass_gain=0.707900, + .decay_time=1.790000, + .highpass_ratio=1.120000, + .lowpass_ratio=0.910000, + .early_gain=0.281800, + .late_gain=0.199500, + .lowpass_reference=250.000000, + .highpass_reference=5000.000000, + .echo_time=0.250000, + .echo_depth=0.200000, + .modulation_time=0.250000, + .modulation_depth=0.000000, + .early_delay=0.046000, + .late_delay=0.028000, + .lowpass_air_absorption_gain=0.994300, + .b_decay_lowpass_limit=1, + .b_need_filter = true +}; + +ST_AE_LIB_API const AE_PARAMS_AL_REVERB gs_ae_params_al_reverb_castle_countryard = +{ + .fs = 0, + .in_channels = 0, + .out_channels = 0, + .density=1.000000, + .diffusion=0.420000, + .reverbGain=0.116200, + .lowpass_gain=0.199500, + .highpass_gain=0.446700, + .decay_time=2.130000, + .highpass_ratio=0.610000, + .lowpass_ratio=0.230000, + .early_gain=0.223900, + .late_gain=0.707900, + .lowpass_reference=250.000000, + .highpass_reference=5000.000000, + .echo_time=0.250000, + .echo_depth=0.370000, + .modulation_time=0.250000, + .modulation_depth=0.000000, + .early_delay=0.160000, + .late_delay=0.036000, + .lowpass_air_absorption_gain=0.994300, + .b_decay_lowpass_limit=0, + .b_need_filter = false +}; +ST_AE_LIB_API const AE_PARAMS_AL_REVERB gs_ae_params_al_reverb_hall = +{ + .fs = 0, + .in_channels = 0, + .out_channels = 0, + .density=1.000000, + .diffusion=0.810000, + .reverbGain=0.116200, + .lowpass_gain=0.177800, + .highpass_gain=0.281800, + .decay_time=7.540000, + .highpass_ratio=0.790000, + .lowpass_ratio=0.620000, + .early_gain=0.177800, + .late_gain=1.122000, + .lowpass_reference=139.500000, + .highpass_reference=5168.600098, + .echo_time=0.250000, + .echo_depth=0.500000, + .modulation_time=0.250000, + .modulation_depth=0.000000, + .early_delay=0.056000, + .late_delay=0.024000, + .lowpass_air_absorption_gain=0.994300, + .b_decay_lowpass_limit=1, + .b_need_filter = false +}; +ST_AE_LIB_API const AE_PARAMS_AL_REVERB gs_ae_params_al_reverb_new = +{ + .fs = 0, + .in_channels = 0, + .out_channels = 0, + .density=0.364500, + .diffusion=0.600000, + .reverbGain=0.100000, + .lowpass_gain=1.000000, + .highpass_gain=0.631000, + .decay_time=6.230000, + .highpass_ratio=0.560000, + .lowpass_ratio=1.000000, + .early_gain=0.139200, + .late_gain=0.493700, + .lowpass_reference=250.000000, + .highpass_reference=5000.000000, + .echo_time=0.850000, + .echo_depth=0.600000, + .modulation_time=0.810000, + .modulation_depth=0.310000, + .early_delay=0.020000, + .late_delay=0.030000, + .lowpass_air_absorption_gain=0.994300, + .b_decay_lowpass_limit=0, + .b_need_filter = false +}; +ST_AE_LIB_API const AE_PARAMS_AL_REVERB gs_ae_params_al_reverb_hall_new = +{ + .fs = 0, + .in_channels = 0, + .out_channels = 0, + .density=1.000000, + .diffusion=0.810000, + .reverbGain=0.116200, + .lowpass_gain=0.177800, + .highpass_gain=0.281800, + .decay_time=10.400000, + .highpass_ratio=0.790000, + .lowpass_ratio=0.620000, + .early_gain=0.177800, + .late_gain=1.122000, + .lowpass_reference=139.500000, + .highpass_reference=5168.600098, + .echo_time=0.250000, + .echo_depth=0.950000, + .modulation_time=0.250000, + .modulation_depth=0.000000, + .early_delay=0.056000, + .late_delay=0.024000, + .lowpass_air_absorption_gain=0.994300, + .b_decay_lowpass_limit=1, + .b_need_filter = false +}; + +ST_AE_LIB_API const AE_PARAMS_AL_REVERB gs_ae_params_al_reverbs[] = +{ + gs_ae_params_al_reverb_generic, + gs_ae_params_al_reverb_generic_1, + gs_ae_params_al_reverb_city_streets, + gs_ae_params_al_reverb_castle_countryard, + gs_ae_params_al_reverb_hall, + gs_ae_params_al_reverb_new, + gs_ae_params_al_reverb_hall_new +}; +#endif //AUDIO_EFFECTS_LIB_CALREVERBPARAMS_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CAudioEffectsChainApi.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CAudioEffectsChainApi.h new file mode 100644 index 0000000..02242a4 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CAudioEffectsChainApi.h @@ -0,0 +1,131 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOEFFECTSCHAINAPI_H +#define AUDIO_EFFECTS_LIB_CAUDIOEFFECTSCHAINAPI_H +/** + * 音效效果链使用方式 + * ae_create_object -> ae_init() -> 各项操作 ->ae_uninit()->ae_destory_object + */ +#include "AudioEffectsDef.h" +#include "ae_defs.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + + /** + * 创建类实例 + * @return 获取类实例 + */ + ST_AE_LIB_API void* ae_create_object(); + + /** + * 销毁类释放空间 + * @param p ae_create_object中创建出的类 + */ + ST_AE_LIB_API void ae_destory_object(void *p); + + + /** + * 初始化类 + * 作用: 初始化内部成员 + * 注意: 创建新类之后,必须进行init才可以进行其他操作 + * @param p ae_create_object中创建出的类 + * @param sample_rate 采样率 + * @param channel 通道数 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + ST_AE_LIB_API AE_ERR ae_init(void *p, int sample_rate, int channel); + + /** + * 逆初始化类 + * 作用: 销毁类内部成员,释放空间 + * 注意: destory_object之前必须进行uninit释放空间 + * @param p ae_create_object中创建出的类 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + ST_AE_LIB_API AE_ERR ae_uninit(void *p); + + /** + * 重置函数 + * 作用: 清空类内部缓存buffer + * 注意: 一般是在输入数据发生变化(比如播放音乐的seek操作)之后,使用该函数 + * @param p ae_create_object中创建出的类 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + ST_AE_LIB_API AE_ERR ae_reset(void *p); + + /** + * 获取本系统的延迟时间(ms) + * 作用: 获取延迟时间 + * 注意: 在内部音效切换或者叠加时有可能引发音效延迟改变 + * @param p ae_create_object中创建出的类 + * @return 返回延迟时间(ms) + */ + ST_AE_LIB_API int ae_get_latency_ms(void *p); + + /** + * 对效果链上的音效参数进行设置 + * 作用: 修改音效参数,设置音效参数 + * @param p 效果链实例 + * @param casw 音效实例 + * @param params 需要设置的参数结构体 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + ST_AE_LIB_API AE_ERR ae_set_params(void *p, void *casw, AE_PARAMS *params); + + /** + * 获取效果链上的效果器参数 + * 作用: 获取效果器参数 + * 注意: 外部需要创建好空间 + * @param p 效果链实例 + * @param casw 音效实例 + * @param params 获取出的效果器参数 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + ST_AE_LIB_API AE_ERR ae_get_params(void *p, void *casw, AE_PARAMS *params); + + /** + * 处理函数 + * 作用: 音效系统主处理函数 + * 注意: inbuf和out_buf可以是同一块buf[建议不是同一块,防止之后添加新效果,新效果有此要求] + * in_buf 和 out_buf 必须一致,且长度为length + * @param p 效果链实例 + * @param in_buf 输入buf + * @param out_buf 输出buf + * @param length buf长度 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + ST_AE_LIB_API AE_ERR ae_process(void *p, float *in_buf, float *out_buf, int length); + + /** + * 添加音效 + * 作用: 创建一个效果器并添加到效果链上 + * 注意: 如果创建成功会添加到效果链并返回效果器地址,如果没有,则返回一个空指针 + * @param p 效果链实例 + * @param effects_number 效果器类型 + * @return 效果器的地址/nullptr + */ + ST_AE_LIB_API void* ae_add_effect(void *p, AE_EFFECT_TYPE effects_number); + + /** + * 删除音效 + * 作用: 关闭效果链上的效果器 + * 注意: 本函数只是关闭了效果器,并没有真正将其从效果链上删除,会在process的过程中自动删除 + * @param p 效果链实例 + * @param casw 效果器实例 + * @return 返回值是AE_ERR AE_ERR_SUCCESS是正常返回 + */ + ST_AE_LIB_API AE_ERR ae_delete_effect(void *p, void *casw); + +#ifdef __cplusplus +} +#endif + + + + +#endif //AUDIO_EFFECTS_LIB_CAUDIOEFFECTSCHAINAPI_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CImEffectParams.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CImEffectParams.h new file mode 100644 index 0000000..96f1358 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CImEffectParams.h @@ -0,0 +1,62 @@ +// +// Created by yangjianli on 2022/9/8. +// + +#ifndef AUDIO_EFFECTS_LIB_CIMEFFECTPARAMS_H +#define AUDIO_EFFECTS_LIB_CIMEFFECTPARAMS_H +#include "ae_defs.h" +#include +#include +#include + +ST_AE_LIB_API typedef struct _Impulse_Param +{ + //采样率,单位 hz + // 外围保证处理时的音频的采样率和卷积的采样率一致 + int32_t fs; + //输入声道数 + // 处理音频的输入通道数和初始化的时候一致即可[内部会自动赋值] + int32_t in_channels; + // 处理音频的输出通道数和初始化的时候一致即可[内部会自动赋值] + //输出声道数,需和输入通道相同 + int32_t out_channels; + // 直接给10,后续在内部会根据输入长度做调整 + //FIR 窗 bit 长度,最小为 10 + int32_t window_bits; + //im 响应,外围需要保证其采样率和输入的采样率一致 + float * im_response; + //响应的长度 + int32_t response_len; + //响应的通道数,该通道数必须与输入通道数相同或为1,为1时表示输入的所有通道使用相同的响应 + int32_t response_channels; + // 每次处理时输入的音频的长度(单声道的长度),用于计算延迟 + int32_t process_buffer_len; + // true则高性能计算,存在算法延迟 + bool high_performance; + // 音频文件地址 + std::string effect_path; +} Impulse_Param; + +inline void copy_impluse_params(Impulse_Param* dst, Impulse_Param* src) +{ + dst->fs = src->fs; + dst->in_channels = src->in_channels; + dst->out_channels = src->out_channels; + dst->window_bits = src->window_bits; + // 只拷贝首地址,减少拷贝时的内存消耗 + dst->im_response = src->im_response; + dst->response_len = src->response_len; + dst->high_performance = src->high_performance; + dst->response_channels = src->response_channels; + dst->process_buffer_len = src->process_buffer_len; + dst->effect_path = src->effect_path; +} +ST_AE_LIB_API typedef struct AE_PARAMS_IM_EFFECT +{ + // 音效的地址 + std::string effect_path; + // true则高性能计算,存在算法延迟 + bool high_performance; +}; + +#endif //AUDIO_EFFECTS_LIB_CIMEFFECTPARAMS_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CReverbParams.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CReverbParams.h new file mode 100644 index 0000000..f9748cd --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/CReverbParams.h @@ -0,0 +1,214 @@ +// +// Created by yangjianli on 2020-01-13. +// + +#ifndef AUDIO_EFFECTS_LIB_CREVERBPARAMS_H +#define AUDIO_EFFECTS_LIB_CREVERBPARAMS_H + +#include "ae_defs.h" + +ST_AE_LIB_API struct AE_PARAMS_REVERB +{ + float mode; + float room_size; + float damp; + float wet; + float dry; + float width; +}; + +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_record_studio = +{ + .mode=0.2f, + .room_size=0, + .damp=0, + .wet=0, + .dry=0.8f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_ktv = +{ + .mode=0.2f, + .room_size=0.35f, + .damp=0.5f, + .wet=0.20f, + .dry=0.5f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_concert = +{ + .mode=0.3f, + .room_size=0.50f, + .damp=0.6f, + .wet=0.3f, + .dry=0.6f, + .width=0.56f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_theater = +{ + .mode=0.0, + .room_size=0.6f, + .damp=0.2f, + .wet=0.4f, + .dry=0.5f, + .width=0.20f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_new_ktv = + { + .mode=0.2f, + .room_size=0.7f, + .damp=0.9f, + .wet=0.11f, + .dry=0.44f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_new_concert = +{ + .mode=0.3f, + .room_size=0.60f, + .damp=0.6f, + .wet=0.18f, + .dry=0.37f, + .width=0.56f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_new_theater = +{ + .mode=0.0, + .room_size=0.8f, + .damp=0.8f, + .wet=0.248f, + .dry=0.182f, + .width=0.50f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_7 = +{ + .mode=0.2f, + .room_size=0.35f, + .damp=0.5f, + .wet=0.20f, + .dry=0.5f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_8 = +{ + .mode=0.2f, + .room_size=0.35f, + .damp=0.5f, + .wet=0.20f, + .dry=0.5f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_9 = +{ + .mode=0.2f, + .room_size=0.35f, + .damp=0.5f, + .wet=0.20f, + .dry=0.5f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_10 = +{ + .mode=0.2f, + .room_size=0.35f, + .damp=0.5f, + .wet=0.20f, + .dry=0.5f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_11 = +{ + .mode=0.2f, + .room_size=0.35f, + .damp=0.5f, + .wet=0.20f, + .dry=0.5f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_12 = +{ + .mode=0.2f, + .room_size=0.4f, + .damp=0.69f, + .wet=0.14f, + .dry=0.22f, + .width=0.82f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_13 = + { + .mode=0.2f, + .room_size=0.41f, + .damp=0.18f, + .wet=0.16f, + .dry=0.24f, + .width=0.68f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_14 = +{ + .mode=0.2f, + .room_size=0.79f, + .damp=0.63f, + .wet=0.14f, + .dry=0.26f, + .width=0.92f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_15 = +{ + .mode=0.2f, + .room_size=0.84f, + .damp=0.25f, + .wet=0.12f, + .dry=0.22f, + .width=0.76f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_16 = +{ + .mode=0.2f, + .room_size=0.88f, + .damp=0.42f, + .wet=0.12f, + .dry=0.17f, + .width=0.84f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_17 = +{ + .mode=0.2f, + .room_size=0.35f, + .damp=0.5f, + .wet=0.20f, + .dry=0.5f, + .width=0.5f, +}; +ST_AE_LIB_API const AE_PARAMS_REVERB gs_reverb_id_18 = +{ + .mode=0.2f, + .room_size=0.65f, + .damp=0.25f, + .wet=0.25f, + .dry=0.6f, + .width=0.95f, +}; + +ST_AE_LIB_API const AE_PARAMS_REVERB gs_ae_params_reverb_params[] = { + gs_reverb_record_studio, + gs_reverb_ktv, + gs_reverb_concert, + gs_reverb_theater, + gs_reverb_new_ktv, + gs_reverb_new_concert, + gs_reverb_new_theater, + gs_reverb_id_7, + gs_reverb_id_8, + gs_reverb_id_9, + gs_reverb_id_10, + gs_reverb_id_11, + gs_reverb_id_12, + gs_reverb_id_13, + gs_reverb_id_14, + gs_reverb_id_15, + gs_reverb_id_16, + gs_reverb_id_17, + gs_reverb_id_18, +}; + +#endif //AUDIO_EFFECTS_LIB_CREVERBPARAMS_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/SAudioEffectsParams.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/SAudioEffectsParams.h new file mode 100644 index 0000000..1fcbe75 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/SAudioEffectsParams.h @@ -0,0 +1,322 @@ +// +// Created by yangjianli on 2020-01-10. +// + +#ifndef AUDIO_EFFECTS_LIB_SAUDIOEFFECTSPARAMS_H +#define AUDIO_EFFECTS_LIB_SAUDIOEFFECTSPARAMS_H + +/** + * 定义关于SAudioEffects的相关参数以及预定义的参数 + */ +#include "vector" +#include "string" +#include "ae_defs.h" + +// 定义参数需要的类名称,函数名称 +#define SAE_CLASS_EQ "Eq" +#define SAE_CLASS_DELAY "Delay" +#define SAE_CLASS_REVERB "Reverb" + +// 函数常量 +#define SAE_FUNC_CENTER_FREQ "Center Freq." +#define SAE_FUNC_GAIN "Gain" +#define SAE_FUNC_Q "Q" +#define SAE_FUNC_DELAY "Delay" +#define SAE_FUNC_FEEDBACK "Feedback" +#define SAE_FUNC_FEEDBACK_DAMPING "Feedback Damping" +#define SAE_FUNC_MIX "Mix" +#define SAE_FUNC_DECAY "Decay" +#define SAE_FUNC_DRY "Dry" +#define SAE_FUNC_INPUT_DAMPING "Input Damping" +#define SAE_FUNC_WET "Wet" +#define SAE_FUNC_IN_CH "In Ch." +#define SAE_FUNC_OUT_CH "Out Ch." + +// 根据方法名,选择对应类型的参数 +ST_AE_LIB_API struct AE_PARAMS_SAE_CONTENT +{ + std::string class_name; + std::string function_name; + int idx; + float f_value; + bool b_value; +}; + +// saudio_effects 参数 +ST_AE_LIB_API struct AE_PARAMS_SAE +{ + std::vector params_list; +}; + +ST_AE_LIB_API const AE_PARAMS_SAE_CONTENT gs_sae_pop[] = +{ + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_CENTER_FREQ, + .idx=0, + .f_value=25.0, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_CENTER_FREQ, + .idx=1, + .f_value=1204.0, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_CENTER_FREQ, + .idx=2, + .f_value=10000.000000, + .b_value= false, + + }, + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_GAIN, + .idx=0, + .f_value=-20.000000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_GAIN, + .idx=1, + .f_value=-3.500000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_GAIN, + .idx=2, + .f_value=2.200000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_Q, + .idx=0, + .f_value=1.000000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_Q, + .idx=1, + .f_value=3.100000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name= SAE_FUNC_Q, + .idx=2, + .f_value=1.000000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_DELAY, + .function_name= SAE_FUNC_DELAY, + .idx=0, + .f_value=160.000000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_DELAY, + .function_name= SAE_FUNC_FEEDBACK, + .idx=0, + .f_value=6.000000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_DELAY, + .function_name= SAE_FUNC_MIX, + .idx=0, + .f_value=5.500000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name= SAE_FUNC_DECAY, + .idx=0, + .f_value=0.700000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name= SAE_FUNC_DRY, + .idx=0, + .f_value=0.900000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name= SAE_FUNC_FEEDBACK_DAMPING, + .idx=0, + .f_value=0.000500, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name= SAE_FUNC_INPUT_DAMPING, + .idx=0, + .f_value=0.000500, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name= SAE_FUNC_WET, + .idx=0, + .f_value=0.100000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name= SAE_FUNC_IN_CH, + .idx=0, + .f_value=0.000000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name= SAE_FUNC_OUT_CH, + .idx=0, + .f_value=0.000000, + .b_value= true, + } +}; + +ST_AE_LIB_API const AE_PARAMS_SAE_CONTENT gs_sae_studio[] = +{ + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_CENTER_FREQ, + .idx = 0, + .f_value = 25.0, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_CENTER_FREQ, + .idx = 1, + .f_value = 2229.0, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_CENTER_FREQ, + .idx = 2, + .f_value = 8105.0, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_GAIN, + .idx = 0, + .f_value = -20.0, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_GAIN, + .idx = 1, + .f_value = 3.0, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_GAIN, + .idx = 2, + .f_value = 1.8, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_Q, + .idx = 0, + .f_value = 0.999000, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_Q, + .idx = 1, + .f_value = 0.6, + .b_value= false, + }, + { + .class_name = SAE_CLASS_EQ, + .function_name = SAE_FUNC_Q, + .idx = 2, + .f_value = 0.1, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name = SAE_FUNC_DECAY, + .idx = 0, + .f_value = 0.7, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name = SAE_FUNC_DRY, + .idx = 0, + .f_value = 0.7, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name = SAE_FUNC_FEEDBACK_DAMPING, + .idx = 0, + .f_value = 0.000500f, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name = SAE_FUNC_INPUT_DAMPING, + .idx = 0, + .f_value = 0.000500f, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name = SAE_FUNC_WET, + .idx = 0, + .f_value = 0.2, + .b_value= false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name = SAE_FUNC_IN_CH, + .idx = 0, + .f_value = 0.0, + .b_value = false, + }, + { + .class_name = SAE_CLASS_REVERB, + .function_name = SAE_FUNC_OUT_CH, + .idx = 0, + .f_value = 0.0, + .b_value = true, + }, +}; +ST_AE_LIB_API const AE_PARAMS_SAE gs_sae_params_pop = +{ + .params_list = std::vector(gs_sae_pop, + gs_sae_pop+19), +}; + +ST_AE_LIB_API const AE_PARAMS_SAE gs_sae_params_studio = +{ + .params_list = std::vector(gs_sae_studio, + gs_sae_studio+16), +}; + + +ST_AE_LIB_API const AE_PARAMS_SAE gs_sae_params[] = +{ + gs_sae_params_pop,gs_sae_params_studio +}; + +#endif //AUDIO_EFFECTS_LIB_SAUDIOEFFECTSPARAMS_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/ae_defs.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/ae_defs.h new file mode 100644 index 0000000..8c9aff1 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/inc/ae_defs.h @@ -0,0 +1,12 @@ +/** + * Author: AlanWang4523. + * Date: 2021/12/29 11:38. + * Mail: alanwang4523@gmail.com + */ + +#ifndef AVAUDIO_EFFECT_LIBS_DEFINES_H +#define AVAUDIO_EFFECT_LIBS_DEFINES_H + +#define ST_AE_LIB_API __attribute__ ((visibility("default"))) + +#endif //AVAUDIO_EFFECT_LIBS_DEFINES_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/py/1.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/py/1.txt new file mode 100644 index 0000000..f9865ec --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/py/1.txt @@ -0,0 +1,261 @@ + + /***************** v1.0, id 0~3, add here ************************************/ + case KALA_VB_RECORD_STUDIO: + { + pmd->setmode(0.2f); + pmd->setroomsize(0); + pmd->setdamp(0); + pmd->setwet(0); + pmd->setdry(0.8f); + pmd->setwidth(0.5f); + break; + } + case KALA_VB_KTV: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + + case KALA_VB_CONCERT: + { + // for concert ok + pmd->setmode(0.3f); + pmd->setroomsize(0.50f); + pmd->setdamp(0.6f); + pmd->setwet(0.3f); + pmd->setdry(0.6f); + pmd->setwidth(0.56f); + break; + } + + case KALA_VB_THEATER: + { + // for theater + pmd->setmode(0.0); + pmd->setroomsize(0.6f); + pmd->setdamp(0.2f); + pmd->setwet(0.4f); + pmd->setdry(0.5f); + pmd->setwidth(0.20f); + break; + } + /***************** kala v2.6, id 0\4\5\6, add here ************************************/ + case KALA_VB_NEW_KTV: + { + + // v2.2 + pmd->setmode(0.2f); + pmd->setroomsize(0.7f); + pmd->setdamp(0.9f); + pmd->setwet(0.11f); + pmd->setdry(0.44f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_NEW_CONCERT: + { + // v2.2 + pmd->setmode(0.3f); + pmd->setroomsize(0.60f); + pmd->setdamp(0.6f); + pmd->setwet(0.18f); + pmd->setdry(0.37f); + pmd->setwidth(0.56f); + + break; + } + case KALA_VB_NEW_THEATER: + { + + //old version + //pmd->setmode(0.0); + //pmd->setroomsize(0.6f); + //pmd->setdamp(0.2f); + //pmd->setwet(0.4f); + //pmd->setdry(0.5f); + //pmd->setwidth(0.20f); + + // new 1.0 + //pmd->setmode(0.0); + //pmd->setroomsize(0.8f); + //pmd->setdamp(0.8f); + //pmd->setwet(0.3f); + //pmd->setdry(0.4f); + //pmd->setwidth(0.50f); + + // v2.0 + //pmd->setmode(0.0); + //pmd->setroomsize(0.8f); + //pmd->setdamp(0.8f); + //pmd->setwet(0.22f); + //pmd->setdry(0.14f); + //pmd->setwidth(0.50f); + + //v2.2 + pmd->setmode(0.0); + pmd->setroomsize(0.8f); + pmd->setdamp(0.8f); + pmd->setwet(0.248f); + pmd->setdry(0.182f); + pmd->setwidth(0.50f); + + break; + } + /***************** kala v2.6, id 0\4\5\6, add here ************************************/ + case KALA_VB_ID_7: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_8: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_9: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_10: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_11: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_12: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.4f); + pmd->setdamp(0.69f); + pmd->setwet(0.14f); + pmd->setdry(0.22f); + pmd->setwidth(0.82f); + + break; + } + case KALA_VB_ID_13: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.41f); + pmd->setdamp(0.18f); + pmd->setwet(0.16f); + pmd->setdry(0.24f); + pmd->setwidth(0.68f); + + break; + } + case KALA_VB_ID_14: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.79f); + pmd->setdamp(0.63f); + pmd->setwet(0.14f); + pmd->setdry(0.26f); + pmd->setwidth(0.92f); + + break; + } + case KALA_VB_ID_15: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.84f); + pmd->setdamp(0.25f); + pmd->setwet(0.12f); + pmd->setdry(0.22f); + pmd->setwidth(0.76f); + + break; + } + case KALA_VB_ID_16: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.88f); + pmd->setdamp(0.42f); + pmd->setwet(0.12f); + pmd->setdry(0.17f); + pmd->setwidth(0.84f); + + break; + } + case KALA_VB_ID_17: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_18: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.65f); + pmd->setdamp(0.25f); + pmd->setwet(0.25f); + pmd->setdry(0.6f); + pmd->setwidth(0.95f); + + break; + } + default: + { + pmd->setmode(1); + break; + } \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/py/trans.py b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/py/trans.py new file mode 100644 index 0000000..fc65690 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/py/trans.py @@ -0,0 +1,49 @@ +import os + + +def read_file(file_path): + res = {} + current_key = "" + with open(file_path, "r") as f: + while True: + line = f.readline() + if not line: + return res + line = line.strip().split() + if not line: + continue + if line[0][0] not in ["p", "c"]: + continue + if line[0] == "case": + current_key = line[1].split(':')[0] + continue + + line = line[0].split('->')[1].replace('set', "").split(':')[0] + func, para = line.split('(') + para = str(para).replace(');', "") + if current_key not in res: + res[current_key] = {} + if func == "roomsize": + func = "room_size" + res[current_key][func] = para + return res + + +if __name__ == "__main__": + res = read_file("1.txt") + for k, v in res.items(): + print("const AE_PARAMS_REVERB gs_{}".format(k.lower().replace("kala_vb", "reverb"))) + print("{") + for kk, vv in v.items(): + print("\t.{}={},".format(kk, vv)) + print("};") + + print("AE_PARAMS_REVERB gs_reverb_params[] = {") + for k, v in res.items(): + print("gs_{},".format(k.lower().replace("kala_vb","reverb"))) + print("};") + + for k, v in res.items(): + print("AE_PARAMS_TYPE_{},".format(k.replace("KALA_VB", "REVERB"))) + + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/CMakeLists.txt new file mode 100644 index 0000000..4e84812 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/CMakeLists.txt @@ -0,0 +1,12 @@ +include_directories(common) +add_subdirectory(common) +add_subdirectory(tone_shift) +add_subdirectory(slow_flanging) +add_subdirectory(phonograph) +add_subdirectory(al_reverb) +add_subdirectory(saudio_effects) +add_subdirectory(autotune) +add_subdirectory(waves) +add_subdirectory(iir_eq) +add_subdirectory(reverb) +add_executable(supersound) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/CMakeLists.txt new file mode 100644 index 0000000..7196bb9 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/CMakeLists.txt @@ -0,0 +1,23 @@ +include_directories(./) +include_directories(inc) + +include_directories(src) +include_directories(src/biquad_filters) +include_directories(src/fast_delay) +include_directories(src/filter) +include_directories(src/AlReverbApi.cpp) +include_directories(src/al_reverb) +include_directories(src/al_reverb_biquad) +include_directories(src/al_reverb_common) +include_directories(src/al_reverb_early_reflection) +include_directories(src/al_reverb_echo) +include_directories(src/al_reverb_late_allpass) +include_directories(src/al_reverb_late_lowpass) +include_directories(src/al_reverb_late_reverb) +include_directories(src/al_reverb_modulation) + + +file(GLOB_RECURSE SRC_ALREVERB_DIR src/*cpp) + +add_library(al_reverb ${SRC_ALREVERB_DIR}) +#set_target_properties(al_reverb PROPERTIES CXX_VISIBILITY_PRESET hidden) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/inc/AlReverbApi.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/inc/AlReverbApi.h new file mode 100644 index 0000000..87ae900 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/inc/AlReverbApi.h @@ -0,0 +1,47 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#ifndef AUDIO_EFFECTS_LIB_ALREVERBAPI_H +#define AUDIO_EFFECTS_LIB_ALREVERBAPI_H + +#include "AlReverbDefs.h" +#include "vector" + +class CFilters; +namespace SUPERSOUND +{ +namespace ALREVERB +{ +class AlReverb; +class AlReverbApi +{ +public: + AlReverbApi(); + ~AlReverbApi(); + +public: + void flush(); + int32_t get_latency(); + int32_t set_param(AE_PARAMS_AL_REVERB *param); + + // 线上接口 + int32_t init(int32_t sample_rate, int32_t channel, int32_t n_type_id); + void reset(); + int32_t process(float *data, int len); + int32_t uninit(); +private: + AlReverb * m_reverb; + CFilters * m_filter; + + int m_sample_rate; + int m_channel; + + bool m_need_filter; // 是否需要滤波 + bool m_need_process; // 是否需要处理 +}; +} +} + + +#endif //AUDIO_EFFECTS_LIB_ALREVERBAPI_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/inc/AlReverbDefs.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/inc/AlReverbDefs.h new file mode 100755 index 0000000..df0a4c1 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/inc/AlReverbDefs.h @@ -0,0 +1,138 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//在 alreverb 工程中使用到的常量和结构 + +#ifndef __AL_REVERB_TYPES_H__ +#define __AL_REVERB_TYPES_H__ + +#include +#include "CAlReverbParams.h" +#include "AudioEffectsConf.h" +class Arp_param +{ +public: + float fl_density; + float fl_diffusion; + float fl_gain; + float fl_gain_hf; + float fl_gain_lf; + float fl_decay_time; + float fl_decayhf_ratio; + float fl_decay_lf_ratio; + float fl_reflections_gain; + float fl_reflections_delay; + float fl_reflections_pan[3]; + float fl_late_reverb_gain; + float fl_late_reverb_delay; + float fl_late_reverb_pan[3]; + float fl_echo_time; + float fl_echo_depth; + float fl_modulation_time; + float fl_modulation_depth; + float fl_air_absorption_gain_hf; + float fl_hf_reference; + float fl_lf_reference; + float fl_room_rolloff_factor; + int i_decay_hf_limit; +}; + +namespace SUPERSOUND +{ + + +//安全删除指针 +#ifndef SAFE_DELETE_PTR +#define SAFE_DELETE_PTR(a) \ +{ \ + if(a) \ + { \ + delete [] a; \ + a = NULL; \ + } \ +} +#endif /* SAFE_DELETE_PTR */ + +#ifndef SAFE_DELETE_OBJ +#define SAFE_DELETE_OBJ(a) \ +{ \ + if(a) \ + { \ + delete a; \ + a = NULL; \ + } \ +} +#endif /* SAFE_DELETE_OBJ */ + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif /* MIN */ + +#ifndef MAX +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#endif /* MAX */ + +//将 a 规范到 [b, c] 之间 +#ifndef MIDDLE +#define MIDDLE(a, b, c) (MIN(c, MAX(a, b))) +#endif /* MIDDLE */ + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif /* M_PI */ + +#ifndef FLOAT_EQUAL +#define FLOAT_EQUAL(a, b) (fabs((a) - (b)) <= 1E-5) +#endif /* FLOAT_EQUAL */ + +//必须是 2 的幂次,这样方便计算一些 +#define SUPERSOUND_WAV_BUF_STEP_LEN 1024 +#define SUPERSOUND_DEFAULT_FFT_LEN SUPERSOUND_WAV_BUF_STEP_LEN +#define SUPERSOUND_CHANNEL_PROC_LEN SUPERSOUND_DEFAULT_FFT_LEN + +} + +//成功 +#define ERROR_SUPERSOUND_SUCCESS 0 + +//输入参数有误 +#define ERROR_SUPERSOUND_PARAM 2000 +//内存不够 +#define ERROR_SUPERSOUND_MEMORY 2001 + +enum ALRB_ERR { + ALRB_ERR_SUCCESS = 0, + ALRB_ERR_PARAM = -1, + ALRB_ERR_BASE_H_MALLOC_NULL = -2, +}; + + +#endif /* __AL_REVERB_TYPES_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/AlReverbApi.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/AlReverbApi.cpp new file mode 100644 index 0000000..78e253a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/AlReverbApi.cpp @@ -0,0 +1,103 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#include "AlReverbApi.h" +#include "AlReverb.h" +#include "filter/CFilters.h" +namespace SUPERSOUND +{ + namespace ALREVERB + { + AlReverbApi::AlReverbApi() + { + m_reverb = nullptr; + m_filter = nullptr; + } + + AlReverbApi::~AlReverbApi() + { + uninit(); + } + + int32_t AlReverbApi::init(int32_t sample_rate, int32_t channel, int32_t n_type_id) + { + m_sample_rate = sample_rate; + m_channel = channel; + + m_reverb = new AlReverb(); + m_reverb->init(channel, sample_rate, n_type_id); + + m_filter = new CFilters(); + m_filter->Init(sample_rate, channel); + m_filter->setFilterType(LOW_PASS_FILTER); + m_need_filter = false; + m_need_process = false; + return ALRB_ERR_SUCCESS; + } + + int32_t AlReverbApi::uninit() + { + if(nullptr != m_reverb) + { + m_reverb->uninit(); + delete m_reverb; + m_reverb = nullptr; + } + + if(nullptr != m_filter) + { + m_filter->Uninit(); + delete m_filter; + m_filter = nullptr; + } + return ALRB_ERR_SUCCESS; + } + + void AlReverbApi::reset() + { + m_filter->Reset(); + m_reverb->reset(); + } + + int32_t AlReverbApi::set_param(AE_PARAMS_AL_REVERB *param) + { + if(nullptr == param) + { + m_need_process = false; + return ALRB_ERR_SUCCESS; + } + m_need_process = true; + m_need_filter = param->b_need_filter; + param->fs = m_sample_rate; + param->in_channels = m_channel; + param->out_channels = m_channel; + return m_reverb->set_param(param); + } + + void AlReverbApi::flush() + { + m_filter->Reset(); + m_reverb->flush(); + } + + int32_t AlReverbApi::get_latency() + { + return m_reverb->get_latecy(); + } + + int32_t AlReverbApi::process(float *data, int len) + { + if(!m_need_process) + { + return ALRB_ERR_SUCCESS; + } + + if(m_need_filter) + { + m_filter->Process(data, len); + } + return m_reverb->process(data, len); + } + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb/AlReverb.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb/AlReverb.cpp new file mode 100755 index 0000000..fce36e3 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb/AlReverb.cpp @@ -0,0 +1,415 @@ + +#include "AlReverb.h" +#include "AlReverbDefs.h" +#include "AlReverbCommon.h" +#include +#include +#include +#include + +#define ARP_EFFECT_ID_MAX 8 +namespace SUPERSOUND +{ +namespace ALREVERB +{ + +static Arp_param aEaxPreset[ARP_EFFECT_ID_MAX] = + { + // EFX_REVERB_PRESET_GENERIC 0 + { 1.0000f, 1.0000f, 0.01f, 0.8913f, 1.0000f, 1.4900f, 0.8300f, 1.0000f, 0.0500f, 0.0070f, { 0.0000f, 0.0000f, 0.0000f }, 1.2589f, 0.0110f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, +// { 1.0000f, 1.0000f, 0.3162f, 0.8913f, 1.0000f, 1.4900f, 0.8300f, 1.0000f, 0.0500f, 0.0070f, { 0.0000f, 0.0000f, 0.0000f }, 1.2589f, 0.0110f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + //{ 1.0000f, 1.0000f, 0.1f, 0.8913f, 1.0000f, 1.4900f, 0.8300f, 1.0000f, 0.0500f, 0.0070f, { 0.0000f, 0.0000f, 0.0000f }, 1.2589f, 0.0110f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + //EFX_REVERB_PRESET_GENERIC + { 1.0000f, 1.0000f, 0.1f, 0.8913f, 1.0000f, 1.4900f, 0.8300f, 1.0000f, 0.0500f, 0.0070f, { 0.0000f, 0.0000f, 0.0000f }, 1.2589f, 0.0110f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + //EFX_REVERB_PRESET_CITY_STREETS + { 1.0000f, 0.7800f, 0.1f, 0.7079f, 0.8913f, 1.7900f, 1.1200f, 0.9100f, 0.2818f, 0.0460f, { 0.0000f, 0.0000f, 0.0000f }, 0.1995f, 0.0280f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.2000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + //EFX_REVERB_PRESET_GENERIC + { 1.0000f, 1.0000f, 0.1f, 0.8913f, 1.0000f, 1.4900f, 0.8300f, 1.0000f, 0.0500f, 0.0070f, { 0.0000f, 0.0000f, 0.0000f }, 1.2589f, 0.0110f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + //EFX_REVERB_PRESET_CASTLE_COURTYARD + { 1.0000f, 0.4200f, 0.1162f, 0.4467f, 0.1995f, 2.1300f, 0.6100f, 0.2300f, 0.2239f, 0.1600f, { 0.0000f, 0.0000f, 0.0000f }, 0.7079f, 0.0360f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.3700f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x0 }, + + //EFX_REVERB_PRESET_CASTLE_HALL + { 1.0000f, 0.8100f, 0.1162f, 0.2818f, 0.1778f, 7.5400f, 0.7900f, 0.6200f, 0.1778f, 0.0560f, { 0.0000f, 0.0000f, 0.0000f }, 1.1220f, 0.0240f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.5000f, 0.2500f, 0.0000f, 0.9943f, 5168.6001f, 139.5000f, 0.0000f, 0x1 }, + + + //EFX_REVERB_PRESET_DIZZY_NEW + { 0.3645f, 0.6000f, 0.1f, 0.6310f, 1.0000f, 6.2300f, 0.5600f, 1.0000f, 0.1392f, 0.0200f, { 0.0000f, 0.0000f, 0.0000f }, 0.4937f, 0.0300f, { 0.0000f, 0.0000f, 0.0000f }, 0.8500f, 0.6000f, 0.8100f, 0.3100f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x0 }, + + //EFX_REVERB_PRESET_CASTLE_HALL_NEW + { 1.0000f, 0.8100f, 0.1162f, 0.2818f, 0.1778f, 10.400f, 0.7900f, 0.6200f, 0.1778f, 0.0560f, { 0.0000f, 0.0000f, 0.0000f }, 1.1220f, 0.0240f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.9500f, 0.2500f, 0.0000f, 0.9943f, 5168.6001f, 139.5000f, 0.0000f, 0x1 }, + + + // EFX_REVERB_PRESET_ROOM 2 + //{ 0.4287f, 1.0000f, 0.3162f, 0.5929f, 1.0000f, 0.4000f, 0.8300f, 1.0000f, 0.1503f, 0.0020f, { 0.0000f, 0.0000f, 0.0000f }, 1.0629f, 0.0030f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + // EFX_REVERB_PRESET_CONCERTHALL 7 + //{ 1.0000f, 1.0000f, 0.3162f, 0.5623f, 1.0000f, 3.9200f, 0.7000f, 1.0000f, 0.2427f, 0.0200f, { 0.0000f, 0.0000f, 0.0000f }, 0.9977f, 0.0290f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + // EFX_REVERB_PRESET_ARENA 9 + //{ 1.0000f, 1.0000f, 0.3162f, 0.4477f, 1.0000f, 7.2400f, 0.3300f, 1.0000f, 0.2612f, 0.0200f, { 0.0000f, 0.0000f, 0.0000f }, 1.0186f, 0.0300f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + // EFX_REVERB_PRESET_ICEPALACE_HALL 49 + //{ 1.0000f, 0.7600f, 0.3162f, 0.4467f, 0.5623f, 5.4900f, 1.5300f, 0.3800f, 0.1122f, 0.0540f, { 0.0000f, 0.0000f, 0.0000f }, 0.6310f, 0.0520f, { 0.0000f, 0.0000f, 0.0000f }, 0.2260f, 0.1100f, 0.2500f, 0.0000f, 0.9943f, 12428.5000f, 99.6000f, 0.0000f, 0x1 }, + + // EFX_REVERB_PRESET_SPACESTATION_CUPBOARD 59 + //{ 0.1715f, 0.5600f, 0.3162f, 0.7079f, 0.8913f, 0.7900f, 0.8100f, 0.5500f, 1.4125f, 0.0070f, { 0.0000f, 0.0000f, 0.0000f }, 1.7783f, 0.0180f, { 0.0000f, 0.0000f, 0.0000f }, 0.1810f, 0.3100f, 0.2500f, 0.0000f, 0.9943f, 3316.1001f, 458.2000f, 0.0000f, 0x1 }, + + + // EFX_REVERB_PRESET_STONEROOM 5 + //{ 1.0000f, 1.0000f, 0.3162f, 0.7079f, 1.0000f, 2.3100f, 0.6400f, 1.0000f, 0.4411f, 0.0120f, { 0.0000f, 0.0000f, 0.0000f }, 1.1003f, 0.0170f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + // EFX_REVERB_PRESET_AUDITORIUM 6 + //{ 1.0000f, 1.0000f, 0.3162f, 0.5781f, 1.0000f, 4.3200f, 0.5900f, 1.0000f, 0.4032f, 0.0200f, { 0.0000f, 0.0000f, 0.0000f }, 0.7170f, 0.0300f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + // EFX_REVERB_PRESET_CAVE 8 + //{ 1.0000f, 1.0000f, 0.3162f, 1.0000f, 1.0000f, 2.9100f, 1.3000f, 1.0000f, 0.5000f, 0.0150f, { 0.0000f, 0.0000f, 0.0000f }, 0.7063f, 0.0220f, { 0.0000f, 0.0000f, 0.0000f }, 0.2500f, 0.0000f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x0 }, + + // EFX_REVERB_PRESET_ALLEY 14 + //{ 1.0000f, 0.3000f, 0.3162f, 0.7328f, 1.0000f, 1.4900f, 0.8600f, 1.0000f, 0.2500f, 0.0070f, { 0.0000f, 0.0000f, 0.0000f }, 0.9954f, 0.0110f, { 0.0000f, 0.0000f, 0.0000f }, 0.1250f, 0.9500f, 0.2500f, 0.0000f, 0.9943f, 5000.0000f, 250.0000f, 0.0000f, 0x1 }, + + // EFX_REVERB_PRESET_FACTORY_LARGEROOM 38, // 工厂,大房间 将此效果作为停车场的代替 + //{ 0.4287f, 0.7500f, 0.2512f, 0.7079f, 0.6310f, 4.2400f, 0.5100f, 1.3100f, 0.1778f, 0.0390f, { 0.0000f, 0.0000f, 0.0000f }, 1.1220f, 0.0230f, { 0.0000f, 0.0000f, 0.0000f }, 0.2310f, 0.0700f, 0.2500f, 0.0000f, 0.9943f, 3762.6001f, 362.5000f, 0.0000f, 0x1 }, + + // EFX_REVERB_PRESET_CITY_LIBRARY 107, // 城市,图书馆 + //{ 1.0000f, 0.8200f, 0.3162f, 0.2818f, 0.0891f, 2.7600f, 0.8900f, 0.4100f, 0.3548f, 0.0290f, { 0.0000f, 0.0000f, -0.0000f }, 0.8913f, 0.0200f, { 0.0000f, 0.0000f, 0.0000f }, 0.1300f, 0.1700f, 0.2500f, 0.0000f, 0.9943f, 2854.3999f, 107.5000f, 0.0000f, 0x0 }, + + }; + +AlReverb::AlReverb() +{ + m_late_desity_gain = 1; + m_b_update = true; + m_current_id = 0; + memset(&m_current_param, 0, sizeof(m_current_param)); + //默认配置 + m_new_param.fs = 44100; + m_new_param.in_channels = 2; + m_new_param.out_channels = 2; + m_new_param.density = 1; + m_new_param.diffusion = 1; + m_new_param.reverbGain = 0.3162f; + m_new_param.lowpass_gain = 0.5623f; + m_new_param.highpass_gain = 1; + m_new_param.decay_time = 3.92f; + m_new_param.lowpass_ratio = 0.7f; + m_new_param.early_gain = 0.2427f; + m_new_param.late_gain = 0.9977f; + m_new_param.lowpass_reference = 5000; + m_new_param.highpass_reference = 250; + m_new_param.echo_time = 0.25f; + m_new_param.echo_depth = 0; + m_new_param.modulation_time = 0.25f; + m_new_param.modulation_depth = 0; + m_new_param.early_delay = 0.02f; + m_new_param.late_delay = 0.029f; + m_new_param.lowpass_air_absorption_gain = 0.9943f; + m_new_param.b_decay_lowpass_limit = 1; +} + +AlReverb::~AlReverb() +{ + +} + +void AlReverb::flush() +{ + m_lowpass.flush(); + m_highpass.flush(); + m_modulation.flush(); + m_early_delay.flush(); + m_late_delay.flush(); + m_early_reflection.flush(); + m_late_reverb.flush(); + m_echo.flush(); + + for(int32_t i = 0; i < 3; i++) + { + m_decorrelator[i].flush(); + } +} + +int32_t AlReverb::get_latecy() +{ + //因为本身作为干声输出所以没有延迟量了 + return 0; +} + +void AlReverb::control_update() +{ + m_b_update = true; +} + +int32_t AlReverb::set_param(AE_PARAMS_AL_REVERB *param) +{ + m_new_param = *param; + control_update(); + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t AlReverb::process(std::vector &buf_vector, int32_t in_num) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + float early[4], late[4]; + float taps[4]; + float in; + float * left = buf_vector[0]; + float * right = NULL; + + //看是否需要更新参数 + if(m_b_update) + { + nRet = update(); + m_b_update = false; + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + } + + //给右声道赋值 + if(m_current_param.in_channels == 2) + right = buf_vector[1]; + + //因为本来这个proc的长度是为双声道准备的,这里预处理之后变成了单声道的数据 + for(int32_t i = 0; i < in_num; i++) + { + if(right == NULL) + in = left[i] / 4; + else + in = (left[i] + right[i]) / 8; + + in = m_lowpass.filtef(in); + in = m_highpass.filtef(in); + + in = m_modulation.filter(in); + //前期反射处理 + in = m_early_delay.filter(in); + m_early_reflection.filter(in, early); + //后期混响处理 + in = m_late_delay.filter(in); + taps[0] = in * m_late_desity_gain; + taps[1] = m_decorrelator[0].filter(taps[0]); + taps[2] = m_decorrelator[1].filter(taps[0]); + taps[3] = m_decorrelator[2].filter(taps[0]); + + m_late_reverb.filter(taps, late); + m_echo.Filter(in, late); + if(right == NULL) + { + left[i] = left[i] + (early[2] + late[2]) * 4; + } + else + { + left[i] = left[i] + (early[0] + late[0]) * 8; + right[i] = right[i] + (early[1] + late[1]) * 8; + } + } + return nRet; +} + +int32_t AlReverb::update() +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + // 低通 + nRet = m_lowpass.set_high_shelf_param(m_new_param.fs, + m_new_param.highpass_reference, + m_new_param.highpass_gain, + 0.75f); + + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + // 高通 + nRet = m_highpass.set_low_shelf_param(m_new_param.fs, + m_new_param.lowpass_reference, + m_new_param.lowpass_gain, + 0.75f); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + // 一个点的延迟 + nRet = m_modulation.set_param(m_new_param.fs, + m_new_param.modulation_time, + m_new_param.modulation_depth); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + // 前向延迟 + nRet = m_early_delay.set_delay_len(int32_t(float(m_new_param.fs * m_new_param.early_delay))); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + // 后向延迟 + nRet = m_late_delay.set_delay_len(int32_t(m_new_param.fs * m_new_param.late_delay)); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + // 前向反射 + nRet = m_early_reflection.set_param(m_new_param.fs, + m_new_param.reverbGain, + m_new_param.early_gain, + m_new_param.late_delay); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + // 计算hfRatio + float hfRatio = m_new_param.highpass_ratio; + if(m_new_param.b_decay_lowpass_limit && (m_new_param.lowpass_air_absorption_gain < 1)) + { + hfRatio = calc_limited_hf_ratio(hfRatio, m_new_param.lowpass_air_absorption_gain, m_new_param.decay_time); + } + + // 后向反射 + nRet = m_late_reverb.set_param(m_new_param.fs, + m_new_param.reverbGain, + m_new_param.late_gain, + m_new_param.density, + m_new_param.decay_time, + m_new_param.diffusion, + hfRatio, + m_new_param.highpass_reference); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + // 回响 + nRet = m_echo.set_param(m_new_param.fs, + m_new_param.reverbGain, + m_new_param.late_gain, + m_new_param.echo_time, + m_new_param.decay_time, + m_new_param.diffusion, + m_new_param.echo_depth, + hfRatio, + m_new_param.lowpass_reference); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + // 解相关 + for(int32_t i = 0; i < 3; i++) + { + float length = (0.15f * pow(2, (float)i)) * (1 + m_new_param.density * 4) * 0.0211f; + nRet = m_decorrelator[i].set_delay_len(int32_t(length * m_new_param.fs)); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + } + + m_late_desity_gain = m_late_reverb.get_density_gain(); + + m_current_param = m_new_param; + + return nRet; +} + +/** + * 更新音效id&参数 + * @param n_type_id + * @return + */ +int32_t AlReverb::reset_effect_id(int n_type_id, bool is_init) +{ + // 输出参数错误 + if(n_type_id >= ARP_EFFECT_ID_MAX || n_type_id < 0) + { + return ERROR_SUPERSOUND_PARAM; + } + + if(m_current_id != n_type_id || is_init) + { + // 参数更改 + m_new_param.density = aEaxPreset[n_type_id].fl_density; + m_new_param.diffusion = aEaxPreset[n_type_id].fl_diffusion; + m_new_param.reverbGain = aEaxPreset[n_type_id].fl_gain; + m_new_param.highpass_gain = aEaxPreset[n_type_id].fl_gain_hf; + m_new_param.lowpass_gain = aEaxPreset[n_type_id].fl_gain_lf; + m_new_param.decay_time = aEaxPreset[n_type_id].fl_decay_time; + m_new_param.highpass_ratio = aEaxPreset[n_type_id].fl_decayhf_ratio; + m_new_param.lowpass_ratio = aEaxPreset[n_type_id].fl_decay_lf_ratio; + m_new_param.early_gain = aEaxPreset[n_type_id].fl_reflections_gain; + m_new_param.early_delay = aEaxPreset[n_type_id].fl_reflections_delay; + m_new_param.late_gain = aEaxPreset[n_type_id].fl_late_reverb_gain; + m_new_param.late_delay = aEaxPreset[n_type_id].fl_late_reverb_delay; + m_new_param.echo_time = aEaxPreset[n_type_id].fl_echo_time; + m_new_param.echo_depth = aEaxPreset[n_type_id].fl_echo_depth; + m_new_param.modulation_time = aEaxPreset[n_type_id].fl_modulation_time; + m_new_param.modulation_depth = aEaxPreset[n_type_id].fl_modulation_depth; + m_new_param.lowpass_air_absorption_gain = aEaxPreset[n_type_id].fl_air_absorption_gain_hf; + m_new_param.highpass_reference = aEaxPreset[n_type_id].fl_hf_reference; + m_new_param.lowpass_reference = aEaxPreset[n_type_id].fl_lf_reference; + m_new_param.b_decay_lowpass_limit = aEaxPreset[n_type_id].i_decay_hf_limit; + m_new_param.in_channels = m_channels; + m_new_param.out_channels = m_channels; + m_new_param.fs = m_freq; + m_current_id = n_type_id; + control_update(); + } + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t AlReverb::init(int32_t channels, int32_t freq, int32_t n_type_id) +{ + m_channels = channels; + m_freq = freq; + reset_effect_id(n_type_id, true); + m_current_param = m_new_param; // 保证代码一致 + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t AlReverb::uninit() +{ + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t AlReverb::process(float *data, int len) +{ + std::vector buf_vector; + int nStep = SUPERSOUND_CHANNEL_PROC_LEN; + int nRet = ERROR_SUPERSOUND_SUCCESS; + for(int i=0;i_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现 openal 中的 reverb 效果 +//这里的早起反射和后期延迟和正常的想法有点出入,主要是这里将声道进行了合并,导致了需要多声道输出 +//但是实际应该每个通道,单独处理的 + +#ifndef __AL_REVERB_H__ +#define __AL_REVERB_H__ + +#include "AlReverbBiquad.h" +#include "AlReverbDefs.h" +#include "AlReverbEarlyReflection.h" +#include "AlReverbLateReverb.h" +#include "AlReverbEcho.h" +#include "AlReverbModulation.h" +#include + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + +#define ALR_MAX_PROCESS_BLOCK 4096 +class AlReverb +{ +public: + AlReverb(); + ~AlReverb(); + +public: + void flush(); + int32_t get_latecy(); + void control_update(); + int32_t set_param(AE_PARAMS_AL_REVERB *param); + int32_t process(std::vector &buf_vector, int32_t in_num); + + // 线上接口 + int32_t init(int32_t channels, int32_t freq, int32_t n_type_id); + void reset(); + int32_t reset_effect_id(int n_type_id, bool is_init = false); + int32_t process(float *data, int len); + int32_t uninit(); + +private: + int32_t update(); +private: + AE_PARAMS_AL_REVERB m_current_param; + AE_PARAMS_AL_REVERB m_new_param; + + //高低切 + AlReverbBiquad m_lowpass; + AlReverbBiquad m_highpass; + //调制过程 + AlReverbModulation m_modulation; + //前期反射和后期混响延迟 + SuperSoundFastDelay m_early_delay; + SuperSoundFastDelay m_late_delay; + //前期反射和后期混响 + AlReverbEarlyReflection m_early_reflection; + AlReverbLateReverb m_late_reverb; + //回声效果 + AlReverbEcho m_echo; + //解相关 + SuperSoundFastDelay m_decorrelator[3]; + + float m_late_desity_gain; + bool m_b_update; + + //左右声道 + float m_left[ALR_MAX_PROCESS_BLOCK]; + float m_right[ALR_MAX_PROCESS_BLOCK]; + + // 基础参数 + int32_t m_channels; + int32_t m_freq; + int32_t m_current_id; +}; + + +} +} + +#endif /* __AL_REVERB_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_biquad/AlReverbBiquad.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_biquad/AlReverbBiquad.cpp new file mode 100755 index 0000000..5901c8f --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_biquad/AlReverbBiquad.cpp @@ -0,0 +1,89 @@ + +#include "AlReverbBiquad.h" +#include "AlReverbDefs.h" +#include + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +AlReverbBiquad::AlReverbBiquad() +{ + m_a1 = 0; + m_a2 = 0; + //输入什么输出什么 + m_b0 = 1; + m_b1 = 0; + m_b2 = 0; + + flush(); +} + +AlReverbBiquad::~AlReverbBiquad() +{ + +} + +void AlReverbBiquad::flush() +{ + m_x1 = 0; + m_x2 = 0; + m_y1 = 0; + m_y2 = 0; +} + +int32_t AlReverbBiquad::get_latecy() +{ + return 0; +} + +int32_t AlReverbBiquad::set_high_shelf_param(int32_t fs, float f0, float A, float S) +{ + float w0 = float(2 * M_PI * f0 / fs); + A = MAX(A, 0.00001f); + float alpha = sin(w0) / 2 * sqrt((A + 1 / A)*(1 / S - 1) + 2); + float a0 = (A + 1) - (A - 1) * cos(w0) + 2 * sqrt(A) * alpha; + + m_b0 = (A * ((A + 1) + (A - 1) * cos(w0) + 2 * sqrt(A) * alpha)) / a0; + m_b1 = (-2 * A * ((A - 1) + (A + 1) * cos(w0))) / a0; + m_b2 = (A * ((A + 1) + (A - 1) * cos(w0) - 2 * sqrt(A) * alpha)) / a0; + m_a1 = (2 * ((A - 1) - (A + 1) * cos(w0))) / a0; + m_a2 = ((A + 1) - (A - 1) * cos(w0) - 2 * sqrt(A) * alpha) / a0; + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t AlReverbBiquad::set_low_shelf_param(int32_t fs, float f0, float A, float S) +{ + float w0 = float(2 * M_PI * f0 / fs); + A = MAX(A, 0.00001f); + float alpha = sin(w0) / 2 * sqrt((A + 1 / A) * (1 / S - 1) + 2); + float a0 = (A + 1) + (A - 1) * cos(w0) + 2 * sqrt(A) * alpha; + + m_b0 = (A * ((A + 1) - (A - 1) * cos(w0) + 2 * sqrt(A) * alpha)) / a0; + m_b1 = (2 * A * ((A - 1) - (A + 1) * cos(w0))) / a0; + m_b2 = (A * ((A + 1) - (A - 1) * cos(w0) - 2 * sqrt(A) * alpha)) / a0; + m_a1 = (-2 * ((A - 1) + (A + 1) * cos(w0))) / a0; + m_a2 = ((A + 1) + (A - 1) * cos(w0) - 2 * sqrt(A) * alpha) / a0; + + return ERROR_SUPERSOUND_SUCCESS; +} + +float AlReverbBiquad::filtef(float in) +{ + float out = m_b0 * in + m_b1 * m_x1 + m_b2 * m_x2 - + m_a1 * m_y1 - m_a2 * m_y2; + + m_x2 = m_x1; + m_x1 = in; + m_y2 = m_y1; + m_y1 = out; + + return out; +} + + +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_biquad/AlReverbBiquad.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_biquad/AlReverbBiquad.h new file mode 100755 index 0000000..6890ac6 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_biquad/AlReverbBiquad.h @@ -0,0 +1,75 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//这里和 AuidoEQCookBook 相同,主要使用了 Shelf 滤波器 + +#ifndef __AL_REVERB_BIQUAD_H__ +#define __AL_REVERB_BIQUAD_H__ + +#include +#include "AudioEffectsConf.h" + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +class AlReverbBiquad +{ +public: + AlReverbBiquad(); + ~AlReverbBiquad(); + +public: + void flush(); + int32_t get_latecy(); + int32_t set_high_shelf_param(int32_t fs, float f0, float A, float S); + int32_t set_low_shelf_param(int32_t fs, float f0, float A, float S); + float filtef(float in); + +private: + float m_x1; + float m_x2; + float m_y1; + float m_y2; + float m_a1; + float m_a2; + float m_b0; + float m_b1; + float m_b2; +}; + + +} +} + +#endif /* __AL_REVERB_BIQUAD_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_common/AlReverbCommon.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_common/AlReverbCommon.cpp new file mode 100755 index 0000000..3239e82 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_common/AlReverbCommon.cpp @@ -0,0 +1,112 @@ + +#include "AlReverbCommon.h" +#include "AlReverbDefs.h" +#include + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +float calc_decay_coeff(float length, float decay_time) +{ + // -60dB 就是 0.001 + return pow(0.001f, length / decay_time); +} + +// Calculate a decay length from a coefficient and the time until the decay +// reaches -60 dB. +float calc_decay_length(float coeff, float decay_time) +{ + return log(coeff) * decay_time / log(0.001f)/*-60 dB*/; +} + +float calc_damping_coeff(int32_t fs, float hf_ratio, float length, float decay_time, float f0) +{ + float coeff = 0; + float gain; + float cw; + + //如果大于 1 的话那么就会变成增强 + if(hf_ratio < 1) + { + gain = calc_decay_coeff(length, decay_time * hf_ratio) / calc_decay_coeff(length, decay_time); + cw = (float)cos(2 * M_PI * f0 / fs); + + //因为是单极点滤波器,因此增益需要取平方 + gain *= gain; + + //近似和 1(这里利用 1 - epsilon) 进行判断 + if(gain < 0.9999f) + { + //如果增益小于 -60dB 的话,将会导致系数趋近于 1,最后将是一个平坦的信号 + gain = MAX(gain, 0.001f); + coeff = (1 - gain * cw - sqrt(2 * gain * (1 - cw) - gain * gain * (1 - cw * cw))) / (1 - gain); + } + + //非常小的衰减时间将会导致非常小的输出,因此这里取个下限 + coeff = MIN(coeff, 0.98f); + } + + return coeff; +} + +float lerp( float val1, float val2, float mu ) +{ + return val1 + (val2 - val1) * mu; +} + +float calc_density_gain(float a) +{ + /* The energy of a signal can be obtained by finding the area under the + * squared signal. This takes the form of Sum(x_n^2), where x is the + * amplitude for the sample n. + * + * Decaying feedback matches exponential decay of the form Sum(a^n), + * where a is the attenuation coefficient, and n is the sample. The area + * under this decay curve can be calculated as: 1 / (1 - a). + * + * Modifying the above equation to find the squared area under the curve + * (for energy) yields: 1 / (1 - a^2). Input attenuation can then be + * calculated by inverting the square root of this approximation, + * yielding: 1 / sqrt(1 / (1 - a^2)), simplified to: sqrt(1 - a^2). + */ + return sqrt(1 - (a * a)); +} + +// Calculate the mixing matrix coefficients given a diffusion factor. +void calc_matrix_coeffs(float diffusion, float *x, float *y) +{ + float n, t; + + // The matrix is of order 4, so n is sqrt (4 - 1). + n = sqrt(3.0f); + t = diffusion * atan(n); + + // Calculate the first mixing matrix coefficient. + *x = cos(t); + // Calculate the second mixing matrix coefficient. + *y = sin(t) / n; +} + +// Calculate the limited HF ratio for use with the late reverb low-pass +// filters. +float calc_limited_hf_ratio(float hf_ratio, float air_absorption_gain_hf, float decay_time) +{ + float limitRatio; + + /* Find the attenuation due to air absorption in dB (converting delay + * time to meters using the speed of sound). Then reversing the decay + * equation, solve for HF ratio. The delay length is cancelled out of + * the equation, so it can be calculated once for all lines. + */ + limitRatio = 1.0f / (calc_decay_length(air_absorption_gain_hf, decay_time) * + 343.3f/*SPEEDOFSOUNDMETRESPERSEC*/); + /* Using the limit calculated above, apply the upper bound to the HF + * ratio. Also need to limit the result to a minimum of 0.1, just like the + * HF ratio parameter. */ + return MIDDLE(limitRatio, 0.1f, hf_ratio); +} +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_common/AlReverbCommon.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_common/AlReverbCommon.h new file mode 100755 index 0000000..27af5f9 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_common/AlReverbCommon.h @@ -0,0 +1,68 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//包含 openal reverb 库中需要处理的一些基础函数 + +#ifndef __AL_REVERB_COMMON_H__ +#define __AL_REVERB_COMMON_H__ + + + +#include + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +//第一个参数是延迟长度,也就是多久算一次,第二个是 T60 衰减时间 +float calc_decay_coeff(float length, float decay_time); + +float calc_decay_length(float coeff, float decay_time); + +//计算高通(甚至低通)的衰减系数 +float calc_damping_coeff(int32_t fs, float hf_ratio, float length, float decay_time, float f0); + +//线性插值的 lerp 函数 +float lerp(float val1, float val2, float mu); + +//计算密度衰减增益 +float calc_density_gain(float a); + +//计算混合矩阵系数 +void calc_matrix_coeffs(float diffusion, float *x, float *y); + +float calc_limited_hf_ratio(float hf_ratio, float air_absorption_gain_hf, float decay_time); +} +} + +#endif /* __AL_REVERB_COMMON_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_early_reflection/AlReverbEarlyReflection.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_early_reflection/AlReverbEarlyReflection.cpp new file mode 100755 index 0000000..fcfcb18 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_early_reflection/AlReverbEarlyReflection.cpp @@ -0,0 +1,90 @@ + +#include "AlReverbEarlyReflection.h" +#include "AlReverbCommon.h" + +#include "AlReverbDefs.h" +#include + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +// 4 个早期反射的延迟量 +const static float gs_early_delay[4] = { + 0.0015f, 0.0045f, 0.0135f, 0.0405f +}; + +AlReverbEarlyReflection::AlReverbEarlyReflection() +{ + memset(m_decay, 0, sizeof(m_decay)); + m_gain = 1; +} + +AlReverbEarlyReflection::~AlReverbEarlyReflection() +{ + +} + +void AlReverbEarlyReflection::flush() +{ + for(int32_t i = 0; i < 4; i++) + { + m_delay[i].flush(); + } +} + +int32_t AlReverbEarlyReflection::get_latency() +{ + int32_t latecy = m_delay[0].get_latecy(); + + for(int32_t i = 0; i < 4; i++) + { + latecy = MIN(latecy, m_delay[i].get_latecy()); + } + + return latecy; +} + +int32_t AlReverbEarlyReflection::set_param(int32_t fs, float reverb_gain, float early_gain, float late_delay) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + m_gain = 0.5f * reverb_gain * early_gain; + + for(int32_t i = 0; i < 4; i++) + { + nRet = m_delay[i].set_delay_len(int32_t(fs * gs_early_delay[i])); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + m_decay[i] = calc_decay_coeff(gs_early_delay[i], late_delay); + } + + return nRet; +} + +void AlReverbEarlyReflection::filter(float in, float (&out)[4]) +{ + float sum = 0; + + for(int32_t i = 0; i < 4; i++) + { + out[i] = m_delay[i].get_now() * m_decay[i]; + sum += out[i]; + } + + sum = sum * 0.5f + in; + + for(int32_t i = 0; i < 4; i++) + { + out[i] = sum - out[i]; + m_delay[i].put_now(out[i]); + out[i] *= m_gain; + } +} + + +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_early_reflection/AlReverbEarlyReflection.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_early_reflection/AlReverbEarlyReflection.h new file mode 100755 index 0000000..756e826 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_early_reflection/AlReverbEarlyReflection.h @@ -0,0 +1,72 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现早期反射模型的类,本来应该是单纯的梳状滤波器组的,但是这里将梳状滤波叠加延迟,增加了混响强度 + +#ifndef __AL_REVERB_EARLY_REFLECTION_H__ +#define __AL_REVERB_EARLY_REFLECTION_H__ + +#include "fast_delay/SupersoundFastDelay.h" + +namespace SUPERSOUND +{ + +namespace ALREVERB +{ + + +class AlReverbEarlyReflection +{ +public: + AlReverbEarlyReflection(); + ~AlReverbEarlyReflection(); + +public: + void flush(); + int32_t get_latency(); + int32_t set_param(int32_t fs, float reverb_gain, float early_gain, float late_delay); + //这个有点蛋碎,和正常的滤波器不是那么一致 + void filter(float in, float (&out)[4]); + +private: + //利用无损衍射的waveguide理论创建最大的漫反射效果,也就是延迟反馈网络 + SuperSoundFastDelay m_delay[4]; + //每个延迟的衰减系数,也就是计算到衰减-60dB,每次衰减需要衰减多少 + float m_decay[4]; + //最后的输出增益 + float m_gain; +}; + + +} +} + +#endif /* __AL_REVERB_EARLY_REFLECTION_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_echo/AlReverbEcho.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_echo/AlReverbEcho.cpp new file mode 100755 index 0000000..983f2f0 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_echo/AlReverbEcho.cpp @@ -0,0 +1,94 @@ + +#include "AlReverbEcho.h" +#include "AlReverbDefs.h" +#include "AlReverbCommon.h" +#include + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +// When diffusion is above 0, an all-pass filter is used to take the edge off +// the echo effect. It uses the following line length (in seconds). +#define ECHO_ALLPASS_LENGTH 0.0133f + +AlReverbEcho::AlReverbEcho() +{ + m_coeff = 0; + m_densityGain = 0; + memset(m_mixCoeff, 0, sizeof(m_mixCoeff)); +} + +AlReverbEcho::~AlReverbEcho() +{ + +} + +void AlReverbEcho::flush() +{ + m_delay.flush(); + m_lowpass.flush(); + m_allpass.flush(); +} + +int32_t AlReverbEcho::get_latecy() +{ + return m_delay.get_latecy() + m_lowpass.get_latecy() + m_allpass.get_latecy(); +} + +int32_t AlReverbEcho::set_param(int32_t fs, float reverb_gain, float late_gain, float echo_time, + float decay_time, float diffusion, float echo_depth, float hf_ratio, float f0) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + m_coeff = calc_decay_coeff(echo_time, decay_time); + + m_densityGain = calc_density_gain(m_coeff); + + nRet = m_delay.set_delay_len(int32_t(fs * echo_time)); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + nRet = m_allpass.set_param(fs, diffusion, ECHO_ALLPASS_LENGTH, decay_time); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + nRet = m_lowpass.set_param(fs, hf_ratio, echo_time, decay_time, f0); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + /* Calculate the echo mixing coefficients. The first is applied to the + * echo itself. The second is used to attenuate the late reverb when + * echo depth is high and diffusion is low, so the echo is slightly + * stronger than the decorrelated echos in the reverb tail. + */ + m_mixCoeff[0] = reverb_gain * late_gain * echo_depth; + m_mixCoeff[1] = 1 - (echo_depth * 0.5f * (1 - diffusion)); + + return nRet; +} + +void AlReverbEcho::Filter( float in, float (&in_out)[4] ) +{ + float feed = m_delay.get_now() * m_coeff; + float out = m_mixCoeff[0] * feed; + + for(int32_t i = 0; i < 4; i++) + { + in_out[i] = m_mixCoeff[1] * in_out[i] + out; + } + + feed += m_densityGain * in; + + feed = m_lowpass.filter(feed); + + feed = m_allpass.filter(feed); + + m_delay.put_now(feed); +} + + +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_echo/AlReverbEcho.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_echo/AlReverbEcho.h new file mode 100755 index 0000000..136bf34 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_echo/AlReverbEcho.h @@ -0,0 +1,73 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现一个回声效果 + +#ifndef __AL_REVERB_ECHO_H__ +#define __AL_REVERB_ECHO_H__ + +#include "fast_delay/SupersoundFastDelay.h" +#include "AlReverbLateAllpass.h" +#include "AlReverbLateLowpass.h" + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +class AlReverbEcho +{ +public: + AlReverbEcho(); + ~AlReverbEcho(); + +public: + void flush(); + int32_t get_latecy(); + int32_t set_param(int32_t fs, float reverb_gain, float late_gain, float echo_time, + float decay_time, float diffusion, float echo_depth, float hf_ratio, float f0); + void Filter(float in, float (&in_out)[4]); + +private: + SuperSoundFastDelay m_delay; + AlReverbLateLowpass m_lowpass; + AlReverbLateAllpass m_allpass; + float m_coeff; + float m_densityGain; + float m_mixCoeff[2]; +}; + + +} +} + +#endif /* __AL_REVERB_ECHO_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_allpass/AlReverbLateAllpass.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_allpass/AlReverbLateAllpass.cpp new file mode 100755 index 0000000..56afbcb --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_allpass/AlReverbLateAllpass.cpp @@ -0,0 +1,54 @@ + +#include "AlReverbLateAllpass.h" +#include "AlReverbCommon.h" +#include + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +AlReverbLateAllpass::AlReverbLateAllpass() +{ + m_coeff = 0; + m_feedCoeff = 0; +} + +AlReverbLateAllpass::~AlReverbLateAllpass() +{ + +} + +void AlReverbLateAllpass::flush() +{ + m_delay.flush(); +} + +int32_t AlReverbLateAllpass::get_latecy() +{ + return m_delay.get_latecy(); +} + +int32_t AlReverbLateAllpass::set_param(int32_t fs, float diffusion, float length, float decay_time) +{ + m_feedCoeff = 0.5f * pow(diffusion, 2); + + m_coeff = calc_decay_coeff(length, decay_time); + + return m_delay.set_delay_len(int32_t(float(fs * length))); +} + +float AlReverbLateAllpass::filter(float in) +{ + float out = m_delay.get_now(); + float feed = m_feedCoeff * in; + + m_delay.put_now(m_feedCoeff * (out - feed) + in); + + return m_coeff * out - feed; +} + + +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_allpass/AlReverbLateAllpass.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_allpass/AlReverbLateAllpass.h new file mode 100755 index 0000000..a9ebee6 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_allpass/AlReverbLateAllpass.h @@ -0,0 +1,67 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现后延迟中的全通滤波器,特么和延迟器混叠在一起的,醉了 + +#ifndef __AL_REVERB_LATE_ALLPASS_H__ +#define __AL_REVERB_LATE_ALLPASS_H__ + +#include "fast_delay/SupersoundFastDelay.h" + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +class AlReverbLateAllpass +{ +public: + AlReverbLateAllpass(); + ~AlReverbLateAllpass(); + +public: + void flush(); + int32_t get_latecy(); + int32_t set_param(int32_t fs, float diffusion, float length, float decay_time); + float filter(float in); + +private: + SuperSoundFastDelay m_delay; + float m_coeff; + float m_feedCoeff; +}; + + +} +} + +#endif /* __AL_REVERB_LATE_ALLPASS_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_lowpass/AlReverbLateLowpass.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_lowpass/AlReverbLateLowpass.cpp new file mode 100755 index 0000000..8e7f458 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_lowpass/AlReverbLateLowpass.cpp @@ -0,0 +1,52 @@ + +#include "AlReverbLateLowpass.h" +#include "AlReverbCommon.h" +#include "AlReverbDefs.h" + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +AlReverbLateLowpass::AlReverbLateLowpass() +{ + m_coeff = 1; + + flush(); +} + +AlReverbLateLowpass::~AlReverbLateLowpass() +{ + +} + +void AlReverbLateLowpass::flush() +{ + m_y1 = 0; +} + +int32_t AlReverbLateLowpass::get_latecy() +{ + return 0; +} + +int32_t AlReverbLateLowpass::set_param(int32_t fs, float hf_ratio, float length, float decay_time, float f0) +{ + m_coeff = calc_damping_coeff(fs, hf_ratio, length, decay_time, f0); + + return ERROR_SUPERSOUND_SUCCESS; +} + +float AlReverbLateLowpass::filter(float in) +{ + float out = lerp(in, m_y1, m_coeff); + + m_y1 = out; + + return out; +} + + +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_lowpass/AlReverbLateLowpass.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_lowpass/AlReverbLateLowpass.h new file mode 100755 index 0000000..4e38a85 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_lowpass/AlReverbLateLowpass.h @@ -0,0 +1,66 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现后延迟中的低通,其实也正常的一阶低通一样的公式 + +#ifndef __AL_REVERB_LATE_LOWPASS_H__ +#define __AL_REVERB_LATE_LOWPASS_H__ + +#include +#include "AudioEffectsConf.h" +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +class AlReverbLateLowpass +{ +public: + AlReverbLateLowpass(); + ~AlReverbLateLowpass(); + +public: + void flush(); + int32_t get_latecy(); + int32_t set_param(int32_t fs, float hf_ratio, float length, float decay_time, float f0); + float filter(float in); + +private: + float m_coeff; + float m_y1; +}; + + +} +} + +#endif /* __AL_REVERB_LATE_LOWPASS_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_reverb/AlReverbLateReverb.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_reverb/AlReverbLateReverb.cpp new file mode 100755 index 0000000..7428593 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_reverb/AlReverbLateReverb.cpp @@ -0,0 +1,143 @@ + +#include "AlReverbLateReverb.h" +#include "AlReverbDefs.h" +#include "AlReverbCommon.h" +#include + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +// 4 个后期反射延迟量 +const static float gs_late_delay[4] = { + 0.0211f, 0.0311f, 0.0461f, 0.0680f +}; + +const static float gs_allpass_length[4] = { + 0.0151f, 0.0167f, 0.0183f, 0.0200f, +}; + +#define LATE_LINE_MULTIPLIER 4 + +AlReverbLateReverb::AlReverbLateReverb() +{ + m_mixCoeff = 1; + + memset(m_coeff, 0, sizeof(m_coeff)); + + m_gain = 1; + + m_density_gain = 1; +} + +AlReverbLateReverb::~AlReverbLateReverb() +{ + +} + +void AlReverbLateReverb::flush() +{ + for(int32_t i = 0; i < 4; i++) + { + m_delay[i].flush(); + m_lowpass[i].flush(); + m_allpass[i].flush(); + } +} + +int32_t AlReverbLateReverb::get_latecy() +{ + int32_t latecy = m_delay[0].get_latecy() + + m_lowpass[0].get_latecy() + + m_allpass[0].get_latecy(); + + for(int32_t i = 1; i < 4; i++) + { + int32_t len = 0; + len += latecy, m_delay[i].get_latecy(); + len += latecy, m_lowpass[i].get_latecy(); + len += latecy, m_allpass[i].get_latecy(); + latecy = MIN(latecy, len); + } + + return latecy; +} + +int32_t AlReverbLateReverb::set_param(int32_t fs, float reverb_gain, float late_gain, + float density, float decay_time, float diffusion, float hf_ratio, float hfcutoff) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + float length; + float x, y; + + calc_matrix_coeffs(diffusion, &x, &y); + m_mixCoeff = y / x; + + m_gain = reverb_gain * late_gain * x; + + length = (gs_late_delay[0] + gs_late_delay[1] + gs_late_delay[2] + gs_late_delay[3]) / 4; + length *= 1 + (density * LATE_LINE_MULTIPLIER); + m_density_gain = calc_density_gain(calc_decay_coeff(length, decay_time)); + + for(int32_t i = 0; i < 4; i++) + { + length = gs_late_delay[i] * (1 + density * LATE_LINE_MULTIPLIER); + + nRet = m_delay[i].set_delay_len(int32_t(length * fs)); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + nRet = m_lowpass[i].set_param(fs, hf_ratio, length, decay_time, hfcutoff); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + nRet = m_allpass[i].set_param(fs, diffusion, gs_allpass_length[i], decay_time); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + m_coeff[i] = calc_decay_coeff(length, decay_time) * x; + } + + return nRet; +} + +void AlReverbLateReverb::filter(float (&in)[4], float (&out)[4]) +{ + float d[4]; + + d[0] = m_lowpass[2].filter(in[2] + m_delay[2].get_now() * m_coeff[2]); + d[1] = m_lowpass[0].filter(in[0] + m_delay[0].get_now() * m_coeff[0]); + d[2] = m_lowpass[3].filter(in[3] + m_delay[3].get_now() * m_coeff[3]); + d[3] = m_lowpass[1].filter(in[1] + m_delay[1].get_now() * m_coeff[1]); + + d[0] = m_allpass[0].filter(d[0]); + d[1] = m_allpass[1].filter(d[1]); + d[2] = m_allpass[2].filter(d[2]); + d[3] = m_allpass[3].filter(d[3]); + + out[0] = d[0] + (m_mixCoeff * ( d[1] + -d[2] + d[3])); + out[1] = d[1] + (m_mixCoeff * (-d[0] + d[2] + d[3])); + out[2] = d[2] + (m_mixCoeff * ( d[0] + -d[1] + d[3])); + out[3] = d[3] + (m_mixCoeff * (-d[0] + -d[1] + -d[2] )); + + m_delay[0].put_now(out[0]); + m_delay[1].put_now(out[1]); + m_delay[2].put_now(out[2]); + m_delay[3].put_now(out[3]); + + out[0] = m_gain * out[0]; + out[1] = m_gain * out[1]; + out[2] = m_gain * out[2]; + out[3] = m_gain * out[3]; +} + +float AlReverbLateReverb::get_density_gain() +{ + return m_density_gain; +} + + +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_reverb/AlReverbLateReverb.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_reverb/AlReverbLateReverb.h new file mode 100755 index 0000000..abc381d --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_late_reverb/AlReverbLateReverb.h @@ -0,0 +1,103 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现后期反射 +/* Late reverb is done with a modified feed-back delay network (FDN) +* topology. Four input lines are each fed through their own all-pass +* filter and then into the mixing matrix. The four outputs of the +* mixing matrix are then cycled back to the inputs. Each output feeds +* a different input to form a circlular feed cycle. +* +* The mixing matrix used is a 4D skew-symmetric rotation matrix derived +* using a single unitary rotational parameter: +* +* [ d, a, b, c ] 1 = a^2 + b^2 + c^2 + d^2 +* [ -a, d, c, -b ] +* [ -b, -c, d, a ] +* [ -c, b, -a, d ] +* +* The rotation is constructed from the effect's diffusion parameter, +* yielding: 1 = x^2 + 3 y^2; where a, b, and c are the coefficient y +* with differing signs, and d is the coefficient x. The matrix is thus: +* +* [ x, y, -y, y ] n = sqrt(matrix_order - 1) +* [ -y, x, y, y ] t = diffusion_parameter * atan(n) +* [ y, -y, x, y ] x = cos(t) +* [ -y, -y, -y, x ] y = sin(t) / n +* +* To reduce the number of multiplies, the x coefficient is applied with +* the cyclical delay line coefficients. Thus only the y coefficient is +* applied when mixing, and is modified to be: y / x. +*/ + +#ifndef __AL_REVERB_LATE_REVERB_H__ +#define __AL_REVERB_LATE_REVERB_H__ + +#include "fast_delay/SupersoundFastDelay.h" +#include "AlReverbLateAllpass.h" +#include "AlReverbLateLowpass.h" + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +class AlReverbLateReverb +{ +public: + AlReverbLateReverb(); + ~AlReverbLateReverb(); + +public: + void flush(); + int32_t get_latecy(); + int32_t set_param(int32_t fs, float reverb_gain, float late_gain, + float density, float decay_time, float diffusion, float hf_ratio, float hfcutoff); + void filter(float (&in)[4], float (&out)[4]); + float get_density_gain(); + +private: + SuperSoundFastDelay m_delay[4]; + AlReverbLateLowpass m_lowpass[4]; + AlReverbLateAllpass m_allpass[4]; + + float m_mixCoeff; + float m_coeff[4]; + float m_gain; + float m_density_gain; +}; + + +} +} + +#endif /* __AL_REVERB_LATE_REVERB_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_modulation/AlReverbModulation.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_modulation/AlReverbModulation.cpp new file mode 100755 index 0000000..7cffac1 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_modulation/AlReverbModulation.cpp @@ -0,0 +1,44 @@ + +#include "AlReverbModulation.h" +#include "AlReverbDefs.h" + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +AlReverbModulation::AlReverbModulation() +{ + +} + +AlReverbModulation::~AlReverbModulation() +{ + +} + +void AlReverbModulation::flush() +{ + m_delay.flush(); +} + +int32_t AlReverbModulation::get_latecy() +{ + return m_delay.get_latecy(); +} + +int32_t AlReverbModulation::set_param(int32_t fs, float time, float depth) +{ + m_delay.set_delay_len(1); + return ERROR_SUPERSOUND_SUCCESS; +} + +float AlReverbModulation::filter(float in) +{ + return m_delay.filter(in); +} + + +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_modulation/AlReverbModulation.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_modulation/AlReverbModulation.h new file mode 100755 index 0000000..2bf57dc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/al_reverb_modulation/AlReverbModulation.h @@ -0,0 +1,66 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//主要是实现一个调制器,本来调制器应该实现一个类似于重采样的功能,解决 +//因为因为采样率等变化带来的瞬态颤音问题,这里直接都略过了 + +#ifndef __AL_REVERB_MODULATION_H__ +#define __AL_REVERB_MODULATION_H__ + +#include "fast_delay/SupersoundFastDelay.h" + +namespace SUPERSOUND +{ +namespace ALREVERB +{ + + +class AlReverbModulation +{ +public: + AlReverbModulation(); + ~AlReverbModulation(); + +public: + void flush(); + int32_t get_latecy(); + int32_t set_param(int32_t fs, float time, float depth); + float filter(float in); + +private: + SuperSoundFastDelay m_delay; +}; + + +} +} + +#endif /* __AL_REVERB_MODULATION_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/fast_delay/SupersoundFastDelay.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/fast_delay/SupersoundFastDelay.cpp new file mode 100755 index 0000000..fb59aa2 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/fast_delay/SupersoundFastDelay.cpp @@ -0,0 +1,155 @@ + +#include "SupersoundFastDelay.h" +#include "AlReverbDefs.h" +#include "AlReverbCommon.h" +#include +#include +#include + +namespace SUPERSOUND +{ + + +SuperSoundFastDelay::SuperSoundFastDelay() +{ + m_len = -1; + m_idx = 0; + + m_cache = NULL; + m_mask = -1; +} + +SuperSoundFastDelay::~SuperSoundFastDelay() +{ + SAFE_DELETE_PTR(m_cache); + m_mask = -1; +} + +void SuperSoundFastDelay::flush() +{ + for(int32_t i = 0; i < m_len; i++) + { + put_now(0); + } +} + +int32_t SuperSoundFastDelay::get_latecy() +{ + return m_len; +} + +int32_t SuperSoundFastDelay::set_delay_len(int32_t len) +{ + //相等就直接返回 + if(len == m_len) + return ERROR_SUPERSOUND_SUCCESS; + + //重新申请一段新的内存空间 + int32_t delay = len; + len = supersound_next_power_2(delay + 1); + float * cache = new(std::nothrow) float[len]; + if(NULL == cache) + { + return ERROR_SUPERSOUND_MEMORY; + } + memset(cache, 0, sizeof(float) * len); + + //如果原始有数据的话,采用线性插值的方式来减少破音 + if(m_cache != NULL) + { + resample(cache, delay); + } + + //重新更新各个参量 + m_cache = cache; + m_len = delay; + m_mask = len - 1; + m_idx = 0; + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t SuperSoundFastDelay::set_param(int32_t fs, float ms) +{ + return set_delay_len(int32_t(fs * ms / 1000)); +} + +float SuperSoundFastDelay::filter(float in) +{ + int32_t idx = (m_idx + m_len) & m_mask; + + float out = m_cache[m_idx]; + m_cache[idx] = in; + + + m_idx = (m_idx + 1) & m_mask; + + return out; +} + +float SuperSoundFastDelay::get_now() +{ + return m_cache[m_idx]; +} + +void SuperSoundFastDelay::put_now(float in) +{ + int32_t idx = (m_idx + m_len) & m_mask; + + m_cache[idx] = in; + + m_idx = (m_idx + 1) & m_mask; +} + +float SuperSoundFastDelay::get_data(int32_t pos) +{ + int32_t idx = (pos + m_idx) & m_mask; + return m_cache[idx]; +} + +void SuperSoundFastDelay::resample(float *cache, int32_t delay) +{ + //数据较少的话就使用拷贝,否则使用重采样的方式 + if((m_len <= 2) || (delay <= 2)) + { + for(int32_t i = 0; i < (delay / 2); i++) + { + cache[i] = get_data(m_len / 2); + } + for(int32_t i = delay / 2; i < delay; i++) + { + cache[i] = get_data(MAX(0, m_len - 1)); + } + } + else + { + int32_t isample; + float istep = (float(m_len - 2)) / (delay - 2); + float s1, s2; + float alpha; + for(int32_t i = 0; i < (delay - 1); i++) + { + isample = int32_t(i * istep); + alpha = i * istep - isample; + s1 = get_data(isample); + s2 = get_data(isample + 1); + cache[i] = s1 * alpha + s2 * (1 - alpha); + } + cache[delay - 1] = get_data(m_len - 1); + } +} + +int32_t SuperSoundFastDelay::supersound_next_power_2(int32_t x) +{ + if(x > 0) +{ + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; +} +return x + 1; +} +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/fast_delay/SupersoundFastDelay.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/fast_delay/SupersoundFastDelay.h new file mode 100755 index 0000000..410d376 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/fast_delay/SupersoundFastDelay.h @@ -0,0 +1,76 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//开发一个快速延迟器 + +#ifndef __AL_REVERB_FAST_DELAY_H__ +#define __AL_REVERB_FAST_DELAY_H__ + +#include +#include "AudioEffectsConf.h" + +namespace SUPERSOUND +{ + + +class SuperSoundFastDelay +{ +public: + SuperSoundFastDelay(); + ~SuperSoundFastDelay(); + +public: + void flush(); + int32_t get_latecy(); + int32_t set_delay_len(int32_t len); + int32_t set_param(int32_t fs, float ms); + float filter(float in); + //这两个接口主要是为梳状滤波等需要取和放不同步的问题 + float get_now(); + void put_now(float in); + +private: + float get_data(int32_t pos); + void resample(float *cache, int32_t delay); + //用来将一个数转为大于它的最小的2的n次方数 + int32_t supersound_next_power_2(int32_t x); +private: + int32_t m_len; + int32_t m_idx; + float * m_cache; + //缓存的长度,为 2 的 n 次幂减 1 + int32_t m_mask; +}; + + +} + +#endif /* __AL_REVERB_FAST_DELAY_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/filter/CFilters.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/filter/CFilters.cpp new file mode 100644 index 0000000..7404089 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/filter/CFilters.cpp @@ -0,0 +1,403 @@ +/************************************************************************/ +/* Phonograph Eimulator */ +/* written by evieng, 7.21,2013 */ +/* last modifiey by evieng, 6.6,2013 */ +/* copy right reserved */ +/************************************************************************/ + +#include "math.h" +#include "stdlib.h" +#include "stdio.h" +#include "memory.h" +#include "AlReverbDefs.h" +#include "filter/CFilters.h" +//#include "audio_score/Score/KTYPED.h" +#include "biquad_filters/BiquadFilter.h" + +// This is a trick. When enabled, all channels refer to the 1st channel +// #define PSEUDO_MULTICHANNELS 1 // defined in KTYPED.h +#ifdef _MSC_VER +// MSVC build for Windows, and it's (expected to be) able to handle true stereo in real time +#define PSEUDO_MULTICHANNELS 0 +#else +#define PSEUDO_MULTICHANNELS 1 +#endif +#define VERSION_ID_NUMBER 100 /* 7.21,2013 */ + +using std::vector; +using namespace BiquadFilter; + + +#define HPF_FREQ (2000.0f / 44100.0f) +#define LPF_FREQ (6000.0f / 44100.0f) +#define HPFQ 0.5f +#define LPFQ 0.5f + +#define BPF_LOW_EDGE (600.0f / 44100.0f) +#define BPF_HIGH_EDGE (8000.0f / 44100.0f) + +class CBaseFilters +{ +public: + CBaseFilters(); + virtual ~CBaseFilters(); + void reset(); + + bool isAllocated() const; + int setFilterType(int ftype) + { + if (ftype < LOW_PASS_FILTER) + { + m_fType = LOW_PASS_FILTER; + } + else if (ftype > BAND_PASS_FILTER) + { + m_fType = BAND_PASS_FILTER; + } + else + m_fType = ftype; + return 0; + } + + void filtering(std::vector *x); + +private: + LPFilter* lpf; + HPFilter* hpf; + BPFilter* bpf; + int m_fType; +}; + +bool CBaseFilters::isAllocated() const +{ + return lpf != NULL && hpf != NULL && bpf != NULL; +} + + + +void CBaseFilters::filtering(std::vector *x) +{ + if (m_fType == LOW_PASS_FILTER) + { + lpf->filtering(x); + } + else if (m_fType == HIGH_PASS_FILTER) + { + hpf->filtering(x); + } + else if (m_fType == BAND_PASS_FILTER) + { + bpf->filtering(x); + } + else + lpf->filtering(x); + //hpf->filtering(x); +} + + + +CBaseFilters::CBaseFilters() +{ + m_fType = LOW_PASS_FILTER; + hpf = new HPFilter(HPF_FREQ, HPFQ); + lpf = new LPFilter(LPF_FREQ, LPFQ); + bpf = new BPFilter(BPF_LOW_EDGE, BPF_HIGH_EDGE); + + if (!hpf || !lpf || !bpf) + { + if (hpf) + { + delete hpf; + hpf = NULL; + } + if (lpf) + { + delete lpf; + lpf = NULL; + } + if (bpf) + { + delete bpf; + bpf = NULL; + } + } +} + +void CBaseFilters::reset() +{ + if (hpf) + { + hpf->reset(); + } + if (lpf) + { + lpf->reset(); + } + if (bpf) + { + bpf->reset(); + } +} + +CBaseFilters::~CBaseFilters() +{ + delete lpf; + delete hpf; + delete bpf; + lpf = NULL; + hpf = NULL; + bpf = NULL; +} + +CFilters::CFilters() +{ + m_samplerate = 0; + m_channels = 0; + handles = NULL; +} + +CFilters::~CFilters() +{ + Uninit(); +} + +int CFilters::Init(int inSampleRate, int inChannel) +{ + m_samplerate = inSampleRate; + m_channels = inChannel; + + + handles = new CBaseFilters*[m_channels]; + + if (!handles) + { + return ALRB_ERR_BASE_H_MALLOC_NULL; + } + + for (int chn = 0; chn < m_channels; chn++) + { + CBaseFilters* filters = new CBaseFilters(); + + ((CBaseFilters**)handles)[chn] = filters; + + if (!filters->isAllocated()) + { + do{ + CBaseFilters* filters = ((CBaseFilters**)handles)[chn]; + delete filters; + + } while (chn--); + + delete [] (CBaseFilters**)handles; + handles = NULL; + + return ALRB_ERR_BASE_H_MALLOC_NULL; + } + + } + + return 0; +} + +void CFilters::Reset() +{ + if (handles) + { + for (int chn = 0; chn < m_channels; chn++) + { + CBaseFilters* filters = ((CBaseFilters**)handles)[chn]; + + if (filters) + { + filters->reset(); + } + + } + } +} + +void CFilters::Uninit() +{ + if (handles) + { + for (int chn = 0; chn < m_channels; chn++) + { + CBaseFilters* filters = ((CBaseFilters**)handles)[chn]; + + if (filters) + { + delete filters; + } + + } + + delete [] (CBaseFilters**)handles; + handles = NULL; + } +} +int CFilters::setFilterType(int fType) +{ + if (handles) + { + for (int chn = 0; chn < m_channels; chn++) + { + CBaseFilters* filters = ((CBaseFilters**)handles)[chn]; + + if (filters) + { + filters->setFilterType(fType); + } + + } + return 0; + } + else + return ALRB_ERR_BASE_H_MALLOC_NULL; +} + +int CFilters::ProcessLRIndependent(float * inLeft, float * inRight, int inOutSize) +{ + if (2 != m_channels) + { + return ALRB_ERR_PARAM; + } + data.reserve(inOutSize); + data.resize(inOutSize); + if (handles) { + data.assign(inLeft, inLeft + inOutSize); + CBaseFilters* filters = ((CBaseFilters**)handles)[0]; + filters->filtering(&data); + for (unsigned int i = 0; i < data.size(); i++) { + inLeft[i] = data[i]; + } + + data.assign(inRight, inRight + inOutSize); + filters = ((CBaseFilters**)handles)[1]; + filters->filtering(&data); + for (unsigned int i = 0; i < data.size(); i++) { + inRight[i] = data[i]; + } + } + return inOutSize; +} + +int CFilters::Process(float* inBuffer, int inSize) +{ + if (0 != (inSize % m_channels)) + { + return ALRB_ERR_PARAM; + } + + if (handles) + { + + data.reserve(inSize / m_channels); + data.resize(inSize / m_channels); + + for (int chn = 0; chn < m_channels; chn++) + { + CBaseFilters* filters = ((CBaseFilters**)handles)[chn]; + float* audio = inBuffer + chn; + + if (filters) + { + if (PSEUDO_MULTICHANNELS && chn > 0) + { + float* audioref = inBuffer; + + for (unsigned int i = 0; i < data.size(); i++){ + audio[0] = audioref[0]; + audio += m_channels; + audioref += m_channels; + } + } + else + { + + for (unsigned int i = 0; i < data.size(); i++){ + // data[i]= audio[i * m_channels + chn] / 32768.0; + data[i] = audio[0]; + audio += m_channels; + } + + filters->filtering(&data); + + float* audio = inBuffer + chn; + + for (unsigned int i = 0; i < data.size(); i++){ + *audio = data[i]; + audio += m_channels; + } + } // if (PSEUDO_MULTICHANNELS && chn>0) + + } + + } + } + return ALRB_ERR_SUCCESS; +} + +int CFilters::Process(char* inBuffer, int inSize) +{ + if (0 != (inSize % (sizeof(short) * m_channels))) + { + return ALRB_ERR_PARAM; + } + + if (handles) + { + + data.reserve(inSize / sizeof(short) / m_channels); + data.resize(inSize / sizeof(short) / m_channels); + + for (int chn = 0; chn < m_channels; chn++) + { + CBaseFilters* filters = ((CBaseFilters**)handles)[chn]; + short* audio = chn + (short*)inBuffer; + + if (filters) + { + if (PSEUDO_MULTICHANNELS && chn>0) + { + short* audioref = (short*)inBuffer; + + for (unsigned int i = 0; i < data.size(); i++){ + audio[0] = audioref[0]; + audio += m_channels; + audioref += m_channels; + } + } + else + { + + for (unsigned int i = 0; i < data.size(); i++){ + // data[i]= audio[i * m_channels + chn] / 32768.0; + data[i]= audio[0] / 32768.0f; + audio += m_channels; + } + + filters->filtering(&data); + + short* audio = chn + (short*)inBuffer; + + for (unsigned int i = 0; i < data.size(); i++){ + int sample = (int)(32767.0f * data[i]); + + if (sample>32767) + sample = 32767; + else if (sample<-32768) + sample = -32768; + + // audio[i * m_channels + chn] = (short)sample; + audio[0] = (short)sample; + audio += m_channels; + } + } // if (PSEUDO_MULTICHANNELS && chn>0) + + } + + } + } + + return inSize; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/filter/CFilters.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/filter/CFilters.h new file mode 100644 index 0000000..be681db --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/al_reverb/src/filter/CFilters.h @@ -0,0 +1,35 @@ +#ifndef KALA_AUDIOBASE_FILTERS_H +#define KALA_AUDIOBASE_FILTERS_H + +#include +#include "AudioEffectsConf.h" + +#define LOW_PASS_FILTER 0 +#define HIGH_PASS_FILTER 1 +#define BAND_PASS_FILTER 2 + + +class CFilters +{ +public: + CFilters(); + virtual ~CFilters(); + + int Init(int inSampleRate, int inChannel); // set sample rate, channel and filter type; + void Reset(); + void Uninit(); // uninit + int setFilterType(int fType); + + // process input buffer and output size. + int Process(char* inBuffer, int inSize); + int Process(float* inBuffer, int inSize); + + int ProcessLRIndependent(float * inLeft, float * inRight, int inOutSize); + +private: + void* handles; + int m_samplerate; + int m_channels; + std::vector data; +}; +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/DecoderCommon.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/DecoderCommon.h new file mode 100644 index 0000000..25d5395 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/DecoderCommon.h @@ -0,0 +1,220 @@ +// +// Created by yangjianli on 2021/1/7. +// + +#ifndef AUDIO_CODEC_DECODERCOMMON_H +#define AUDIO_CODEC_DECODERCOMMON_H +#include +#include +#include "ac_defs.h" + +#ifdef ST_DEBUG +#include +#define ASSERT(e) assert(e) +#else +#define ASSERT(e) +#endif + +//安全关闭文件 +#ifndef SAFE_CLOSE_FILE +#define SAFE_CLOSE_FILE(file) \ +{ \ + if(file) \ + { \ + fclose(file); \ + file = NULL; \ + } \ +} +#endif //SAFE_CLOSE_FILE + +//安全释放内存 +#ifndef SAFE_FREE +#define SAFE_FREE(ptr) \ +{ \ + if(ptr) \ + { \ + free(ptr); \ + ptr = NULL; \ + } \ +} +#endif //SAFE_FREE + +//安全删除对象 +#ifndef SAFE_DELETE_OBJ +#define SAFE_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_DELETE_OBJ + +//安全逆初始化并删除对象 +#ifndef SAFE_UNINIT_DELETE_OBJ +#define SAFE_UNINIT_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + obj->uninit(); \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_UNINIT_DELETE_OBJ + +#ifndef SAFE_CLOSE_DELETE_OBJ +#define SAFE_CLOSE_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + obj->close(); \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_CLOSE_DELETE_OBJ + +//安全删除数组 +#ifndef SAFE_DELETE_ARRAY +#define SAFE_DELETE_ARRAY(array) \ +{ \ + if(array) \ + { \ + delete [] array; \ + array = NULL; \ + } \ +} +#endif //SAFE_DELETE_ARRAY + +//取大值 +#ifndef GLOBAL_MAX +#define GLOBAL_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +//取小值 +#ifndef GLOBAL_MIN +#define GLOBAL_MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +//取中间 +#ifndef GLOBAL_MID +#define GLOBAL_MID(a, b, c) (GLOBAL_MAX(a, GLOBAL_MIN(b, c))) +#endif + +//取绝对值 +#ifndef GLOBAL_ABS +#define GLOBAL_ABS(a) ((a) < 0 ? (-(a)) : (a)) +#endif + + +#ifndef CHECK_FLOAT_EQUAL +#define CHECK_FLOAT_EQUAL(a, b) (fabs(a - b) < 0.001f) +#endif + +// 错误码 +enum +{ + E_NATIVE_DECODER_SUCCESS, + + //继续,说明可以继续往下调用 + E_NATIVE_DECODER_COMMON_CONTINUE = 10000, + E_NATIVE_DECODER_MEMORY, + + E_NATIVE_DECODER_NO_MEMORY = 30000, + E_NATIVE_DECODER_FORMAT, + E_NATIVE_DECODER_STREAM, + E_NATIVE_DECODER_NO_DECODER, + E_NATIVE_DECODER_DECODER_OPEN, + E_NATIVE_DECODER_ALREADY_INIT, + E_NATIVE_DECODER_RESAMPLE, + E_NATIVE_DECODER_CONTINUE, + E_NATIVE_DECODER_SEEK, + E_NATIVE_DECODER_END, + + //协议部分产生的错误码 + E_NATIVE_PROTOCOL_PATH_OPEN = 40000, + E_NATIVE_NO_PROTOCOL, + E_NATIVE_NO_CONTEXT, + E_NATIVE_NO_DECODER, +}; + +// 类型 +//底层获取的音频文件的参数,文件内部的参数,如采样率之类的 +ST_AC_LIB_API typedef struct _MediaInfo +{ + //音频的时长 + double duration; + //音频的采样率 + int sample_rate; + //音频的通道数 + int channels; + //码率 + int bit_rate; +}MediaInfo, *pMediaInfo; + +//上层设置的音频文件的参数,也就是当成的参数,不是文件内部的参数 +ST_AC_LIB_API typedef struct _MediaParam +{ + //音频文件的位置 + const char * path; + //开始播放的时间,单位是ms + double start_time; + //截止播放的时间,单位是ms,默认和 duration 一致 + double end_time; + //播放的长度,单位是ms,如果小于等于0,则认为播放全曲 + double duration; + //前奏时间,方便跳过前奏处理 + double prelude_time; + //是否需要解密,因为导唱就不需要解密操作 + bool need_decrypt; + //多人同框时加入的时间点ms + double multi_join_time; + // 音频响度,单位:db + double loudness; + //是否是hook模式 + bool is_seek_delay_record; +}MediaParam, *pMediaParam; + + +#ifndef DEF_AUDIO_FRAME_BUFFER +#define DEF_AUDIO_FRAME_BUFFER +//自定义的一帧音频数据 +template +class ST_AC_LIB_API AudioFrameBuffer +{ +public: + AudioFrameBuffer() + : m_buffer(NULL) + , m_size(0) + , m_postion(0) + , m_duration(0) { } + + ~AudioFrameBuffer() { uninit(); } + +public: + inline int init(int size) { m_buffer = new T [size]; m_size = size; return E_NATIVE_DECODER_SUCCESS; } + inline T * get_buffer() { return m_buffer; } + inline int get_size() { return m_size; } + inline void set_postion(double postion) { m_postion = postion; } + inline double get_postion() { return m_postion; } + inline void set_duration(double duration) { m_duration = duration; } + inline double get_duration() { return m_duration; } + inline void uninit() { SAFE_DELETE_ARRAY(m_buffer); m_size = 0; m_postion = 0; m_duration = 0; } + +private: + //音频数据 + T * m_buffer; + //音频数据长度 + int m_size; + //音频数据的开始时间点,ms + double m_postion; + //音频的持续时间,ms + double m_duration; +}; +#endif + + + +#endif //AUDIO_CODEC_DECODERCOMMON_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/DecoderWrapper.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/DecoderWrapper.h new file mode 100644 index 0000000..6842415 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/DecoderWrapper.h @@ -0,0 +1,59 @@ +// +// Created by 杨将 on 2017/8/14. +// + +#ifndef __DECODERWRAPPER_H__ +#define __DECODERWRAPPER_H__ + +#include "DecoderCommon.h" +#include "IProtocol.h" +#include "IContext.h" +#include "IDecoder.h" +#include +#include "ac_defs.h" +//协议、解密、解码、重采样的一个控制封装 + +class IEnDeCrypt; +ST_AC_LIB_API class CDecoderWrapper +{ +public: + CDecoderWrapper(); + ~CDecoderWrapper(); + +public: + int init(pMediaParam param, int samplerate, int channel, + int context_type, + int decoder_type, + int protocol_type); + int init(pMediaParam param, int samplerate, int channel, + int context_type=CONTEXT_FFMPEG, + int decoder_type=DECODER_FFMPEG) { + + int protocol_type = strncmp(param->path, "stmedia:", 8) == 0 ? ANDROID_PROTOCOL_TYPE_ST_MEDIA : (param->need_decrypt ? PROTOCOL_TYPE_FILE : PROTOCOL_ACCOMPANY_TYPE_FILE); + return init(param, samplerate, channel, context_type, decoder_type, protocol_type); + } + void get_media_info(pMediaInfo media_info); + //如果解码结束,则会将frame中的数据置0 + int decode(AudioFrameBuffer * frame); + int seek(double pos); + int set_start_end_time(double start_time, double end_time); + void uninit(); + +private: + IEnDeCrypt * m_crypt; + IProtocol * m_protocol; + IContext * m_context; + IDecoder * m_decoder; + + double m_start_time; + double m_end_time; + + //下一次解码的开始时间 + double m_next_time; + //decode出来的采样率 + int m_samplerate; + //decode出来的通道数 + int m_channel; +}; + +#endif //__DECODERWRAPPER_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IContext.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IContext.h new file mode 100644 index 0000000..fd71278 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IContext.h @@ -0,0 +1,37 @@ +// +// Created by 杨将 on 2017/6/28. +// + +#ifndef __ICONTEXT_H__ +#define __ICONTEXT_H__ + +//上下文,相当于c中的void*,我们这里统一用这个来代替 +#include "InstanceFactory.h" +class IContext +{ +public: + IContext() { }; + virtual ~IContext() { }; +public: + virtual void set_protocol(IProtocol * protocol) = 0; + virtual IProtocol * get_protocol() = 0; + virtual void set_samplerate(int samplerate) = 0; + virtual int get_samplerate() = 0; + virtual void set_channels(int channels) = 0; + virtual int get_channels() = 0; +}; + +ST_AC_LIB_API class IContextCreator +{ +public: + IContextCreator() = delete; + explicit IContextCreator(int type) + { + registered_context(type, this); + }; + +public: + virtual IContext* get_inst() = 0; +}; + +#endif //__ICONTEXT_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IDecoder.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IDecoder.h new file mode 100644 index 0000000..2901c18 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IDecoder.h @@ -0,0 +1,45 @@ +// +// Created by 杨将 on 2017/7/25. +// + +#ifndef __IDECODER_H__ +#define __IDECODER_H__ + +#include "DecoderCommon.h" +#include "InstanceFactory.h" +//外部需要考虑这些函数不能多线程的问题 + +class IContext; + +class IDecoder +{ +public: + IDecoder() { } + virtual ~IDecoder() { } + +public: + //传入解码所需的上下文,包含协议等 + virtual int init(IContext * context) = 0; + //获取音频文件的内部信息 + virtual void get_media_info(pMediaInfo media_info) = 0; + //获取一段固定长度的数据,给frame中的数据填充数据,这里的长度不一定是编码里面的帧 + virtual int decode(AudioFrameBuffer * frame) = 0; + //跳转到某个精确的位置,这里需要在内部转换到固定的点上,ms数 + virtual int seek(double pos) = 0; + //释放掉所有在初始化中申请的资源,回到未初始化的状态 + virtual void uninit() = 0; +}; + +ST_AC_LIB_API class IDecoderCreator +{ +public: + IDecoderCreator() = delete; + explicit IDecoderCreator(int type) + { + registered_decoder(type, this); + }; + +public: + virtual IDecoder* get_inst() = 0; +}; +#endif //__IDECODER_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IProtocol.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IProtocol.h new file mode 100644 index 0000000..a2d891c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/IProtocol.h @@ -0,0 +1,41 @@ +// +// Created by 杨将 on 2017/7/25. +// + +#ifndef __IPROTOCOL_H__ +#define __IPROTOCOL_H__ +#include "InstanceFactory.h" +class IEnDeCrypt; +class IProtocol +{ +public: + IProtocol() { } + virtual ~IProtocol() { } + +public: + //打开一个url地址,获取数据,返回错误码 + virtual int open(const char * url, IEnDeCrypt * crypt) = 0; + //读取数据,返回读取到多少数据,如果出错或者读到结尾则返回0 + virtual int read(unsigned char * buf, int size) = 0; + //写数据,返回还有多少数据没写,如果出错则返回-1 + virtual int write(const unsigned char * buf, int size) = 0; + //跳转,返回跳转后的位置,如果失败则返回-1 + virtual int seek(int offset, int whence) = 0; + //关闭url链接 + virtual void close() = 0; +}; + +ST_AC_LIB_API class IProtocolCreator +{ +public: + IProtocolCreator() = delete; + explicit IProtocolCreator(int type) + { + registered_protocol(type, this); + }; + +public: + virtual IProtocol* get_inst() = 0; +}; + +#endif //__IPROTOCOL_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/InstanceFactory.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/InstanceFactory.h new file mode 100644 index 0000000..4911bb2 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/InstanceFactory.h @@ -0,0 +1,60 @@ +// +// Created by yangjianli on 2021/1/7. +// + +#ifndef AUDIO_CODEC_INSTANCEFACTORY_H +#define AUDIO_CODEC_INSTANCEFACTORY_H + +#include "ac_defs.h" + +class IProtocol; +class IProtocolCreator; +class IContext; +class IContextCreator; +class IDecoder; +class IDecoderCreator; + + +ST_AC_LIB_API void registered_protocol(int type, IProtocolCreator* creator); +ST_AC_LIB_API IProtocol* get_protocol_inst(int type); + +ST_AC_LIB_API void registered_context(int type, IContextCreator* creator); +ST_AC_LIB_API IContext* get_context_inst(int type); + +ST_AC_LIB_API void registered_decoder(int type, IDecoderCreator* creator); +ST_AC_LIB_API IDecoder* get_decoder_inst(int type); + + +#define REGISTER_CREATOR(BaseCreator,BaseClass, ClassName, type) \ + class ClassName##Creator : public BaseCreator { \ + public: \ + ClassName##Creator(int type) : BaseCreator(type) {}; \ + public: \ + BaseClass * get_inst() { \ + return new ClassName(); \ + } \ + }; \ + static ClassName##Creator ClassName##type##Creator(type); + +// 公有的类型 +enum PROTOCOL_TYPE +{ + PROTOCOL_TYPE_FILE, + PROTOCOL_IOS, + ANDROID_PROTOCOL_TYPE_ST_MEDIA, + PROTOCOL_ACCOMPANY_TYPE_FILE +}; + +enum CONTEXT_TYPE +{ + CONTEXT_FFMPEG, + CONTEXT_IOS +}; + +enum DECODER_TYPE +{ + DECODER_FFMPEG, + DECODER_IOS +}; + +#endif //AUDIO_CODEC_INSTANCEFACTORY_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/ac_defs.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/ac_defs.h new file mode 100644 index 0000000..7518327 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_codec/inc/ac_defs.h @@ -0,0 +1,11 @@ +/** + * Author: AlanWang4523. + * Date: 2021/12/29 11:38. + * Mail: alanwang4523@gmail.com + */ + +#ifndef AVAUDIO_CODEC_LIBS_DEFINES_H +#define AVAUDIO_CODEC_LIBS_DEFINES_H + +#define ST_AC_LIB_API __attribute__ ((visibility("default"))) +#endif //AVAUDIO_CODEC_LIBS_DEFINES_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/CMakeLists.txt new file mode 100644 index 0000000..7a7e712 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 2.8) +project(audio_resampler) +set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) +include_directories(./) +include_directories(inc src) + +# ffmpeg-mac +#include_directories(/Users/yangjianli/starMaker/ffmpeg_lib/ffmpeg-4.3.1/mac/include) +#set(FFMPEG_LIB /Users/yangjianli/starMaker/ffmpeg_lib/ffmpeg-4.3.1/mac/lib) +add_library(audio_resample ${RESAMPLE_SRC_CPP}) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/inc/FfmpegResampler.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/inc/FfmpegResampler.h new file mode 100644 index 0000000..c39e185 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/inc/FfmpegResampler.h @@ -0,0 +1,35 @@ +// +// Created by 杨将 on 2017/9/4. +// + +#ifndef __FFMPEGRESAMPLER_H__ +#define __FFMPEGRESAMPLER_H__ + +#include "IResampler.h" + +class SwrContext; + +class CFfmpegResampler : public IResampler +{ +public: + CFfmpegResampler(); + virtual ~CFfmpegResampler(); + +public: + virtual int init(int in_samplerate, int out_samplerate, int in_channel=1, int out_channel=1); + virtual int get_out_samples(int num); + virtual int get_latency(); + virtual void reset(); + virtual int resample(float * in_buf, int in_num, float * out_buf, int & out_num); + virtual void uninit(); + +private: + SwrContext * m_swr_context; + //重采样的buffer和长度 + unsigned char * m_swr_buffer; + int m_swr_bufsize; + int m_in_channel; + int m_out_channel; +}; + +#endif //__FFMPEGRESAMPLER_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/inc/IResampler.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/inc/IResampler.h new file mode 100644 index 0000000..93baf9b --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/inc/IResampler.h @@ -0,0 +1,67 @@ +// +// Created by 杨将 on 2017/9/4. +// +/** + * 基于ffmpeg的重采样模块 + */ +#ifndef __IRESAMPLER_H__ +#define __IRESAMPLER_H__ + +enum +{ + E_RESAMPLER_SUCCESS = 0, + E_RESAMPLER_NO_MEMORY = 10000, + E_RESAMPLER_NUM_ZERO, +}; +class IResampler +{ +public: + IResampler() { } + virtual ~IResampler() { } + +public: + /** + * 初始化函数 + * @param in_samplerate 输入数据的采样率 + * @param out_samplerate 输出数据的采样率 + * @param in_channel + * @param out_channel + * @return 0 表示正常 + */ + virtual int init(int in_samplerate, int out_samplerate, int in_channel=1, int out_channel=1) = 0; + + /** + * 当输入每个通道采样点数为num长度的数据时,从resample可以获取到的输出数据的最大长度 + * @param num 本次将要输入的数据长度[单通道采样点数量] + * @return 单通道采样点数量,负数表示异常 + */ + virtual int get_out_samples(int num) = 0; + + /** + * 获取延迟延迟时间,采样点级别 + * @return + */ + virtual int get_latency() = 0; + + /** + * 重设,清空内部缓存数据[当输入数据源切换时,需要进行设置] + */ + virtual void reset() = 0; + + /** + * 重采样函数 + * @param in_buf 输入数据[多通道时,交错方式的存储][in] + * @param in_num 输入数据单个通道的采样点数量[in] + * @param out_buf 输出数据[多通道时,交错方式的存储][in][注意:需要外部开辟好空间,可以使用get_out_samples获取需要开辟的最大长度] + * @param out_num 输出数据单个通道的采样点数量[out][注意: out_buf真实被写入的单通道采样点数量] + * @return 0表示正常 + */ + virtual int resample(float * in_buf, int in_num, float * out_buf, int & out_num) = 0; + + /** + * 销毁函数 + */ + virtual void uninit() = 0; +}; + +#endif //__IRESAMPLER_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/src/FfmpegResampler.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/src/FfmpegResampler.cpp new file mode 100644 index 0000000..3fb8d98 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/audio_resample/src/FfmpegResampler.cpp @@ -0,0 +1,138 @@ +// +// Created by 杨将 on 2017/9/4. +// + +#include "FfmpegResampler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#include "libswresample/swresample.h" +#include "libavutil/channel_layout.h" + +#ifdef __cplusplus +}; +#endif + +CFfmpegResampler::CFfmpegResampler() +{ + m_swr_context = NULL; + m_swr_buffer = NULL; + m_swr_bufsize = 0; +} + +CFfmpegResampler::~CFfmpegResampler() +{ + uninit(); +} + +int CFfmpegResampler::init(int in_samplerate, int out_samplerate, int in_channel, int out_channel) +{ + if(in_samplerate != out_samplerate) + { + m_swr_context = swr_alloc_set_opts( + NULL, + av_get_default_channel_layout(out_channel), + AV_SAMPLE_FMT_FLT, + out_samplerate, + av_get_default_channel_layout(in_channel), + AV_SAMPLE_FMT_FLT, + in_samplerate, + 0, + NULL + ); + + if((NULL == m_swr_context) || swr_init(m_swr_context)) + { + return E_RESAMPLER_NO_MEMORY; + } + } + m_in_channel = in_channel; + m_out_channel = out_channel; + return E_RESAMPLER_SUCCESS; +} + +int CFfmpegResampler::get_out_samples(int num) +{ + if(m_swr_context) + { + return swr_get_out_samples(m_swr_context, num); + } + else + { + return num; + } +} + +int CFfmpegResampler::get_latency() +{ + return 0; +} + +void CFfmpegResampler::reset() +{ + if(m_swr_context && m_swr_buffer) + { + swr_convert(m_swr_context, &m_swr_buffer, m_swr_bufsize, 0, 0); + } +} + +int CFfmpegResampler::resample(float * in_buf, int in_num, float * out_buf, int & out_num) +{ + if(m_swr_context) + { + //看之前申请的内存空间是否足够,不足的话,就进行申请空间 + if(out_num > m_swr_bufsize) + { + unsigned char * buffer = NULL; + if(av_samples_alloc(&buffer, NULL, m_out_channel, out_num, AV_SAMPLE_FMT_FLT, 0) < 0) + { + return E_RESAMPLER_NO_MEMORY; + } + m_swr_bufsize = out_num; + if(m_swr_buffer) + { + av_freep(&m_swr_buffer); + } + m_swr_buffer = buffer; + } + + out_num = swr_convert( + m_swr_context, + &m_swr_buffer, + out_num, + (const uint8_t **)(&in_buf), + in_num + ); + if(out_num < 0) + { + out_num = 0; + return E_RESAMPLER_NUM_ZERO; + } + + memcpy(out_buf, m_swr_buffer, out_num * sizeof(float) * m_out_channel); + } + else if(in_buf == out_buf) + { + out_num = in_num; + } + else + { + memcpy(out_buf, in_buf, out_num * sizeof(float) * m_out_channel); + } + return E_RESAMPLER_SUCCESS; +} + +void CFfmpegResampler::uninit() +{ + if(m_swr_context) + { + swr_free(&m_swr_context); + } + if(m_swr_buffer) + { + av_freep(&m_swr_buffer); + } + m_swr_bufsize = 0; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/CMakeLists.txt new file mode 100644 index 0000000..2a32277 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/CMakeLists.txt @@ -0,0 +1,9 @@ +include_directories(./) +include_directories(inc) +include_directories(src) + +file(GLOB_RECURSE AUTOTUNE_SRC_CPP_FILES src/*cpp) +file(GLOB_RECURSE AUTOTUNE_SRC_C_FILES src/*c) + +add_library(autotune ${AUTOTUNE_SRC_CPP_FILES} ${AUTOTUNE_SRC_C_FILES}) +#set_target_properties(autotune PROPERTIES CXX_VISIBILITY_PRESET hidden) diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/inc/ATndkWrapper.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/inc/ATndkWrapper.h new file mode 100644 index 0000000..6f6ade5 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/inc/ATndkWrapper.h @@ -0,0 +1,54 @@ +// +// Created by 杨将 on 2017/7/12. +// + +#ifndef __ATNDKWRAPPER_H__ +#define __ATNDKWRAPPER_H__ +#include "AudioEffectsConf.h" +#include + +//keychange的参数 +typedef struct _KeyChangeParam +{ + //更新的开始时间点,单位ms + float * times; + //更新的key值 + int * keys; + //更新的scale值 + int * scales; + //总共的个数 + int count; +}KeyChangeParam, *pKeyChangeParam; + +class CAutoTuneWrapper; + +class CATndkWrapper +{ +public: + CATndkWrapper(); + ~CATndkWrapper(); + +public: + int init(int samplerate, int channels); + int set_key_change(pKeyChangeParam param); + void reset(); + int process(short * input, short * output, int samples, double ms); + int process(float * input, float * output, int samples, double ms); + int get_latency_time_ms(); + void uninit(); + +private: + void quick_set_key_change(double ms); + void destroy_key_change(); + +private: + CAutoTuneWrapper * m_autotune; + KeyChangeParam m_keychange; + int m_current_idx; + int m_channels; + float * m_float_buffer; + int m_last_sample_num_of_float; + +}; + +#endif //__ATNDKWRAPPER_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/inc/AutoTuneDef.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/inc/AutoTuneDef.h new file mode 100644 index 0000000..c5a4b68 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/inc/AutoTuneDef.h @@ -0,0 +1,17 @@ +// +// Created by yangjianli on 2020-01-13. +// + +#ifndef AUDIO_EFFECTS_LIB_AOTOTUNEDEF_H +#define AUDIO_EFFECTS_LIB_AOTOTUNEDEF_H +#include "AudioEffectsConf.h" +enum AT_ERR { + AT_ERR_SUCCESS = 0, + AT_ERR_NO_MEMORY = -1, + AT_ERR_PARAM = -2, + AT_ERR_HAS_SET_PARAM = -3, + AT_ERR_AUTOTUNE_INIT = -4, + AT_ERR_BASE_H_NULL = -5, + AT_ERR_BASE_H_MALLOC_NULL = -6, +}; +#endif //AUDIO_EFFECTS_LIB_AOTOTUNEDEF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ATndkWrapper.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ATndkWrapper.cpp new file mode 100644 index 0000000..ca1563c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ATndkWrapper.cpp @@ -0,0 +1,243 @@ +// +// Created by 杨将 on 2017/7/12. +// + +#include +#include +#include +#include "common/util/util.h" +#include "ATndkWrapper.h" +#include "AutoTuneDef.h" +#include "common/common.h" +#include "autotune/CAutoTuneWrapper.h" + +CATndkWrapper::CATndkWrapper() +{ + m_autotune = NULL; + + m_keychange.times = NULL; + m_keychange.keys = NULL; + m_keychange.scales = NULL; + m_keychange.count = 0; + + m_current_idx = 0; + m_channels = 1; + + m_float_buffer = NULL; + m_last_sample_num_of_float = 0; +} + +CATndkWrapper::~CATndkWrapper() +{ + uninit(); +} + +int CATndkWrapper::init(int samplerate, int channels) +{ + int nRet = AT_ERR_SUCCESS; + + m_channels = channels; + + m_autotune = new(std::nothrow) CAutoTuneWrapper(); + if(NULL == m_autotune) + { + nRet = AT_ERR_NO_MEMORY; + goto exit; + } + + nRet = m_autotune->init(samplerate, channels); + if(AT_ERR_SUCCESS != nRet) + { + nRet = AT_ERR_AUTOTUNE_INIT; + goto exit; + } +exit: + if(AT_ERR_SUCCESS != nRet) + { + uninit(); + } + return nRet; +} + +int CATndkWrapper::set_key_change(pKeyChangeParam param) +{ + //已经设置了参数直接返回,这里会有两个出口,切记 + if(m_keychange.count > 0) + { + return AT_ERR_HAS_SET_PARAM; + } + + //必须在init之后才能调用set参数接口 + int nRet = AT_ERR_SUCCESS; + + m_keychange.count = param->count; + + m_keychange.times = new(std::nothrow) float[m_keychange.count]; + if(NULL == m_keychange.times) + { + nRet = AT_ERR_NO_MEMORY; + goto exit; + } + memcpy(m_keychange.times, param->times, m_keychange.count * sizeof(float)); + + m_keychange.keys = new(std::nothrow) int[m_keychange.count]; + if(NULL == m_keychange.keys) + { + nRet = AT_ERR_NO_MEMORY; + goto exit; + } + memcpy(m_keychange.keys, param->keys, m_keychange.count * sizeof(int)); + + m_keychange.scales = new(std::nothrow) int[m_keychange.count]; + if(NULL == m_keychange.scales) + { + nRet = AT_ERR_NO_MEMORY; + goto exit; + } + memcpy(m_keychange.scales, param->scales, m_keychange.count * sizeof(int)); + +exit: + if(AT_ERR_SUCCESS != nRet) + { + destroy_key_change(); + } + + return nRet; +} + +void CATndkWrapper::reset() +{ + m_autotune->reset(); +} + +int CATndkWrapper::process(short * input, short * output, int samples, double ms) +{ + //如果当前的m_float_buffer空间不足则重新申请 + if (m_last_sample_num_of_float < samples) + { + float * last_buffer = m_float_buffer; + + m_float_buffer = (float *) malloc(samples * sizeof(float)); + if (NULL == m_float_buffer) + { + return AT_ERR_NO_MEMORY; + } + + //释放之前分配的内存 + SAFE_FREE(last_buffer); + m_last_sample_num_of_float = samples; + } + + //查找当前的key 和 scale + quick_set_key_change(ms); + + short_to_float(input, m_float_buffer, samples); + + m_autotune->process(m_float_buffer, m_float_buffer, samples); + + //返回输出的数据 + float_to_short(m_float_buffer, output, samples); + + return AT_ERR_SUCCESS; +} + +int CATndkWrapper::process(float *input, float * output, int samples, double ms) +{ + + //如果当前的m_float_buffer空间不足则重新申请 + if (m_last_sample_num_of_float < samples) + { + float * last_buffer = m_float_buffer; + + m_float_buffer = (float *) malloc(samples * sizeof(float)); + if (NULL == m_float_buffer) + { + return AT_ERR_NO_MEMORY; + } + + //释放之前分配的内存 + SAFE_FREE(last_buffer); + m_last_sample_num_of_float = samples; + } + + //查找当前的key 和 scale + quick_set_key_change(ms); + + memcpy(m_float_buffer, input, samples * sizeof(float)); + + m_autotune->process(m_float_buffer, output, samples); + + return AT_ERR_SUCCESS; +} + +int CATndkWrapper::get_latency_time_ms() +{ + return m_autotune->get_latency(); +} + +void CATndkWrapper::uninit() +{ + SAFE_DELETE_OBJ(m_autotune); + + SAFE_FREE(m_float_buffer); + m_last_sample_num_of_float = 0; + + destroy_key_change(); + + m_current_idx = 0; +} + +void CATndkWrapper::quick_set_key_change(double ms) +{ + int idx = m_current_idx; + float * times = m_keychange.times; + int count = m_keychange.count - 1; + + //如果没有设置过keychange这个时候要返回 + if(NULL == times) + { + return ; + } + + //本来应该如果重置过了,说明跳转了或者切换过了,因此进行二分查找 + //否则进行直接往后查找 + //简单点,直接当前位置前后查找 + if(times[idx] <= ms) + { + while(idx < count) + { + if((times[idx + 1] > ms) && (times[idx] <= ms)) + { + break; + } + ++idx; + } + } + else + { + while(idx > 0) + { + if((times[idx - 1] <= ms) && (times[idx] > ms)) + { + --idx; + break; + } + --idx; + } + } + + if(idx != m_current_idx) + { + bool notes[12]; + m_current_idx = idx; +// m_autotune->Set_notes_from_key_and_scale(notes, m_keychange.keys[m_current_idx], m_keychange.scales[m_current_idx]); + } +} + +void CATndkWrapper::destroy_key_change() +{ + SAFE_DELETE_ARRAY(m_keychange.times); + SAFE_DELETE_ARRAY(m_keychange.keys); + SAFE_DELETE_ARRAY(m_keychange.scales); + m_keychange.count = 0; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTune.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTune.cpp new file mode 100644 index 0000000..cd9394a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTune.cpp @@ -0,0 +1,509 @@ + +#include +#include +#include +#include +#include "ref/CircularBuffer.h" +#include "pitch/PitchDetector.h" +#include "formant_corrector/FormantCorrector.h" +#include "pitch/PitchShifter.h" +#include "autotune/CAutoTune.h" +#include "AutoTuneDef.h" + +extern "C" +{ +#include "ref/fftwrap.h" +} + +#if 0 +//ndef WIN32 + +#include +#include +#include "logutil.h" + +//void androidLog(int, const char*, ...); + +#define LOG_TAG "CAUTOTUNE_NATIVE" +#define LOGD(...) androidLog(ANDROID_LOG_DEBUG,LOG_TAG,__VA_ARGS__) +#define LOGI(...) androidLog(ANDROID_LOG_INFO,LOG_TAG,__VA_ARGS__) +#define LOGW(...) androidLog(ANDROID_LOG_WARN,LOG_TAG,__VA_ARGS__) +#define LOGE(...) androidLog(ANDROID_LOG_ERROR,LOG_TAG,__VA_ARGS__) + +#else + +#define LOGE(...) + +#endif + + +#define PI (float)3.14159265358979323846 +#define L2SC (float)3.32192809488736218171 + + +typedef struct { + + float m_fTune; + float m_fFixed; + float m_fPull; + float m_fAmount; + float m_fSmooth; + float m_fShift; + int m_iScwarp; + int m_iLfoquant; + float m_fMix; + float m_fLatency; + fft_vars* fmembvars; // member variables for fft routine + + unsigned long fs; // Sample rate + + + int noverlap; + + + // VARIABLES FOR LOW-RATE SECTION + float aref; // A tuning reference (Hz) + + + float lfophase; + + CircularBuffer buffer; + PitchDetector pdetector; + FormantCorrector fcorrector; + PitchShifter pshifter; + +} Autotalent; + +int CAutoTune::init(unsigned long samplerate, const int* suggestion) +{ + + Autotalent* membvars = (Autotalent*)malloc(sizeof(Autotalent)); + if (membvars == NULL) + { + return AT_ERR_BASE_H_MALLOC_NULL; + } + int ti; + membvars->aref = 440.0f; + membvars->fs = samplerate; + int ret = 0; + ret = instantiate_circular_buffer(&membvars->buffer, samplerate); + if (ret != 0) + { + if (membvars != NULL) + { + free(membvars); + membvars = NULL; + } + return ret; + } + membvars->fmembvars = fft_con((int)membvars->buffer.cbsize); + if (membvars->fmembvars == NULL) + { + if (membvars != NULL) + { + free(membvars); + membvars = NULL; + } + return AT_ERR_BASE_H_MALLOC_NULL; + } + ret = instantiate_pitch_detector(&membvars->pdetector, membvars->fmembvars, membvars->buffer.cbsize, + (int) samplerate); + if (ret != 0) + { + if (membvars != NULL) + { + free(membvars); + membvars = NULL; + } + return ret; + } + + ret = formant_corrector_init(&membvars->fcorrector, samplerate, (int) membvars->buffer.cbsize); + if (ret != 0) + { + if (membvars != NULL) + { + free(membvars); + membvars = NULL; + } + return ret; + } + membvars->noverlap = 4; + + membvars->lfophase = 0; + + ret = pitch_shifter_init(&membvars->pshifter, samplerate, membvars->buffer.cbsize); + if (ret != 0) + { + if (membvars != NULL) + { + free(membvars); + membvars = NULL; + } + return ret; + } + membvars->m_fTune = 440.0f; + membvars->m_fFixed = 0.0f; + membvars->m_fPull = 0.0f; + + for (ti = 0; ti < 12; ti++) + { + m_originalSeq[ti] = m_iNotes[ti] = suggestion[(ti - 3 + 12)%12];//g_ScalePara[scale][ti]; + //m_originalSeq[ti] = suggestion[ti]; + + LOGE("sugg:%02d/%d", ti, m_originalSeq[ti]); + } + + membvars->m_fAmount = 1.0f; + membvars->m_fSmooth = 0.5f; + membvars->m_fShift = 0.0f; + membvars->m_iScwarp = 0; + membvars->m_iLfoquant = 1; + membvars->fcorrector.iFcorr = 0; + membvars->fcorrector.fFwarp = 0.0f; + membvars->m_fMix = 1.0f; + + + + membvars->pdetector.confidence = 0.0f; + + membvars->m_fLatency = membvars->buffer.cbsize - 1; + + update_formant_warp(&membvars->fcorrector); + m_membvars = membvars; + + scaleSemitoneConversion(); + + inpitch = 0.0f; + outpitch = 0.0f; +// m_scale = 0;//scale; + + return 0; +} + +int CAutoTune::updateScale(int scale) +{ + + int ti; + for (ti = 0; ti < 12; ti++) + { + m_iNotes[ti] = m_originalSeq[(-scale+ti+12)%12]; +// m_originalSeq[ti] = suggestion[ti]; + } + + return scaleSemitoneConversion(); + +} +// Called every time we get a new chunk of audio +void CAutoTune::process(float *indata, float *outdata, unsigned long SampleCount) +{ + Autotalent* psAutotalent = (Autotalent *)m_membvars; + + for (unsigned long lSampleIndex = 0; lSampleIndex < SampleCount; lSampleIndex++) + { + + // load data into circular buffer + tf = (float)*(indata++); + ti4 = psAutotalent->buffer.cbiwr; + psAutotalent->buffer.cbi[ti4] = tf; + + if (psAutotalent->fcorrector.iFcorr >= 1) + { + remove_formants(&psAutotalent->fcorrector, &psAutotalent->buffer, tf); + } + else + { + psAutotalent->buffer.cbf[ti4] = tf; + } + // Input write pointer logic + psAutotalent->buffer.cbiwr++; + if (psAutotalent->buffer.cbiwr >= psAutotalent->buffer.cbsize) + { + psAutotalent->buffer.cbiwr = 0; + } + + // Every N/noverlap samples, run pitch estimation / manipulation code + if ((psAutotalent->buffer.cbiwr) % (psAutotalent->buffer.cbsize / psAutotalent->noverlap) == 0) + { + + // ---- Obtain autocovariance ---- + bbtain_autocovariance(&psAutotalent->pdetector, psAutotalent->fmembvars, &psAutotalent->buffer, + psAutotalent->buffer.cbsize, psAutotalent->buffer.corrsize); + + get_pitch_conf(&psAutotalent->pdetector, psAutotalent->fmembvars, psAutotalent->buffer.corrsize, + psAutotalent->fs, psAutotalent->aref, inpitch); + + outpitch = inpitch; + + // Pull to fixed pitch + outpitch = (1 - psAutotalent->m_fPull)*outpitch + psAutotalent->m_fPull*psAutotalent->m_fFixed; + + // -- Convert from semitones to scale notes -- + ti = (int)(outpitch / 12 + 32) - 32; // octave + tf = outpitch - ti * 12; // semitone in octave + ti2 = (int)tf; + ti3 = ti2 + 1; + // a little bit of pitch correction logic, since it's a convenient place for it + if (m_iNotes[ti2 % 12]<0 || m_iNotes[ti3 % 12]<0) + { // if between 2 notes that are more than a semitone apart + lowersnap = 1; + uppersnap = 1; + } + else + { + lowersnap = 0; + uppersnap = 0; + if (m_iNotes[ti2 % 12] == 1) + { // if specified by user + lowersnap = 1; + } + if (m_iNotes[ti3 % 12] == 1) + { // if specified by user + uppersnap = 1; + } + } + // (back to the semitone->scale conversion) + // finding next lower pitch in scale + while (m_iNotes[(ti2 + 12) % 12]<0) + { + ti2 = ti2 - 1; + } + // finding next higher pitch in scale + while (m_iNotes[ti3 % 12]<0) + { + ti3 = ti3 + 1; + } + tf = (tf - ti2) / (ti3 - ti2) + m_iPitch2Note[(ti2 + 12) % 12]; + if (ti2<0) + { + tf = tf - m_numNotes; + } + outpitch = tf + m_numNotes*ti; + // -- Done converting to scale notes -- + + // The actual pitch correction + ti = (int)(outpitch + 128) - 128; + tf = outpitch - ti - 0.5; + ti2 = ti3 - ti2; + if (ti2>2) + { // if more than 2 semitones apart, put a 2-semitone-like transition halfway between + tf2 = (float)ti2 / 2; + } + else + { + tf2 = (float)1; + } + + if ((psAutotalent->m_fSmooth * 0.8)<0.001) + { + tf2 = tf*tf2 / 0.001; + } + else + { + tf2 = tf*tf2 / (psAutotalent->m_fSmooth * 0.8); + } + if (tf2<-0.5) tf2 = -0.5; + if (tf2>0.5) tf2 = 0.5; + tf2 = 0.5*sin(PI*tf2) + 0.5; // jumping between notes using horizontally-scaled sine segment + tf2 = tf2 + ti; + if ((tf<0.5 && lowersnap) || (tf >= 0.5 && uppersnap)) + { + outpitch = psAutotalent->m_fAmount*tf2 + ((float)1 - psAutotalent->m_fAmount)*outpitch; + } + + // Add in pitch shift + outpitch = outpitch + psAutotalent->m_fShift; + + // LFO logic + //tf = psAutotalent->m_fLforate*psAutotalent->buffer.cbsize / (psAutotalent->noverlap*psAutotalent->fs); + //if (tf>1) + // tf = 1; + //psAutotalent->lfophase = psAutotalent->lfophase + tf; + //if (psAutotalent->lfophase > 1) + // psAutotalent->lfophase = psAutotalent->lfophase - 1; + //float lfoval = psAutotalent->lfophase; + //tf = (psAutotalent->m_fLfosymm + 1) / 2; + //if (tf <= 0 || tf >= 1) + //{ + // if (tf <= 0) + // lfoval = 1 - lfoval; + //} + //else + //{ + // if (lfoval <= tf) + // { + // lfoval = lfoval / tf; + // } + // else + // { + // lfoval = 1 - (lfoval - tf) / (1 - tf); + // } + //} + //if (psAutotalent->m_fLfoshape >= 0) + //{ + // // linear combination of cos and line + // lfoval = (0.5 - 0.5*cos(lfoval*PI))*psAutotalent->m_fLfoshape + lfoval*(1 - psAutotalent->m_fLfoshape); + // lfoval = psAutotalent->m_fLfoamp*(lfoval * 2 - 1); + //} + //else + //{ + // // smoosh the sine horizontally until it's squarish + // tf = 1 + psAutotalent->m_fLfoshape; + // if (tf<0.001) + // { + // lfoval = (lfoval - 0.5) * 2 / 0.001; + // } + // else + // { + // lfoval = (lfoval - 0.5) * 2 / tf; + // } + // if (lfoval>1) lfoval = 1; + // if (lfoval < -1) lfoval = -1; + // lfoval = psAutotalent->m_fLfoamp*sin(lfoval*PI*0.5); + //} + // add in quantized LFO + if (psAutotalent->m_iLfoquant >= 1) + { + outpitch = outpitch + (int)(/*m_numNotes*lfoval +*/ m_numNotes + 0.5) - m_numNotes; + } + + + // Convert back from scale notes to semitones + outpitch = outpitch + m_iScwarp; // output scale rotate implemented here + ti = (int)(outpitch / m_numNotes + 32) - 32; + tf = outpitch - ti*m_numNotes; + ti2 = (int)tf; + ti3 = ti2 + 1; + outpitch = m_iNote2Pitch[ti3%m_numNotes] - m_iNote2Pitch[ti2]; + if (ti3 >= m_numNotes) + { + outpitch = outpitch + 12; + } + outpitch = outpitch*(tf - ti2) + m_iNote2Pitch[ti2]; + outpitch = outpitch + 12 * ti; + outpitch = outpitch - (m_iNote2Pitch[m_iScwarp] - m_iNote2Pitch[0]); //more scale rotation here + + // add in unquantized LFO + if (psAutotalent->m_iLfoquant <= 0) + { + //outpitch = outpitch;// + lfoval * 2; + } + + + if (outpitch<-36) outpitch = -48; + if (outpitch>24) outpitch = 24; + + + // ---- END Modify pitch in all kinds of ways! ---- + + // Compute variables for pitch shifter that depend on pitch + psAutotalent->pshifter.inphinc = psAutotalent->aref*pow(2, inpitch / 12) / psAutotalent->fs; + psAutotalent->pshifter.outphinc = psAutotalent->aref*pow(2, outpitch / 12) / psAutotalent->fs; + psAutotalent->pshifter.phincfact = psAutotalent->pshifter.outphinc / psAutotalent->pshifter.inphinc; + } + + tf = shift_pitch(&psAutotalent->pshifter, &psAutotalent->buffer, psAutotalent->buffer.cbsize); + + + ti4 = (psAutotalent->buffer.cbiwr + 2) % psAutotalent->buffer.cbsize; + if (psAutotalent->fcorrector.iFcorr >= 1) + { + tf = add_formants(&psAutotalent->fcorrector, tf, ti4); + } + else + { + psAutotalent->fcorrector.fmute = 0; + } + + + *(outdata++) = (float)psAutotalent->m_fMix*tf + (1 - psAutotalent->m_fMix)*psAutotalent->buffer.cbi[ti4]; + + } + // Tell the host the algorithm latency + psAutotalent->m_fLatency = (float)(psAutotalent->buffer.cbsize - 1); +} + +int CAutoTune::getLatency() +{ + Autotalent* psAutotalent = (Autotalent *)m_membvars; + return (int)(1000.0f * psAutotalent->m_fLatency / psAutotalent->fs); +} + +void CAutoTune::reset() +{ + Autotalent* psAutotalent = (Autotalent *)m_membvars; + + // circlebuffer reset + memset(psAutotalent->buffer.cbi, 0, sizeof(float) * psAutotalent->buffer.cbsize); + memset(psAutotalent->buffer.cbf, 0, sizeof(float) * psAutotalent->buffer.cbsize); + psAutotalent->buffer.cbiwr = 0; + + //formant_corrector_init reset + // Initialize formant corrector + int i; + for (i=0; ifcorrector.ford; i++) + { + memset(psAutotalent->fcorrector.flevels[i].buff, 0, sizeof(float) * psAutotalent->buffer.cbsize); + } + memset(psAutotalent->fcorrector.ftvec, 0, sizeof(float) * psAutotalent->fcorrector.ford); + + //pitch_shifter + memset(psAutotalent->pshifter.frag, 0, sizeof(float) * psAutotalent->buffer.cbsize); + psAutotalent->pshifter.fragsize = 0; + + memset(psAutotalent->pshifter.cbo, 0, sizeof(float) * psAutotalent->buffer.cbsize); + psAutotalent->pshifter.cbord = 0; +} + +void CAutoTune::unInit() +{ + fft_des(((Autotalent*)m_membvars)->fmembvars); + free(((Autotalent*)m_membvars)->buffer.cbi); + free(((Autotalent*)m_membvars)->buffer.cbf); + free(((Autotalent*)m_membvars)->pshifter.cbo); + free(((Autotalent*)m_membvars)->pdetector.cbwindow); + free(((Autotalent*)m_membvars)->pshifter.hannwindow); + free(((Autotalent*)m_membvars)->pdetector.acwinv); + free(((Autotalent*)m_membvars)->pshifter.frag); + + cleanup_formant_corrector(&((Autotalent *) m_membvars)->fcorrector); + free((Autotalent*)m_membvars); +} + +int CAutoTune::scaleSemitoneConversion() +{ + // Some logic for the semitone->scale and scale->semitone conversion + // If no notes are selected as being in the scale, instead snap to all notes + int ti2 = 0; + for (ti = 0; ti<12; ti++) + { + if (m_iNotes[ti] >= 0) + { + m_iPitch2Note[ti] = ti2; + m_iNote2Pitch[ti2] = ti; + ti2 = ti2 + 1; + } + else + { + m_iPitch2Note[ti] = -1; + } + } + m_numNotes = ti2; + while (ti2<12) + { + m_iNote2Pitch[ti2] = -1; + ti2 = ti2 + 1; + } + if (m_numNotes == 0) + { + for (ti = 0; ti<12; ti++) + { + m_iNotes[ti] = 1; + m_iPitch2Note[ti] = ti; + m_iNote2Pitch[ti] = ti; + } + m_numNotes = 12; + } + m_iScwarp = (((Autotalent*)m_membvars)->m_iScwarp + m_numNotes * 5) % m_numNotes; + return 0; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTune.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTune.h new file mode 100644 index 0000000..ebc5c23 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTune.h @@ -0,0 +1,50 @@ +#ifndef __AUTO_TUNE_H_ +#define __AUTO_TUNE_H_ +#include "AudioEffectsConf.h" + + +class CAutoTune +{ +public: + int init(unsigned long samplerate, const int* suggestion); + + void + process(float *indata, float *outdata, + unsigned long SampleCount); + void + unInit(); + void reset(); + int updateScale(int scale); // Key Shift + int getLatency(); + +private: + int scaleSemitoneConversion(); + +private: + void* m_membvars; + int m_iNotes[12]; + int m_iPitch2Note[12]; + int m_iNote2Pitch[12]; + int m_originalSeq[12]; + int m_iScwarp; + int m_numNotes; + //int m_scale; + + long int ti; + long int ti2; + long int ti3; + long int ti4; + float tf; + float tf2; + + // Variables for cubic spline interpolator + + int lowersnap; + int uppersnap; + float inpitch; + float outpitch; + +}; + + +#endif // !__AUTO_TUNE_H_ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTuneWrapper.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTuneWrapper.cpp new file mode 100644 index 0000000..2c2c041 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTuneWrapper.cpp @@ -0,0 +1,95 @@ +// +// Created by wangjianjun on 18/8/21. +// + +#include +#include +#include "CAutoTuneWrapper.h" +#include "AutoTuneDef.h" + +CAutoTuneWrapper::CAutoTuneWrapper() +{ + m_autotune = NULL; + m_channels = 1; +} + +CAutoTuneWrapper::~CAutoTuneWrapper() +{ + uninit(); +} + +void CAutoTuneWrapper::uninit() +{ + if (m_autotune) + { + m_autotune->unInit(); + delete m_autotune; + m_autotune = NULL; + } + m_channels = 1; +} + +int CAutoTuneWrapper::init(int samplerate, int channels) +{ + int res = 0; + m_channels = channels; + + m_autotune = new CAutoTune(); + if (m_autotune == NULL) + { + return AT_ERR_BASE_H_MALLOC_NULL; + } + + int suggestion[12] = {1,-1,1,-1,1,1,-1,1,-1,1,-1,1};//默认C大调 + res = m_autotune->init(samplerate, suggestion); + if (res != 0) + { + goto exit; + } +exit: + if (res != 0) + { + uninit(); + } + return res; +} + +int CAutoTuneWrapper::update_scale(int scale) { + if (m_autotune == NULL) { + return AT_ERR_BASE_H_NULL; + } + return m_autotune->updateScale(scale); +} + +int CAutoTuneWrapper::process(float *inData, float *outData, unsigned int count) +{ + if (m_channels == 1) + { + m_autotune->process(inData, outData, count); + } + else if (m_channels == 2) + { + for (size_t i = 0; i < count / 2; i++) + { + inData[i] = (inData[2 * i] + inData[2 * i + 1]) / 2.0f; + } + m_autotune->process(inData, outData + count / 2, count / 2); + + for (size_t i = 0; i < count / 2; i++) + { + outData[2 * i] = outData[count / 2 + i]; + outData[2 * i + 1] = outData[count / 2 + i]; + } + } + return count; +} + + +int CAutoTuneWrapper::get_latency() { + return m_autotune->getLatency(); +} + +void CAutoTuneWrapper::reset() +{ + m_autotune->reset(); +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTuneWrapper.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTuneWrapper.h new file mode 100644 index 0000000..5042a11 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/autotune/CAutoTuneWrapper.h @@ -0,0 +1,29 @@ +// +// Created by wangjianjun on 18/8/21. +// + +#ifndef CAUDIODECODER_CAUTOTUNEWRAPPER_H +#define CAUDIODECODER_CAUTOTUNEWRAPPER_H + +#include "CAutoTune.h" + +class CAutoTuneWrapper { +public: + CAutoTuneWrapper(); + ~CAutoTuneWrapper(); + +public: + int init(int samplerate, int channels); + int process(float *inData, float *outData, unsigned int count); + int update_scale(int scale); + int get_latency(); + void reset(); +private: + void uninit(); +private: + CAutoTune * m_autotune; + int m_channels; +}; + + +#endif //CAUDIODECODER_CAUTOTUNEWRAPPER_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/common/common.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/common/common.h new file mode 100644 index 0000000..779f0fc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/common/common.h @@ -0,0 +1,150 @@ +// +// Created by 杨将 on 2017/6/27. +// + +#ifndef __COMMON_H__ +#define __COMMON_H__ +// +//#ifdef __APPLE__ +//#include +//#else +//#include "malloc.h" +//#endif + +#include +#include + +#ifdef ST_DEBUG +#include +#define ASSERT(e) assert(e) +#else +#define ASSERT(e) +#endif + + +//定义一些常用的宏或者常量等 + +//录制、播放、解码的缓存buffer的时间长度,单位ms +#define RECORDER_CIRCLE_BUFFER_TIME ((int)200) + +//默认写入文件的人声采样率 +#define FILE_VOCAL_SAMPLERATE ((int)44100) + +//跳转在多少ms以内不需要进行跳转 +#define SEEK_NO_OPRATE (1.01) + +//文件读取的基本长度 +#define FILE_OPERATE_LEN ((int)2048) +//人声文件中进行fade的最大长度 +#define FILE_FADE_LEN ((int)240) +//最大声道数,这个值不能改动,可以改成1 +#define MAX_CHANNEL ((int)2) + +//底层音量的中值 +#define DEFAULT_VOLUME ((int)50) + +//伴奏音量的基准值(分贝) +#define DEFAULT_BASELINE_DB ((float)-14.57f) + +//安全关闭文件 +#ifndef SAFE_CLOSE_FILE +#define SAFE_CLOSE_FILE(file) \ +{ \ + if(file) \ + { \ + fclose(file); \ + file = NULL; \ + } \ +} +#endif //SAFE_CLOSE_FILE + +//安全释放内存 +#ifndef SAFE_FREE +#define SAFE_FREE(ptr) \ +{ \ + if(ptr) \ + { \ + free(ptr); \ + ptr = NULL; \ + } \ +} +#endif //SAFE_FREE + +//安全删除对象 +#ifndef SAFE_DELETE_OBJ +#define SAFE_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_DELETE_OBJ + +//安全逆初始化并删除对象 +#ifndef SAFE_UNINIT_DELETE_OBJ +#define SAFE_UNINIT_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + obj->uninit(); \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_UNINIT_DELETE_OBJ + +#ifndef SAFE_CLOSE_DELETE_OBJ +#define SAFE_CLOSE_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + obj->close(); \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_CLOSE_DELETE_OBJ + +//安全删除数组 +#ifndef SAFE_DELETE_ARRAY +#define SAFE_DELETE_ARRAY(array) \ +{ \ + if(array) \ + { \ + delete [] array; \ + array = NULL; \ + } \ +} +#endif //SAFE_DELETE_ARRAY + +//取大值 +#ifndef GLOBAL_MAX +#define GLOBAL_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +//取小值 +#ifndef GLOBAL_MIN +#define GLOBAL_MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +//取中间 +#ifndef GLOBAL_MID +#define GLOBAL_MID(a, b, c) (GLOBAL_MAX(a, GLOBAL_MIN(b, c))) +#endif + +//取绝对值 +#ifndef GLOBAL_ABS +#define GLOBAL_ABS(a) ((a) < 0 ? (-(a)) : (a)) +#endif + + +#ifndef CHECK_FLOAT_EQUAL +#define CHECK_FLOAT_EQUAL(a, b) (fabs(a - b) < 0.001f) +#endif + +#define TYPE_PLAY_ORIGIN 1 +#define TYPE_PLAY_CORRECTION 2 + +#endif //__COMMON_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/common/util/util.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/common/util/util.cpp new file mode 100644 index 0000000..b69e696 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/common/util/util.cpp @@ -0,0 +1,206 @@ +// +// Created by 杨将 on 2017/6/27. +// + +#include "util.h" +#include "common/common.h" +#include +#include +#ifdef __ANDROID__ +#include +#else +#include +#endif +#include + +void short_fade_in(short * buffer, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + buffer[i] = (short)(buffer[i] * i / size); + } + } + else + { + for(int i = 0; i < size; i += 2) + { + buffer[i] = (short)(buffer[i] * i / size); + buffer[i + 1] = (short)(buffer[i + 1] * i / size); + } + } +} + +void float_fade_in(float * buffer, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + buffer[i] = buffer[i] * i / size; + } + } + else + { + for(int i = 0; i < size; i += 2) + { + buffer[i] = buffer[i] * i / size; + buffer[i + 1] = buffer[i + 1] * i / size; + } + } +} + +void short_fade_out(short * buffer, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + buffer[i] = (short)(buffer[i] * (size - i) / size); + } + } + else + { + for(int i = 0; i < size; i += 2) + { + buffer[i] = (short)(buffer[i] * (size - i) / size); + buffer[i + 1] = (short)(buffer[i + 1] * (size - i) / size); + } + } +} + +void float_fade_out(float * buffer, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + buffer[i] = buffer[i] * (size - i) / size; + } + } + else + { + for(int i = 0; i < size; i += 2) + { + buffer[i] = buffer[i] * (size - i) / size; + buffer[i + 1] = buffer[i + 1] * (size - i) / size; + } + } +} + +void float_to_short(float * in, short * out, int num) +{ + for(int i = 0; i < num; i++) + { + out[i] = (short)GLOBAL_MID(-32768, in[i] * 32767, 32767); + } +} + +void short_to_float(short * in, float * out, int num) +{ + for(int i = 0; i < num; i++) + { + out[i] = in[i] / 32768.0f; + } +} +//立体声转单声道 +void short_stereo_to_mono(short * in, short * out, int num) +{ + for(int i = 0; i < num; i++) + { + out[i] = in[i*2]; + } +} +//双声道转单声道 +void float_stereo_to_mono(float * in, float * out, int num) +{ + for(int i = 0; i < num; i++) + { + out[i] = in[i*2]; + } +} + +void float_crossfade(float * fadein_buf, float * fadeout_buf, float * out, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + out[i] = fadein_buf[i] + fadeout_buf[i] * (size - i) / size; + } + } + else + { + for(int i = 0; i < size; i += 2) + { + out[i] = fadein_buf[i] + fadeout_buf[i] * (size - i) / size; + out[i + 1] = fadein_buf[i + 1] + fadeout_buf[i + 1] * (size - i) / size; + } + } +} + +double get_current_time_ms() +{ + struct timeval now; + if(0 == gettimeofday(&now, NULL)) + { + return 1000.0 * now.tv_sec + now.tv_usec / 1000.0; + } + else + { + return 0; + } +} + +long get_file_size(const char * path) { + int32_t file_size = -1; + + struct stat statbuf; + if (stat(path, &statbuf) >= 0) + { + file_size = statbuf.st_size; + } + return file_size; +} + +int getSdkVersion() { +#ifdef __ANDROID__ + static int sCachedSdkVersion = -1; + if (sCachedSdkVersion == -1) { + char sdk[PROP_VALUE_MAX] = {0}; + if (__system_property_get("ro.build.version.sdk", sdk) != 0) { + sCachedSdkVersion = atoi(sdk); + } + } + return sCachedSdkVersion; +#endif + return -1; +} + +float calc_rms(float *in, int len, int channel) +{ + float rms = 0; + // 只取用第一个声道数据 + for(int i=0;idst_gain增益平滑增长 +void float_gain_crossfade(float src_gain, float dst_gain, float* in, int size, int channel); + +#endif //__UTIL_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/formant_corrector/FormantCorrector.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/formant_corrector/FormantCorrector.cpp new file mode 100644 index 0000000..1795ce4 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/formant_corrector/FormantCorrector.cpp @@ -0,0 +1,192 @@ +#include "FormantCorrector.h" +#include "AutoTuneDef.h" + +int formant_corrector_init(FormantCorrector *fcorrector, unsigned long sample_rate, int cbsize) +{ + // Initialize formant corrector + fcorrector->ford = 7; // should be sufficient to capture formants + fcorrector->falph = pow(0.001f, (float) 80 / (sample_rate)); + fcorrector->flamb = -(0.8517*sqrt(atan(0.06583*sample_rate))-0.1916); // or about -0.88 @ 44.1kHz + fcorrector->flevels = (FormantLevel*)calloc(fcorrector->ford, sizeof(FormantLevel)); + if (fcorrector->flevels == NULL) + { + return AT_ERR_BASE_H_MALLOC_NULL; + } + fcorrector->fhp = 0; + fcorrector->flp = 0; + fcorrector->flpa = pow(0.001f, (float) 10 / (sample_rate)); + fcorrector->ftvec = (float*)calloc(fcorrector->ford, sizeof(float)); + if (fcorrector->ftvec == NULL) + { + if (fcorrector->flevels != NULL) + { + free(fcorrector->flevels); + fcorrector->flevels = NULL; + } + return AT_ERR_BASE_H_MALLOC_NULL; + } + int i; + for (i=0; iford; i++) + { + fcorrector->flevels[i].buff = (float*)calloc(cbsize, sizeof(float)); + if (fcorrector->flevels[i].buff == NULL) + { + + i--; + for (; i >= 0; i--) + { + if (fcorrector->flevels[i].buff != NULL) + { + free(fcorrector->flevels[i].buff); + fcorrector->flevels[i].buff = NULL; + } + } + if (fcorrector->flevels != NULL) + { + free(fcorrector->flevels); + fcorrector->flevels = NULL; + } + if (fcorrector->ftvec != NULL) + { + free(fcorrector->ftvec); + fcorrector->ftvec = NULL; + } + return AT_ERR_BASE_H_MALLOC_NULL; + } + } + fcorrector->fmute = 1; + fcorrector->fmutealph = powf(0.001f, (float)1 / (sample_rate)); + return 0; +} + + +float FormantRemovalIteration(FormantLevel* level, float falph, float flamb, float *fa, float* fb) +{ + float foma=(1-falph); + level->fsig = (*fa)*(*fa)*foma + level->fsig*falph; + float fc = (*fb-(level->fc))*(flamb) + level->fb; + + level->fc=fc; + level->fb=*fb; + float fk_tmp = (*fa)*fc*foma + level->fk*falph; + level->fk = fk_tmp; + float result = fk_tmp/(level->fsig + 0.000001); + result = result*foma + level->fsmooth*falph; + level->fsmooth = result; + *fb = fc - result*(*fa); + *fa = *fa - result*fc; + return result; +} +// tf is signal input +void remove_formants(FormantCorrector *fcorrector, CircularBuffer *buffer, float tf) +{ + // Somewhat experimental formant corrector + // formants are removed using an adaptive pre-filter and + // re-introduced after pitch manipulation using post-filter + + float fa = tf - fcorrector->fhp; // highpass pre-emphasis filter + fcorrector->fhp = tf; + float fb = fa; + + int i; + for (i=0; i<(fcorrector->ford); i++) + { + FormantLevel* level=&fcorrector->flevels[i]; + level->buff[buffer->cbiwr]=FormantRemovalIteration(level,fcorrector->falph, fcorrector->flamb,&fa,&fb); + } + buffer->cbf[buffer->cbiwr] = fa; + // Now hopefully the formants are reduced + // More formant correction code at the end of the DSP loops +} +float FormantCorrectorIteration(FormantCorrector* fcorrector, float fa, long int writepoint) +{ + float fb=fa; + for (int i=0; iford; i++) + { + FormantLevel level=fcorrector->flevels[i]; + float fc = (fb-level.frc)*fcorrector->frlamb + level.frb; + float tf = level.buff[writepoint]; + fb = fc - tf*fa; + fcorrector->ftvec[i] = tf*fc; + fa = fa - fcorrector->ftvec[i]; + } + + float tf = -fa; + for (int i = fcorrector->ford - 1; i >= 0; i--) + { + tf = tf + fcorrector->ftvec[i]; + } + return tf; +} +float add_formants(FormantCorrector *fcorrector, float in, long int writepoint) +{ + // The second part of the formant corrector + // This is a post-filter that re-applies the formants, designed + // to result in the exact original signal when no pitch + // manipulation is performed. + // tf is signal input + // gotta run it 3 times because of a pesky delay free loop + // first time: compute 0-response + float f0resp = FormantCorrectorIteration(fcorrector,0,writepoint); + // second time: compute 1-response + float f1resp = FormantCorrectorIteration(fcorrector,1,writepoint); + // now solve equations for output, based on 0-response and 1-response + float tf = (float)2 * in; + float tf2 = tf; + tf = ((float)1 - f1resp + f0resp); + if (tf != 0) + { + tf2 = (tf2 + f0resp) / tf; + } + else + { + tf2 = 0; + } + // third time: update delay registers + float fa = tf2; + float fb = fa; + for (int ti = 0; tiford; ti++) + { + float fc = (fb - fcorrector->flevels[ti].frc)*fcorrector->frlamb + fcorrector->flevels[ti].frb; + fcorrector->flevels[ti].frc = fc; + fcorrector->flevels[ti].frb = fb; + tf = fcorrector->flevels[ti].buff[writepoint]; + fb = fc - tf*fa; + fa = fa - tf*fc; + } + tf = tf2; + tf = tf + fcorrector->flpa * fcorrector->flp; // lowpass post-emphasis filter + fcorrector->flp = tf; + // Bring up the gain slowly when formant correction goes from disabled + // to enabled, while things stabilize. + if (fcorrector->fmute>0.5) + { + tf = tf*(fcorrector->fmute - 0.5) * 2; + } + else + { + tf = 0; + } + tf2 = fcorrector->fmutealph; + fcorrector->fmute = (1 - tf2) + tf2*fcorrector->fmute; + // now tf is signal output + // ...and we're done messing with formants + return tf; +} + +void update_formant_warp(FormantCorrector *fcorrector) +{ + float f = pow((float)2,(fcorrector->fFwarp)/2)*(1+fcorrector->flamb)/(1-fcorrector->flamb); + fcorrector->frlamb = (f - 1)/(f + 1); +} + +void cleanup_formant_corrector(FormantCorrector *fcorrector) +{ + int i; + for (i=0; iford; i++) + { + free(fcorrector->flevels[i].buff); + } + free(fcorrector->flevels); + free(fcorrector->ftvec); +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/formant_corrector/FormantCorrector.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/formant_corrector/FormantCorrector.h new file mode 100644 index 0000000..faea0f7 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/formant_corrector/FormantCorrector.h @@ -0,0 +1,44 @@ +#ifndef __FORMANT_CORRECTOR_H_ +#define __FORMANT_CORRECTOR_H_ + +#include +#include +#include "ref/CircularBuffer.h" + // VARIABLES FOR FORMANT CORRECTOR + +typedef struct { + float fk; + float fb; + float fc; + float frb; + float frc; + float fsig; + float fsmooth; + float* buff; +} FormantLevel; + + typedef struct { + int iFcorr; + float fFwarp; + + int ford; + float falph; + float flamb; + float frlamb; + FormantLevel* flevels; + float fhp; + float flp; + float flpa; + float fmute; + float fmutealph; + float *ftvec; +} FormantCorrector; + + +int formant_corrector_init(FormantCorrector *fcorrector, unsigned long sample_rate, int cbsize); +void remove_formants(FormantCorrector *fcorrector, CircularBuffer *buffer, float tf); +void update_formant_warp(FormantCorrector *fcorrector); +float add_formants(FormantCorrector *fcorrector, float in, long int writepoint); +void cleanup_formant_corrector(FormantCorrector *fcorrector); + +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchDetector.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchDetector.cpp new file mode 100644 index 0000000..937bde3 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchDetector.cpp @@ -0,0 +1,173 @@ +#include "PitchDetector.h" +#include "AutoTuneDef.h" +#include + +#define L2SC (float)3.32192809488736218171 +void bbtain_autocovariance(PitchDetector *pdetector, fft_vars *fftvars, CircularBuffer *buffer, long int N, long int Nf) { + + // Window and fill FFT buffer + unsigned long ti2 = buffer->cbiwr; + long ti = 0; + for (long ti = 0; ti < N; ti++) + { + fftvars->ffttime[ti] = (float)(buffer->cbi[(ti2 - ti + N) % N] * pdetector->cbwindow[ti]); + } + + // Calculate FFT + fft_forward(fftvars, fftvars->ffttime, fftvars->fftfreqre, fftvars->fftfreqim); + + // Remove DC + fftvars->fftfreqre[0] = 0; + fftvars->fftfreqim[0] = 0; + + // Take magnitude squared + for (ti = 1; ti < Nf; ti++) + { + fftvars->fftfreqre[ti] = (fftvars->fftfreqre[ti])*(fftvars->fftfreqre[ti]) + (fftvars->fftfreqim[ti])*(fftvars->fftfreqim[ti]); + fftvars->fftfreqim[ti] = 0; + } + + // Calculate IFFT + fft_inverse(fftvars, fftvars->fftfreqre, fftvars->fftfreqim, fftvars->ffttime); + + // Normalize + float tf = fabs(fftvars->ffttime[0]) < FLT_EPSILON ? 0.0f : (float)1 / fftvars->ffttime[0]; + for (ti = 1; ti < N; ti++) + { + fftvars->ffttime[ti] = fftvars->ffttime[ti] * tf; + } + fftvars->ffttime[0] = 1; + +} + +void get_pitch_conf(PitchDetector *pdetector, fft_vars *fftvars, unsigned long Nf, float fs, float aref, float &inpitch) +{ + // Calculate pitch period + + //MPM Algorithm, thanks to Philip McLeod, and Geoff Wyvill, adapted from their GPL Tartini program + // Calculate pitch period + // Pitch period is determined by the location of the max (biased) + // peak within a given range + // Confidence is determined by the corresponding unbiased height + long ti2 = 0; + long ti3 = 0; + long ti4 = 0; + long ti = 0; + float tf = 0.0f; + float tf2 = 0.0f; + float pperiod = pdetector->pmin; + for (ti = pdetector->nmin; ti < pdetector->nmax; ti++) //here is 63~630 + { + ti2 = ti - 1; + ti3 = ti + 1; + if (ti2<0) //check + { + ti2 = 0; + } + if (ti3>Nf) //check + { + ti3 = Nf; + } + tf = fftvars->ffttime[ti]; + + if (tf > fftvars->ffttime[ti2] && tf >= fftvars->ffttime[ti3] && tf > tf2) + { + tf2 = tf; + ti4 = ti; + } + } + float conf = 0.0f; + if (tf2 > 0) + { + conf = tf2*pdetector->acwinv[ti4]; + if (ti4 > 0 && ti4 < Nf) + { + // Find the center of mass in the vicinity of the detected peak + tf = fftvars->ffttime[ti4 - 1] * (ti4 - 1); + tf = tf + fftvars->ffttime[ti4] * (ti4); + tf = tf + fftvars->ffttime[ti4 + 1] * (ti4 + 1); + tf = fabs(fftvars->ffttime[ti4 - 1] + fftvars->ffttime[ti4] + fftvars->ffttime[ti4 + 1]) < FLT_EPSILON ? (float)ti4 : tf / (fftvars->ffttime[ti4 - 1] + fftvars->ffttime[ti4] + fftvars->ffttime[ti4 + 1]); + pperiod = tf / fs; + } + else + { + pperiod = (float)ti4 / fs; + } + } + + // Convert to semitones + tf = (float)-12 * log10((float)aref*pperiod)*L2SC; + if (conf >= pdetector->vthresh) + { + inpitch = tf; + pdetector->inpitch = tf; // update pitch only if voiced + } + pdetector->confidence = conf; + + // ---- END Calculate pitch and confidence ---- +} + +int instantiate_pitch_detector(PitchDetector *pdetector, fft_vars *fftvars, unsigned long cbsize, int samplerate) { + //pdetector->ppickthresh=0.9;//I have no idea what this should be, except the MPM paper suggested between 0.8 and 1, so I am taking the average :P + unsigned long corrsize=cbsize/2+1; + + pdetector->pmax = 1/(float)70; // max and min periods (ms) + pdetector->pmin = 1/(float)700; // eventually may want to bring these out as sliders + + pdetector->nmax = (unsigned long)(samplerate * pdetector->pmax); + if (pdetector->nmax > corrsize) { + pdetector->nmax =corrsize; + } + pdetector->nmin = (unsigned long)(samplerate * pdetector->pmin); + pdetector->vthresh = 0.7; // The voiced confidence (unbiased peak) threshold level + // Generate a window with a single raised cosine from N/4 to 3N/4 + pdetector->cbwindow = (float*)calloc(cbsize, sizeof(float)); + if (pdetector->cbwindow == NULL) + { + return AT_ERR_BASE_H_MALLOC_NULL; + } + unsigned long ti = 0; + for (ti = 0; ti < (cbsize / 2); ti++) { + pdetector->cbwindow[ti + cbsize / 4] = -0.5*cos(4 * PI*ti / (cbsize - 1)) + 0.5; + } + + + + //fftvars = fft_con(cbsize); + + + + // ---- Calculate autocorrelation of window ---- + pdetector->acwinv = (float*)calloc(cbsize, sizeof(float)); + if (pdetector->acwinv == NULL) + { + if (pdetector->cbwindow != NULL) + { + free(pdetector->cbwindow); + pdetector->cbwindow = NULL; + } + return AT_ERR_BASE_H_MALLOC_NULL; + } + for (ti = 0; ti < cbsize; ti++) { + fftvars->ffttime[ti] = pdetector->cbwindow[ti]; + } + fft_forward(fftvars, pdetector->cbwindow, fftvars->fftfreqre, fftvars->fftfreqim); + for (ti = 0; ti < corrsize; ti++) { + fftvars->fftfreqre[ti] = (fftvars->fftfreqre[ti])*(fftvars->fftfreqre[ti]) + (fftvars->fftfreqim[ti])*(fftvars->fftfreqim[ti]); + fftvars->fftfreqim[ti] = 0; + } + fft_inverse(fftvars, fftvars->fftfreqre, fftvars->fftfreqim, fftvars->ffttime); + for (ti = 1; tiacwinv[ti] = fftvars->ffttime[ti] / fftvars->ffttime[0]; + if (pdetector->acwinv[ti] > 0.000001) { + pdetector->acwinv[ti] = (float)1 / pdetector->acwinv[ti]; + } + else { + pdetector->acwinv[ti] = 0; + } + } + pdetector->acwinv[0] = 1; + return 0; + // ---- END Calculate autocorrelation of window ---- + +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchDetector.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchDetector.h new file mode 100644 index 0000000..76008b2 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchDetector.h @@ -0,0 +1,38 @@ +#ifndef __PITCH_DETECTOR_H_ +#define __PITCH_DETECTOR_H_ + +#include +#include "ref/CircularBuffer.h" +extern "C" +{ +#include "ref/fftwrap.h" +} + +#include +#include + + + +#define PI (float)3.14159265358979323846 + +typedef struct +{ + float pmax; // Maximum allowable pitch period (seconds) + float pmin; // Minimum allowable pitch period (seconds) + unsigned long nmax; // Maximum period index for pitch prd est + unsigned long nmin; // Minimum period index for pitch prd est + float inpitch; + float confidence; + float* cbwindow; //cosine window; + float* acwinv; // inverse of autocorrelation of window + + float vthresh; // Voiced speech threshold + //float ppickthresh; +} PitchDetector; + +void bbtain_autocovariance(PitchDetector *pdetector, fft_vars *fftvars, CircularBuffer *buffer, long int N, long int Nf); + +void get_pitch_conf(PitchDetector *pdetector, fft_vars *fftvars, unsigned long Nf, float fs, float aref, float &inpitch); + +int instantiate_pitch_detector(PitchDetector *pdetector, fft_vars *fftvars, unsigned long cbsize, int samplerate); +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchShifter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchShifter.cpp new file mode 100644 index 0000000..0c08db3 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchShifter.cpp @@ -0,0 +1,124 @@ +#include "pitch/PitchShifter.h" +#include "AutoTuneDef.h" +#include +int pitch_shifter_init(PitchShifter *pshifter, unsigned long sampleRate, unsigned long cbsize) { + // Pitch shifter initialization + pshifter->phprdd = 0.01; // Default period + pshifter->inphinc = (float)1/(pshifter->phprdd * sampleRate); + pshifter->phincfact = 1; + pshifter->phasein = 0; + pshifter->phaseout = 0; + pshifter->frag = (float*)calloc(cbsize, sizeof(float)); + if (pshifter->frag == NULL) + { + return AT_ERR_BASE_H_MALLOC_NULL; + } + pshifter->fragsize = 0; + + // Standard raised cosine window, max height at N/2 + pshifter->hannwindow = (float*)calloc(cbsize, sizeof(float)); + if (pshifter->hannwindow == NULL) + { + if (pshifter->frag != NULL) + { + free(pshifter->frag); + pshifter->frag = NULL; + } + return AT_ERR_BASE_H_MALLOC_NULL; + } + long int i; + for (i=0; ihannwindow[i] = -0.5*cos(2*PI*i/cbsize) + 0.5; + } + + pshifter->cbo = (float*)calloc(cbsize, sizeof(float)); + pshifter->cbord = 0; + pshifter->active = 0; + pshifter->outphinc = 0; + return 0; +} + +void compute_pitch_shifter_variables(PitchShifter *pshifter, float inpperiod, float outpperiod, float fs) { + float invinphinc=inpperiod*fs; + pshifter->inphinc = 1/invinphinc; //This is like the fraction of a period every sample is. + pshifter->outphinc = 1/(outpperiod*fs); + pshifter->phincfact = pshifter->outphinc*invinphinc; + +} + +float shift_pitch(PitchShifter *pshifter, CircularBuffer *buffer, long int N) +{ + // Pitch shifter (kind of like a pitch-synchronous version of Fairbanks' technique) + // Note: pitch estimate is naturally N/2 samples old + pshifter->phasein = pshifter->phasein + pshifter->inphinc; //This is like the total amount of the period we've been through. + + pshifter->phaseout = pshifter->phaseout + pshifter->outphinc; //likewise for output + + // When input phase resets, take a snippet from N/2 samples in the past + if (pshifter->phasein >= 1) { + pshifter->phasein = pshifter->phasein - 1; + long fragment_beginning= buffer->cbiwr - N/2; + +#define FRAGCOPYLOOP(lower,upper,index) {\ + for (long i=lower; ifrag[index] = buffer->cbf[(i + fragment_beginning + N)%N];\ + }\ +} + + FRAGCOPYLOOP(-N/2,0,i+N) + FRAGCOPYLOOP(0,N/2,i) + } + + // When output phase resets, put a snippet N/2 samples in the future + if (pshifter->phaseout >= 1) { + pshifter->fragsize = pshifter->fragsize*2; + if (pshifter->fragsize > N) { + pshifter->fragsize = N; + } + pshifter->phaseout = pshifter->phaseout - 1; + long int ti3 = (long int)(((float)pshifter->fragsize) / pshifter->phincfact); + + //Interpolator + interpolate(pshifter, ti3, N); + pshifter->fragsize = 0; + } + pshifter->fragsize++; + + // Get output signal from buffer + float tf = pshifter->cbo[pshifter->cbord]; // read buffer + + pshifter->cbo[pshifter->cbord] = 0; // erase for next cycle + pshifter->cbord++; // increment read pointer + if (pshifter->cbord >= N) { + pshifter->cbord = 0; + } + return tf; +} + +void interpolate(PitchShifter *pshifter, long int bounds, long int N) +{ + if (bounds>=N/2) { + bounds = N/2 - 1; + } + long int i; + for (i=-bounds/2; i<(bounds/2); i++) + { + float tf = pshifter->hannwindow[(long int)N/2 + i*(long int)N/bounds]; + // 3rd degree polynomial interpolator - based on eqns from Hal Chamberlin's book + float indd = pshifter->phincfact*i; + int ind1 = (int)indd; + int ind2 = ind1+1; + int ind3 = ind1+2; + int ind0 = ind1-1; + float val0 = pshifter->frag[(ind0+N)%N]; + float val1 = pshifter->frag[(ind1+N)%N]; + float val2 = pshifter->frag[(ind2+N)%N]; + float val3 = pshifter->frag[(ind3+N)%N]; + float vald = 0; + vald = vald - (float)0.166666666667 * val0 * (indd - ind1) * (indd - ind2) * (indd - ind3); + vald = vald + (float)0.5 * val1 * (indd - ind0) * (indd - ind2) * (indd - ind3); + vald = vald - (float)0.5 * val2 * (indd - ind0) * (indd - ind1) * (indd - ind3); + vald = vald + (float)0.166666666667 * val3 * (indd - ind0) * (indd - ind1) * (indd - ind2); + pshifter->cbo[(i + pshifter->cbord + 3*N/2)%N] = pshifter->cbo[(i + pshifter->cbord + 3*N/2)%N] + vald*tf; + } +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchShifter.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchShifter.h new file mode 100644 index 0000000..f276e66 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/pitch/PitchShifter.h @@ -0,0 +1,39 @@ +#ifndef __PITCH_SHIFTER_H_ +#define __PITCH_SHIFTER_H_ + + +#include +#include +#include "PitchDetector.h" +#include "ref/CircularBuffer.h" + typedef struct + { +// VARIABLES FOR PITCH SHIFTER + float phprdd; // default (unvoiced) phase period + double inphinc; // input phase increment + double outphinc; // output phase increment + double phincfact; // factor determining output phase increment + + double phasein; + double phaseout; + + float* frag; // windowed fragment of speech + unsigned long fragsize; // size of fragment in samples + + float* hannwindow; // length-N hann + + float* cbo; // circular output buffer + unsigned long cbord; //read index for circular buffer; + + int active; +} PitchShifter; + + +int pitch_shifter_init(PitchShifter *pshifter, unsigned long sampleRate, unsigned long cbsize); + +void compute_pitch_shifter_variables(PitchShifter *pshifter, float inpperiod, float outpperiod, float fs); + +float shift_pitch(PitchShifter *pshifter, CircularBuffer *buffer, long int N); + +void interpolate(PitchShifter *pshifter, long int bounds, long int N); +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/CircularBuffer.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/CircularBuffer.cpp new file mode 100644 index 0000000..8008681 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/CircularBuffer.cpp @@ -0,0 +1,30 @@ +#include "ref/CircularBuffer.h" +#include "AutoTuneDef.h" + +int instantiate_circular_buffer(CircularBuffer *buffer, unsigned long samplerate) { + if (samplerate>=88200) { + buffer->cbsize = 4096; + } + else { + buffer->cbsize = 2048; + } + buffer->corrsize = buffer->cbsize / 2 + 1; + + buffer->cbi = (float*)calloc(buffer->cbsize, sizeof(float)); + if (buffer->cbi == NULL) + { + return AT_ERR_BASE_H_MALLOC_NULL; + } + buffer->cbf = (float*)calloc(buffer->cbsize, sizeof(float)); + if (buffer->cbf == NULL) + { + if (buffer->cbi != NULL) + { + free(buffer->cbi); + buffer->cbi = NULL; + } + return AT_ERR_BASE_H_MALLOC_NULL; + } + buffer->cbiwr = 0; + return 0; +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/CircularBuffer.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/CircularBuffer.h new file mode 100644 index 0000000..47826cc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/CircularBuffer.h @@ -0,0 +1,14 @@ +#ifndef __CIRCULAR_BUFFER_H_ +#define __CIRCULAR_BUFFER_H_ +#include + +typedef struct { + unsigned long cbsize; // size of circular buffer + unsigned long corrsize; // cbsize/2 + 1 + unsigned long cbiwr; //write pointer; + float* cbi; + float* cbf; // circular formant correction buffer +} CircularBuffer; + +int instantiate_circular_buffer(CircularBuffer *buffer, unsigned long samplerate); +#endif \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/fftwrap.c b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/fftwrap.c new file mode 100644 index 0000000..dbbd653 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/fftwrap.c @@ -0,0 +1,99 @@ + +#include + +#include "ref/fftwrap.h" +#include "ref/mayer_fft.h" + +fft_vars* fft_con(int nfft) +{ + fft_vars* membvars = (fft_vars*)malloc(sizeof(fft_vars)); + if (membvars == NULL) + { + return NULL; + } + + membvars->nfft = nfft; + membvars->numfreqs = nfft / 2 + 1; + + membvars->fft_data = (float*)calloc(nfft, sizeof(float)); + membvars->ffttime = (float*)calloc(nfft, sizeof(float)); + membvars->fftfreqre = (float*)calloc(nfft, sizeof(float)); + membvars->fftfreqim = (float*)calloc(nfft, sizeof(float)); + return membvars; +} + +// Destructor for FFT routine +void fft_des(fft_vars* membvars) +{ + free(membvars->fft_data); + free(membvars->ffttime); + free(membvars->fftfreqre); + free(membvars->fftfreqim); + free(membvars); +} + +// Perform forward FFT of real data +// Accepts: +// membvars - pointer to struct of FFT variables +// input - pointer to an array of (real) input values, size nfft +// output_re - pointer to an array of the real part of the output, +// size nfft/2 + 1 +// output_im - pointer to an array of the imaginary part of the output, +// size nfft/2 + 1 +void fft_forward(fft_vars* membvars, float* input, float* output_re, float* output_im) +{ + int ti; + int nfft; + int hnfft; + int numfreqs; + + nfft = membvars->nfft; + hnfft = nfft / 2; + numfreqs = membvars->numfreqs; + + for (ti = 0; ti < nfft; ti++) { + membvars->fft_data[ti] = input[ti]; + } + + mayer_realfft(nfft, membvars->fft_data); + + output_im[0] = 0; + for (ti = 0; ti < hnfft; ti++) { + output_re[ti] = membvars->fft_data[ti]; + output_im[ti + 1] = membvars->fft_data[nfft - 1 - ti]; + } + output_re[hnfft] = membvars->fft_data[hnfft]; + output_im[hnfft] = 0; +} + +// Perform inverse FFT, returning real data +// Accepts: +// membvars - pointer to struct of FFT variables +// input_re - pointer to an array of the real part of the output, +// size nfft/2 + 1 +// input_im - pointer to an array of the imaginary part of the output, +// size nfft/2 + 1 +// output - pointer to an array of (real) input values, size nfft +void fft_inverse(fft_vars* membvars, float* input_re, float* input_im, float* output) +{ + int ti; + int nfft; + int hnfft; + int numfreqs; + + nfft = membvars->nfft; + hnfft = nfft / 2; + numfreqs = membvars->numfreqs; + + for (ti = 0; ti < hnfft; ti++) { + membvars->fft_data[ti] = input_re[ti]; + membvars->fft_data[nfft - 1 - ti] = input_im[ti + 1]; + } + membvars->fft_data[hnfft] = input_re[hnfft]; + + mayer_realifft(nfft, membvars->fft_data); + + for (ti = 0; ti < nfft; ti++) { + output[ti] = membvars->fft_data[ti]; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/fftwrap.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/fftwrap.h new file mode 100644 index 0000000..528157e --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/fftwrap.h @@ -0,0 +1,41 @@ +#ifndef __FFT_WRAP_H_ +#define __FFT_WRAP_H_ + +// Variables for FFT routine +typedef struct +{ + int nfft; // size of FFT + int numfreqs; // number of frequencies represented (nfft/2 + 1) + float* ffttime; + float* fftfreqre; + float* fftfreqim; + float* fft_data; // array for writing/reading to/from FFT function +} fft_vars; + +// Constructor for FFT routine +fft_vars* fft_con(int nfft); + +// Destructor for FFT routine +void fft_des(fft_vars* membvars); + +// Perform forward FFT of real data +// Accepts: +// membvars - pointer to struct of FFT variables +// input - pointer to an array of (real) input values, size nfft +// output_re - pointer to an array of the real part of the output, +// size nfft/2 + 1 +// output_im - pointer to an array of the imaginary part of the output, +// size nfft/2 + 1 +void fft_forward(fft_vars* membvars, float* input, float* output_re, float* output_im); + +// Perform inverse FFT, returning real data +// Accepts: +// membvars - pointer to struct of FFT variables +// input_re - pointer to an array of the real part of the output, +// size nfft/2 + 1 +// input_im - pointer to an array of the imaginary part of the output, +// size nfft/2 + 1 +// output - pointer to an array of (real) input values, size nfft +void fft_inverse(fft_vars* membvars, float* input_re, float* input_im, float* output); +#endif + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/mayer_fft.c b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/mayer_fft.c new file mode 100644 index 0000000..a93698e --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/autotune/src/ref/mayer_fft.c @@ -0,0 +1,419 @@ +/* This is the FFT routine taken from PureData, a great piece of +software by Miller S. Puckette. +http://crca.ucsd.edu/~msp/software.html */ + +/* +** FFT and FHT routines +** Copyright 1988, 1993; Ron Mayer +** +** mayer_fht(fz,n); +** Does a hartley transform of "n" points in the array "fz". +** mayer_fft(n,real,imag) +** Does a fourier transform of "n" points of the "real" and +** "imag" arrays. +** mayer_ifft(n,real,imag) +** Does an inverse fourier transform of "n" points of the "real" +** and "imag" arrays. +** mayer_realfft(n,real) +** Does a real-valued fourier transform of "n" points of the +** "real" array. The real part of the transform ends +** up in the first half of the array and the imaginary part of the +** transform ends up in the second half of the array. +** mayer_realifft(n,real) +** The inverse of the realfft() routine above. +** +** +** NOTE: This routine uses at least 2 patented algorithms, and may be +** under the restrictions of a bunch of different organizations. +** Although I wrote it completely myself, it is kind of a derivative +** of a routine I once authored and released under the GPL, so it +** may fall under the free software foundation's restrictions; +** it was worked on as a Stanford Univ project, so they claim +** some rights to it; it was further optimized at work here, so +** I think this company claims parts of it. The patents are +** held by R. Bracewell (the FHT algorithm) and O. Buneman (the +** trig generator), both at Stanford Univ. +** If it were up to me, I'd say go do whatever you want with it; +** but it would be polite to give credit to the following people +** if you use this anywhere: +** Euler - probable inventor of the fourier transform. +** Gauss - probable inventor of the FFT. +** Hartley - probable inventor of the hartley transform. +** Buneman - for a really cool trig generator +** Mayer(me) - for authoring this particular version and +** including all the optimizations in one package. +** Thanks, +** Ron Mayer; mayer@acuson.com +** +*/ + +/* This is a slightly modified version of Mayer's contribution; write +* msp@ucsd.edu for the original code. Kudos to Mayer for a fine piece +* of work. -msp +*/ + +#define REAL float +#define GOOD_TRIG + +#ifdef GOOD_TRIG +#else +#define FAST_TRIG +#endif + +#if defined(GOOD_TRIG) +#define FHT_SWAP(a,b,t) {(t)=(a);(a)=(b);(b)=(t);} +#define TRIG_VARS \ + int t_lam=0; +#define TRIG_INIT(k,c,s) \ + { \ + int i; \ + for (i=2 ; i<=k ; i++) \ + {coswrk[i]=costab[i];sinwrk[i]=sintab[i];} \ + t_lam = 0; \ + c = 1; \ + s = 0; \ + } +#define TRIG_NEXT(k,c,s) \ + { \ + int i,j; \ + (t_lam)++; \ + for (i=0 ; !((1<1) \ + { \ + for (j=k-i+2 ; (1<>1; (!((k2^=k)&k)); k>>=1); + if (k1>k2) + { + aa=fz[k1];fz[k1]=fz[k2];fz[k2]=aa; + } + } + for ( k=0 ; (1<> 1; + fi = fz; + gi = fi + kx; + fn = fz + n; + do + { + REAL g0,f0,f1,g1,f2,g2,f3,g3; + f1 = fi[0 ] - fi[k1]; + f0 = fi[0 ] + fi[k1]; + f3 = fi[k2] - fi[k3]; + f2 = fi[k2] + fi[k3]; + fi[k2] = f0 - f2; + fi[0 ] = f0 + f2; + fi[k3] = f1 - f3; + fi[k1] = f1 + f3; + g1 = gi[0 ] - gi[k1]; + g0 = gi[0 ] + gi[k1]; + g3 = SQRT2 * gi[k3]; + g2 = SQRT2 * gi[k2]; + gi[k2] = g0 - g2; + gi[0 ] = g0 + g2; + gi[k3] = g1 - g3; + gi[k1] = g1 + g3; + gi += k4; + fi += k4; + } while (fi + +using std::vector; + +namespace BiquadFilter { + APFilter::APFilter(float cutoff, float Q){ + this->cutoff = cutoff; + this->Q = Q; + + alloc(); + + //init filter coefficient + float omega = 2.0 * M_PI* cutoff; + float alpha = sin(omega) / 2.0 * Q; + + a[0] = 1.0 + alpha; + a[1] = -2.0 * cos(omega); + a[2] = 1.0 - alpha; + b[0] = 1.0 - alpha; + b[1] = -2.0 * cos(omega); + b[2] = 1.0 + alpha; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BPFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BPFilter.cpp new file mode 100644 index 0000000..8994f32 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BPFilter.cpp @@ -0,0 +1,40 @@ +//BPFilter.cpp +#define _USE_MATH_DEFINES + +#include "biquad_filters/BiquadFilter.h" +#include + +using std::vector; + +/** +#ifndef _MSC_VER + +static float log2(float x) +{ + return log(x) / log(2.0); +} + +#endif // _MSC_VER +**/ + +namespace BiquadFilter{ + BPFilter::BPFilter(float low_edge, float high_edge){ + this->low_edge = low_edge; + this->high_edge = high_edge; + + alloc(); + + //init filter coefficient + float bw = log2(high_edge / low_edge); + float cutoff = low_edge * pow(2, bw/2); + float omega = 2.0 * M_PI* cutoff; + float alpha = sin(omega) * sinh(log(2.0)) / 2.0 * bw * omega / sin(omega); + + a[0] = 1.0 + alpha; + a[1] = -2.0 * cos(omega); + a[2] = 1.0 - alpha; + b[0] = alpha; + b[1] = 0.0; + b[2] = -alpha; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BiquadFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BiquadFilter.cpp new file mode 100644 index 0000000..942b6b6 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BiquadFilter.cpp @@ -0,0 +1,59 @@ +//BiquadFilter.cpp + +#include "biquad_filters/BiquadFilter.h" + +//using +using std::vector; + + +namespace BiquadFilter{ + + CBaseFilter::CBaseFilter() + { + reset(); + } + + CBaseFilter::~CBaseFilter(){} + + void CBaseFilter::filtering(vector *x){ + vector &in = *x; + vector out(in.size()); + + float b00 = b[0] / a[0]; + float b10 = b[1] / a[0]; + float b20 = b[2] / a[0]; + float a10 = a[1] / a[0]; + float a20 = a[2] / a[0]; + + for (unsigned int i = 0; i < out.size(); i++){ + out[i] = b00 * in[i] + b10 * bin1 + b20 * bin2 - a10 * bout1 - a20 * bout2; +// out[i] = (b[0] / a[0]) * in[i] + (b[1] / a[0]) * bin1 + (b[2] / a[0]) * bin2 - (a[1] / a[0]) * bout1 - (a[2] / a[0]) * bout2; + //update input buf + bin2 = bin1; + bin1 = in[i]; + //update output buf + bout2 = bout1; + bout1 = out[i]; + } + + //copy + for (unsigned int i = 0; i < in.size(); i++){ + in[i] = out[i]; + } + + } + + void CBaseFilter::alloc(){ + a.resize(3); + b.resize(3); + } + + void CBaseFilter::reset() + { + bin1 = 0; //= 0.0, bin2 = 0.0; + bout1 = 0;// = 0.0, bout2 = 0.0; + bin2 = 0; //= 0.0, bin2 = 0.0; + bout2 = 0;// = 0.0, bout2 = 0.0; + } + +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BiquadFilter.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BiquadFilter.h new file mode 100644 index 0000000..c1c53df --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/BiquadFilter.h @@ -0,0 +1,103 @@ +#ifndef __BIQUADFILTER_H__ +#define __BIQUADFILTER_H__ + +#include +#include "AudioEffectsConf.h" + +namespace BiquadFilter +{ + class CBaseFilter{ + public: + CBaseFilter(); + virtual ~CBaseFilter(); + void filtering(std::vector *x); + void reset(); + protected: + void alloc(); + + protected: + + std::vector a; + std::vector b; + private: + float bin1; //= 0.0, bin2 = 0.0; + float bout1;// = 0.0, bout2 = 0.0; + float bin2; //= 0.0, bin2 = 0.0; + float bout2;// = 0.0, bout2 = 0.0; + }; + + + class LPFilter : public CBaseFilter{ + public: + LPFilter(float cutoff, float Q); + private: + float cutoff; + + float Q; + }; + + + class HPFilter : public CBaseFilter{ + public: + HPFilter(float cutoff, float Q); + private: + float cutoff; + + float Q; + }; + + class BPFilter : public CBaseFilter{ + public: + BPFilter(float low_edge, float high_edge); + private: + float low_edge; + float high_edge; + }; + + class NTFilter : public CBaseFilter{ + public: + NTFilter(float low_edge, float high_edge); + private: + float low_edge; + float high_edge; + }; + + + class LSFilter : public CBaseFilter{ + public: + LSFilter(float cutoff, float Q, float gain); + private: + float cutoff; + float Q; + float gain; + }; + + + class HSFilter : public CBaseFilter{ + public: + HSFilter(float cutoff, float Q, float gain); + private: + float cutoff; + float Q; + float gain; + }; + + class PKFilter : public CBaseFilter{ + public: + PKFilter(float low_edge, float high_edge, float gain); + private: + float low_edge; + float high_edge; + float gain; + }; + + class APFilter : public CBaseFilter{ + public: + APFilter(float cutoff, float Q); + private: + float cutoff; + float Q; + }; +} + +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/HPFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/HPFilter.cpp new file mode 100644 index 0000000..cfb8103 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/HPFilter.cpp @@ -0,0 +1,27 @@ +//LPFilter.cpp +#define _USE_MATH_DEFINES + +#include "biquad_filters/BiquadFilter.h" +#include + +using std::vector; + +namespace BiquadFilter{ + HPFilter::HPFilter(float cutoff, float Q){ + this->cutoff = cutoff; + this->Q = Q; + + alloc(); + + //init filter coefficient + float omega = 2.0 * M_PI* cutoff; + float alpha = sin(omega) / (2.0*Q); + + a[0] = 1.0 + alpha; + a[1] = -2.0 * cos(omega); + a[2] = 1.0 - alpha; + b[0] = (1.0 + cos(omega)) / 2.0; + b[1] = -(1.0 + cos(omega)); + b[2] = (1.0 + cos(omega)) / 2.0; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/HSFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/HSFilter.cpp new file mode 100644 index 0000000..57de803 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/HSFilter.cpp @@ -0,0 +1,29 @@ +//HSFilter.cpp +#define _USE_MATH_DEFINES + +#include "biquad_filters/BiquadFilter.h" +#include + +using std::vector; + +namespace BiquadFilter{ + HSFilter::HSFilter(float cutoff, float Q, float gain){ + this->cutoff = cutoff; + this->Q = Q; + this->gain = gain; + + alloc(); + + //init filter coefficient + float omega = 2.0 * M_PI* cutoff; + float A = pow(10.0, (gain / 40.0)); + float beta = sqrt(A) / Q; + + a[0] = (A + 1.0) - (A - 1.0) * cos(omega) + beta * sin(omega); + a[1] = 2.0 * ((A - 1.0) - (A + 1.0) * cos(omega)); + a[2] = (A + 1.0) - (A - 1.0) * cos(omega) - beta * sin(omega); + b[0] = A * ((A + 1.0) + (A - 1.0) * cos(omega) + beta * sin(omega)); + b[1] = -2.0 * A * ((A - 1.0) + (A + 1.0) * cos(omega)); + b[2] = A * ((A + 1.0) + (A - 1.0) * cos(omega) - beta * sin(omega)); + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/LPFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/LPFilter.cpp new file mode 100644 index 0000000..6315592 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/LPFilter.cpp @@ -0,0 +1,27 @@ +//LPFilter.cpp +#define _USE_MATH_DEFINES + +#include "biquad_filters/BiquadFilter.h" +#include + +using std::vector; + +namespace BiquadFilter{ + LPFilter::LPFilter(float cutoff, float Q){ + this->cutoff = cutoff; + this->Q = Q; + + alloc(); + + //init filter coefficient + float omega = 2.0 * M_PI* cutoff; + float alpha = sin(omega) / (2.0*Q); + + a[0] = 1.0 + alpha; + a[1] = -2.0 * cos(omega); + a[2] = 1.0 - alpha; + b[0] = (1.0 - cos(omega)) / 2.0; + b[1] = 1.0 - cos(omega); + b[2] = (1.0 - cos(omega)) / 2.0; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/LSFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/LSFilter.cpp new file mode 100644 index 0000000..a0e85ae --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/LSFilter.cpp @@ -0,0 +1,29 @@ +//LSFilter.cpp +#define _USE_MATH_DEFINES + +#include "biquad_filters/BiquadFilter.h" +#include + +using std::vector; + +namespace BiquadFilter{ + LSFilter::LSFilter(float cutoff, float Q, float gain){ + this->cutoff = cutoff; + this->Q = Q; + this->gain = gain; + + alloc(); + + //init filter coefficient + float omega = 2.0 * M_PI* cutoff; + float A = pow(10.0, (gain / 40.0)); + float beta = sqrt(A) / Q; + + a[0] = (A + 1.0) + (A - 1.0) * cos(omega) + beta * sin(omega); + a[1] = -2.0 * ((A - 1.0) + (A + 1.0) * cos(omega)); + a[2] = (A + 1.0) + (A - 1.0) * cos(omega) - beta * sin(omega); + b[0] = A * ((A + 1.0) - (A - 1.0) * cos(omega) + beta * sin(omega)); + b[1] = 2.0 * A * ((A - 1.0) - (A + 1.0) * cos(omega)); + b[2] = A * ((A + 1.0) - (A - 1.0) * cos(omega) - beta * sin(omega)); + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/NTFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/NTFilter.cpp new file mode 100644 index 0000000..d385ddd --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/NTFilter.cpp @@ -0,0 +1,40 @@ +//NotchFilter.cpp +#define _USE_MATH_DEFINES + +#include "biquad_filters/BiquadFilter.h" +#include + +/** +#ifndef _MSC_VER + +static float log2(float x) +{ + return log(x) / log(2.0); +} + +#endif // _MSC_VER +*/ + +using std::vector; + +namespace BiquadFilter{ + NTFilter::NTFilter(float low_edge, float high_edge){ + this->low_edge = low_edge; + this->high_edge = high_edge; + + alloc(); + + //init filter coefficient + float bw = log2(high_edge / low_edge); + float cutoff = low_edge * pow(2, bw / 2); + float omega = 2.0 * M_PI* cutoff; + float alpha = sin(omega) * sinh(log(2.0)) / 2.0 * bw * omega / sin(omega); + + a[0] = 1.0 + alpha; + a[1] = -2.0 * cos(omega); + a[2] = 1.0 - alpha; + b[0] = 1.0; + b[1] = -2.0 * cos(omega); + b[2] = 1.0; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/PKFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/PKFilter.cpp new file mode 100644 index 0000000..4f0345d --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/common/biquad_filters/PKFilter.cpp @@ -0,0 +1,42 @@ +//PKFilter.cpp +#define _USE_MATH_DEFINES + +#include "biquad_filters/BiquadFilter.h" +#include + +using std::vector; + +/** +#ifndef _MSC_VER + +static float log2(float x) +{ + return log(x) / log(2.0); +} + +#endif // _MSC_VER +**/ + +namespace BiquadFilter{ + PKFilter::PKFilter(float low_edge, float high_edge, float gain){ + this->low_edge = low_edge; + this->high_edge = high_edge; + this->gain = gain; + + alloc(); + + //init filter coefficient + float bw = log2(high_edge / low_edge); + float cutoff = low_edge * pow(2, bw / 2); + float omega = 2.0 * M_PI* cutoff; + float alpha = sin(omega) * sinh(log(2.0)) / 2.0 * bw * omega / sin(omega); + float A = pow(10.0, (gain/40.0)); + + a[0] = 1.0 + alpha / A; + a[1] = -2.0 * cos(omega); + a[2] = 1.0 - alpha / A; + b[0] = 1.0 + alpha * A; + b[1] = -2.0 * cos(omega); + b[2] = 1.0 - alpha * A; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/CMakeLists.txt new file mode 100644 index 0000000..47a9c3d --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/CMakeLists.txt @@ -0,0 +1,15 @@ +#cmake_minimum_required(VERSION 2.8) +#project(iir_eq) +#set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) +set(CMAKE_CXX_STANDARD 11) + +include_directories(./) +include_directories(inc) +include_directories(src) +include_directories(src/audacious_arma) +include_directories(src/audacious_eq) + +file(GLOB_RECURSE SRC_IIR_EQ_SRC src/*cpp) + +add_library(iir_eq ${SRC_IIR_EQ_SRC}) +#set_target_properties(iir_eq PROPERTIES CXX_VISIBILITY_PRESET hidden) diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/build_bash.sh b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/build_bash.sh new file mode 100755 index 0000000..1cc5d56 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/build_bash.sh @@ -0,0 +1,62 @@ +#!/bin/sh + +# @Time : 2019-06-18 17:50 +# @Author : AlanWang +# @FileName: build_android.sh + +# MY_NDK 和 MY_CMAKE 需要改成自己对应的 ndk 中的目录 +# MY_NDK="/Users/wangjianjun/AndroidDev/sdk/ndk-bundle" +# MY_NDK="/Users/yangjianli/Library/Android/sdk/ndk-bundle" +MY_NDK="/Users/yangjianli/Library/Android/sdk/ndk-bundle" +MY_CMAKE="/Users/yangjianli/Library/Android/sdk/cmake/3.6.4111459/bin/cmake" + +if [ -z "$MY_NDK" ]; then + echo "Please set MY_NDK to the Android NDK folder" + exit 1 +fi + +if [ -z "$MY_CMAKE" ]; then + echo "Please set MY_CMAKE to the Android CMake folder" + exit 1 +fi + +OUTPUT_LIBS="./build/libs/android" +ANDROID_NATIVE_API_LEVEL="android-16" + +# arme_abis=(armeabi armeabi-v7a arm64-v8a x86 x86_64 mips mips64) +arme_abis=(armeabi-v7a arm64-v8a x86 x86_64) + +function build_with_armeabi() { + ARME_ABI=$1 + echo ${ARME_ABI} + + BUILD_DIR="./build/android/${ARME_ABI}" + PRE_EXE_DIR=$(pwd) + echo ${PRE_EXE_DIR} + + ${MY_CMAKE} \ + -H"./" \ + -B"${BUILD_DIR}" \ + -DANDROID_ABI="${ARME_ABI}" \ + -DANDROID_NDK="${MY_NDK}" \ + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY="./build/android/libs/${ARME_ABI}" \ + -DCMAKE_BUILD_TYPE="Release" \ + -DCMAKE_TOOLCHAIN_FILE="${MY_NDK}/build/cmake/android.toolchain.cmake" \ + -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} \ + -DANDROID_TOOLCHAIN="clang" \ + -DCMAKE_C_FLAGS="-fpic -fexceptions -frtti -Wno-narrowing" \ + -DCMAKE_CXX_FLAGS="-fpic -fexceptions -frtti -Wno-narrowing" \ + -DANDROID_STL="c++_static" \ + + cd ${BUILD_DIR} + make + + cd ${PRE_EXE_DIR} + mkdir -p ${OUTPUT_LIBS}/${ARME_ABI}/ + mv ${PRE_EXE_DIR}/lib/* ${OUTPUT_LIBS}/${ARME_ABI}/ + rm -r ./build/android +} + +for i in ${arme_abis[@]}; do + build_with_armeabi $i +done \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/inc/CAudaciousEqApi.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/inc/CAudaciousEqApi.h new file mode 100644 index 0000000..61a519c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/inc/CAudaciousEqApi.h @@ -0,0 +1,42 @@ +// +// Created by yangjianli on 2019-10-17. +// +/*** + * 封装eq,支持多个声道的音频 + */ +#ifndef IIR_EQ_ALL_AUDACIOUS_EQ_API_H +#define IIR_EQ_ALL_AUDACIOUS_EQ_API_H + +#include "stdio.h" +#include "CAudaciousEqDef.h" + +namespace SUPERSOUND +{ + class CAudaciousEq; +} + +class CAudaciousEqApi +{ + +public: + CAudaciousEqApi(); + ~CAudaciousEqApi(); +public: + int init(int sample_rate, int channel); + int uninit(); + int32_t set_param(const float *bands); // length is 10 + int32_t set_param(int nType); + int32_t reset(); + int32_t process(float *in, float *out, int length); + int32_t get_latency_ms(); // 返回第一个声道的延迟 + +private: + bool m_b_need_eq; + int m_sample_rate; + int m_channel; + SUPERSOUND::CAudaciousEq* m_eqs; + +}; + + +#endif //IIR_EQ_ALL_AUDACIOUS_EQ_API_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/inc/CAudaciousEqDef.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/inc/CAudaciousEqDef.h new file mode 100644 index 0000000..2244062 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/inc/CAudaciousEqDef.h @@ -0,0 +1,15 @@ +// +// Created by yangjianli on 2020-01-10. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDACIOUSEQDEF_H +#define AUDIO_EFFECTS_LIB_CAUDACIOUSEQDEF_H +#include "AudioEffectsDef.h" +// 错误码 +enum AUDACIOUS_EQ_ERROR_CODE { + AUDACIOUS_EQ_ERROR_CODE_SUCCESS = 0, + AUDACIOUS_EQ_ERROR_CODE_INPUT_ERROR = -1, + AUDACIOUSEQAPI_ERROR_INIT_ERROR = -2, +}; + +#endif //AUDIO_EFFECTS_LIB_CAUDACIOUSEQDEF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/CAudaciousEqApi.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/CAudaciousEqApi.cpp new file mode 100644 index 0000000..62521ae --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/CAudaciousEqApi.cpp @@ -0,0 +1,148 @@ +// +// Created by yangjianli on 2019-10-17. +// + +#include "CAudaciousEq.h" +#include "CAudaciousEqApi.h" +#include "cstring" +#include "iostream" + +CAudaciousEqApi::CAudaciousEqApi() +{ + m_eqs = NULL; +} + +CAudaciousEqApi::~CAudaciousEqApi() +{ + uninit(); +} + +/** + * 初始化,每个channel一个10段均衡器 + * @param sample_rate + * @param channel + * @return + */ +int CAudaciousEqApi::init(int sample_rate, int channel) +{ + m_sample_rate = sample_rate; + m_channel = channel; + m_b_need_eq = false; + if(channel <= 0) + { + return AUDACIOUS_EQ_ERROR_CODE_INPUT_ERROR; + } + m_eqs = new (std::nothrow) SUPERSOUND::CAudaciousEq[channel]; + if(NULL == m_eqs ) + { + return AUDACIOUSEQAPI_ERROR_INIT_ERROR; + } + return AUDACIOUS_EQ_ERROR_CODE_SUCCESS; +} + +/** + * 释放空间 + */ +int CAudaciousEqApi::uninit() +{ + if(m_eqs != NULL) + { + delete [] m_eqs; + m_eqs = NULL; + } + return AUDACIOUS_EQ_ERROR_CODE_SUCCESS; +} + +/** + * 清空缓存数据 + * @return + */ +int CAudaciousEqApi::reset() +{ + for(int i=0;i0 ? m_eqs[0].get_latency_ms() : 0; +} + +/** + * 设置参数 + * @param nType + * @return + */ +int CAudaciousEqApi::set_param(int nType) +{ + for(int i=0;i + +namespace SUPERSOUND +{ + + +CAudaciousArma::CAudaciousArma() +{ + m_a0 = 0; + m_a1 = 0; + m_b0 = 0; + m_b1 = 0; + + reset(); +} + +CAudaciousArma::~CAudaciousArma() +{ + +} + +void CAudaciousArma::reset() +{ + m_y0 = 0; + m_y1 = 0; +} + +int32_t CAudaciousArma::get_latency_ms() +{ + return 0; +} + +int32_t CAudaciousArma::set_param(int32_t fs, float f0, float Q, float gain) +{ + m_gain = pow(10, gain / 20) - 1; + + float th = float(2 * M_PI * f0 / fs); + float C = (1 - tan(th * Q / 2)) / (1 + tan(th * Q / 2)); + + m_a0 = (1 + C) * cosf (th); + m_a1 = -C; + m_b0 = (1 - C) / 2; + m_b1 = -1.005f; + + return AUDACIOUS_EQ_ERROR_CODE_SUCCESS; +} + +float CAudaciousArma::process(float in) +{ + float y0 = in * m_b0 + m_y0 * m_a0 + m_y1 * m_a1; + + in += (y0 + m_y1 * m_b1) * m_gain; + + m_y1 = m_y0; + m_y0 = y0; + + return in; +} + + +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_arma/CAudaciousArma.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_arma/CAudaciousArma.h new file mode 100755 index 0000000..7b7c4a0 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_arma/CAudaciousArma.h @@ -0,0 +1,68 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//封装开源库 audacious 中的 IIR 的双二阶滤波器带通部分 +//这里只是使用了 ARMA 模型来做,其实也可以改成 AudioCookBook + +#ifndef __AUDACIOUS_ARMA_H__ +#define __AUDACIOUS_ARMA_H__ + +#include +#include "CAudaciousEqDef.h" +namespace SUPERSOUND +{ + + +class CAudaciousArma +{ +public: + CAudaciousArma(); + ~CAudaciousArma(); + +public: + void reset(); + int32_t get_latency_ms(); + int32_t set_param(int32_t fs, float f0, float Q, float gain); + float process(float in); + +private: + float m_a0; + float m_a1; + float m_b0; + float m_b1; + float m_y0; + float m_y1; + float m_gain; +}; + +} + +#endif /* __AUDACIOUS_ARMA_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_eq/CAudaciousEq.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_eq/CAudaciousEq.cpp new file mode 100755 index 0000000..b94d713 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_eq/CAudaciousEq.cpp @@ -0,0 +1,107 @@ + +#include "CAudaciousEq.h" + +namespace SUPERSOUND +{ + + +static const float CF[10] = { + 31.25, 62.5, 125, 250, 500, + 1000, 2000, 4000, 8000, 16000 +}; +static const int STYLES_LEN = 25; +// 线上应用 +// 0 2 3 4 7 8 10 13 17 22 +static const float STYLES[STYLES_LEN][10] = { + {4,2,0,-3,-6,-6,-3,0,1,3}, // pop 0 + {7,6,3,0,0,-4,-6,-6,0,0}, // dance 1 + {3,6,8,3,-2,0,4,7,9,10}, // blues 2 + {0,0,0,0,0,0,-6,-6,-6,-8}, // classic 3 + {0,0,1,4,4,4,0,1,3,3}, // jazz 4 + {5,4,2,0,-2,0,3,6,7,8}, // slow 5 + {6,5,0,-5,-4,0,6,8,8,7}, // electronica 6 + {7,4,-4,7,-2,1,5,7,9,9}, // rock 7 + {5,6,2,-5,1,1,-5,3,8,5}, // country 8 + {-2,-1,-1,0,3,4,3,0,0,1},// voice 9 + {5.8,5.8,3,0,-1.5,-1.5,0,0,0,0},//低音 10 + {13.8,12.6,6.3,0,-1.5,-1.5,0,0,0,0},//超重低音 11 + {5.8,5.8,3,0,-1.5,-1.5,0,1.5,5.8,5.8},//低音&高音 12 + {-3,-3,-3,-3,-1.5,-1.5,0,6.3,9.6,12.3},//高音 13 + {0,0,0,0,0,0,-3,-3,-3,-4.5},//经典 14 + {5.8,3.2,2.1,0,0,-2.7,-2,-2.2,-0.6,-0.1},//舞曲 15 + {5.8,3.2,1.3,-3,-2.3,2.2,3.6,5.8,5.8,5.8},//摇滚 16 + {5.8,5.8,0,-2.9,-2.1,0,3.6,7.4,7.7,7.5},//电子 17 + {-2.9,-2.9,2.5,3.6,4.8,4.7,6,6,3,3},//扬声器(响亮) 18 + {-4.5,-3.8,2.2,2.2,2.2,2.2,2.1,1.5,1.5,1.5},//现场 19 + {-4.5,-4.5,-1.5,-1.5,4.5,4.5,1.5,0,-4.5,-6},//中音 20 + {1.5,4.5,5.8,3,1.5,0,0,0,1.5,3},//流行 21 + {5.5,2.5,0.9,-2.6,-5,-10.6,-12.4,-13.3,-10.6,-6.7},//柔和 22 + {3,3,3,0,-3,-3,0,0,0,0},//柔和低音 23 + {0,0,-4.5,-4.5,-4.5,-4.5,-3,0,4.5,4.5},//柔和高音 24 +}; + +CAudaciousEq::CAudaciousEq() +{ + +} + +CAudaciousEq::~CAudaciousEq() +{ + +} + +void CAudaciousEq::reset() +{ + for(int32_t i = 0; i < 10; i++) + { + m_arma[i].reset(); + } +} + +int32_t CAudaciousEq::get_latency_ms() +{ + int32_t latecy = 0; + + for(int32_t i = 0; i < 10; i++) + { + latecy += m_arma[i].get_latency_ms(); + } + + return latecy; +} + +int32_t CAudaciousEq::set_param(int32_t fs, const float *fBands) +{ + int32_t nRet = AUDACIOUS_EQ_ERROR_CODE_SUCCESS; + + for(int32_t i = 0; i < 10; i++) + { + nRet = m_arma[i].set_param(fs, CF[i], 1.2247449f, fBands[i]); + if(nRet != AUDACIOUS_EQ_ERROR_CODE_SUCCESS) + return nRet; + } + + return nRet; +} + +int32_t CAudaciousEq::set_param(int32_t fs, int nType) { + int32_t nRet = AUDACIOUS_EQ_ERROR_CODE_SUCCESS; + if(nType >= STYLES_LEN || nType < 0) + { + return AUDACIOUSEQAPI_ERROR_INIT_ERROR; + } + nRet = set_param(fs, STYLES[nType]); + return nRet; +} + +float CAudaciousEq::process(float in) +{ + for(int32_t i = 0; i < 10; i++) + { + in = m_arma[i].process(in); + } + return in; +} + + +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_eq/CAudaciousEq.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_eq/CAudaciousEq.h new file mode 100755 index 0000000..1cf3f01 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/iir_eq/src/audacious_eq/CAudaciousEq.h @@ -0,0 +1,63 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//封装开源库 audacious 中的 IIR 的 10 段 Eq 部分 + +#ifndef __AUDACIOUS_EQ_H__ +#define __AUDACIOUS_EQ_H__ + +#include "CAudaciousArma.h" + +namespace SUPERSOUND +{ + +class CAudaciousEq +{ +public: + CAudaciousEq(); + ~CAudaciousEq(); + +public: + void reset(); + int32_t get_latency_ms(); + //外围保证带长为 10 + int32_t set_param(int32_t fs, const float *fBands); + int32_t set_param(int32_t fs, int nType); // 使用预先设置好的类型 + float process(float in); + +private: + // 10 段均衡器 + CAudaciousArma m_arma[10]; +}; + +} + +#endif /* __AUDACIOUS_EQ_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/CMakeLists.txt new file mode 100644 index 0000000..2fc7827 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/CMakeLists.txt @@ -0,0 +1,4 @@ +include_directories(./ inc src) +file(GLOB_RECURSE PHONOGRAPH_SRC_FILES src/*cpp) +add_library(phonograph ${PHONOGRAPH_SRC_FILES}) +#set_target_properties(phonograph PROPERTIES CXX_VISIBILITY_PRESET hidden) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/inc/CPhonograph.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/inc/CPhonograph.h new file mode 100644 index 0000000..5316aba --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/inc/CPhonograph.h @@ -0,0 +1,30 @@ +#ifndef KALA_AUDIOBASE_PHONOGRAPH_H +#define KALA_AUDIOBASE_PHONOGRAPH_H + +#include +#include "CPhonographDef.h" + +class CPhonograph +{ +public: + CPhonograph(); + virtual ~CPhonograph(); + void reset(); + + int init(int sample_rate, int channel); // set sample rate and channel; + void uninit(); // uninit + + // process input buffer and output size. + int process(short* in_buffer, int in_size); + + int process(float * in_buffer, int in_size); + + int process_lr_independent(float *in_left, float *in_right, int in_out_size); + +private: + void* handles; + int m_samplerate; + int m_channels; + std::vector data; +}; +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/inc/CPhonographDef.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/inc/CPhonographDef.h new file mode 100644 index 0000000..8acf064 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/inc/CPhonographDef.h @@ -0,0 +1,14 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#ifndef AUDIO_EFFECTS_LIB_PHONOGRAPHDEF_H +#define AUDIO_EFFECTS_LIB_PHONOGRAPHDEF_H +#include "AudioEffectsDef.h" +enum PG_ERR { + PG_ERR_SUCCESS = 0, + PG_ERR_PARAM = -1, + PG_ERR_BASE_H_MALLOC_NULL = -2, +}; + +#endif //AUDIO_EFFECTS_LIB_PHONOGRAPHDEF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/src/CPhonograph.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/src/CPhonograph.cpp new file mode 100644 index 0000000..b0da905 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/phonograph/src/CPhonograph.cpp @@ -0,0 +1,309 @@ +/************************************************************************/ +/* Phonograph Eimulator */ +/* copy right reserved */ +/************************************************************************/ + +#include "math.h" +#include "stdlib.h" +#include "stdio.h" +#include "memory.h" +#include "CPhonograph.h" +#include "biquad_filters/BiquadFilter.h" + +// This is a trick. When enabled, all channels refer to the 1st channel +#define PSEUDO_MULTICHANNELS 0 // defined in KTYPED.h + +#define VERSION_ID_NUMBER 100 /* 7.21,2013 */ + +using std::vector; +using namespace BiquadFilter; + + +#define HPF_FREQ 1712 / 44100.0f +#define LPF_FREQ 2222 / 44100.0f +#define HPFQ 2.1f +#define LPFQ 2.1f + + +class CPhonographFilters +{ +public: + CPhonographFilters(); + virtual ~CPhonographFilters(); + void reset(); + + bool isAllocated() const; + + void filtering(std::vector *x); + +private: + LPFilter* lpf; + HPFilter* hpf; +}; + +bool CPhonographFilters::isAllocated() const +{ + return lpf != NULL && hpf != NULL; +} + + + +void CPhonographFilters::filtering(std::vector *x) +{ + hpf->filtering(x); + lpf->filtering(x); +} + + + +CPhonographFilters::CPhonographFilters() +{ + hpf = new HPFilter(HPF_FREQ, HPFQ); + lpf = new LPFilter(LPF_FREQ, LPFQ); + + if (!hpf || !lpf) + { + if (hpf) + { + delete hpf; + hpf = NULL; + } + if (lpf) + { + delete lpf; + lpf = NULL; + } + } +} + +void CPhonographFilters::reset() +{ + if (hpf) + { + hpf->reset(); + } + if (lpf) + { + lpf->reset(); + } +} + +CPhonographFilters::~CPhonographFilters() +{ + delete lpf; + delete hpf; + + lpf = NULL; + hpf = NULL; +} + +CPhonograph::CPhonograph() +{ + m_samplerate = 0; + m_channels = 0; + handles = NULL; +} + +void CPhonograph::reset() +{ + if (handles) + { + for (int chn = 0; chn < m_channels; chn++) + { + CPhonographFilters* filters = ((CPhonographFilters**)handles)[chn]; + + if (filters) + { + filters->reset(); + } + + } + } +} + +CPhonograph::~CPhonograph() +{ + uninit(); +} + +int CPhonograph::init(int sample_rate, int channel) +{ + m_samplerate = sample_rate; + m_channels = channel; + + + handles = new CPhonographFilters*[m_channels]; + + if (!handles) + { + return PG_ERR_BASE_H_MALLOC_NULL; + } + + for (int chn = 0; chn < m_channels; chn++) + { + CPhonographFilters* filters = new CPhonographFilters(); + + ((CPhonographFilters**)handles)[chn] = filters; + + if (!filters->isAllocated()) + { + do{ + CPhonographFilters* filters = ((CPhonographFilters**)handles)[chn]; + delete filters; + + } while (chn--); + + delete [] (CPhonographFilters**)handles; + handles = NULL; + + return PG_ERR_BASE_H_MALLOC_NULL; + } + + } + + return 0; +} + +void CPhonograph::uninit() +{ + if (handles) + { + for (int chn = 0; chn < m_channels; chn++) + { + CPhonographFilters* filters = ((CPhonographFilters**)handles)[chn]; + + if (filters) + { + delete filters; + } + + } + + delete [] (CPhonographFilters**)handles; + handles = NULL; + } +} + +int CPhonograph::process(short* inBuffer, int inSize) +{ + if (0 != (inSize % m_channels)) + { + return PG_ERR_PARAM; + } + + if (handles) + { + + data.reserve(inSize / m_channels); + data.resize(inSize / m_channels); + + for (int chn = 0; chn < m_channels; chn++) + { + CPhonographFilters* filters = ((CPhonographFilters**)handles)[chn]; + short* audio = chn + inBuffer; + + if (filters) + { + if (PSEUDO_MULTICHANNELS && chn>0) + { + short* audioref = (short*)inBuffer; + + for (unsigned int i = 0; i < data.size(); i++){ + audio[0] = audioref[0]; + audio += m_channels; + audioref += m_channels; + } + } + else + { + + for (unsigned int i = 0; i < data.size(); i++){ + // data[i]= audio[i * m_channels + chn] / 32768.0; + data[i]= audio[0] / 32768.0f; + audio += m_channels; + } + + filters->filtering(&data); + + short* audio = chn + inBuffer; + + for (unsigned int i = 0; i < data.size(); i++){ + int sample = (int)(32767.0f * data[i]); +// int sample = (int)(20767.0f * data[i]); +#if 1 + if (sample>32767) + sample = 32767; + else if (sample<-32768) + sample = -32768; +#endif + // audio[i * m_channels + chn] = (short)sample; + audio[0] = (short)sample; + audio += m_channels; + } + } // if (PSEUDO_MULTICHANNELS && chn>0) + + } + + } + } + + return inSize; +} + +int CPhonograph::process_lr_independent(float *in_left, float *in_right, int in_out_size) +{ + if (2 != m_channels) + { + return PG_ERR_PARAM; + } + data.reserve(in_out_size); + data.resize(in_out_size); + if (handles) { + data.assign(in_left, in_left + in_out_size); + CPhonographFilters* filters = ((CPhonographFilters**)handles)[0]; + filters->filtering(&data); + for (unsigned int i = 0; i < data.size(); i++) { + in_left[i] = data[i]; + } + + data.assign(in_right, in_right + in_out_size); + filters = ((CPhonographFilters**)handles)[1]; + filters->filtering(&data); + for (unsigned int i = 0; i < data.size(); i++) { + in_right[i] = data[i]; + } + } + return in_out_size; +} + +int CPhonograph::process(float* inBuffer, int inSize) { + if (handles) { + data.reserve(inSize / m_channels); + data.resize(inSize / m_channels); + for (int chn = 0; chn < m_channels; chn++) { + CPhonographFilters* filters = ((CPhonographFilters**)handles)[chn]; + float* audio = chn + inBuffer; + if (filters) { + for (unsigned int i = 0; i < data.size(); i++) { + data[i]= audio[0]; + audio += m_channels; + } + filters->filtering(&data); + float* audio = chn + inBuffer; + for (unsigned int i = 0; i < data.size(); i++) { + float sample = data[i]; +// float sample = (20767.0f / 32768) * data[i]; +#if 1 + if (sample > (32767.0f / 32768)) + sample = 32767.0f / 32768; + else if (sample < -1.0f) + sample = -1.0f; +#endif + audio[0] = sample; + audio += m_channels; + } + } + } + } + return inSize; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/CMakeLists.txt new file mode 100644 index 0000000..57010ef --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/CMakeLists.txt @@ -0,0 +1,4 @@ +include_directories(./ inc) +file(GLOB_RECURSE REVERB_SRC_FILES src/*cpp) +add_library(reverb ${REVERB_SRC_FILES}) +#set_target_properties(reverb PROPERTIES CXX_VISIBILITY_PRESET hidden) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/inc/CReverb.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/inc/CReverb.h new file mode 100644 index 0000000..caa6446 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/inc/CReverb.h @@ -0,0 +1,81 @@ +#ifndef C_REVERB_RATION_H +#define C_REVERB_RATION_H +/************************************************************************/ +/* reverberation */ +/************************************************************************/ + +//#include "KalaInterfaces.h" +#include +#include "CReverbDef.h" +/* version before kala v2.6, only 0\4\5\6 used. kala 1.0 use id [0,3]*/ +#define KALA_VB_RECORD_STUDIO 0 // lu yin peng +#define KALA_VB_KTV 1 // KTV +#define KALA_VB_CONCERT 2 // yan chang hui +#define KALA_VB_THEATER 3 // ju chang +#define KALA_VB_NEW_KTV 4 // new ktv +#define KALA_VB_NEW_CONCERT 5 // new concert +#define KALA_VB_NEW_THEATER 6 // new theater + +/* id not used */ +#define KALA_VB_ID_7 7 // lu yin pen +#define KALA_VB_ID_8 8 // lu yin pen +#define KALA_VB_ID_9 9 // lu yin pen + +/* id used for kala v3.0's quick version */ +#define KALA_VB_ID_10 10 // +#define KALA_VB_ID_11 11 // +#define KALA_VB_ID_12 12 // +#define KALA_VB_ID_13 13 // +#define KALA_VB_ID_14 14 // +#define KALA_VB_ID_15 15 // +#define KALA_VB_ID_16 16 // +#define KALA_VB_ID_17 17 // +#define KALA_VB_ID_18 18 // + +#include "CReverbParams.h" + +class CReverb +{ +public: + CReverb(); + ~CReverb(); + +public: + int init(int sample_rate, int channel); + void reset(); + void uninit(); + + void get_id_range(int *max_val, int *min_val); + int get_id_default(); + + int set_type_id(int type_id); + int set_params(AE_PARAMS_REVERB* param); + int get_type_id(); + char * get_name_by_id(int type_id); + + int set_room_size(float room_size); + int set_wet(float wet); + + float get_room_size(); + float get_wet(); + + int get_latency(); + + int process(short *in_buffer, int in_size, short *out_buffer, int out_size); + int process(float *in_buffer, int in_size, float *out_buffer, int out_size); + + int process_lr_independent(float *in_left, float *in_right, float *out_left, float *out_right, int in_out_size); + +private: + int m_id; + int m_sampleRate; + int m_channel; + bool m_need_process; + void* m_pvb; +}; + + + +#endif + + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/inc/CReverbDef.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/inc/CReverbDef.h new file mode 100644 index 0000000..7b55dc5 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/inc/CReverbDef.h @@ -0,0 +1,24 @@ +// +// Created by yangjianli on 2020-01-13. +// + +#ifndef AUDIO_EFFECTS_LIB_CREVERBDEF_H +#define AUDIO_EFFECTS_LIB_CREVERBDEF_H + +#include "stdlib.h" +#include "AudioEffectsConf.h" +enum RB_ERR { + RB_ERR_SUCCESS = 0, + RB_ERR_NO_MEMORY = -1, + RB_ERR_PARAM = -2, + RB_ERR_HAS_SET_PARAM = -3, + RB_ERR_AUTOTUNE_INIT = -4, + RB_ERR_BASE_H_NULL = -5, + RB_ERR_BASE_H_MALLOC_NULL = -6, +}; + +#ifndef safe_free +#define safe_free(p) { if(p) { free(p); (p)=NULL; } } +#endif + +#endif //AUDIO_EFFECTS_LIB_CREVERBDEF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/CReverb.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/CReverb.cpp new file mode 100644 index 0000000..b8a6f44 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/CReverb.cpp @@ -0,0 +1,663 @@ +#include "CReverbDef.h" +#include "CReverb.h" +#include "verb/revmodel.h" + +#define MAX_VERB_VALUE 19 +#define MIN_VERB_VALUE 0 +#define MAX_NUMBER_OF_VERB MAX_VERB_VALUE +1 +const char g_p_verb_id_names[][64] = { + { "studio" }, + { "KTV" }, + { "concert" }, + { "theater" }, + { "ktv 2" }, + { "concert 2" }, + { "theater 2" }, + { "7" }, + { "8" }, + {"9"}, + {"10"}, + { "11" }, + {"12"}, + {"13"}, + { "14" }, + { "15" }, + { "16" }, + { "17" }, + { "distant" }, + { "custom" } +}; + +#ifndef SHORTMAX +#define SHORTMAX 32767 +#endif +#ifndef SHORTMIN +#define SHORTMIN -32768 +#endif + +#ifndef Clip_short +#define Clip_short(x) (short)((x)>SHORTMAX?SHORTMAX:((x)mute(); + + m_channel = channel; + m_sampleRate = sample_rate; + + m_pvb = (void*)pmd; + m_need_process = false; + return 0; +} + +void CReverb::reset() +{ + revmodel* pmv = (revmodel*)m_pvb; + if (pmv != NULL) + { + pmv->mute(); + } + return; +} + +void CReverb::uninit() +{ + revmodel* pmv = (revmodel*)m_pvb; + if (pmv != NULL) + { + delete pmv; + pmv = NULL; + + m_pvb = NULL; + m_need_process = false; + } + +} + +int CReverb::get_latency() +{ + return 0; +} + +void CReverb::get_id_range(int *max_val, int *min_val) +{ + *max_val = MAX_VERB_VALUE; + *min_val = MIN_VERB_VALUE; +} + +int CReverb::get_id_default() +{ + return KALA_VB_RECORD_STUDIO; +} + +int CReverb::set_room_size(float room_size) +{ + if (room_size > 1.0f || room_size < 0.0f) + return RB_ERR_BASE_H_NULL; + + revmodel* pmd = (revmodel*)m_pvb; + if (pmd == NULL) + { + return RB_ERR_BASE_H_NULL; + } + pmd->setroomsize(room_size); + return 0; +} + +float CReverb::get_room_size() +{ + revmodel* pmd = (revmodel*)m_pvb; + float roomSize = pmd->getroomsize(); + return roomSize; +} + +int CReverb::set_wet(float wet) +{ + if (wet > 1.0f || wet < 0.0f) + return RB_ERR_BASE_H_NULL; + revmodel* pmd = (revmodel*)m_pvb; + if (pmd == NULL) + { + return RB_ERR_BASE_H_NULL; + } + pmd->setwet(wet/3.0f); + return 0; +} + +float CReverb::get_wet() +{ + revmodel* pmd = (revmodel*)m_pvb; + float wet = pmd->getwet(); + wet *= 3.0f; + return wet; +} + +int CReverb::set_type_id(int type_id) +{ + m_need_process = true; + revmodel* pmd = (revmodel*)m_pvb; + if (pmd == NULL) + { + return RB_ERR_BASE_H_NULL; + } + + // check input + if (type_idMAX_VERB_VALUE) + { + type_id = MAX_VERB_VALUE; + } + + m_id = type_id; + + /* it's better here to use id array than switch case, can optimization here later */ + switch (m_id) + { + /***************** v1.0, id 0~3, add here ************************************/ + case KALA_VB_RECORD_STUDIO: + { + pmd->setmode(0.2f); + pmd->setroomsize(0); + pmd->setdamp(0); + pmd->setwet(0); + pmd->setdry(0.8f); + pmd->setwidth(0.5f); + break; + } + case KALA_VB_KTV: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + + case KALA_VB_CONCERT: + { + // for concert ok + pmd->setmode(0.3f); + pmd->setroomsize(0.50f); + pmd->setdamp(0.6f); + pmd->setwet(0.3f); + pmd->setdry(0.6f); + pmd->setwidth(0.56f); + break; + } + + case KALA_VB_THEATER: + { + // for theater + pmd->setmode(0.0); + pmd->setroomsize(0.6f); + pmd->setdamp(0.2f); + pmd->setwet(0.4f); + pmd->setdry(0.5f); + pmd->setwidth(0.20f); + break; + } + /***************** kala v2.6, id 0\4\5\6, add here ************************************/ + case KALA_VB_NEW_KTV: + { + + // v2.2 + pmd->setmode(0.2f); + pmd->setroomsize(0.7f); + pmd->setdamp(0.9f); + pmd->setwet(0.11f); + pmd->setdry(0.44f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_NEW_CONCERT: + { + // v2.2 + pmd->setmode(0.3f); + pmd->setroomsize(0.60f); + pmd->setdamp(0.6f); + pmd->setwet(0.18f); + pmd->setdry(0.37f); + pmd->setwidth(0.56f); + + break; + } + case KALA_VB_NEW_THEATER: + { + + //old version + //pmd->setmode(0.0); + //pmd->setroomsize(0.6f); + //pmd->setdamp(0.2f); + //pmd->setwet(0.4f); + //pmd->setdry(0.5f); + //pmd->setwidth(0.20f); + + // new 1.0 + //pmd->setmode(0.0); + //pmd->setroomsize(0.8f); + //pmd->setdamp(0.8f); + //pmd->setwet(0.3f); + //pmd->setdry(0.4f); + //pmd->setwidth(0.50f); + + // v2.0 + //pmd->setmode(0.0); + //pmd->setroomsize(0.8f); + //pmd->setdamp(0.8f); + //pmd->setwet(0.22f); + //pmd->setdry(0.14f); + //pmd->setwidth(0.50f); + + //v2.2 + pmd->setmode(0.0); + pmd->setroomsize(0.8f); + pmd->setdamp(0.8f); + pmd->setwet(0.248f); + pmd->setdry(0.182f); + pmd->setwidth(0.50f); + + break; + } + /***************** kala v2.6, id 0\4\5\6, add here ************************************/ + case KALA_VB_ID_7: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_8: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_9: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_10: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_11: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_12: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.4f); + pmd->setdamp(0.69f); + pmd->setwet(0.14f); + pmd->setdry(0.22f); + pmd->setwidth(0.82f); + + break; + } + case KALA_VB_ID_13: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.41f); + pmd->setdamp(0.18f); + pmd->setwet(0.16f); + pmd->setdry(0.24f); + pmd->setwidth(0.68f); + + break; + } + case KALA_VB_ID_14: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.79f); + pmd->setdamp(0.63f); + pmd->setwet(0.14f); + pmd->setdry(0.26f); + pmd->setwidth(0.92f); + + break; + } + case KALA_VB_ID_15: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.84f); + pmd->setdamp(0.25f); + pmd->setwet(0.12f); + pmd->setdry(0.22f); + pmd->setwidth(0.76f); + + break; + } + case KALA_VB_ID_16: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.88f); + pmd->setdamp(0.42f); + pmd->setwet(0.12f); + pmd->setdry(0.17f); + pmd->setwidth(0.84f); + + break; + } + case KALA_VB_ID_17: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.35f); + pmd->setdamp(0.5f); + pmd->setwet(0.20f); + pmd->setdry(0.5f); + pmd->setwidth(0.5f); + + break; + } + case KALA_VB_ID_18: + { + // for ktv + pmd->setmode(0.2f); + pmd->setroomsize(0.65f); + pmd->setdamp(0.25f); + pmd->setwet(0.25f); + pmd->setdry(0.6f); + pmd->setwidth(0.95f); + + break; + } + default: + { + pmd->setmode(1); + break; + } + } + + return 0; +} + +int CReverb::get_type_id() +{ + return m_id; +} + +char* CReverb::get_name_by_id(int type_id) +{ + if (type_id <0) + { + type_id = 0; + } + else if (type_id > MAX_VERB_VALUE -1) + { + type_id = MAX_VERB_VALUE -1; + } + + return (char*)g_p_verb_id_names[type_id]; +} + +int CReverb::process(short *in_buffer, int in_size, short *out_buffer, int out_size) +{ + if(!m_need_process) + { + if(in_buffer != out_buffer) + { + memcpy(in_buffer, out_buffer, sizeof(float)* out_size); + } + return RB_ERR_SUCCESS; + } + + /* check input */ + revmodel* pmd = (revmodel*)m_pvb; + if (pmd == NULL) + { + return RB_ERR_BASE_H_NULL; + } + + if (in_size!= out_size) + { + return RB_ERR_PARAM; + } + + in_size /= m_channel; + out_size /= m_channel; + + /* for mono */ + if (m_channel == 1) + { + int j; + short* pInSample = in_buffer; + short* pOutSample = out_buffer; + + for (j = 0; j < in_size; j++) + { + float saminL; + float samoutL; + + saminL = pInSample[j]/32768.0f; + //saminL = pInSample[j]; + pmd->processMono(&saminL,&samoutL, 1,0); + + pOutSample[j] = (short)Clip_short((int)(samoutL*32768.0f)); + //pOutSample[j] = (short)Clip_short((int)samoutL); + } + + } + else if (m_channel == 2) + { + int j; + short* pInSample = in_buffer; + short* pOutSample = out_buffer; + + for (j = 0; j < in_size; j++) + { + float saminL,saminR; + float samoutL,samoutR; + + saminL = pInSample[j+j]/32768.0f; + saminR = pInSample[j+j +1]/32768.0f; + + pmd->processreplace(&saminL,&saminR,&samoutL,&samoutR, 1,0); + + pOutSample[j+j] = (short)Clip_short((int)(samoutL*32768.0f)); + pOutSample[j+j+1] = (short)Clip_short((int)(samoutR*32768.0f)); + } + } + + return out_size; +} + +int CReverb::process_lr_independent(float *in_left, float *in_right, float *out_left, float *out_right, int in_out_size) +{ + if(!m_need_process) + { + if(in_left != out_left) + { + memcpy(in_left, out_left, sizeof(float)* in_out_size); + } + if(in_right != out_right) + { + memcpy(in_right, out_left, sizeof(float)* in_out_size); + } + return RB_ERR_SUCCESS; + } + + if (2 != m_channel) + { + return RB_ERR_PARAM; + } + /* check input */ + revmodel* pmd = (revmodel*)m_pvb; + if (pmd == NULL) + { + return RB_ERR_BASE_H_NULL; + } + + int j; + for (j = 0; j < in_out_size; j++) + { + pmd->processreplace(in_left + j, in_right + j, out_left + j, out_right + j, 1, 0); + } + return in_out_size; +} + +int CReverb::process(float *in_buffer, int in_size, float *out_buffer, int out_size) +{ + if(!m_need_process) + { + if(in_buffer != out_buffer) + { + memcpy(in_buffer, out_buffer, sizeof(float)* out_size); + } + return RB_ERR_SUCCESS; + } + + /* check input */ + revmodel* pmd = (revmodel*)m_pvb; + if (pmd == NULL) + { + return RB_ERR_BASE_H_NULL; + } + + if (in_size!= out_size) + { + return RB_ERR_PARAM; + } + + in_size /= m_channel; + out_size /= m_channel; + + /* for mono */ + if (m_channel == 1) + { + int j; + float* pInSample = in_buffer; + float* pOutSample = out_buffer; + + for (j = 0; j < in_size; j++) + { + float saminL; + float samoutL; + + saminL = pInSample[j]; + //saminL = pInSample[j]; + pmd->processMono(&saminL,&samoutL, 1,0); + + pOutSample[j] = samoutL; + //pOutSample[j] = (short)Clip_short((int)samoutL); + } + + } + else if (m_channel == 2) + { + int j; + float* pInSample = in_buffer; + float* pOutSample = out_buffer; + + for (j = 0; j < in_size; j++) + { + float saminL,saminR; + float samoutL,samoutR; + + saminL = pInSample[j+j]; + saminR = pInSample[j+j +1]; + + pmd->processreplace(&saminL,&saminR,&samoutL,&samoutR, 1,0); + + pOutSample[j+j] = samoutL; + pOutSample[j+j+1] = samoutR; + } + } + + return out_size; +} + +int CReverb::set_params(AE_PARAMS_REVERB *param) +{ + revmodel* pmd = (revmodel*)m_pvb; + if (pmd == NULL) + { + return RB_ERR_BASE_H_NULL; + } + if(NULL == param) + { + pmd->setmode(1); + m_need_process = false; + return RB_ERR_SUCCESS; + } + + pmd->setmode(param->mode); + pmd->setroomsize(param->room_size); + pmd->setdamp(param->damp); + pmd->setwet(param->wet); + pmd->setdry(param->dry); + pmd->setwidth(param->width); + + m_need_process = true; + return RB_ERR_SUCCESS; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/allpass.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/allpass.cpp new file mode 100644 index 0000000..850337e --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/allpass.cpp @@ -0,0 +1,36 @@ +// Allpass filter implementation +// +// Written by Jezar at Dreampoint, June 2000 +// http://www.dreampoint.co.uk +// This code is public domain + +#include "allpass.h" + +allpass::allpass() +{ + bufidx = 0; +} + +void allpass::setbuffer(float *buf, int size) +{ + buffer = buf; + bufsize = size; +} + +void allpass::mute() +{ + for (int i=0; i=bufsize) bufidx = 0; + + return output; +} + +#endif//_allpass + +//ends diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/comb.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/comb.cpp new file mode 100644 index 0000000..62be706 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/comb.cpp @@ -0,0 +1,48 @@ +// Comb filter implementation +// +// Written by Jezar at Dreampoint, June 2000 +// http://www.dreampoint.co.uk +// This code is public domain + +#include "comb.h" + +comb::comb() +{ + filterstore = 0; + bufidx = 0; +} + +void comb::setbuffer(float *buf, int size) +{ + buffer = buf; + bufsize = size; +} + +void comb::mute() +{ + for (int i=0; i=bufsize) bufidx = 0; + + return output; +} + +#endif //_comb_ + +//ends diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/revmodel.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/revmodel.cpp new file mode 100644 index 0000000..c2d0ea6 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/revmodel.cpp @@ -0,0 +1,493 @@ +// Reverb model implementation +// +// Written by Jezar at Dreampoint, June 2000 +// http://www.dreampoint.co.uk +// This code is public domain + +#include "revmodel.h" +//#include "MSdcommon.h" +#include "CReverbDef.h" +#include "string" + +revmodel::revmodel() +{ + // + mallocBuffers(); + + // Tie the components to their buffers + combL[0].setbuffer(bufcombL1,combtuningL1); + combR[0].setbuffer(bufcombR1,combtuningR1); + combL[1].setbuffer(bufcombL2,combtuningL2); + combR[1].setbuffer(bufcombR2,combtuningR2); + combL[2].setbuffer(bufcombL3,combtuningL3); + combR[2].setbuffer(bufcombR3,combtuningR3); + combL[3].setbuffer(bufcombL4,combtuningL4); + combR[3].setbuffer(bufcombR4,combtuningR4); + combL[4].setbuffer(bufcombL5,combtuningL5); + combR[4].setbuffer(bufcombR5,combtuningR5); + combL[5].setbuffer(bufcombL6,combtuningL6); + combR[5].setbuffer(bufcombR6,combtuningR6); + combL[6].setbuffer(bufcombL7,combtuningL7); + combR[6].setbuffer(bufcombR7,combtuningR7); + combL[7].setbuffer(bufcombL8,combtuningL8); + combR[7].setbuffer(bufcombR8,combtuningR8); + allpassL[0].setbuffer(bufallpassL1,allpasstuningL1); + allpassR[0].setbuffer(bufallpassR1,allpasstuningR1); + allpassL[1].setbuffer(bufallpassL2,allpasstuningL2); + allpassR[1].setbuffer(bufallpassR2,allpasstuningR2); + allpassL[2].setbuffer(bufallpassL3,allpasstuningL3); + allpassR[2].setbuffer(bufallpassR3,allpasstuningR3); + allpassL[3].setbuffer(bufallpassL4,allpasstuningL4); + allpassR[3].setbuffer(bufallpassR4,allpasstuningR4); + + // Set default values + allpassL[0].setfeedback(0.5f); + allpassR[0].setfeedback(0.5f); + allpassL[1].setfeedback(0.5f); + allpassR[1].setfeedback(0.5f); + allpassL[2].setfeedback(0.5f); + allpassR[2].setfeedback(0.5f); + allpassL[3].setfeedback(0.5f); + allpassR[3].setfeedback(0.5f); + setwet(initialwet); + setroomsize(initialroom); + setdry(initialdry); + setdamp(initialdamp); + setwidth(initialwidth); + setmode(initialmode); + + // Buffer will be full of rubbish - so we MUST mute them + mute(); +} + +revmodel::~revmodel() +{ + safe_free(bufallpassL1); + safe_free(bufallpassL2); + safe_free(bufallpassL3); + safe_free(bufallpassL4); + safe_free(bufallpassR1); + safe_free(bufallpassR2); + safe_free(bufallpassR3); + safe_free(bufallpassR4); + + safe_free(bufcombL1); + safe_free(bufcombL2); + safe_free(bufcombL3); + safe_free(bufcombL4); + safe_free(bufcombL5); + safe_free(bufcombL6); + safe_free(bufcombL7); + safe_free(bufcombL8); + + safe_free(bufcombR1); + safe_free(bufcombR2); + safe_free(bufcombR3); + safe_free(bufcombR4); + safe_free(bufcombR5); + safe_free(bufcombR6); + safe_free(bufcombR7); + safe_free(bufcombR8); + +} + +int revmodel::mallocBuffers() +{ + //int ires; + + bufallpassL1 = NULL; + bufallpassL2 = NULL; + bufallpassL3 = NULL; + bufallpassL4 = NULL; + bufallpassR1 = NULL; + bufallpassR2 = NULL; + bufallpassR3 = NULL; + bufallpassR4 = NULL; + + bufcombL1 = NULL; + bufcombL2 = NULL; + bufcombL3 = NULL; + bufcombL4 = NULL; + bufcombL5 = NULL; + bufcombL6 = NULL; + bufcombL7 = NULL; + bufcombL8 = NULL; + + bufcombR1 = NULL; + bufcombR2 = NULL; + bufcombR3 = NULL; + bufcombR4 = NULL; + bufcombR5 = NULL; + bufcombR6 = NULL; + bufcombR7 = NULL; + bufcombR8 = NULL; + + bufcombL1 = (float*)malloc(combtuningL1*sizeof(float)); + bufcombL2 = (float*)malloc(combtuningL2*sizeof(float)); + bufcombL3 = (float*)malloc(combtuningL3*sizeof(float)); + bufcombL4 = (float*)malloc(combtuningL4*sizeof(float)); + bufcombL5 = (float*)malloc(combtuningL5*sizeof(float)); + bufcombL6 = (float*)malloc(combtuningL6*sizeof(float)); + bufcombL7 = (float*)malloc(combtuningL7*sizeof(float)); + bufcombL8 = (float*)malloc(combtuningL8*sizeof(float)); + + bufcombR1 = (float*)malloc(combtuningR1*sizeof(float)); + bufcombR2 = (float*)malloc(combtuningR2*sizeof(float)); + bufcombR3 = (float*)malloc(combtuningR3*sizeof(float)); + bufcombR4 = (float*)malloc(combtuningR4*sizeof(float)); + bufcombR5 = (float*)malloc(combtuningR5*sizeof(float)); + bufcombR6 = (float*)malloc(combtuningR6*sizeof(float)); + bufcombR7 = (float*)malloc(combtuningR7*sizeof(float)); + bufcombR8 = (float*)malloc(combtuningR8*sizeof(float)); + + bufallpassL1 = (float*)malloc(allpasstuningL1*sizeof(float)); + bufallpassL2 = (float*)malloc(allpasstuningL2*sizeof(float)); + bufallpassL3 = (float*)malloc(allpasstuningL3*sizeof(float)); + bufallpassL4 = (float*)malloc(allpasstuningL4*sizeof(float)); + bufallpassR1 = (float*)malloc(allpasstuningR1*sizeof(float)); + bufallpassR2 = (float*)malloc(allpasstuningR2*sizeof(float)); + bufallpassR3 = (float*)malloc(allpasstuningR3*sizeof(float)); + bufallpassR4 = (float*)malloc(allpasstuningR4*sizeof(float)); + + memset(bufcombL1,0,combtuningL1*sizeof(float)); + memset(bufcombL2,0,combtuningL2*sizeof(float)); + memset(bufcombL3,0,combtuningL3*sizeof(float)); + memset(bufcombL4,0,combtuningL4*sizeof(float)); + memset(bufcombL5,0,combtuningL5*sizeof(float)); + memset(bufcombL6,0,combtuningL6*sizeof(float)); + memset(bufcombL7,0,combtuningL7*sizeof(float)); + memset(bufcombL8,0,combtuningL8*sizeof(float)); + memset(bufcombR1,0,combtuningR1*sizeof(float)); + memset(bufcombR2,0,combtuningR2*sizeof(float)); + memset(bufcombR3,0,combtuningR3*sizeof(float)); + memset(bufcombR4,0,combtuningR4*sizeof(float)); + memset(bufcombR5,0,combtuningR5*sizeof(float)); + memset(bufcombR6,0,combtuningR6*sizeof(float)); + memset(bufcombR7,0,combtuningR7*sizeof(float)); + memset(bufcombR8,0,combtuningR8*sizeof(float)); + + memset(bufallpassL1,0,allpasstuningL1*sizeof(float)); + memset(bufallpassL2,0,allpasstuningL2*sizeof(float)); + memset(bufallpassL3,0,allpasstuningL3*sizeof(float)); + memset(bufallpassL4,0,allpasstuningL4*sizeof(float)); + memset(bufallpassR1,0,allpasstuningR1*sizeof(float)); + memset(bufallpassR2,0,allpasstuningR2*sizeof(float)); + memset(bufallpassR3,0,allpasstuningR3*sizeof(float)); + memset(bufallpassR4,0,allpasstuningR4*sizeof(float)); + + return 0; + + + //float bufcombL1[combtuningL1]; + //float bufcombR1[combtuningR1]; + //float bufcombL2[combtuningL2]; + //float bufcombR2[combtuningR2]; + //float bufcombL3[combtuningL3]; + //float bufcombR3[combtuningR3]; + //float bufcombL4[combtuningL4]; + //float bufcombR4[combtuningR4]; + //float bufcombL5[combtuningL5]; + //float bufcombR5[combtuningR5]; + //float bufcombL6[combtuningL6]; + //float bufcombR6[combtuningR6]; + //float bufcombL7[combtuningL7]; + //float bufcombR7[combtuningR7]; + //float bufcombL8[combtuningL8]; + //float bufcombR8[combtuningR8]; + + //// Buffers for the allpasses + //float bufallpassL1[allpasstuningL1]; + //float bufallpassR1[allpasstuningR1]; + //float bufallpassL2[allpasstuningL2]; + //float bufallpassR2[allpasstuningR2]; + //float bufallpassL3[allpasstuningL3]; + //float bufallpassR3[allpasstuningR3]; + //float bufallpassL4[allpasstuningL4]; + //float bufallpassR4[allpasstuningR4]; +} + +void revmodel::mute() +{ + int i; + if (getmode() >= freezemode) + return; + + for (i=0;i 0) + { + int i; + outL = 0; + input = (*inputL + *inputL) * gain; + + for(i=0; i 0) + { + int i; + outL = outR = 0; + input = (*inputL + *inputR) * gain; + + // Accumulate comb filters in parallel + for(i=0; i 0) + { + int i; + outL = outR = 0; + input = (*inputL + *inputR) * gain; + + // Accumulate comb filters in parallel + for(i=0; i= freezemode) + { + roomsize1 = 1; + damp1 = 0; + gain = muted; + } + else + { + roomsize1 = roomsize; + damp1 = damp; + gain = fixedgain; + } + + for(i=0; i= freezemode) + return 1; + else + return 0; +} + +int revmodel::setverbID(int iID) +{ + int i; + int j; + float fval; + float pParam[MAX_VERB_ID_NUMBER][6] = + { + //{0.0f, 0.846f, 0.36f, 0.23f, 3.225f, 0.618f}, // 0,big room + //{0.2f, 0.721f, 0.20f, 0.16f, 2.337f, 0.618f}, // 1,media room + //{0.1f, 0.720f, 0.06f, 0.0f, 1.786f, 0.618f}, // 2,small room + //{0.0f, 0.944f, 0.81f, 0.93f, -2.55f, 0.618f}, // 3,教堂 + //{0.0f, 0.929f, 0.83f, 0.92f, -5.62f, 0.618f}, // 4,剧场 + //{0.4f, 0.942f, 0.13f, 0.32f, 0.886f, 0.618f}, // 5,隧道 + //{0.4f, 0.912f, 0.66f, 0.36f, 3.225f, 0.618f}, // 6,音乐 + //{0.4f, 0.882f, 0.42f, 0.22f, -0.479f,0.618f}, // 7,影院 + //{0.2f, 0.925f, 1.00f, 1.00f, -5.621f,0.618f} // 8,浴室 + + {0.0f, 0.846f, 0.36f, 0.23f, 0.0f, 1.0f }, // 0,big room + {0.0f, 0.846f, 0.36f, 0.23f, 2.425f, 0.418f}, // 1,big room + {0.2f, 0.721f, 0.20f, 0.16f, 2.107f, 0.418f}, // 2,media room + {0.1f, 0.720f, 0.06f, 0.0f, 1.786f, 0.418f}, // 3,small room + {0.0f, 0.944f, 0.81f, 0.93f, 1.05f, 0.118f}, // 4,教堂 + {0.0f, 0.929f, 0.83f, 0.92f, 0.82f, 0.218f}, // 5,剧场 + {0.4f, 0.942f, 0.13f, 0.32f, 0.886f, 0.618f}, // 6,隧道 + {0.4f, 0.912f, 0.66f, 0.36f, 1.225f, 0.318f}, // 7,音乐 + {0.4f, 0.882f, 0.42f, 0.22f, -0.479f,0.618f}, // 8,影院 + {0.2f, 0.925f, 1.00f, 1.00f, 0.901f, 0.018f} // 9,浴室 + }; + + if ((iID<0)||(iID>= MAX_VERB_ID_NUMBER)) + { + return -2; + } + + i = iID; + j = 0; + //setwet(pParam[i][j++]); + //setroomsize(pParam[i][j++]); + //setdry(pParam[i][j++]); + //setdamp(pParam[i][j++]); + //setwidth(pParam[i][j++]); + //setmode(pParam[i][j++]); + + setmode(pParam[i][j++]); + fval = (pParam[i][j++] - 0.7f)/0.28f; + setroomsize(fval); + //setroomsize(pParam[i][j++]); + setdamp(pParam[i][j++]); + setwidth(pParam[i][j++]); + + fval = (pParam[i][j++]/3); + setwet(fval); + + fval = (pParam[i][j++]/2); + setdry(fval); + + return 0; +} + +//ends diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/revmodel.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/revmodel.h new file mode 100644 index 0000000..56c6c46 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/revmodel.h @@ -0,0 +1,120 @@ +// Reverb model declaration +// +// Written by Jezar at Dreampoint, June 2000 +// http://www.dreampoint.co.uk +// This code is public domain + +#ifndef _revmodel_ +#define _revmodel_ + +#include "comb.h" +#include "allpass.h" +#include "tuning.h" + +#define MAX_VERB_ID_NUMBER 10 // fixed verb id number + +class revmodel +{ +public: + revmodel(); + ~revmodel(); + void mute(); + void processmix(float *inputL, float *inputR, float *outputL, float *outputR, long numsamples, int skip); + void processreplace(float *inputL, float *inputR, float *outputL, float *outputR, long numsamples, int skip); + void processMono (float* inputL,float* outputL, long numsamples,int skip); + void setroomsize(float value); + float getroomsize(); + void setdamp(float value); + float getdamp(); + void setwet(float value); + float getwet(); + void setdry(float value); + float getdry(); + void setwidth(float value); + float getwidth(); + void setmode(float value); + float getmode(); + int setverbID(int iID); +private: + void update(); + int mallocBuffers(); +private: + float gain; + float roomsize,roomsize1; + float damp,damp1; + float wet,wet1,wet2; + float dry; + float width; + float mode; + + // The following are all declared inline + // to remove the need for dynamic allocation + // with its subsequent error-checking messiness + + // Comb filters + comb combL[numcombs]; + comb combR[numcombs]; + + // Allpass filters + allpass allpassL[numallpasses]; + allpass allpassR[numallpasses]; + + // Buffers for the combs + //float bufcombL1[combtuningL1]; + //float bufcombR1[combtuningR1]; + //float bufcombL2[combtuningL2]; + //float bufcombR2[combtuningR2]; + //float bufcombL3[combtuningL3]; + //float bufcombR3[combtuningR3]; + //float bufcombL4[combtuningL4]; + //float bufcombR4[combtuningR4]; + //float bufcombL5[combtuningL5]; + //float bufcombR5[combtuningR5]; + //float bufcombL6[combtuningL6]; + //float bufcombR6[combtuningR6]; + //float bufcombL7[combtuningL7]; + //float bufcombR7[combtuningR7]; + //float bufcombL8[combtuningL8]; + //float bufcombR8[combtuningR8]; + + //// Buffers for the allpasses + //float bufallpassL1[allpasstuningL1]; + //float bufallpassR1[allpasstuningR1]; + //float bufallpassL2[allpasstuningL2]; + //float bufallpassR2[allpasstuningR2]; + //float bufallpassL3[allpasstuningL3]; + //float bufallpassR3[allpasstuningR3]; + //float bufallpassL4[allpasstuningL4]; + //float bufallpassR4[allpasstuningR4]; + + float* bufcombL1; + float* bufcombR1; + float* bufcombL2; + float* bufcombR2; + float* bufcombL3; + float* bufcombR3; + float* bufcombL4; + float* bufcombR4; + float* bufcombL5; + float* bufcombR5; + float* bufcombL6; + float* bufcombR6; + float* bufcombL7; + float* bufcombR7; + float* bufcombL8; + float* bufcombR8; + + // Buffers for the all passes + float* bufallpassL1; + float* bufallpassR1; + float* bufallpassL2; + float* bufallpassR2; + float* bufallpassL3; + float* bufallpassR3; + float* bufallpassL4; + float* bufallpassR4; +}; + +#endif//_revmodel_ + +//ends diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/tuning.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/tuning.h new file mode 100644 index 0000000..e8a4cc9 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/reverb/src/verb/tuning.h @@ -0,0 +1,62 @@ +// Reverb model tuning values +// +// Written by Jezar at Dreampoint, June 2000 +// http://www.dreampoint.co.uk +// This code is public domain + +#ifndef _tuning_ +#define _tuning_ +#include "AudioEffectsConf.h" + +const int numcombs = 8; +const int numallpasses = 4; +const float muted = 0; +const float fixedgain = 0.015f; +const float scalewet = 3; +const float scaledry = 2; +const float scaledamp = 0.4f; +const float scaleroom = 0.28f; +const float offsetroom = 0.7f; +const float initialroom = 0.5f; +const float initialdamp = 0.5f; +const float initialwet = 1/scalewet; +const float initialdry = 0; +const float initialwidth = 1; +const float initialmode = 0; +const float freezemode = 0.5f; +const int stereospread = 23; + +// These values assume 44.1KHz sample rate +// they will probably be OK for 48KHz sample rate +// but would need scaling for 96KHz (or other) sample rates. +// The values were obtained by listening tests. +const int combtuningL1 = 1116; +const int combtuningR1 = 1116+stereospread; +const int combtuningL2 = 1188; +const int combtuningR2 = 1188+stereospread; +const int combtuningL3 = 1277; +const int combtuningR3 = 1277+stereospread; +const int combtuningL4 = 1356; +const int combtuningR4 = 1356+stereospread; +const int combtuningL5 = 1422; +const int combtuningR5 = 1422+stereospread; +const int combtuningL6 = 1491; +const int combtuningR6 = 1491+stereospread; +const int combtuningL7 = 1557; +const int combtuningR7 = 1557+stereospread; +const int combtuningL8 = 1617; +const int combtuningR8 = 1617+stereospread; +const int allpasstuningL1 = 556; +const int allpasstuningR1 = 556+stereospread; +const int allpasstuningL2 = 441; +const int allpasstuningR2 = 441+stereospread; +const int allpasstuningL3 = 341; +const int allpasstuningR3 = 341+stereospread; +const int allpasstuningL4 = 225; +const int allpasstuningR4 = 225+stereospread; + +#endif//_tuning_ + +//ends + + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/CMakeLists.txt new file mode 100644 index 0000000..34490cf --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/CMakeLists.txt @@ -0,0 +1,13 @@ +#cmake_minimum_required(VERSION 2.8) +#project(smule_eq) +#set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) + +include_directories(./ inc) +include_directories(src/all_plat src/audio_effect src/biquad + src/buffer src/damper src/delay src/delayi src/envelope_follower + src/equalizer src/reverb src/simple_delay_effect src/simple_reverb_effect) + +file(GLOB_RECURSE SAUDIO_EFFECTS_SRC_FILES src/*cpp) + +add_library(saudio_effects ${SAUDIO_EFFECTS_SRC_FILES}) +#set_target_properties(saudio_effects PROPERTIES CXX_VISIBILITY_PRESET hidden) diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/build_anrdroid.sh b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/build_anrdroid.sh new file mode 100755 index 0000000..2abd1de --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/build_anrdroid.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +#!/bin/sh + +# @Time : 2019-06-18 17:50 +# @Author : AlanWang +# @FileName: build_android.sh + +# MY_NDK 和 MY_CMAKE 需要改成自己对应的 ndk 中的目录 +MY_NDK="/Users/wangjianjun/AndroidDev/sdk/ndk-bundle" +MY_CMAKE="/Users/wangjianjun/AndroidDev/sdk/cmake/3.6.4111459/bin/cmake" +MY_NDK="/Users/yangjianli/Library/Android/sdk/ndk-bundle" +MY_CMAKE="/Users/yangjianli/Library/Android/sdk/cmake/3.6.4111459/bin/cmake" +if [ -z "$MY_NDK" ]; then + echo "Please set MY_NDK to the Android NDK folder" + exit 1 +fi + +if [ -z "$MY_CMAKE" ]; then + echo "Please set MY_CMAKE to the Android CMake folder" + exit 1 +fi + +OUTPUT_LIBS="./build/libs/android" +ANDROID_NATIVE_API_LEVEL="android-16" + +# arme_abis=(armeabi armeabi-v7a arm64-v8a x86 x86_64 mips mips64) +arme_abis=(armeabi-v7a arm64-v8a x86 x86_64) + +function build_with_armeabi() { + ARME_ABI=$1 + echo ${ARME_ABI} + + BUILD_DIR="./build/android/${ARME_ABI}" + PRE_EXE_DIR=$(pwd) + echo ${PRE_EXE_DIR} + + ${MY_CMAKE} \ + -H"./" \ + -B"${BUILD_DIR}" \ + -DANDROID_ABI="${ARME_ABI}" \ + -DANDROID_NDK="${MY_NDK}" \ + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY="./build/android/libs/${ARME_ABI}" \ + -DCMAKE_BUILD_TYPE="Release" \ + -DCMAKE_TOOLCHAIN_FILE="${MY_NDK}/build/cmake/android.toolchain.cmake" \ + -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} \ + -DANDROID_TOOLCHAIN="clang" \ + -DCMAKE_C_FLAGS="-fpic -fexceptions -frtti -Wno-narrowing" \ + -DCMAKE_CXX_FLAGS="-fpic -fexceptions -frtti -Wno-narrowing" \ + -DANDROID_STL="c++_static" \ + + cd ${BUILD_DIR} + make + make install + + cd ${PRE_EXE_DIR} + mkdir -p ${OUTPUT_LIBS}/${ARME_ABI}/ + mv ${PRE_EXE_DIR}/lib/* ${OUTPUT_LIBS}/${ARME_ABI}/ + rm -r ./build/android + rm -r ${PRE_EXE_DIR}/lib +} + +for i in ${arme_abis[@]}; do + build_with_armeabi $i +done + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/inc/SAudioEffectsApi.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/inc/SAudioEffectsApi.h new file mode 100644 index 0000000..9df2389 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/inc/SAudioEffectsApi.h @@ -0,0 +1,63 @@ +// +// Created by yangjianli on 2019-11-21. +// + +#ifndef SMULE_EQ_SAUDIOEFFECTSAPI_H +#define SMULE_EQ_SAUDIOEFFECTSAPI_H + +// 音效类对外接口 +// 双声道的数据会被处理为单声道 +#include "AudioEffectsDef.h" + +#define ERROR_CODE_SUCCESS 0 +#define ERROR_CODE_NO_MEMORY -1 +#define ERROR_CODE_EFFECT_ERROR -2 // 效果器为空 + +#define STEREO 2 +#define MONO 1 + +class Equalizer; +class SimpleDelayEffect; +class SimpleReverbEffect; + +class SAudioEffectsApi { +public: + SAudioEffectsApi(); + ~SAudioEffectsApi(); + +public: + int init(int sample_rate, int channel); + int set_audio_effect(AE_PARAMS* param); + void reset(); + + // 输出始终是双声道,需要注意空间的大小 + // 输入的samples长度是给定buffer的长度 + int process(float* input, float* output, int number_of_samples); + int get_latency(); + void uninit(); + +private: + int allocate(int sample_rate, int channel); + void update(); + +private: + Equalizer * m_eq; + SimpleDelayEffect * m_delay; + SimpleReverbEffect * m_reverb; + +private: + // buf + float* m_tmp_buf; + int m_tmp_buf_len; + + int m_sample_rate; + int m_channel; + + bool m_need_eq; + bool m_need_delay; + bool m_need_reverb; + +}; + + +#endif //SMULE_EQ_SAUDIOEFFECTSAPI_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/inc/SAudioEffectsConf.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/inc/SAudioEffectsConf.h new file mode 100644 index 0000000..2f47b15 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/inc/SAudioEffectsConf.h @@ -0,0 +1,8 @@ +// +// Created by yangjianli on 2020/10/29. +// + +#ifndef AUDIO_EFFECTS_LIB_SAUDIOEFFECTSCONF_H +#define AUDIO_EFFECTS_LIB_SAUDIOEFFECTSCONF_H +#include "AudioEffectsConf.h" +#endif //AUDIO_EFFECTS_LIB_SAUDIOEFFECTSCONF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/SAudioEffectsApi.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/SAudioEffectsApi.cpp new file mode 100644 index 0000000..bfec714 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/SAudioEffectsApi.cpp @@ -0,0 +1,228 @@ +// +// Created by yangjianli on 2019-11-21. +// +#include "SAudioEffectsApi.h" + +#include "Equalizer.h" +#include "SimpleDelayEffect.h" +#include "SimpleReverbEffect.h" + +SAudioEffectsApi::SAudioEffectsApi() +{ + m_eq = NULL; + m_delay = NULL; + m_reverb = NULL; + m_tmp_buf = NULL; + m_tmp_buf_len = 0; + + m_need_eq = false; + m_need_delay = false; + m_need_reverb = false; +} + +SAudioEffectsApi::~SAudioEffectsApi() {} + + +int SAudioEffectsApi::init(int sample_rate, int channel) +{ + m_sample_rate = sample_rate; + m_channel = channel; + m_tmp_buf_len = 0; + + m_need_eq = false; + m_need_delay = false; + m_need_reverb = false; + int nRet = allocate(sample_rate, channel); + if (ERROR_CODE_SUCCESS != nRet) + { + uninit(); + return nRet; + } + return ERROR_CODE_SUCCESS; +} + +// 分配内存空间 +int SAudioEffectsApi::allocate(int sample_rate, int channel) +{ + m_eq = new(std::nothrow) Equalizer(sample_rate); + if (NULL == m_eq) + { + return ERROR_CODE_NO_MEMORY; + } + m_delay = new(std::nothrow) SimpleDelayEffect(sample_rate); + if (NULL == m_delay) + { + return ERROR_CODE_NO_MEMORY; + } + + // 初始化不需要给空间,process时根据输入处理 + m_reverb = new(std::nothrow) SimpleReverbEffect(MONO, 0); + if (NULL == m_reverb) + { + return ERROR_CODE_NO_MEMORY; + } + return ERROR_CODE_SUCCESS; +} + +void SAudioEffectsApi::uninit() +{ + if(NULL != m_eq) + { + delete m_eq; + m_eq = NULL; + } + + if(NULL != m_delay) + { + delete m_delay; + m_delay = NULL; + } + + if(NULL != m_reverb) + { + delete m_reverb; + m_reverb = NULL; + } + + if(NULL != m_tmp_buf) + { + delete[] m_tmp_buf; + m_tmp_buf = NULL; + } +} + + +int SAudioEffectsApi::set_audio_effect(AE_PARAMS *param) +{ + m_need_eq = false; + m_need_delay = false; + m_need_reverb = false; + if(NULL == param) + { + return ERROR_CODE_SUCCESS; + } + AE_PARAMS_SAE* params = (AE_PARAMS_SAE*) param; + for(int i=0;iparams_list.size();i++) + { + AE_PARAMS_SAE_CONTENT tp = params->params_list[i]; + if(SAE_CLASS_EQ == tp.class_name) + { + m_need_eq = true; + m_eq->set_parameter_value(tp.function_name, tp.idx, tp.f_value); + }else if(SAE_CLASS_DELAY == tp.class_name) + { + m_need_delay = true; + m_delay->set_parameter_value(tp.function_name, tp.f_value); + }else if(SAE_CLASS_REVERB == tp.class_name) + { + m_need_reverb = true; + if(SAE_FUNC_IN_CH == tp.function_name || SAE_FUNC_OUT_CH == tp.function_name) + { + m_reverb->set_parameter_value(tp.function_name, tp.b_value); + }else + { + m_reverb->set_parameter_value(tp.function_name, tp.f_value); + } + } + } + return ERROR_CODE_SUCCESS; +} + +void SAudioEffectsApi::reset() +{ + m_eq->reset(); + m_delay->reset(); + m_reverb->reset(); +} + +int SAudioEffectsApi::process(float *input, float *output, int number_of_samples) +{ + if(NULL == m_eq || NULL == m_delay || NULL == m_reverb || 0 == m_channel) + { + return ERROR_CODE_EFFECT_ERROR; + } + + // 什么都不做 + if(!m_need_eq && !m_need_delay && !m_need_reverb) + { + if(output != input) + { + memcpy(output, input, sizeof(float) * number_of_samples); + } + return ERROR_CODE_SUCCESS; + } + + number_of_samples /= m_channel; // 保证和上层一致 + bool need_trans = false; + + // 立体声,做成单声道 + if(STEREO == m_channel) + { + need_trans = true; + for(int i=0;iprocess_internal(input, output, number_of_samples); + } + if(m_need_delay) + { + m_delay->process_internal(output, output, number_of_samples); + } + if(m_need_reverb) + { + if(MONO == m_channel) + { + // 单声道输入,内部需要有双声道的空间长度 + // 最后再重新放回到单声道的空间中 + if(m_tmp_buf_len != number_of_samples * STEREO) + { + if(NULL != m_tmp_buf) + { + delete [] m_tmp_buf; + m_tmp_buf = NULL; + } + m_tmp_buf = new (std::nothrow) float[number_of_samples * STEREO]; + if(NULL == m_tmp_buf) + { + return ERROR_CODE_NO_MEMORY; + } + m_tmp_buf_len = number_of_samples * STEREO; + } + m_reverb->process_internal(output, m_tmp_buf, number_of_samples); + for(int i=0;iprocess_internal(output, output, number_of_samples); + } + need_trans = false; + } + + if(need_trans) + { + need_trans = false; + for(int i=number_of_samples-1;i>=0;i--) + { + output[2*i] = output[i]; + output[2*i+1] = output[i]; + } + } + return ERROR_CODE_SUCCESS; +} + +int SAudioEffectsApi::get_latency() +{ + return 0; +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/all_plat/Allplat.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/all_plat/Allplat.cpp new file mode 100644 index 0000000..1c364e4 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/all_plat/Allplat.cpp @@ -0,0 +1,124 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "Allplat.h" +#include + +Allplat::Allplat() +{ + m_cache = nullptr; +} + +Allplat::~Allplat() +{ + allplat_destroy(); +} + +void Allplat::allplat_create(int len, float feedback) +{ + m_cache = new float[len]; + if(len) + { + memset(m_cache, 0, sizeof(float) * len); + } + m_current_ptr = m_cache; + m_end_ptr = m_cache + len; + m_len = len; + m_feedback = feedback; +} + +void Allplat::allplat_reset() +{ + if(m_len) + { + memset(m_cache, 0, sizeof(float) * m_len); + } + + m_current_ptr = m_cache; +} + +void Allplat::allplat_process(float * data, int len) +{ + if(len == 0) + { + return ; + } + + for(int i = 0; i < len; ++i) + { + data[i] = allplat_tick(data[i]); + } +} + +void Allplat::allplat_tap(float * data, int len, int tap, float wet) +{ + tap = std::min(tap, m_len - 1); + float * ptr = m_current_ptr - tap - len; + while(ptr < m_cache) + { + ptr += m_len; + } + + for(int i = 0; i < len; ++i) + { + data[i] = data[i] + *ptr * wet; + + ++ptr; + if(ptr >= m_end_ptr) + { + ptr = m_cache; + } + } +} + +float Allplat::allplat_tick(float in) +{ + in = in - m_feedback * *m_current_ptr; + float out = *m_current_ptr + m_feedback * in; + + *m_current_ptr = in; + ++m_current_ptr; + if(m_current_ptr >= m_end_ptr) + { + m_current_ptr = m_cache; + } + + return out; +} + +void Allplat::allplat_destroy() +{ + if(m_cache) + { + delete [] m_cache; + m_cache = nullptr; + } +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/all_plat/Allplat.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/all_plat/Allplat.h new file mode 100644 index 0000000..bda8596 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/all_plat/Allplat.h @@ -0,0 +1,60 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_ALLPLAT_H__ +#define __SMULE_ALLPLAT_H__ + +//实现一个简单的全通滤波器,参考了freeverb3代码 +#include "SAudioEffectsConf.h" + +class Allplat +{ +public: + Allplat(); + ~Allplat(); + +public: + void allplat_create(int len, float feedback); + void allplat_reset(); + void allplat_process(float * data, int len); + void allplat_tap(float * data, int len, int tap, float wet); + float allplat_tick(float in); + void allplat_destroy(); + +private: + float * m_cache;//0 + float * m_current_ptr;//4 + float * m_end_ptr;//8 + int m_len;//12 + float m_feedback;//16 +}; + +#endif /* __SMULE_ALLPLAT_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/audio_effect/AudioEffect.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/audio_effect/AudioEffect.cpp new file mode 100644 index 0000000..cce72be --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/audio_effect/AudioEffect.cpp @@ -0,0 +1,60 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "AudioEffect.h" + +//void AudioEffect::reset() +//{ +// +//} +// +//void AudioEffect::set_by_pass(bool bypass) +//{ +// m_bypass = bypass; +//} +// +//int AudioEffect::get_latency_frames() +//{ +// return 0; +//} +// +//void AudioEffect::set_parameter_value(const std::string & key, int idx, float value) +//{ +// +//} +// +//void AudioEffect::set_parameter_value(const std::string & key, bool value) +//{ +// if(key == "Bypass") +// { +// m_bypass = value; +// } +//} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/audio_effect/AudioEffect.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/audio_effect/AudioEffect.h new file mode 100644 index 0000000..001f866 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/audio_effect/AudioEffect.h @@ -0,0 +1,80 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_AUDIOEFFECT_H__ +#define __SMULE_AUDIOEFFECT_H__ + +#include +#include +#include "SAudioEffectsConf.h" + +class Parameter; + +class MidiContext; +class FXConfigInfo; + +//效果器的基类 +class AudioEffect +{ +public: + AudioEffect(unsigned int in_channels, unsigned int out_channels) {m_in_channels = in_channels;m_out_channels=out_channels;}; + virtual ~AudioEffect() {}; + +public: + //重置缓存 + virtual void reset() {}; + virtual void set_by_pass(bool bypass) {}; + virtual void input_channels() {}; + virtual void output_channels() {}; + //返回延迟多少点 + virtual int get_latency_frames() {}; + virtual void set_parameter(const std::shared_ptr &) {}; + virtual void set_render_context(std::shared_ptr) {}; + virtual void set_fX_config(std::shared_ptr) {}; + virtual void process_internal(const float *input, float *output, unsigned int len) {}; + + //下面为设置参数模块 + virtual void set_parameter_value(const std::string &key, float value) {}; + virtual void set_parameter_value(const std::string &key, int idx, float value) {}; + virtual void set_parameter_value(const std::string &, const std::string &, float) {}; + virtual void set_parameter_value(const std::string &, const std::string &, bool) {}; + virtual void set_parameter_value(const std::string &, std::string) {}; + virtual void set_parameter_value(const std::string &key, bool value) {}; + +protected: + bool m_bypass;//4 + int m_in_channels;//8 + int m_out_channels;//12 + int m_16[12];//16 + int m_64;//64 +}; + +#endif /* __SMULE_AUDIOEFFECT_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/biquad/Biquad.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/biquad/Biquad.cpp new file mode 100644 index 0000000..64d481a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/biquad/Biquad.cpp @@ -0,0 +1,334 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "Biquad.h" +#include +#include + +Biquad::Biquad() +{ + m_param = nullptr; + m_type = BIQUAD_LOP; + m_channels = 0; + m_fc = 0; + m_Q = 0; + m_gain = 0; + m_fs = 0; +} + +Biquad::~Biquad() +{ + biquad_destroy(); +} + +void Biquad::biquad_create_lop(float fc, float Q, float fs, int channels) +{ + biquad_create(BIQUAD_LOP, fc, Q, 0, fs, channels); +} + +void Biquad::biquad_create_hip(float fc, float Q, float fs, int channels) +{ + biquad_create(BIQUAD_HIP, fc, Q, 0, fs, channels); +} + +void Biquad::biquad_create_bp(float fc, float Q, float fs, int channels) +{ + biquad_create(BIQUAD_BP, fc, Q, 0, fs, channels); +} + +void Biquad::biquad_create_notch(float fc, float Q, float fs, int channels) +{ + biquad_create(BIQUAD_NOTCH, fc, Q, 0, fs, channels); +} + +void Biquad::biquad_create_peak(float fc, float Q, float db, float fs, int channels) +{ + biquad_create(BIQUAD_PEAK, fc, Q, 0, fs, channels); +} + +void Biquad::biquad_create_loshelf(float fc, float Q, float db, float fs, int channels) +{ + biquad_create(BIQUAD_LOSHELF, fc, Q, 0, fs, channels); +} + +void Biquad::biquad_create_hishelf(float fc, float Q, float db, float fs, int channels) +{ + biquad_create(BIQUAD_HISHELF, fc, Q, 0, fs, channels); +} + +void Biquad::biquad_create(int type, float fc, float Q, float db, float fs, int channels) +{ + if(type > BIQUAD_MAX) + { + return; + } + + m_param = new BiquadParam[channels]; + m_type = type; + m_channels = channels; + m_fc = fc; + m_Q = Q; + m_gain = db; + m_fs = fs; + + biquad_setcoefs(type, fc, Q, db); +} + +int Biquad::biquad_setFreq(float fc) +{ + return biquad_setcoefs(m_type, fc, m_Q, m_gain); +} + +int Biquad::biquad_setGain(float db) +{ + return biquad_setcoefs(m_type, m_fc, m_Q, db); +} + +int Biquad::biquad_setQ(float Q) +{ + return biquad_setcoefs(m_type, m_fc, Q, m_gain); +} + +int Biquad::biquad_setcoefs(int type, float fc, float Q, float db) +{ + int err_code = 0; + + //检查有没有初始化 + if(m_param == nullptr || m_fs == 0) + { + return 1; + } + + //检查参数合法性 + Q = std::max(Q, 0.01f); + fc = std::max(0.0f, std::min(fc, m_fs / 2)); + + m_type = type; + m_fc = fc; + m_Q = Q; + m_gain = db; + + float w0 = (m_fc / m_fs) * 2 * M_PI; + float sin0 = sin(w0); + float cos0 = cos(w0); + float alpha = sin0 / (2 * m_Q); + float A = pow(10, m_gain / 40); + + float a0, a1, a2, b0, b1, b2; + + switch(m_type) + { + case BIQUAD_LOP: + b0 = (1 - cos0) / 2; + b1 = 1 - cos0; + b2 = (1 - cos0) / 2; + a0 = 1 + alpha; + a1 = -2 * cos0; + a2 = 1 - alpha; + break; + case BIQUAD_HIP: + b0 = (1 + cos0) / 2; + b1 = -(1 + cos0); + b2 = (1 + cos0) / 2; + a0 = 1 + alpha; + a1 = -2 * cos0; + a2 = 1 - alpha; + break; + case BIQUAD_BP: + b0 = alpha; + b1 = 0; + b2 = -alpha; + a0 = 1 + alpha; + a1 = -2 * cos0; + a2 = 1 - alpha; + break; + case BIQUAD_NOTCH: + b0 = 1; + b1 = -2 * cos0; + b2 = 1; + a0 = 1 + alpha; + a1 = -2 * cos0; + a2 = 1 - alpha; + break; + case BIQUAD_PEAK: + b0 = 1 + alpha * A; + b1 = -2 * cos0; + b2 = 1 - alpha * A; + a0 = 1 + alpha / A; + a1 = -2 * cos0; + a2 = 1 - alpha / A; + break; + case BIQUAD_LOSHELF: + b0 = A * ((A + 1) - (A - 1) * cos0 + 2 * sqrt(A) * alpha); + b1 = 2 * A * ((A - 1) - (A + 1) * cos0); + b2 = A * ((A + 1) - (A - 1) * cos0 - 2 * sqrt(A) * alpha); + a0 = (A + 1) + (A - 1) * cos0 + 2 * sqrt(A) * alpha; + a1 = -2 * ((A - 1) + (A + 1) * cos0); + a2 = (A + 1) + (A - 1) * cos0 - 2 * sqrt(A) * alpha; + break; + case BIQUAD_HISHELF: + b0 = A * ((A + 1) + (A - 1) * cos0 + 2 * sqrt(A) * alpha); + b1 = -2 * A * ((A - 1) + (A + 1) * cos0); + b2 = A * ((A + 1) + (A - 1) * cos0 - 2 * sqrt(A) * alpha); + a0 = (A + 1) - (A - 1) * cos0 + 2 * sqrt(A) * alpha; + a1 = 2 * ((A - 1) - (A + 1) * cos0); + a2 = (A + 1) - (A - 1) * cos0 - 2 * sqrt(A) * alpha; + break; + default: + b0 = 0; + b1 = 0; + b2 = 0; + a0 = 1; + a1 = 0; + a2 = 0; + err_code = 1; + } + + //防止出现除 0 错误 + if(a0 == 0) + { + a0 = 1e-20; + } + + //每个通道需要一组参数 + if(m_channels) + { + b0 /= a0; + b1 /= a0; + b2 /= a0; + a1 /= a0; + a2 /= a0; + a0 = 1; + + for(int i = 0; i < m_channels; ++i) + { + m_param[i].b0 = b0; + m_param[i].b1 = b1; + m_param[i].b2 = b2; + m_param[i].a1 = a1; + m_param[i].a2 = a2; + } + } + + return err_code; +} + +void Biquad::biquad_reset() +{ + if(m_param && m_channels) + { + //将保存的延迟参数进行清空 + for(int i = 0; i < m_channels; ++i) + { + m_param[i].x1 = 0; + m_param[i].x2 = 0; + m_param[i].y1 = 0; + m_param[i].y2 = 0; + } + } +} + +void Biquad::biquad_process1(float * data, int len) +{ + if(m_channels == 0) + { + return ; + } + + for(int i = 0; i < m_channels; ++i) + { + float b0 = m_param[i].b0; + float b1 = m_param[i].b1; + float b2 = m_param[i].b2; + float a1 = m_param[i].a1; + float a2 = m_param[i].a2; + float x1 = m_param[i].x1; + float x2 = m_param[i].x2; + float y1 = m_param[i].y1; + float y2 = m_param[i].y2; + + for(int j = i; j < len; j += m_channels) + { + float x = data[j]; + data[j] = b0 * x + b1 * x1 + b2 * x2 - a1 * y1 - a2 * y2; + + x2 = x1; + x1 = x; + y2 = y1; + y1 = data[j]; + } + + m_param[i].x1 = x1; + m_param[i].x2 = x2; + m_param[i].y1 = y1; + m_param[i].y2 = y2; + } +} + +void Biquad::biquad_process2(float * data, int len) +{ + if(m_channels == 0) + { + return ; + } + + for(int i = 0; i < m_channels; ++i) + { + float b0 = m_param[i].b0; + float b1 = m_param[i].b1; + float b2 = m_param[i].b2; + float a1 = m_param[i].a1; + float a2 = m_param[i].a2; + float cache1 = m_param[i].x1; + float cache2 = m_param[i].x2; + + for(int j = i; j < len; j += m_channels) + { + float cache = data[j] - a1 * cache1 - a2 * cache2; + data[j] = b0 * cache + b1 * cache1 + b2 * cache2; + + cache2 = cache1; + cache1 = cache; + } + + m_param[i].x1 = cache1; + m_param[i].x2 = cache2; + } +} + +void Biquad::biquad_destroy() +{ + if(m_param) + { + delete [] m_param; + m_param = nullptr; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/biquad/Biquad.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/biquad/Biquad.h new file mode 100644 index 0000000..7b51975 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/biquad/Biquad.h @@ -0,0 +1,123 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_BIQUAD_H__ +#define __SMULE_BIQUAD_H__ +#include "SAudioEffectsConf.h" +//这里实际上按照 RBJ 的 cookbook 进行设计的双二阶滤波器 + +//滤波器类型 +#define BIQUAD_LOP 0 +#define BIQUAD_HIP 1 +#define BIQUAD_BP 2 +#define BIQUAD_NOTCH 3 +#define BIQUAD_PEAK 4 +#define BIQUAD_LOSHELF 5 +#define BIQUAD_HISHELF 6 +#define BIQUAD_MAX BIQUAD_HISHELF + +typedef struct _BiquadParam +{ + float b0; + float b1; + float b2; + float a1; + float a2; + float x1; + float x2; + float y1; + float y2; + _BiquadParam() : b0(0), b1(0), b2(0), a1(0), a2(0), x1(0), x2(0), y1(0), y2(0) { } +}BiquadParam; + +class Biquad +{ +public: + Biquad(); + ~Biquad(); + +public: + //初始化低通滤波参数 + void biquad_create_lop(float fc, float Q, float fs, int channels); + //初始化高通滤波参数 + void biquad_create_hip(float fc, float Q, float fs, int channels); + //初始化带通滤波参数 + void biquad_create_bp(float fc, float Q, float fs, int channels); + //初始化陷波滤波参数 + void biquad_create_notch(float fc, float Q, float fs, int channels); + //初始化peak滤波参数 + void biquad_create_peak(float fc, float Q, float db, float fs, int channels); + //初始化低搁置滤波参数 + void biquad_create_loshelf(float fc, float Q, float db, float fs, int channels); + //初始化高搁置滤波参数 + void biquad_create_hishelf(float fc, float Q, float db, float fs, int channels); + //初始化函数 + void biquad_create(int type, float fc, float Q, float db, float fs, int channels); + + //设置中心频率或者截止频率 + int biquad_setFreq(float fc); + //设置增益 + int biquad_setGain(float db); + //设置Q值 + int biquad_setQ(float Q); + //设置参数 + int biquad_setcoefs(int type, float fc, float Q, float db); + + //重置函数 + void biquad_reset(); + + //按照直接一型进行运算,长度为一个声道的长度 + void biquad_process1(float * data, int len); + //按照直接二型进行运算,长度为一个声道的长度 + void biquad_process2(float * data, int len); + +private: + //销毁、释放内存 + void biquad_destroy(); + +private: + //运行参数 + BiquadParam * m_param;//0 + //滤波器类型 + int m_type;//4 + //声道数 + int m_channels;//8 + //截止频率或中心频率 + float m_fc;//12 + //Q值 + float m_Q;//16 + //增益 + float m_gain;//20 + //采样率 + float m_fs;//24 +}; + +#endif /* __SMULE_BIQUAD_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/buffer/Buffer.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/buffer/Buffer.h new file mode 100644 index 0000000..34b4322 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/buffer/Buffer.h @@ -0,0 +1,66 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_BUFFER_H__ +#define __SMULE_BUFFER_H__ +#include "SAudioEffectsConf.h" + +namespace Smule +{ +namespace Audio +{ + +template +class Buffer +{ +public: + Buffer(unsigned int len); + ~Buffer(); + +public: + void ensure(int len); + T * ptr(); + +private: + //数据指针 + std::shared_ptr m_ptr;//0 + //总长度 + int m_len;//8 + //当前已写入的位置 + int m_write_idx;//12 +}; + +} +} + +#include "Buffer.hpp" + +#endif /* __SMULE_BUFFER_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/buffer/Buffer.hpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/buffer/Buffer.hpp new file mode 100644 index 0000000..36c27c3 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/buffer/Buffer.hpp @@ -0,0 +1,74 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_BUFFER_HPP__ +#define __SMULE_BUFFER_HPP__ + +namespace Smule +{ +namespace Audio +{ + +template +Buffer::Buffer(unsigned int len) + : m_ptr(new T[chsize * len], std::default_delete()) +{ + m_len = len; + m_write_idx = 0; +} + +template +Buffer::~Buffer() +{ + +} + +template +void Buffer::ensure(int len) +{ + if(m_len - m_write_idx < len) + { + std::shared_ptr ptr(new T[m_write_idx + chsize * len], std::default_delete()); + memcpy(ptr.get(), m_ptr.get(), sizeof(T) * m_write_idx); + m_ptr = ptr; + } +} + +template +T * Buffer::ptr() +{ + return m_ptr.get() + m_write_idx; +} + +} +} + +#endif /* __SMULE_BUFFER_HPP__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/damper/Damper.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/damper/Damper.cpp new file mode 100644 index 0000000..6260e83 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/damper/Damper.cpp @@ -0,0 +1,85 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "Damper.h" + +Damper::Damper() +{ + m_coeff = 0; + m_y1 = 0; +} + +Damper::~Damper() +{ + damper_destroy(); +} + +void Damper::damper_create(float coeff) +{ + m_coeff = coeff; + m_y1 = 0; +} + +void Damper::damper_set(float coeff) +{ + m_coeff = coeff; +} + +void Damper::damper_clear() +{ + m_y1 = 0; +} + +void Damper::damper_process(float * data, int len) +{ + if(len == 0) + { + return ; + } + + for(int i = 0; i < len; ++i) + { + data[i] = damper_tick(data[i]); + } +} + +float Damper::damper_tick(float in) +{ + m_y1 = m_coeff * m_y1 + (1 - m_coeff) * in; + + return m_y1; +} + +void Damper::damper_destroy() +{ + m_coeff = 0; + m_y1 = 0; +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/damper/Damper.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/damper/Damper.h new file mode 100644 index 0000000..07f621b --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/damper/Damper.h @@ -0,0 +1,59 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_DAMPER_H__ +#define __SMULE_DAMPER_H__ + +//利用单极点来实现减震器,参考 freeverb3 中的 efilter +#include "SAudioEffectsConf.h" + +class Damper +{ +public: + Damper(); + ~Damper(); + +public: + void damper_create(float coeff); + void damper_set(float coeff); + void damper_clear(); + void damper_process(float * data, int len); + float damper_tick(float in); + void damper_destroy(); + +private: + //低通系数 + float m_coeff;//0 + //历史数据 + float m_y1;//4 +}; + +#endif /* __SMULE_DAMPER_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delay/Delay.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delay/Delay.cpp new file mode 100644 index 0000000..2f0b595 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delay/Delay.cpp @@ -0,0 +1,148 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "Delay.h" +#include + +Delay::Delay() +{ + m_start_ptr = nullptr; + m_read_ptr = nullptr; + m_write_ptr = nullptr; + m_end_ptr = nullptr; +} + +Delay::~Delay() +{ + delay_destroy(); +} + +void Delay::delay_create(float delay_time, float fs, float wet, float dry, float feedback) +{ + int frames = (int)(delay_time * fs * 0.001); + if(frames <= 0) + { + frames = 1; + delay_time = 1 / (fs * 0.001); + } + + m_delay_len = frames + 1; + m_start_ptr = new float[m_delay_len]; + memset(m_start_ptr, 0, sizeof(float) * m_delay_len); + + m_read_ptr = m_start_ptr; + m_write_ptr = m_start_ptr + frames; + m_end_ptr = m_start_ptr + m_delay_len; + + m_fs = fs; + m_delay_time = delay_time; + m_delay_frame = frames; + + delay_set_fbk(feedback); + delay_set_levels(wet, dry); +} + +void Delay::delay_set_delframes(int frames) +{ + m_delay_frame = std::max(1, std::min(frames, m_delay_len - 1)); + m_delay_time = m_delay_frame / (m_fs * 0.001); + + //TODO 感觉这里有问题 + m_write_ptr = m_read_ptr + m_delay_frame; + if(m_write_ptr >= m_end_ptr) + { + m_write_ptr = m_read_ptr; + } +} + +void Delay::delay_set_deltime(float time) +{ + int frames = (int)(time * m_fs * 0.001); + delay_set_delframes(frames); +} + +void Delay::delay_set_fbk(float feedback) +{ + m_feedback = feedback; +} + +void Delay::delay_set_levels(float wet, float dry) +{ + m_wet = wet; + m_dry = dry; +} + +float Delay::delay_get_deltime() +{ + return m_delay_time; +} + +void Delay::delay_clear() +{ + memset(m_start_ptr, 0, sizeof(float) * m_delay_frame); +} + +void Delay::delay_process(float * data, int len) +{ + if(len == 0) + { + return ; + } + + for(int i = 0; i < len; ++i) + { + float x = data[i]; + data[i] = m_dry * x + m_wet * *m_read_ptr; + + *m_write_ptr = x + m_feedback * *m_read_ptr; + + ++m_read_ptr; + if(m_read_ptr >= m_end_ptr) + { + m_read_ptr -= m_delay_len; + } + + ++m_write_ptr; + if(m_write_ptr >= m_end_ptr) + { + m_write_ptr -= m_delay_len; + } + } +} + +void Delay::delay_destroy() +{ + if(m_start_ptr) + { + delete [] m_start_ptr; + m_start_ptr = nullptr; + } +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delay/Delay.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delay/Delay.h new file mode 100644 index 0000000..f5d149a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delay/Delay.h @@ -0,0 +1,71 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_DELAY_H__ +#define __SMULE_DELAY_H__ +#include "SAudioEffectsConf.h" +//实现简单的延迟器 + +class Delay +{ +public: + Delay(); + ~Delay(); + +public: + void delay_create(float delay_time, float fs, float wet, float dry, float feedback); + void delay_set_delframes(int frames); + void delay_set_deltime(float time); + void delay_set_fbk(float feedback); + void delay_set_levels(float wet, float dry); + float delay_get_deltime(); + void delay_clear(); + void delay_process(float * data, int len); + void delay_destroy(); + +private: + float * m_start_ptr;//0 + float * m_read_ptr;//4 + float * m_write_ptr;//8 + float * m_end_ptr;//12 + //延迟线长度 + int m_delay_len;//16 + float m_fs;//20 + //单位ms + float m_delay_time;//24 + //单位为采样点个数 + int m_delay_frame;//28 + float m_feedback;//32 + float m_dry;//36 + float m_wet;//40 +}; + +#endif /* __SMULE_DELAY_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delayi/DelayI.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delayi/DelayI.cpp new file mode 100644 index 0000000..ecd3959 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delayi/DelayI.cpp @@ -0,0 +1,239 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "DelayI.h" +#include +#include + +#define LFO_LEN 1024 +static float gs_lfo_waves[LFO_LEN]; +static bool gs_lfo_inited = false; + +//找到比 x 小的、最大的 2 次方数 +inline int small_biggest_pow2(unsigned int x) +{ + int result; + + do + { + result = x; + x &= (x - 1); + } + while(x != 0); + + return result; +} + +DelayI::DelayI() +{ + m_cache = nullptr; +} + +DelayI::~DelayI() +{ + delayi_destroy(); +} + +void DelayI::delayi_create(int depth) +{ + depth = std::max(depth, 0); + + int total_len = std::max(1, 4 * small_biggest_pow2(depth)); + + m_cache = new float[total_len]; + memset(m_cache, 0, sizeof(float) * total_len); + m_delay_len = depth; + m_total_len = total_len; + m_mask = total_len - 1; + m_read_idx = 0; + m_write_idx = depth; + + m_amplitude = 0; + m_rate = 0; + m_inc = 0; + m_phase = 0; + m_feedback = 0; + + if(!gs_lfo_inited) + { + gs_lfo_inited = true; + + for(int i = 0; i < LFO_LEN; ++i) + { + gs_lfo_waves[i] = sinf(2 * M_PI * i / LFO_LEN); + } + } +} + +void DelayI::delayi_set_fbk(float feedback) +{ + m_feedback = feedback; +} + +void DelayI::delayi_set_lfo_amt(float amplitude) +{ + +} + +void DelayI::delayi_set_lfo_inc(float inc) +{ + m_inc = inc; +} + +void DelayI::delayi_set_lfo_phase(float phase) +{ + m_phase = phase - (int)phase; +} + +void DelayI::delayi_set_lfo_rate(float rate) +{ + m_rate = rate; +} + +void DelayI::delayi_reset() +{ + if(m_total_len) + { + memset(m_cache, 0, sizeof(float) * m_total_len); + } +} + +void DelayI::delayi_process(float * data, int len) +{ + if(len == 0) + { + return ; + } + + for(int i = 0; i < len; ++i) + { + //当前相位对应的LFO的采样点位置并更新相位 + float lfo_pos = m_phase * LFO_LEN; + m_phase += m_inc; + m_phase = m_phase - (int)m_phase; + + //存放延迟数据,只是为方便后面 0 延迟时取数据 + float x = data[i]; + m_cache[m_write_idx] = x; + + //计算需要返回的延迟数据的位置,实际为一个线性插值,并更新读取位置 + int s = ((int)lfo_pos) & (LFO_LEN - 1); + int e = (s + 1) & (LFO_LEN - 1); + float delay_pos = m_total_len + m_read_idx + + m_amplitude * (gs_lfo_waves[s] + (lfo_pos - (int)lfo_pos) * (gs_lfo_waves[e] - gs_lfo_waves[s])); + m_read_idx = (m_read_idx + 1) & m_mask; + + //计算延迟结果值,实际为一个线性插值 + s = ((int)delay_pos) & m_mask; + e = (s + 1) & m_mask; + float value = m_cache[s] + (delay_pos - (int)delay_pos) * (m_cache[e] - m_cache[s]); + + //计算输出值,应该按照干湿比进行运算 + if(m_feedback > 0) + { + data[i] = x + value; + } + else + { + data[i] = x - value; + } + + //存放延迟数据,并更新写入位置 + m_cache[m_write_idx] = x + m_feedback * value; + m_write_idx = (m_write_idx + 1) & m_mask; + } +} + +void DelayI::delayi_tap(float * data, int len, int tap, float weight) +{ + //tap位置不超过延迟长度 + tap = std::min(tap, (int)(m_delay_len - 1)); + + //计算开始读取的位置 + int read_idx = m_write_idx - len - tap; + while(read_idx < 0) + { + read_idx += m_total_len; + } + + for(int i = 0; i < len; ++i) + { + data[i] = data[i] + m_cache[read_idx] * weight; + read_idx = (read_idx + 1) & m_mask; + } +} + +float DelayI::delayi_tick(float in) +{ + //这里加不加这个分支结果是一致的 + if(m_amplitude == 0) + { + m_cache[m_write_idx] = in; + m_write_idx = (m_write_idx + 1) & m_mask; + + in = m_cache[m_read_idx]; + m_read_idx = (m_read_idx + 1) & m_mask; + } + else + { + //当前相位对应的LFO的采样点位置并更新相位 + float lfo_pos = m_phase * LFO_LEN; + m_phase += m_inc; + m_phase = m_phase - (int)m_phase; + + //存放延迟数据,并更新写入位置 + m_cache[m_write_idx] = in; + m_write_idx = (m_write_idx + 1) & m_mask; + + //计算需要返回的延迟数据的位置,实际为一个线性插值,并更新读取位置 + int s = ((int)lfo_pos) & (LFO_LEN - 1); + int e = (s + 1) & (LFO_LEN - 1); + float delay_pos = m_total_len + m_read_idx + + m_amplitude * (gs_lfo_waves[s] + (lfo_pos - (int)lfo_pos) * (gs_lfo_waves[e] - gs_lfo_waves[s])); + m_read_idx = (m_read_idx + 1) & m_mask; + + //计算延迟结果值,实际为一个线性插值 + s = ((int)delay_pos) & m_mask; + e = (s + 1) & m_mask; + in = m_cache[s] + (delay_pos - (int)delay_pos) * (m_cache[e] - m_cache[s]); + } + + return in; +} + +void DelayI::delayi_destroy() +{ + if(m_cache) + { + delete [] m_cache; + m_cache = nullptr; + } +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delayi/DelayI.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delayi/DelayI.h new file mode 100644 index 0000000..2cd46cc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/delayi/DelayI.h @@ -0,0 +1,81 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_DELAYI_H__ +#define __SMULE_DELAYI_H__ +#include "SAudioEffectsConf.h" +//实现一个调制延迟效果器 + +class DelayI +{ +public: + DelayI(); + ~DelayI(); + +public: + void delayi_create(int depth); + void delayi_set_fbk(float feedback); + void delayi_set_lfo_amt(float amplitude); + void delayi_set_lfo_inc(float inc); + void delayi_set_lfo_phase(float phase); + void delayi_set_lfo_rate(float rate); + void delayi_reset(); + void delayi_process(float * data, int len); + void delayi_tap(float * data, int len, int tap, float weight); + float delayi_tick(float in); + void delayi_destroy(); + +private: + //循环延迟的缓存数据 + float * m_cache;//0 + //延迟长度 + float m_delay_len;//4 + //循环的延迟缓存长度 + int m_total_len;//8 + //将延迟器长度设置为 2 的次方,然后用与运算加速 idx 的寻址操作 + int m_mask;//12 + //当前取出的位置 + int m_read_idx;//16 + //当前存入的位置 + int m_write_idx;//20 + //LFO的幅度值 + float m_amplitude;//24 + //LFO的频率,由于和"每次对输入信号的相移"有一定重合,这里不使用 + float m_rate;//28 + //LFO对输入信号每次的相移 + float m_inc;//32 + //LFO当前的相位,取值范围 [0, 1),代表了相位偏移 [0, 360) 度 + float m_phase;//36 + //延迟器的反馈比例 + float m_feedback;//40 +}; + +#endif /* __SMULE_DELAYI_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/envelope_follower/EnvelopeFollower.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/envelope_follower/EnvelopeFollower.cpp new file mode 100644 index 0000000..2b9b3af --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/envelope_follower/EnvelopeFollower.cpp @@ -0,0 +1,143 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "EnvelopeFollower.h" +#include + +#define ANALOG_RATIO 0.368 +#define DIGITAL_RATIO 0.1 + +static inline float get_ratio(EnvelopeFollower * inst, float frames) +{ + float result; + + if(frames == 0) + { + result = 0; + } + else + { + float tc = inst->getTimeConstant(); + result = expf(tc / frames); + } + + return result; +} + +EnvelopeFollower::EnvelopeFollower(float attack, float release, unsigned int in_channels) + : AudioEffect(in_channels, 1) +{ + setAttackTime_frames(attack); + setReleaseTime_frames(release); + setLevel(0); +} + +EnvelopeFollower::~EnvelopeFollower() +{ + +} + +void EnvelopeFollower::process_internal(const float *input, float *output, unsigned int len) +{ + if(m_in_channels == 1) + { + for(int i = 0; i < len; ++i) + { + float in = fabsf(input[i]); + if(m_level < in) + { + m_level = in + m_attack * (m_level - in); + } + else + { + m_level = in + m_release * (m_level - in); + } + } + } + else if(m_in_channels == 2) + { + for(int i = 0; i < len; ++i) + { + float in = std::max(fabsf(input[2 * i + 0]), fabsf(input[2 * i + 1])); + if(m_level < in) + { + m_level = in + m_attack * (m_level - in); + } + else + { + m_level = in + m_release * (m_level - in); + } + } + } +} + +void EnvelopeFollower::setAttackTime_frames(float frames) +{ + m_attack = get_ratio(this, frames); +} + +void EnvelopeFollower::setCircuitType(EnvelopeFollower::CircuitType type) +{ + + float tc = getTimeConstant(); + float attack = tc / logf(m_attack); + float release = tc / logf(m_release); + + m_type = type; + setAttackTime_frames(attack); + setReleaseTime_frames(release); +} + +void EnvelopeFollower::setLevel(float level) +{ + m_level = level; +} + +void EnvelopeFollower::setReleaseTime_frames(float frames) +{ + m_release = get_ratio(this, frames); +} + +EnvelopeFollower::CircuitType EnvelopeFollower::getCircuitType() +{ + return m_type; +} + +float EnvelopeFollower::getLevel() +{ + return m_level; +} + +float EnvelopeFollower::getTimeConstant() +{ + float ratio = m_type == EnvelopeFollower::Analog ? ANALOG_RATIO : DIGITAL_RATIO; + return logf(ratio); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/envelope_follower/EnvelopeFollower.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/envelope_follower/EnvelopeFollower.h new file mode 100644 index 0000000..c168804 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/envelope_follower/EnvelopeFollower.h @@ -0,0 +1,69 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_ENVELOPEFOLLOWER_H__ +#define __SMULE_ENVELOPEFOLLOWER_H__ +#include "SAudioEffectsConf.h" +#include "AudioEffect.h" + +class EnvelopeFollower : public AudioEffect +{ +public: + EnvelopeFollower(float attack, float release, unsigned int in_channels); + virtual ~EnvelopeFollower(); + +public: + void process_internal(const float *input, float *output, unsigned int len) override ; + +public: + enum CircuitType + { + Analog, + Digital + }; + +public: + void setAttackTime_frames(float frames); + void setCircuitType(CircuitType type); + void setLevel(float level); + void setReleaseTime_frames(float frames); + CircuitType getCircuitType(); + float getLevel(); + float getTimeConstant(); + +private: + float m_level;//68 + float m_attack;//72 + float m_release;//76 + CircuitType m_type;//80 +}; + +#endif /* __SMULE_ENVELOPEFOLLOWER_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/equalizer/Equalizer.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/equalizer/Equalizer.cpp new file mode 100644 index 0000000..05c23a9 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/equalizer/Equalizer.cpp @@ -0,0 +1,144 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "Equalizer.h" + +static float gs_fc[EQ_BANDS] = { 25, 2229, 20000 }; + +Equalizer::Equalizer(float fs) + : AudioEffect(1, 1) +{ + memset(m_gain, 0, sizeof(m_gain)); + memset(m_Q, 0, sizeof(m_Q)); + memcpy(m_fc, gs_fc, sizeof(gs_fc)); + + int idx = 0; + m_biquad[idx] = std::make_shared(); + m_biquad[idx]->biquad_create(BIQUAD_LOSHELF, m_fc[idx], m_Q[idx], m_gain[idx], fs, EQ_CHANNEL); + + for(idx = 1; idx < EQ_BANDS - 1; ++idx) + { + m_biquad[idx] = std::make_shared(); + m_biquad[idx]->biquad_create(BIQUAD_PEAK, m_fc[idx], m_Q[idx], m_gain[idx], fs, EQ_CHANNEL); + } + + m_biquad[idx] = std::make_shared(); + m_biquad[idx]->biquad_create(BIQUAD_HISHELF, m_fc[idx], m_Q[idx], m_gain[idx], fs, EQ_CHANNEL); +} + +Equalizer::~Equalizer() +{ + ; +} + +void Equalizer::reset() +{ + for(auto & biquad : m_biquad) + { + biquad->biquad_reset(); + } +} + +void Equalizer::process_internal(const float *input, float *output, unsigned int len) +{ + if(input != output) + { + memcpy(output, input, EQ_CHANNEL * sizeof(float) * len); + } + + + m_biquad[1]->biquad_process2(output, len); + m_biquad[0]->biquad_process2(output, len); + m_biquad[2]->biquad_process2(output, len); +// for(auto & biquad : m_biquad) +// { +// biquad->biquad_process2(output, len); +// } +} + +void Equalizer::set_parameter_value(const std::string &key, int idx, float value) +{ + if(key == "Center Freq.") + { + setFreqForEQBand(value, idx); + } + else if(key == "Gain") + { + setGaindBForEQBand(value, idx); + } + else if(key == "Q") + { + setSlopeForEQBand(value, idx); + } +} + +void Equalizer::setFreqForEQBand(float fc, int idx) +{ + if(idx >= 0 && idx < EQ_BANDS) + { + m_fc[idx] = fc; + } + + updateParameters(); +} + +void Equalizer::setGaindBForEQBand(float db, int idx) +{ + if(idx >= 0 && idx < EQ_BANDS) + { + m_gain[idx] = db; + } + + updateParameters(); +} + +void Equalizer::setSlopeForEQBand(float Q, int idx) +{ + if(idx >= 0 && idx < EQ_BANDS) + { + m_Q[idx] = Q; + } + + updateParameters(); +} + +void Equalizer::updateParameters() +{ + int idx = 0; + m_biquad[idx]->biquad_setcoefs(BIQUAD_LOSHELF, m_fc[idx], m_Q[idx], m_gain[idx]); + + for(idx = 1; idx < EQ_BANDS - 1; ++idx) + { + m_biquad[idx]->biquad_setcoefs(BIQUAD_PEAK, m_fc[idx], m_Q[idx], m_gain[idx]); + } + + m_biquad[idx]->biquad_setcoefs(BIQUAD_HISHELF, m_fc[idx], m_Q[idx], m_gain[idx]); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/equalizer/Equalizer.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/equalizer/Equalizer.h new file mode 100644 index 0000000..9731b54 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/equalizer/Equalizer.h @@ -0,0 +1,70 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_EQUALIZER_H__ +#define __SMULE_EQUALIZER_H__ + +#include "AudioEffect.h" +#include "Biquad.h" +#include "SAudioEffectsConf.h" +//这里实现一个三段均衡器,默认处理单声道 +#define EQ_BANDS 3 +#define EQ_CHANNEL 1 + +class Equalizer : public AudioEffect +{ +public: + explicit Equalizer(float fs); + ~Equalizer() override ; + +public: + void reset() override ; + void process_internal(const float *input, float *output, unsigned int len) override ; + void set_parameter_value(const std::string &key, int idx, float value) override ; + +public: + void setFreqForEQBand(float fc, int idx); + void setGaindBForEQBand(float db, int idx); + void setSlopeForEQBand(float Q, int idx); + void updateParameters(); + +private: + //均衡器的增益数组 + float m_gain[EQ_BANDS];//68 + //均衡器的Q值数组 + float m_Q[EQ_BANDS];//80 + //均衡器的中心频率或者截止频率数组 + float m_fc[EQ_BANDS];//92 + //第一个为低搁置滤波、最后一个为高搁置滤波、中间为peak滤波 + std::shared_ptr m_biquad[EQ_BANDS];//104 +}; + +#endif /* __SMULE_EQUALIZER_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/reverb/Reverb.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/reverb/Reverb.cpp new file mode 100644 index 0000000..d17c36b --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/reverb/Reverb.cpp @@ -0,0 +1,393 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "Reverb.h" +#include + +//论文参数对应的采样率 +#define ORIGINAL_FS 29761 +#define BANDWIDTH 0.9995f +#define INPUT_DIFFUSION1 0.75f +#define INPUT_DIFFUSION2 0.625f +#define DECAY_DIFFUSION1 0.7f +#define DECAY_DIFFUSION2 0.5f +#define DAMPING 0.0005f +#define MAX_DAMP_COMP 0.9999999 +#define MAX_DAMP 1 +#define MAX_DECAY 0.999999f +#define MIN_DECAY -MAX_DECAY +//按照论文,湿声比例还需要乘以 WET_RATIO +#define WET_RATIO 0.6 + +#define INPUT_ALLPASS_LEN1 142 +#define INPUT_ALLPASS_LEN2 107 +#define INPUT_ALLPASS_LEN3 379 +#define INPUT_ALLPASS_LEN4 277 + +#define TANK_ALLPASS_LEFT_LEN1 672 +#define TANK_ALLPASS_LEFT_LEN2 1800 +#define TANK_ALLPASS_RIGHT_LEN1 908 +#define TANK_ALLPASS_RIGHT_LEN2 2656 + +#define TANK_DELAYI_LEFT_LEN1 4453 +#define TANK_DELAYI_LEFT_LEN2 3270 +#define TANK_DELAYI_RIGHT_LEN1 4217 +#define TANK_DELAYI_RIGHT_LEN2 3163 + +#define OUTPUT_LEFT_C54_1 266 +#define OUTPUT_LEFT_C54_2 2974 +#define OUTPUT_LEFT_C55_59 1913 +#define OUTPUT_LEFT_C63 1996 +#define OUTPUT_LEFT_C30 1990 +#define OUTPUT_LEFT_C31_33 186 +#define OUTPUT_LEFT_C39 1066 + +#define OUTPUT_RIGHT_C30_1 353 +#define OUTPUT_RIGHT_C30_2 3627 +#define OUTPUT_RIGHT_C31_33 1228 +#define OUTPUT_RIGHT_C39 2673 +#define OUTPUT_RIGHT_C54 2111 +#define OUTPUT_RIGHT_C55_59 335 +#define OUTPUT_RIGHT_C63 121 + +//帧长和延迟长度有一定的制约关系,需要注意 +#define FRAME_LEN 512 + +#define DELETE_PTR(ptr) \ +{ \ + if(ptr) \ + { \ + delete ptr; \ + ptr = nullptr; \ + } \ +} + +#define DELETE_ARRAY(ptr) \ +{ \ + if(ptr) \ + { \ + delete [] ptr; \ + ptr = nullptr; \ + } \ +} + +Reverb::Reverb() +{ + m_damp_in = nullptr; + m_allplat_c_13_14 = nullptr; + m_allplat_c_19_20 = nullptr; + m_allplat_c_15_16 = nullptr; + m_allplat_c_21_22 = nullptr; + m_allplat_c_23_24 = nullptr; + m_allplat_c_31_33 = nullptr; + m_allplat_c_46_48 = nullptr; + m_allplat_c_55_59 = nullptr; + m_delayi_c_30 = nullptr; + m_delayi_c_39 = nullptr; + m_delayi_c_54 = nullptr; + m_delayi_c_63 = nullptr; + m_damp_tank_left = nullptr; + m_damp_tank_right = nullptr; + + m_decay_left_in = 0; + m_decay_left_out = 0; + m_decay_right_in = 0; + m_decay_right_out = 0; + + m_dry = 0; + m_wet = 0; + + m_frame_len = 0; + m_frame_ptr = nullptr; + m_tank_right_out = nullptr; + m_tank_left_out = nullptr; +} + +Reverb::~Reverb() +{ + reverb_destroy(); +} + +void Reverb::reverb_create(float dry, float wet, float decay) +{ + m_damp_in = new Damper(); + //因为原始论文采用的是这个参数方式,但是实现的时候有点变化,因此这里做了下变换 + m_damp_in->damper_create(1 - BANDWIDTH); + + m_allplat_c_13_14 = new Allplat(); + m_allplat_c_13_14->allplat_create(INPUT_ALLPASS_LEN1, INPUT_DIFFUSION1); + m_allplat_c_19_20 = new Allplat(); + m_allplat_c_19_20->allplat_create(INPUT_ALLPASS_LEN2, INPUT_DIFFUSION1); + m_allplat_c_15_16 = new Allplat(); + m_allplat_c_15_16->allplat_create(INPUT_ALLPASS_LEN3, INPUT_DIFFUSION2); + m_allplat_c_21_22 = new Allplat(); + m_allplat_c_21_22->allplat_create(INPUT_ALLPASS_LEN4, INPUT_DIFFUSION2); + + m_allplat_c_23_24 = new Allplat(); + m_allplat_c_23_24->allplat_create(TANK_ALLPASS_LEFT_LEN1, DECAY_DIFFUSION1); + m_allplat_c_31_33 = new Allplat(); + m_allplat_c_31_33->allplat_create(TANK_ALLPASS_LEFT_LEN2, DECAY_DIFFUSION2); + + m_allplat_c_46_48 = new Allplat(); + m_allplat_c_46_48->allplat_create(TANK_ALLPASS_RIGHT_LEN1, DECAY_DIFFUSION1); + m_allplat_c_55_59 = new Allplat(); + m_allplat_c_55_59->allplat_create(TANK_ALLPASS_RIGHT_LEN2, DECAY_DIFFUSION2); + + m_delayi_c_30 = new DelayI(); + m_delayi_c_30->delayi_create(TANK_DELAYI_LEFT_LEN1); + m_delayi_c_30->delayi_set_lfo_amt(11.839); + m_delayi_c_30->delayi_set_lfo_inc(0.0000249433106); + m_delayi_c_30->delayi_set_lfo_phase(0); + m_delayi_c_39 = new DelayI(); + m_delayi_c_39->delayi_create(TANK_DELAYI_LEFT_LEN2); + m_delayi_c_39->delayi_set_lfo_amt(2.2); + m_delayi_c_39->delayi_set_lfo_inc(0.0000204081625); + m_delayi_c_39->delayi_set_lfo_phase(0.2); + + m_delayi_c_54 = new DelayI(); + m_delayi_c_54->delayi_create(TANK_DELAYI_RIGHT_LEN1); + m_delayi_c_54->delayi_set_lfo_amt(10); + m_delayi_c_54->delayi_set_lfo_inc(0.0000242630394); + m_delayi_c_54->delayi_set_lfo_phase(0.4); + m_delayi_c_63 = new DelayI(); + m_delayi_c_63->delayi_create(TANK_DELAYI_RIGHT_LEN2); + m_delayi_c_63->delayi_set_lfo_amt(2.11); + m_delayi_c_63->delayi_set_lfo_inc(0.0000199546485); + m_delayi_c_63->delayi_set_lfo_phase(0.6); + + m_damp_tank_left = new Damper(); + m_damp_tank_left->damper_create(DAMPING); + + m_damp_tank_right = new Damper(); + m_damp_tank_right->damper_create(DAMPING); + + reverb_set_decay(decay); + reverb_set_dry(dry); + reverb_set_wet(wet); + + m_frame_len = FRAME_LEN; + m_frame_ptr = new float[m_frame_len * 4]; + memset(m_frame_ptr, 0, sizeof(float) * m_frame_len * 4); + m_tank_right_out = m_frame_ptr + m_frame_len; + m_tank_left_out = m_frame_ptr + 2 * m_frame_len; +} + +void Reverb::reverb_set() +{ + +} + +void Reverb::reverb_set_decay(float decay) +{ + decay = std::max(MIN_DECAY, std::min(decay, MAX_DECAY)); + m_decay_left_in = decay; + m_decay_left_out = decay; + m_decay_right_in = decay; + m_decay_right_out = decay; +} + +void Reverb::reverb_set_dry(float dry) +{ + m_dry = dry; +} + +void Reverb::reverb_set_wet(float wet) +{ + m_wet = wet; +} + +void Reverb::reverb_set_indamp(float damp) +{ + if(damp > MAX_DAMP_COMP) + { + damp = MAX_DAMP; + } + + m_damp_in->damper_set(damp); +} + +void Reverb::reverb_set_tankdamp(float damp) +{ + if(damp > MAX_DAMP_COMP) + { + damp = MAX_DAMP; + } + + m_damp_tank_left->damper_set(damp); + m_damp_tank_right->damper_set(damp); +} + +float Reverb::reverb_get_wet() +{ + return m_wet; +} + +void Reverb::reverb_reset() +{ + m_damp_in->damper_clear(); + + m_allplat_c_13_14->allplat_reset(); + m_allplat_c_19_20->allplat_reset(); + m_allplat_c_15_16->allplat_reset(); + m_allplat_c_21_22->allplat_reset(); + m_allplat_c_23_24->allplat_reset(); + m_allplat_c_31_33->allplat_reset(); + m_allplat_c_46_48->allplat_reset(); + m_allplat_c_55_59->allplat_reset(); + + m_delayi_c_30->delayi_reset(); + m_delayi_c_39->delayi_reset(); + m_delayi_c_54->delayi_reset(); + m_delayi_c_63->delayi_reset(); + + m_damp_tank_left->damper_clear(); + m_damp_tank_right->damper_clear(); + + memset(m_frame_ptr, 0, sizeof(float) * m_frame_len * 4); + m_tank_right_out = m_frame_ptr + m_frame_len; + m_tank_left_out = m_frame_ptr + 2 * m_frame_len; +} + +void Reverb::reverb_process(const float * input, int channel, float * out_l, float * out_r, int len) +{ + //这里按照每帧固定长度进行 + while(len > 0) + { + int proc_len = std::min(len, m_frame_len); + len -= proc_len; + + //只允许输入单声道或者双声道 + if(channel != 2) + { + memcpy(m_frame_ptr, input, sizeof(float) * proc_len); + } + else + { + //该混响是单声道进,双声道出,因此这里需要先对数据进行单声道处理 + for(int i = 0; i < proc_len; ++i) + { + m_frame_ptr[i] = (input[2 * i + 0] + input[2 * i + 1]) / 2; + } + } + + //input diffusion + m_damp_in->damper_process(m_frame_ptr, proc_len); + m_allplat_c_13_14->allplat_process(m_frame_ptr, proc_len); + m_allplat_c_19_20->allplat_process(m_frame_ptr, proc_len); + m_allplat_c_15_16->allplat_process(m_frame_ptr, proc_len); + m_allplat_c_21_22->allplat_process(m_frame_ptr, proc_len); + + //tank + for(int i = 0; i < proc_len; ++i) + { + //tank feedback input + float left_in = m_frame_ptr[i] + *m_tank_right_out; + float right_in = m_frame_ptr[i] + *m_tank_left_out; + + //left tank + left_in = m_allplat_c_23_24->allplat_tick(left_in); + left_in = m_delayi_c_30->delayi_tick(left_in); + left_in = m_damp_tank_left->damper_tick(left_in); + left_in = m_allplat_c_31_33->allplat_tick(left_in * m_decay_left_in); + *m_tank_left_out = m_delayi_c_39->delayi_tick(left_in) * m_decay_left_out; + + //right tank + right_in = m_allplat_c_46_48->allplat_tick(right_in); + right_in = m_delayi_c_54->delayi_tick(right_in); + right_in = m_damp_tank_right->damper_tick(right_in); + right_in = m_allplat_c_55_59->allplat_tick(right_in * m_decay_right_in); + *m_tank_right_out = m_delayi_c_63->delayi_tick(right_in) * m_decay_right_out; + } + + //计算干声输出比例 + for(int i = 0; i < proc_len; ++i) + { + out_l[i] = input[channel * i + 0] * m_dry; + out_r[i] = input[channel * i + channel - 1] * m_dry; + } + + //delay network + memset(m_frame_ptr, 0, sizeof(float) * proc_len); + //left reverb out + m_delayi_c_54->delayi_tap(m_frame_ptr, proc_len, OUTPUT_LEFT_C54_1, m_wet * WET_RATIO); + m_delayi_c_54->delayi_tap(m_frame_ptr, proc_len, OUTPUT_LEFT_C54_2, m_wet * WET_RATIO); + m_allplat_c_55_59->allplat_tap(m_frame_ptr, proc_len, OUTPUT_LEFT_C55_59, m_wet * -WET_RATIO); + m_delayi_c_63->delayi_tap(m_frame_ptr, proc_len, OUTPUT_LEFT_C63, m_wet * WET_RATIO); + m_delayi_c_30->delayi_tap(m_frame_ptr, proc_len, OUTPUT_LEFT_C30, m_wet * -WET_RATIO); + m_allplat_c_31_33->allplat_tap(m_frame_ptr, proc_len, OUTPUT_LEFT_C31_33, m_wet * -WET_RATIO); + m_delayi_c_39->delayi_tap(m_frame_ptr, proc_len, OUTPUT_LEFT_C39, m_wet * -WET_RATIO); + //计算左声道最终输出 + for(int i = 0; i < proc_len; ++i) + { + out_l[i] += m_frame_ptr[i]; + } + + //right reverb out + m_delayi_c_30->delayi_tap(m_frame_ptr, proc_len, OUTPUT_RIGHT_C30_1, m_wet * WET_RATIO); + m_delayi_c_30->delayi_tap(m_frame_ptr, proc_len, OUTPUT_RIGHT_C30_2, m_wet * WET_RATIO); + m_allplat_c_31_33->allplat_tap(m_frame_ptr, proc_len, OUTPUT_RIGHT_C31_33, m_wet * -WET_RATIO); + m_delayi_c_39->delayi_tap(m_frame_ptr, proc_len, OUTPUT_RIGHT_C39, m_wet * WET_RATIO); + m_delayi_c_54->delayi_tap(m_frame_ptr, proc_len, OUTPUT_RIGHT_C54, m_wet * -WET_RATIO); + m_allplat_c_55_59->allplat_tap(m_frame_ptr, proc_len, OUTPUT_RIGHT_C55_59, m_wet * -WET_RATIO); + m_delayi_c_63->delayi_tap(m_frame_ptr, proc_len, OUTPUT_RIGHT_C63, m_wet * -WET_RATIO); + //计算右声道最终输出 + for(int i = 0; i < proc_len; ++i) + { + out_r[i] += m_frame_ptr[i]; + } + + //更新指针位置信息 + out_l += proc_len; + out_r += proc_len; + input += proc_len; + } +} + +void Reverb::reverb_destroy() +{ + DELETE_PTR(m_damp_in) + DELETE_PTR(m_allplat_c_13_14) + DELETE_PTR(m_allplat_c_19_20) + DELETE_PTR(m_allplat_c_15_16) + DELETE_PTR(m_allplat_c_21_22) + DELETE_PTR(m_allplat_c_23_24) + DELETE_PTR(m_allplat_c_31_33) + DELETE_PTR(m_allplat_c_46_48) + DELETE_PTR(m_allplat_c_55_59) + DELETE_PTR(m_delayi_c_30) + DELETE_PTR(m_delayi_c_39) + DELETE_PTR(m_delayi_c_54) + DELETE_PTR(m_delayi_c_63) + DELETE_PTR(m_damp_tank_left) + DELETE_PTR(m_damp_tank_right) + + DELETE_ARRAY(m_frame_ptr) +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/reverb/Reverb.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/reverb/Reverb.h new file mode 100644 index 0000000..f2c99a3 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/reverb/Reverb.h @@ -0,0 +1,91 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_REVERB_H__ +#define __SMULE_REVERB_H__ + +//Dattorro’s Plate Reverb,代码参考了 freeverb3 中的 strev +//论文地址为:https://ccrma.stanford.edu/~dattorro/EffectDesignPart1.pdf + +#include "Allplat.h" +#include "DelayI.h" +#include "Damper.h" +#include "SAudioEffectsConf.h" +class Reverb +{ +public: + Reverb(); + ~Reverb(); + +public: + void reverb_create(float dry, float wet, float decay); + void reverb_set(); + void reverb_set_decay(float decay); + void reverb_set_dry(float dry); + void reverb_set_wet(float wet); + void reverb_set_indamp(float damp); + void reverb_set_tankdamp(float damp); + float reverb_get_wet(); + void reverb_reset(); + void reverb_process(const float * input, int channel, float * out_l, float * out_r, int len); + void reverb_destroy(); + +private: + Damper * m_damp_in;//0 + Allplat * m_allplat_c_13_14;//4 + Allplat * m_allplat_c_19_20;//8 + Allplat * m_allplat_c_15_16;//12 + Allplat * m_allplat_c_21_22;//16 + //原始论文,23_24 和 46_48 应该为调制全通,但是这里简化了 + Allplat * m_allplat_c_23_24;//20 + Allplat * m_allplat_c_31_33;//24 + Allplat * m_allplat_c_46_48;//28 + Allplat * m_allplat_c_55_59;//32 + //原始论文,这里应该都是简单的延迟器,但是这里改成了调制延迟器,单实际还是简单延迟器 + DelayI * m_delayi_c_30;//36 + DelayI * m_delayi_c_39;//40 + DelayI * m_delayi_c_54;//44 + DelayI * m_delayi_c_63;//48 + Damper * m_damp_tank_left;//52 + Damper * m_damp_tank_right;//56 + float m_decay_left_in;//60 + float m_decay_left_out;//64 + float m_decay_right_in;//68 + float m_decay_right_out;//72 + float m_dry;//76 + float m_wet;//80 + int m_frame_len;//84 + float * m_frame_ptr;//88 + float * m_tank_right_out;//92 + float * m_tank_left_out;//96 +}; + +#endif /* __SMULE_REVERB_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_delay_effect/SimpleDelayEffect.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_delay_effect/SimpleDelayEffect.cpp new file mode 100644 index 0000000..3b6fe6b --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_delay_effect/SimpleDelayEffect.cpp @@ -0,0 +1,92 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "SimpleDelayEffect.h" + +SimpleDelayEffect::SimpleDelayEffect(unsigned int fs) + : AudioEffect(1, 1) +{ + m_delay = std::make_shared(); + m_delay->delay_create(100, fs, 28, -27, 0); +} + +SimpleDelayEffect::~SimpleDelayEffect() +{ + +} + +void SimpleDelayEffect::reset() +{ + m_delay->delay_clear(); +} + +void SimpleDelayEffect::process_internal(const float *input, float *output, unsigned int len) +{ + if(input != output) + { + memcpy(output, input, sizeof(float) * len); + } + + m_delay->delay_process(output, len); +} + +void SimpleDelayEffect::set_parameter_value(const std::string &key, float value) +{ + if(key == "Mix") + { + setMix(value); + } + else if(key == "Delay") + { + setDelayTimeInMS(value); + } + else if(key == "Feedback") + { + setFeedback(value); + } +} + +void SimpleDelayEffect::setDelayTimeInMS(float time) +{ + m_delay->delay_set_deltime(time); +} + +void SimpleDelayEffect::setFeedback(float feedback) +{ + m_delay->delay_set_fbk(feedback / 100); +} + +void SimpleDelayEffect::setMix(float mix) +{ + float wet = mix / 100; + float dry = 1 - wet; + m_delay->delay_set_levels(wet, dry); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_delay_effect/SimpleDelayEffect.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_delay_effect/SimpleDelayEffect.h new file mode 100644 index 0000000..49d603c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_delay_effect/SimpleDelayEffect.h @@ -0,0 +1,58 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_SIMPLEDELAYEFFECT_H__ +#define __SMULE_SIMPLEDELAYEFFECT_H__ + +#include "AudioEffect.h" +#include "Delay.h" +#include "SAudioEffectsConf.h" +class SimpleDelayEffect : public AudioEffect +{ +public: + explicit SimpleDelayEffect(unsigned int fs); + virtual ~SimpleDelayEffect(); + +public: + void reset() override ; + void process_internal(const float *input, float *output, unsigned int len) override ; + void set_parameter_value(const std::string &key, float value) override ; + +public: + void setDelayTimeInMS(float time); + void setFeedback(float feedback); + void setMix(float mix); + +private: + std::shared_ptr m_delay;//68 +}; + +#endif /* __SMULE_SIMPLEDELAYEFFECT_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_reverb_effect/SimpleReverbEffect.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_reverb_effect/SimpleReverbEffect.cpp new file mode 100644 index 0000000..833a2dd --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_reverb_effect/SimpleReverbEffect.cpp @@ -0,0 +1,153 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#include "SimpleReverbEffect.h" + +float SimpleReverbEffect::DEFAULT_DECAY = 0.08; +float SimpleReverbEffect::DEFAULT_DRY = 0.9; +float SimpleReverbEffect::DEFAULT_FBKDMP = 0.5; +float SimpleReverbEffect::DEFAULT_INDAMP = 0.0005; +float SimpleReverbEffect::DEFAULT_WET = 0.0005; + +SimpleReverbEffect::SimpleReverbEffect(unsigned int in_channels, unsigned int frame_len) + : AudioEffect(in_channels, 2) + , m_left(frame_len) + , m_right(frame_len) +{ + m_reverb = std::make_shared(); + m_reverb->reverb_create(DEFAULT_DRY, DEFAULT_WET, DEFAULT_DECAY); + m_reverb->reverb_set_wet(DEFAULT_WET); + m_reverb->reverb_set_dry(DEFAULT_DRY); + m_reverb->reverb_set_decay(DEFAULT_DECAY); + m_reverb->reverb_set_indamp(DEFAULT_INDAMP); + m_reverb->reverb_set_tankdamp(DEFAULT_FBKDMP); +} + +SimpleReverbEffect::~SimpleReverbEffect() +{ + +} + +void SimpleReverbEffect::reset() +{ + m_reverb->reverb_reset(); +} + +void SimpleReverbEffect::process_internal(const float *input, float *output, unsigned int len) +{ + //保证长度满足要求 + m_left.ensure(len); + m_right.ensure(len); + + auto left = m_left.ptr(); + auto right = m_right.ptr(); + + m_reverb->reverb_process(input, m_in_channels, left, right, len); + + if(output != nullptr && len != 0) + { + for(int i = 0; i < len; ++i) + { + output[2 * i + 0] = left[i]; + output[2 * i + 1] = right[i]; + } + } +} + +void SimpleReverbEffect::set_parameter_value(const std::string &key, float value) +{ + if(key == "Wet") + { + m_reverb->reverb_set_wet(value); + } + else if(key == "Dry") + { + m_reverb->reverb_set_dry(value); + } + else if(key == "Decay") + { + m_reverb->reverb_set_decay(value); + } + else if(key == "Input Damping") + { + m_reverb->reverb_set_indamp(value); + } + else if(key == "Feedback Damping") + { + m_reverb->reverb_set_tankdamp(value); + } +} + +void SimpleReverbEffect::set_parameter_value(const std::string &key, bool value) +{ + if(key == "In Ch.") + { + m_in_channels = value ? 2 : 1; + } + else if(key == "Out Ch.") + { + m_out_channels = value ? 2 : 1; + } + else + { + AudioEffect::set_parameter_value(key, value); + } +} + +void SimpleReverbEffect::setDecay(float decay) +{ + m_reverb->reverb_set_decay(decay); +} + +void SimpleReverbEffect::setDry(float dry) +{ + m_reverb->reverb_set_dry(dry); +} + +void SimpleReverbEffect::setFeedbackDamping(float damp) +{ + m_reverb->reverb_set_tankdamp(damp); +} + +void SimpleReverbEffect::setInputDamping(float damp) +{ + m_reverb->reverb_set_indamp(damp); +} + +void SimpleReverbEffect::setWet(float wet) +{ + m_reverb->reverb_set_wet(wet); +} + +float SimpleReverbEffect::getWet() +{ + return m_reverb->reverb_get_wet(); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_reverb_effect/SimpleReverbEffect.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_reverb_effect/SimpleReverbEffect.h new file mode 100644 index 0000000..b93291a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/saudio_effects/src/simple_reverb_effect/SimpleReverbEffect.h @@ -0,0 +1,75 @@ + +/************************************************************ +* author: yangjiang * +*************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SMULE_SIMPLEREVERBEFFECT_H__ +#define __SMULE_SIMPLEREVERBEFFECT_H__ + +//实现简易混响器 + +#include "AudioEffect.h" +#include "Reverb.h" +#include "Buffer.h" +#include "SAudioEffectsConf.h" + +class SimpleReverbEffect : public AudioEffect +{ +public: + explicit SimpleReverbEffect(unsigned int in_channels, unsigned int frame_len); + virtual ~SimpleReverbEffect(); + +public: + void reset() override ; + void process_internal(const float *input, float *output, unsigned int len) override ; + void set_parameter_value(const std::string &key, float value) override ; + void set_parameter_value(const std::string &key, bool value) override ; + +public: + void setDecay(float decay); + void setDry(float dry); + void setFeedbackDamping(float damp); + void setInputDamping(float damp); + void setWet(float wet); + float getWet(); + +private: + static float DEFAULT_DECAY; + static float DEFAULT_DRY; + static float DEFAULT_FBKDMP; + static float DEFAULT_INDAMP; + static float DEFAULT_WET; + +private: + std::shared_ptr m_reverb;//68 + Smule::Audio::Buffer m_left;//76 + Smule::Audio::Buffer m_right;//92 +}; + +#endif /* __SMULE_SIMPLEREVERBEFFECT_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/CMakeLists.txt new file mode 100644 index 0000000..19b6920 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/CMakeLists.txt @@ -0,0 +1,4 @@ +include_directories(./ inc src) +file(GLOB_RECURSE SLOWFLANGING_SRC_FILES src/*cpp) +add_library(slow_flanging ${SLOWFLANGING_SRC_FILES}) +#set_target_properties(slow_flanging PROPERTIES CXX_VISIBILITY_PRESET hidden) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/inc/CSlowFlanging.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/inc/CSlowFlanging.h new file mode 100644 index 0000000..381dffc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/inc/CSlowFlanging.h @@ -0,0 +1,40 @@ +#ifndef __SLOW_FLANGING_H_ +#define __SLOW_FLANGING_H_ + +#include + +#include "CSlowFlangingDef.h" +class CSlowFlanging +{ +public: + int init(int channel, int sample_rate); + void reset(); + void uninit(); + int process(const char *in_buffer, char *out_buffer, int size); + int process(const float *in_buffer, float *out_buffer, int size); + + int process_lr_independent(float *in_left, float *in_right, float *out_left, float *out_right, int in_out_size); +private: + int process_sample(const float *in_samples, float *out_samples, int sample_count, unsigned int &samples_processed, + float *circle_buffer, int &iwr, float *circle_buffer_fb, int &iwr_fb); + +private: + unsigned int m_samples_processed_left; + unsigned int m_samples_processed_right; + //float m_oscillation_period; + int m_oscillation_period; + //std::vector m_delayed_samples; + + float * m_circle_buffer_l; + float * m_circle_buffer_fb_l; + int m_iwr_l; + int m_iwr_fb_l; + + float * m_circle_buffer_r; + float * m_circle_buffer_fb_r; + int m_iwr_r; + int m_iwr_fb_r; + int m_channel; + int m_sample_rate; +}; +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/inc/CSlowFlangingDef.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/inc/CSlowFlangingDef.h new file mode 100644 index 0000000..61ae467 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/inc/CSlowFlangingDef.h @@ -0,0 +1,14 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#ifndef AUDIO_EFFECTS_LIB_CSLOWFLANGING_DEF_H +#define AUDIO_EFFECTS_LIB_CSLOWFLANGING_DEF_H +#include "AudioEffectsConf.h" +enum SF_ERR { + SF_ERR_SUCCESS = 0, + SF_ERR_PARAM = -1, + SF_ERR_BASE_H_MALLOC_NULL = -2, + SF_ERR_BASE_H_UNKNOWN = -3, +}; +#endif //AUDIO_EFFECTS_LIB_CSLOWFLANGING_DEF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/src/CSlowFlanging.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/src/CSlowFlanging.cpp new file mode 100644 index 0000000..84c69fc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/slow_flanging/src/CSlowFlanging.cpp @@ -0,0 +1,520 @@ +#include "CSlowFlangingDef.h" +#define _USE_MATH_DEFINES +#include +#include "CSlowFlanging.h" + +#define MAX_DELAY_SAMPLE (2.53) // +#define MIN_DELAY_SAMPLE (0.0) + +int CSlowFlanging::init(int channel, int sample_rate) +{ + + m_iwr_l = 0; + m_circle_buffer_l = NULL; + m_iwr_fb_l = 0; + m_circle_buffer_fb_l = NULL; + m_iwr_r = 0; + m_circle_buffer_r = NULL; + m_iwr_fb_r = 0; + m_circle_buffer_fb_r = NULL; + m_channel = channel; + if (channel > 2 || channel < 1) + { + return SF_ERR_PARAM; + } + m_sample_rate = sample_rate; + m_oscillation_period = 3787.0f*(float)m_sample_rate/1000.0f; + m_samples_processed_left = 0; + m_samples_processed_right = m_oscillation_period/2; +#define MAX_DELAY_SAMPLE_COUNT (int(MAX_DELAY_SAMPLE*(float)m_sample_rate/1000.0f)+2) + //m_delayed_samples = std::vector(MAX_DELAY_SAMPLE_COUNT,0); + m_circle_buffer_l = new(std::nothrow) float[MAX_DELAY_SAMPLE_COUNT]; + if (m_circle_buffer_l == NULL) + { + uninit(); + return SF_ERR_BASE_H_MALLOC_NULL; + } + //memset(m_circle_buffer,0,MAX_DELAY_SAMPLE_COUNT); + for(int i =0;i 1.0f ? out_samples[i]/fabs(out_samples[i])*32767.0f : out_samples[i]*32767.0f); + } + if (in_samples != NULL) + { + delete[] in_samples; + in_samples = NULL; + } + if (out_samples != NULL) + { + delete[] out_samples; + out_samples = NULL; + } + } + else if (m_channel == 2) + { + if (size % 4 != 0) + { + return SF_ERR_PARAM; + } + float *in_samples_l = new(std::nothrow) float[size/4]; + if (in_samples_l == NULL) + { + return SF_ERR_BASE_H_MALLOC_NULL; + } + float *in_samples_r = new(std::nothrow) float[size/4]; + if (in_samples_r == NULL) + { + if (in_samples_l != NULL) + { + delete[] in_samples_l; + in_samples_l = NULL; + } + return SF_ERR_BASE_H_MALLOC_NULL; + } + + + for (int i = 0; i < size/4; i++) + { + in_samples_l[i] = *(((short*)in_buffer+i*2))/32767.0f; + in_samples_r[i] = *(((short*)in_buffer+i*2+1))/32767.0f; + } + + float *out_samples_l = new(std::nothrow) float[size/4]; + if (out_samples_l == NULL) + { + if (in_samples_l != NULL) + { + delete[] in_samples_l; + in_samples_l = NULL; + } + if (in_samples_r != NULL) + { + delete[] in_samples_r; + in_samples_r = NULL; + } + return SF_ERR_BASE_H_MALLOC_NULL; + } + float *out_samples_r = new(std::nothrow) float[size/4]; + if (out_samples_r == NULL) + { + if (in_samples_l != NULL) + { + delete[] in_samples_l; + in_samples_l = NULL; + } + if (in_samples_r != NULL) + { + delete[] in_samples_r; + in_samples_r = NULL; + } + if (out_samples_l != NULL) + { + delete[] out_samples_l; + out_samples_l = NULL; + } + return SF_ERR_BASE_H_MALLOC_NULL; + } + + int sample_count_l = process_sample(in_samples_l, out_samples_l, size / 4, m_samples_processed_left, + m_circle_buffer_l, m_iwr_l, m_circle_buffer_fb_l, m_iwr_fb_l); + int sample_count_r = process_sample(in_samples_r, out_samples_r, size / 4, m_samples_processed_right, + m_circle_buffer_r, m_iwr_r, m_circle_buffer_fb_r, m_iwr_fb_r); + if (sample_count_l != size/4 || sample_count_r != size/4) + { + if (in_samples_l != NULL) + { + delete[] in_samples_l; + in_samples_l = NULL; + } + if (in_samples_r != NULL) + { + delete[] in_samples_r; + in_samples_r = NULL; + } + if (out_samples_l != NULL) + { + delete[] out_samples_l; + out_samples_l = NULL; + + } + if (out_samples_r != NULL) + { + delete[] out_samples_r; + out_samples_r = NULL; + } + return SF_ERR_BASE_H_UNKNOWN; + } + + for (int i = 0; i < size/4; i++) + { + *((short*)out_buffer+2*i) = (short)(fabs(out_samples_l[i]) > 1.0f ? out_samples_l[i]/fabs(out_samples_l[i])*32767.0f : out_samples_l[i]*32767.0f); + *((short*)out_buffer+2*i+1) = (short)(fabs(out_samples_r[i]) > 1.0f ? out_samples_r[i]/fabs(out_samples_r[i])*32767.0f : out_samples_r[i]*32767.0f); + } + + if (in_samples_l != NULL) + { + delete[] in_samples_l; + in_samples_l = NULL; + } + if (in_samples_r != NULL) + { + delete[] in_samples_r; + in_samples_r = NULL; + } + if (out_samples_l != NULL) + { + delete[] out_samples_l; + out_samples_l = NULL; + + } + if (out_samples_r != NULL) + { + delete[] out_samples_r; + out_samples_r = NULL; + } + } + else + ; + return size; +} + +int CSlowFlanging::process_sample(const float *in_samples, float *out_samples, int sample_count, + unsigned int &samples_processed, + float *circle_buffer, int &iwr, float *circle_buffer_fb, int &iwr_fb) +{ + //float r = (float)(MAX_DELAY_SAMPLE_COUNT - MIN_DELAY_SAMPLE_COUNT)*2.0f/(float)OSCILLATION_PERIOD; +#define MAX_DELAY_SAMPLE_COUNT (int(MAX_DELAY_SAMPLE*(float)m_sample_rate/1000.0f)+2) +#define AVE_DELAY_SAMPLE (((MAX_DELAY_SAMPLE) - (MIN_DELAY_SAMPLE))*(float)m_sample_rate/1000.0f/2) + + float gfb = 0.2f; + float gff = 1.0f; + for (int i = 0; i < sample_count; i++) + { + //int delay_index = (m_samples_processed % OSCILLATION_PERIOD) <= OSCILLATION_PERIOD / 2 ? + // (float)(m_samples_processed % OSCILLATION_PERIOD) * r + MIN_DELAY_SAMPLE_COUNT + // : - (float)(m_samples_processed % OSCILLATION_PERIOD) * r + 2 * MAX_DELAY_SAMPLE_COUNT - MIN_DELAY_SAMPLE_COUNT; + // if (m_samples_processed > OSCILLATION_PERIOD) + // { + // break; + // } + float delay_t = (float)(AVE_DELAY_SAMPLE+MIN_DELAY_SAMPLE) + (AVE_DELAY_SAMPLE*sin(((float)(samples_processed)*2*M_PI)/(float)m_oscillation_period)); + //float delay_t_1 = (float)(MAX_DELAY_SAMPLE_COUNT/2) + (MAX_DELAY_SAMPLE_COUNT/2*sin(((float)(m_samples_processed+m_start_delay+1)*M_PI)/(float)OSCILLATION_PERIOD)); + //printf("%f\n",delay_t); + + int delay_n = int(delay_t); + int delay_n_1 = delay_n + 1;//= delay_t_1 > delay_t ? delay_n+1 : delay_n-1; + + int delay_n_fb = delay_n; + int delay_n_fb_1 = delay_n_fb + 1; + float ita = delay_t - (float)delay_n; + //int delay_n_1 = delay_n+1; + + delay_n = delay_n <= iwr ? iwr - delay_n : MAX_DELAY_SAMPLE_COUNT + iwr - delay_n; + delay_n_1 = delay_n_1 <= iwr ? iwr - delay_n_1 : MAX_DELAY_SAMPLE_COUNT + iwr - delay_n_1; + delay_n_fb = delay_n_fb <= iwr_fb ? iwr_fb - delay_n_fb : MAX_DELAY_SAMPLE_COUNT + iwr_fb - delay_n_fb; + delay_n_fb_1 = delay_n_fb_1 <= iwr_fb ? iwr_fb - delay_n_fb_1 : MAX_DELAY_SAMPLE_COUNT + iwr_fb - delay_n_fb_1; + //delay_n_1 = delay_n_1 <= m_iwr ? m_iwr - delay_n_1 : MAX_DELAY_SAMPLE_COUNT+1 + m_iwr - delay_n_1; + //out_samples[i] = in_samples[i] + ((float)delay_n + 1.0f - delay_t)*m_circle_buffer[delay_n]+(delay_t-(float)delay_n)*m_circle_buffer[delay_n_1]; + //m_circle_buffer_fb[m_iwr_fb] = (m_circle_buffer[delay_n] + ita*(m_circle_buffer[delay_n_1]-m_circle_buffer[delay_n])) + // + gfb*(m_circle_buffer_fb[delay_n_fb]+ita*(m_circle_buffer_fb[delay_n_fb_1]-m_circle_buffer_fb[delay_n_fb])); + + //out_samples[i] = in_samples[i] + gff * m_circle_buffer_fb[m_iwr_fb]; + out_samples[i] = in_samples[i] + gff*(circle_buffer[delay_n] + ita*(circle_buffer[delay_n_1]-circle_buffer[delay_n])) + gfb*(circle_buffer_fb[delay_n_fb]+ita*(circle_buffer_fb[delay_n_fb_1]-circle_buffer_fb[delay_n_fb])); + out_samples[i] = 0.6f*out_samples[i]; + samples_processed++; + circle_buffer[iwr] = in_samples[i]; + iwr++; + circle_buffer_fb[iwr_fb] = out_samples[i]; + iwr_fb++; + iwr = iwr >= MAX_DELAY_SAMPLE_COUNT ? 0 : iwr; + iwr_fb = iwr_fb >= MAX_DELAY_SAMPLE_COUNT ? 0 : iwr_fb; + + } + return sample_count; +} + +void CSlowFlanging::uninit() +{ + //m_delayed_samples.clear(); + if (m_circle_buffer_l != NULL) + { + delete[] m_circle_buffer_l; + m_circle_buffer_l = NULL; + } + if (m_circle_buffer_fb_l != NULL) + { + delete[] m_circle_buffer_fb_l; + m_circle_buffer_fb_l = NULL; + } + if (m_circle_buffer_r != NULL) + { + delete[] m_circle_buffer_r; + m_circle_buffer_r = NULL; + } + if (m_circle_buffer_fb_r != NULL) + { + delete[] m_circle_buffer_fb_r; + m_circle_buffer_fb_r = NULL; + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/CMakeLists.txt new file mode 100644 index 0000000..7a39e15 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/CMakeLists.txt @@ -0,0 +1,4 @@ +include_directories(./ inc ref src src/common ref ref/kiss_fft) +file(GLOB_RECURSE SUPERSOUND_SRC_FILES src/*cpp ref/*cpp) +add_library(supersound ${SUPERSOUND_SRC_FILES}) + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/impulse_types.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/impulse_types.h new file mode 100755 index 0000000..0f54d5a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/impulse_types.h @@ -0,0 +1,59 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//在 impulse 工程中使用到的常量和结构 + +#ifndef __IMPULSE_TYPES_H__ +#define __IMPULSE_TYPES_H__ + +#include +#include "CImEffectParams.h" +//typedef struct _Impulse_Param +//{ +// //采样率,单位 hz +// int32_t fs; +// //输入声道数 +// int32_t in_channels; +// //输出声道数,需和输入通道相同 +// int32_t out_channels; +// //FIR 窗 bit 长度,最小为 10 +// int32_t window_bits; +// //im 响应,外围需要保证其采样率和输入的采样率一致 +// float * im_response; +// //响应的长度 +// int32_t response_len; +// //响应的通道数,该通道数必须与输入通道数相同或为1,为1时表示输入的所有通道使用相同的响应 +// int32_t response_channels; +// // 每次处理时输入的音频的长度(单声道的长度),用于计算延迟 +// int32_t process_buffer_len; +//} Impulse_Param; + +#endif /* __IMPULSE_TYPES_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/supersound_err.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/supersound_err.h new file mode 100755 index 0000000..8f9de9c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/supersound_err.h @@ -0,0 +1,45 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SUPERSOUND_ERR_H__ +#define __SUPERSOUND_ERR_H__ + +//成功 +#define ERROR_SUPERSOUND_SUCCESS 0 + +//输入参数有误 +#define ERROR_SUPERSOUND_PARAM 2000 +//内存不够 +#define ERROR_SUPERSOUND_MEMORY 2001 + +#define ERROR_SUPERSOUND_INPUT_NOT_SAME_OUT 2002 + +#endif /* __SUPERSOUND_ERR_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/supersound_types.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/supersound_types.h new file mode 100755 index 0000000..1043f7c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/inc/supersound_types.h @@ -0,0 +1,74 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//在 super sound 工程中使用到的常量和结构 + +#ifndef __SUPERSOUND_TYPES_H__ +#define __SUPERSOUND_TYPES_H__ + +#include + +//+ ---------------------------------------------------- +//+ 版本定义,实际含义是 N 位 2 位 2 位:3.10.01 版本 +//+ ---------------------------------------------------- +#define SUPERSOUND_VERSION 31001 + +//+ ---------------------------------------------------- +//+ 实例定义 +//+ ---------------------------------------------------- +typedef void* SUPERSOUND_INST; + +//+ ---------------------------------------------------- +//+ 音效类型定义 +//+ ---------------------------------------------------- +enum SUPERSOUND_EFFECT_TYPE +{ + SUPERSOUND_MIN_TYPE = -1, //类型最小值,限制输入 + + SUPERSOUND_IMPULSE_TYPE, //IM 效果 + + SUPERSOUND_MAX_TYPE, //类型最大值,限制输入 +}; + +//声道的宏定义,方便在数组中表示 +#define CHANNEL_FL 0 +#define CHANNEL_FC 1 +#define CHANNEL_FR 2 +#define CHANNEL_SL 3 +#define CHANNEL_SR 4 +#define CHANNEL_LFE 5 +//后环绕 +#define CHANNEL_BSUR 6 +#define CHANNEL_BL 6 +#define CHANNEL_BR 7 +#define CHANNEL_MAX 8 + +#endif /* __SUPERSOUND_TYPES_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/_kiss_fft_guts.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/_kiss_fft_guts.h new file mode 100755 index 0000000..62951be --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/_kiss_fft_guts.h @@ -0,0 +1,132 @@ +/* +Copyright (c) 2003-2004, Mark Borgerding + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* kiss_fft.h + defines kiss_fft_scalar as either short or a float type + and defines + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ +#include "kiss_fft.h" + + +#define MAXFACTORS 32 +/* e.g. an fft of length 128 has 4 factors + as far as kissfft is concerned + 4*4*4*2 + */ + +struct kiss_fft_state{ + int nfft; + int inverse; + int factors[2*MAXFACTORS]; + kiss_fft_cpx twiddles[1]; +}; + +/* + Explanation of macros dealing with complex math: + + C_MUL(m,a,b) : m = a*b + C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise + C_SUB( res, a,b) : res = a - b + C_SUBFROM( res , a) : res -= a + C_ADDTO( res , a) : res += a + * */ +#ifdef FIXED_POINT + +#if defined(CHECK_OVERFLOW) +# define CHECK_OVERFLOW_OP(a,op,b) \ + if ( (long)(a) op (long)(b) > 32767 || (long)(a) op (long)(b) < -32768 ) { \ + fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(long)(a) op (long)(b) ); } +#endif + + +# define smul(a,b) ( (long)(a)*(b) ) +# define sround( x ) (short)( ( (x) + (1<<14) ) >>15 ) + +# define S_MUL(a,b) sround( smul(a,b) ) + +# define C_MUL(m,a,b) \ + do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ + (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) + +# define DIVSCALAR(x,k) \ + (x) = sround( smul( x, 32767/k ) ) + +# define C_FIXDIV(c,div) \ + do { DIVSCALAR( (c).r , div); \ + DIVSCALAR( (c).i , div); }while (0) + +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r = sround( smul( (c).r , s ) ) ;\ + (c).i = sround( smul( (c).i , s ) ) ; }while(0) + +#else /* not FIXED_POINT*/ + +# define S_MUL(a,b) ( (a)*(b) ) +#define C_MUL(m,a,b) \ + do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ + (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) +# define C_FIXDIV(c,div) /* NOOP */ +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r *= (s);\ + (c).i *= (s); }while(0) +#endif + +#ifndef CHECK_OVERFLOW_OP +# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ +#endif + +#define C_ADD( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,+,(b).r)\ + CHECK_OVERFLOW_OP((a).i,+,(b).i)\ + (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ + }while(0) +#define C_SUB( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,-,(b).r)\ + CHECK_OVERFLOW_OP((a).i,-,(b).i)\ + (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ + }while(0) +#define C_ADDTO( res , a)\ + do { \ + CHECK_OVERFLOW_OP((res).r,+,(a).r)\ + CHECK_OVERFLOW_OP((res).i,+,(a).i)\ + (res).r += (a).r; (res).i += (a).i;\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {\ + CHECK_OVERFLOW_OP((res).r,-,(a).r)\ + CHECK_OVERFLOW_OP((res).i,-,(a).i)\ + (res).r -= (a).r; (res).i -= (a).i; \ + }while(0) + + + + +static +void kf_cexp(kiss_fft_cpx * x,double phase) /* returns e ** (j*phase) */ +{ +#ifdef FIXED_POINT + x->r = (kiss_fft_scalar) (32767 * cos (phase)); + x->i = (kiss_fft_scalar) (32767 * sin (phase)); +#else + x->r = (kiss_fft_scalar) cos (phase); + x->i = (kiss_fft_scalar) sin (phase); +#endif +} + +/* a debugging function */ +#define pcpx(c)\ + fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) ) diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fft.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fft.cpp new file mode 100755 index 0000000..62c787f --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fft.cpp @@ -0,0 +1,369 @@ +/* +Copyright (c) 2003-2004, Mark Borgerding + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#include "_kiss_fft_guts.h" +/* The guts header contains all the multiplication and addition macros that are defined for + fixed or floating point complex numbers. It also delares the kf_ internal functions. + */ + +static kiss_fft_cpx *scratchbuf=NULL; +static size_t nscratchbuf=0; +static kiss_fft_cpx *tmpbuf=NULL; +static size_t ntmpbuf=0; + +#define CHECKBUF(buf,nbuf,n) \ + do { \ + if ( nbuf < (size_t)(n) ) {\ + buf = (kiss_fft_cpx*)realloc(buf,sizeof(kiss_fft_cpx)*(n)); \ + nbuf = (size_t)(n); \ + } \ + }while(0) + + +static void kf_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx * Fout2; + kiss_fft_cpx * tw1 = st->twiddles; + kiss_fft_cpx t; + Fout2 = Fout + m; + do{ + C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); + + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + }while (--m); +} + +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + const size_t m + ) +{ + kiss_fft_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + size_t k=m; + const size_t m2=2*m; + const size_t m3=3*m; + + tw3 = tw2 = tw1 = st->twiddles; + + do { + C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); + + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + if(st->inverse) { + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + }else{ + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + } + ++Fout; + }while(--k); +} + +static void kf_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + size_t m + ) +{ + size_t k=m; + const size_t m2 = 2*m; + kiss_fft_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride*m]; + + tw1=tw2=st->twiddles; + + do{ + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); + + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - scratch[3].r/2; + Fout[m].i = Fout->i - scratch[3].i/2; + + C_MULBYSCALAR( scratch[0] , epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + }while(--k); +} + +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=st->twiddles; + for ( u=0; ur += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } +} + +/* perform the butterfly for one stage of a mixed radix FFT */ +static void kf_bfly_generic( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int p + ) +{ + int u,k,q1,q; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + int Norig = st->nfft; + + CHECKBUF(scratchbuf,nscratchbuf,p); + + for ( u=0; u=Norig) twidx-=Norig; + C_MUL(t,scratchbuf[q] , twiddles[twidx] ); + C_ADDTO( Fout[ k ] ,t); + } + k += m; + } + } +} + +static +void kf_work( + kiss_fft_cpx * Fout, + const kiss_fft_cpx * f, + const size_t fstride, + int in_stride, + int * factors, + const kiss_fft_cfg st + ) +{ + kiss_fft_cpx * Fout_beg=Fout; + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + const kiss_fft_cpx * Fout_end = Fout + p*m; + + if (m==1) { + do{ + *Fout = *f; + f += fstride*in_stride; + }while(++Fout != Fout_end ); + }else{ + do{ + kf_work( Fout , f, fstride*p, in_stride, factors,st); + f += fstride*in_stride; + }while( (Fout += m) != Fout_end ); + } + + Fout=Fout_beg; + + switch (p) { + case 2: kf_bfly2(Fout,fstride,st,m); break; + case 3: kf_bfly3(Fout,fstride,st,m); break; + case 4: kf_bfly4(Fout,fstride,st,m); break; + case 5: kf_bfly5(Fout,fstride,st,m); break; + default: kf_bfly_generic(Fout,fstride,st,m,p); break; + } +} + +/* facbuf is populated by p1,m1,p2,m2, ... +where +p[i] * m[i] = m[i-1] +m0 = n */ +static +void kf_factor(int n,int * facbuf) +{ + int p=4; + double floor_sqrt; + floor_sqrt = floor( sqrt((double)n) ); + + /*factor out powers of 4, powers of 2, then any remaining primes */ + do { + while (n % p) { + switch (p) { + case 4: p = 2; break; + case 2: p = 3; break; + default: p += 2; break; + } + if (p > floor_sqrt) + p = n; /* no more factors, skip to end */ + } + n /= p; + *facbuf++ = p; + *facbuf++ = n; + } while (n > 1); +} + +/* +* +* User-callable function to allocate all necessary storage space for the fft. +* +* The return value is a contiguous block of memory, allocated with malloc. As such, +* It can be freed with free(), rather than a kiss_fft-specific function. +* */ +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) +{ + kiss_fft_cfg st=NULL; + size_t memneeded = sizeof(struct kiss_fft_state) + + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/ + + if ( lenmem==NULL ) { + st = ( kiss_fft_cfg)malloc( memneeded ); + }else{ + if (*lenmem >= memneeded) + st = (kiss_fft_cfg)mem; + *lenmem = memneeded; + } + if (st) { + int i; + st->nfft=nfft; + st->inverse = inverse_fft; + + for (i=0;iinverse) + phase *= -1; + kf_cexp(st->twiddles+i, phase ); + } + + kf_factor(nfft,st->factors); + } + return st; +} + + + + +void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) +{ + if (fin == fout) { + CHECKBUF(tmpbuf,ntmpbuf,st->nfft); + kf_work(tmpbuf,fin,1,in_stride, st->factors,st); + memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); + }else{ + kf_work( fout, fin, 1,in_stride, st->factors,st ); + } +} + +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + kiss_fft_stride(cfg,fin,fout,1); +} + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fft.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fft.h new file mode 100755 index 0000000..1e9f023 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fft.h @@ -0,0 +1,92 @@ +#ifndef KISS_FFT_H +#define KISS_FFT_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + ATTENTION! + If you would like a : + -- a utility that will handle the caching of fft objects + -- real-only (no imaginary time component ) FFT + -- a multi-dimensional FFT + -- a command-line utility to perform ffts + -- a command-line utility to perform fast-convolution filtering + + Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c + in the tools/ directory. +*/ + +#ifdef FIXED_POINT +# define kiss_fft_scalar short +#else +# ifndef kiss_fft_scalar +/* default is float */ +# define kiss_fft_scalar float +# endif +#endif + +typedef struct { + kiss_fft_scalar r; + kiss_fft_scalar i; +}kiss_fft_cpx; + +typedef struct kiss_fft_state* kiss_fft_cfg; + +/* + * kiss_fft_alloc + * + * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. + * + * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); + * + * The return value from fft_alloc is a cfg buffer used internally + * by the fft routine or NULL. + * + * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. + * The returned value should be free()d when done to avoid memory leaks. + * + * The state can be placed in a user supplied buffer 'mem': + * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, + * then the function places the cfg in mem and the size used in *lenmem + * and returns mem. + * + * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), + * then the function returns NULL and places the minimum cfg + * buffer size in *lenmem. + * */ + +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); + +/* + * kiss_fft(cfg,in_out_buf) + * + * Perform an FFT on a complex input buffer. + * for a forward FFT, + * fin should be f[0] , f[1] , ... ,f[nfft-1] + * fout will be F[0] , F[1] , ... ,F[nfft-1] + * Note that each element is complex and can be accessed like + f[k].r and f[k].i + * */ +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +/* + A more generic version of the above function. It reads its input from every Nth sample. + * */ +void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); + +/* If kiss_fft_alloc allocated a buffer, it is one contiguous + buffer and can be simply free()d when no longer needed*/ +#define kiss_fft_free free + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftnd.c b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftnd.c new file mode 100755 index 0000000..45be848 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftnd.c @@ -0,0 +1,173 @@ + + +/* +Copyright (c) 2003-2004, Mark Borgerding + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "kiss_fftnd.h" +#include "_kiss_fft_guts.h" + +struct kiss_fftnd_state{ + int dimprod; /* dimsum would be mighty tasty right now */ + int ndims; + int *dims; + kiss_fft_cfg *states; /* cfg states for each dimension */ + kiss_fft_cpx * tmpbuf; /*buffer capable of hold the entire buffer */ +}; + +kiss_fftnd_cfg kiss_fftnd_alloc(int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem) +{ + kiss_fftnd_cfg st = NULL; + int i; + int dimprod=1; + size_t memneeded = sizeof(struct kiss_fftnd_state); + char * ptr; + + for (i=0;istates[i] */ + dimprod *= dims[i]; + } + memneeded += sizeof(int) * ndims;/* st->dims */ + memneeded += sizeof(void*) * ndims;/* st->states */ + memneeded += sizeof(kiss_fft_cpx) * dimprod; /* st->tmpbuf */ + + if (lenmem == NULL) {/* allocate for the caller*/ + st = (kiss_fftnd_cfg) malloc (memneeded); + } else { /* initialize supplied buffer if big enough */ + if (*lenmem >= memneeded) + st = (kiss_fftnd_cfg) mem; + *lenmem = memneeded; /*tell caller how big struct is (or would be) */ + } + if (!st) + return NULL; /*malloc failed or buffer too small */ + + st->dimprod = dimprod; + st->ndims = ndims; + ptr=(char*)(st+1); + + st->states = (kiss_fft_cfg *)ptr; + ptr += sizeof(void*) * ndims; + + st->dims = (int*)ptr; + ptr += sizeof(int) * ndims; + + st->tmpbuf = (kiss_fft_cpx*)ptr; + ptr += sizeof(kiss_fft_cpx) * dimprod; + + for (i=0;idims[i] = dims[i]; + kiss_fft_alloc (st->dims[i], inverse_fft, NULL, &len); + st->states[i] = kiss_fft_alloc (st->dims[i], inverse_fft, ptr,&len); + ptr += len; + } + return st; +} + +/* + This works by tackling one dimension at a time. + + In effect, + Each stage starts out by reshaping the matrix into a DixSi 2d matrix. + A Di-sized fft is taken of each column, transposing the matrix as it goes. + +Here's a 3-d example: +Take a 2x3x4 matrix, laid out in memory as a contiguous buffer + [ [ [ a b c d ] [ e f g h ] [ i j k l ] ] + [ [ m n o p ] [ q r s t ] [ u v w x ] ] ] + +Stage 0 ( D=2): treat the buffer as a 2x12 matrix + [ [a b ... k l] + [m n ... w x] ] + + FFT each column with size 2. + Transpose the matrix at the same time using kiss_fft_stride. + + [ [ a+m a-m ] + [ b+n b-n] + ... + [ k+w k-w ] + [ l+x l-x ] ] + + Note fft([x y]) == [x+y x-y] + +Stage 1 ( D=3) treats the buffer (the output of stage D=2) as an 3x8 matrix, + [ [ a+m a-m b+n b-n c+o c-o d+p d-p ] + [ e+q e-q f+r f-r g+s g-s h+t h-t ] + [ i+u i-u j+v j-v k+w k-w l+x l-x ] ] + + And perform FFTs (size=3) on each of the columns as above, transposing + the matrix as it goes. The output of stage 1 is + (Legend: ap = [ a+m e+q i+u ] + am = [ a-m e-q i-u ] ) + + [ [ sum(ap) fft(ap)[0] fft(ap)[1] ] + [ sum(am) fft(am)[0] fft(am)[1] ] + [ sum(bp) fft(bp)[0] fft(bp)[1] ] + [ sum(bm) fft(bm)[0] fft(bm)[1] ] + [ sum(cp) fft(cp)[0] fft(cp)[1] ] + [ sum(cm) fft(cm)[0] fft(cm)[1] ] + [ sum(dp) fft(dp)[0] fft(dp)[1] ] + [ sum(dm) fft(dm)[0] fft(dm)[1] ] ] + +Stage 2 ( D=4) treats this buffer as a 4*6 matrix, + [ [ sum(ap) fft(ap)[0] fft(ap)[1] sum(am) fft(am)[0] fft(am)[1] ] + [ sum(bp) fft(bp)[0] fft(bp)[1] sum(bm) fft(bm)[0] fft(bm)[1] ] + [ sum(cp) fft(cp)[0] fft(cp)[1] sum(cm) fft(cm)[0] fft(cm)[1] ] + [ sum(dp) fft(dp)[0] fft(dp)[1] sum(dm) fft(dm)[0] fft(dm)[1] ] ] + + Then FFTs each column, transposing as it goes. + + The resulting matrix is the 3d FFT of the 2x3x4 input matrix. + + Note as a sanity check that the first element of the final + stage's output (DC term) is + sum( [ sum(ap) sum(bp) sum(cp) sum(dp) ] ) + , i.e. the summation of all 24 input elements. + + */ +void kiss_fftnd(kiss_fftnd_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + int i,k; + const kiss_fft_cpx * bufin=fin; + kiss_fft_cpx * bufout; + + /*arrange it so the last bufout == fout*/ + if ( st->ndims & 1 ) { + bufout = fout; + if (fin==fout) { + memcpy( st->tmpbuf, fin, sizeof(kiss_fft_cpx) * st->dimprod ); + bufin = st->tmpbuf; + } + }else + bufout = st->tmpbuf; + + for ( k=0; k < st->ndims; ++k) { + int curdim = st->dims[k]; + int stride = st->dimprod / curdim; + + for ( i=0 ; istates[k], bufin+i , bufout+i*curdim, stride ); + + /*toggle back and forth between the two buffers*/ + if (bufout == st->tmpbuf){ + bufout = fout; + bufin = st->tmpbuf; + }else{ + bufout = st->tmpbuf; + bufin = fout; + } + } +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftnd.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftnd.h new file mode 100755 index 0000000..1404674 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftnd.h @@ -0,0 +1,18 @@ +#ifndef KISS_FFTND_H +#define KISS_FFTND_H + +#include "kiss_fft.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct kiss_fftnd_state * kiss_fftnd_cfg; + +kiss_fftnd_cfg kiss_fftnd_alloc(int *dims,int ndims,int inverse_fft,void*mem,size_t*lenmem); +void kiss_fftnd(kiss_fftnd_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftr.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftr.cpp new file mode 100755 index 0000000..416ede8 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftr.cpp @@ -0,0 +1,141 @@ +/* +Copyright (c) 2003-2004, Mark Borgerding + +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ +#include "kiss_fftr.h" +#include "_kiss_fft_guts.h" + +struct kiss_fftr_state{ + kiss_fft_cfg substate; + kiss_fft_cpx * tmpbuf; + kiss_fft_cpx * super_twiddles; +}; + +kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) +{ + int i; + kiss_fftr_cfg st = NULL; + size_t subsize, memneeded; + + if (nfft & 1) { + // fprintf(stderr,"Real FFT optimization must be even.\n"); + return NULL; + } + nfft >>= 1; + + kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); + memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2); + + if (lenmem == NULL) { + st = (kiss_fftr_cfg) malloc (memneeded); + } else { + if (*lenmem >= memneeded) + st = (kiss_fftr_cfg) mem; + *lenmem = memneeded; + } + if (!st) + return NULL; + + st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */ + st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); + st->super_twiddles = st->tmpbuf + nfft; + kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); + + for (i = 0; i < nfft; ++i) { + double phase = -3.14159265358979323846264338327 * ((double) i / nfft + .5); + if (inverse_fft) + phase *= -1; + kf_cexp (st->super_twiddles+i,phase); + } + return st; +} + +void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) +{ + /* input buffer timedata is stored row-wise */ + int k,N; + kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; + + if ( st->substate->inverse) { + // fprintf(stderr,"kiss fft usage error: improper alloc\n"); + return; + } + + N = st->substate->nfft; + + /*perform the parallel fft of two real signals packed in real,imag*/ + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); + + tdc.r = st->tmpbuf[0].r; + tdc.i = st->tmpbuf[0].i; + C_FIXDIV(tdc,2); + + CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); + freqdata[0].r = tdc.r + tdc.i; + freqdata[0].i = 0; + + for (k=1;k <= N/2 ; ++k ) { + + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[N-k].r; + fpnk.i = -st->tmpbuf[N-k].i; + C_FIXDIV(fpk,2); + C_FIXDIV(fpnk,2); + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k]); + + C_ADD( freqdata[k] , f1k ,tw); + freqdata[k].r = (f1k.r + tw.r) / 2; + freqdata[k].i = (f1k.i + tw.i) / 2; + + freqdata[N-k].r = (f1k.r - tw.r)/2; + freqdata[N-k].i = - (f1k.i - tw.i)/2; + } + CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); + freqdata[N].r = tdc.r - tdc.i; + freqdata[N].i = 0; +} + +void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +{ + /* input buffer timedata is stored row-wise */ + int k, N; + + if (st->substate->inverse == 0) { + // fprintf (stderr, "kiss fft usage error: improper alloc\n"); + return; + } + + N = st->substate->nfft; + + st->tmpbuf[0].r = freqdata[0].r + freqdata[N].r; + st->tmpbuf[0].i = freqdata[0].r - freqdata[N].r; + + for (k = 1; k <= N / 2; ++k) { + kiss_fft_cpx fk, fnkc, fek, fok, tmpbuf; + fk = freqdata[k]; + fnkc.r = freqdata[N - k].r; + fnkc.i = -freqdata[N - k].i; + C_FIXDIV( fk , 2 ); + C_FIXDIV( fnkc , 2 ); + + C_ADD (fek, fk, fnkc); + C_SUB (tmpbuf, fk, fnkc); + C_MUL (fok, tmpbuf, st->super_twiddles[k]); + C_ADD (st->tmpbuf[k], fek, fok); + C_SUB (st->tmpbuf[N - k], fek, fok); + st->tmpbuf[N - k].i *= -1; + } + kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftr.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftr.h new file mode 100755 index 0000000..0d50858 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/ref/kiss_fft/kiss_fftr.h @@ -0,0 +1,46 @@ +#ifndef KISS_FTR_H +#define KISS_FTR_H + +#include "kiss_fft.h" +#ifdef __cplusplus +extern "C" { +#endif + + +/* + + Real optimized version can save about 45% cpu time vs. complex fft of a real seq. + + + + */ + +typedef struct kiss_fftr_state *kiss_fftr_cfg; + + +kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); +/* + nfft must be even + + If you don't care to allocate space, use mem = lenmem = NULL +*/ + + +void kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); +/* + input timedata has nfft scalar points + output freqdata has nfft/2+1 complex points +*/ + +void kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); +/* + input freqdata has nfft/2+1 complex points + output timedata has nfft scalar points +*/ + +#define kiss_fftr_free free + +#ifdef __cplusplus +} +#endif +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/isupersound_convolution.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/isupersound_convolution.h new file mode 100755 index 0000000..45bdec1 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/isupersound_convolution.h @@ -0,0 +1,67 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//封装卷积,目前有快速卷积和定义卷积做法 + +#ifndef __I_SUPERSOUND_CONVOLUTION_H__ +#define __I_SUPERSOUND_CONVOLUTION_H__ + +#include + +namespace SUPERSOUND +{ + + +class ISuperSoundConvolution +{ +public: + ISuperSoundConvolution() { } + virtual ~ISuperSoundConvolution() { } + +public: + //清空所有缓存数据 + virtual void Flush() = 0; + //这个东西该实例只让调用一次,因为多次调用,还不如让外面重新弄个实例 + //设置处理帧长和交叠 0 的长度 + //也就是说 FFT 的长度为这两个长度的总和,FFT 的帧移为 frame_len + virtual int32_t SetFrameLenZeroLen(int32_t frame_len, int32_t zero_len) = 0; + //设置、更新脉冲响应 + virtual int32_t SetImpulseResponse(float * imres, int32_t len) = 0; + //处理,每次处理一,外面保证输入长度为一长,输出的长度放在 out_num 中 + virtual int32_t ProcessFrame(float * buf, int32_t & out_num) = 0; + //获取延迟单元长度 + virtual int32_t GetLatecy() = 0; +}; + + +} + +#endif /* __I_SUPERSOUND_CONVOLUTION_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_fast_convolution.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_fast_convolution.cpp new file mode 100755 index 0000000..47dd5e6 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_fast_convolution.cpp @@ -0,0 +1,286 @@ + +#include "supersound_fast_convolution.h" +#include "supersound_err.h" +#include "fft/supersound_kiss_fft.h" +#include "supersound_common.h" +#include +#include + +namespace SUPERSOUND +{ + + +SuperSoundFastConvolution::SuperSoundFastConvolution() +{ + m_frame_len = 0; + m_frame_step = 0; + m_zero_len = 0; + m_imres_len = 0; + m_kiss_fft = NULL; + m_filter_num = 0; + m_filter_buf_array = NULL; + m_mix_buf_array = NULL; + m_mix_buf_idx = 0; + m_history_buf = NULL; + m_process_buf = NULL; + m_b_reduce_delay = false; // 一开始不需要考虑延迟 +} + +SuperSoundFastConvolution::~SuperSoundFastConvolution() +{ + DestoryAll(); +} + +void SuperSoundFastConvolution::DestoryAll() +{ + SAFE_DELETE_PTR(m_history_buf); + SAFE_DELETE_OBJ(m_kiss_fft); + SAFE_DELETE_PTR(m_process_buf); + DestoryFilterBuf(); + DestoryMixBuf(); +} + +void SuperSoundFastConvolution::DestoryFilterBuf() +{ + if(m_filter_buf_array) + { + for(int32_t i = 0; i < m_filter_num; i++) + { + SAFE_DELETE_PTR(m_filter_buf_array[i]) + } + SAFE_DELETE_PTR(m_filter_buf_array); + } +} + +void SuperSoundFastConvolution::DestoryMixBuf() +{ + if(m_mix_buf_array) + { + for(int32_t i = 0; i < m_filter_num; i++) + { + SAFE_DELETE_PTR(m_mix_buf_array[i]); + } + SAFE_DELETE_PTR(m_mix_buf_array); + } +} + +int32_t SuperSoundFastConvolution::SetFrameLenZeroLen( int32_t frame_len, int32_t zero_len ) +{ + int nRet = ERROR_SUPERSOUND_SUCCESS; + + //更新帧长和帧移 + m_frame_step = frame_len; + m_zero_len = zero_len; + m_frame_len = frame_len + zero_len; + + m_kiss_fft = new(std::nothrow) SuperSoundKissFFT(); + if(m_kiss_fft == NULL) + return ERROR_SUPERSOUND_MEMORY; + + m_history_buf = new(std::nothrow) float[m_frame_step]; + if(m_history_buf == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_history_buf, 0, sizeof(float) * m_frame_step); + + m_process_buf = new(std::nothrow) float[m_frame_len]; + if(m_process_buf == NULL) + return ERROR_SUPERSOUND_MEMORY; + + nRet = m_kiss_fft->Init(m_frame_len); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + return nRet; +} + +int32_t SuperSoundFastConvolution::SetImpulseResponse( float * imres, int32_t len ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + //更新脉冲响应长度 + m_imres_len = len; + + nRet = AllocFilterAndMixBuf(); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + nRet = CalFilterBuf(imres); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + return nRet; +} + +int32_t SuperSoundFastConvolution::ProcessFrame( float * buf, int32_t & out_num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + memcpy(m_process_buf, buf, sizeof(float) * m_frame_step); + memset(m_process_buf + m_frame_step, 0, sizeof(float) * m_zero_len); + + //先对该帧做 FFT 变换 + nRet = m_kiss_fft->FFT(m_process_buf); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + //相乘混合 + CalMixBuf(m_process_buf); + + //最后 IFFT 变换 + nRet = m_kiss_fft->IFFT(m_mix_buf_array[m_mix_buf_idx]); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + //和上一次的输出进行交叠相加获取输出 + for(int32_t i = 0; i < m_frame_step; i++) + { + buf[i] = (m_mix_buf_array[m_mix_buf_idx][i] + m_history_buf[i]) / m_frame_len; + } + + //重置历史帧数据 + int32_t copynum = MIN(m_frame_step, m_zero_len); + memcpy(m_history_buf, m_mix_buf_array[m_mix_buf_idx] + m_frame_step, sizeof(float) * copynum); + memset(m_history_buf + copynum, 0, sizeof(float) * MAX(0, m_frame_step - copynum)); + + //清空输出混合帧 + memset(m_mix_buf_array[m_mix_buf_idx], 0, sizeof(float) * m_frame_len); + m_mix_buf_idx = (m_mix_buf_idx + 1) % m_filter_num; + + //计算应该输出的长度 + if(m_b_reduce_delay) + { + out_num = m_frame_step - m_frame_step / 2; + memmove(buf, buf + m_frame_step / 2, sizeof(float) * out_num); + m_b_reduce_delay = false; + } + else + { + out_num = m_frame_step; + } + + return nRet; +} + +void SuperSoundFastConvolution::Flush() +{ + m_b_reduce_delay = false; // 默认完整输出 + + if(m_history_buf) + { + memset(m_history_buf, 0, sizeof(float) * m_frame_step); + } + + if(m_mix_buf_array) + { + for(int32_t i = 0; i < m_filter_num; i++) + { + memset(m_mix_buf_array[i], 0, sizeof(float) * m_frame_len); + } + } + m_mix_buf_idx = 0; +} + +int32_t SuperSoundFastConvolution::GetLatecy() +{ + // 默认不需要考虑延迟 + return 0; +// return m_frame_step / 2; +} + +int32_t SuperSoundFastConvolution::AllocFilterAndMixBuf() +{ + //向上取整 + int32_t filter_num = (m_imres_len + m_frame_step - 1) / m_frame_step; + + if(filter_num == m_filter_num) + return ERROR_SUPERSOUND_SUCCESS; + + //将老的 Filter 内存释放掉 + DestoryFilterBuf(); + + //将老的 Mix 内存释放掉 + DestoryMixBuf(); + + m_filter_num = filter_num; + + m_filter_buf_array = new(std::nothrow) float *[m_filter_num]; + if(m_filter_buf_array == NULL) + return ERROR_SUPERSOUND_MEMORY; + + for(int32_t i = 0; i < m_filter_num; i++) + { + m_filter_buf_array[i] = new(std::nothrow) float[m_frame_len]; + if(m_filter_buf_array[i] == NULL) + return ERROR_SUPERSOUND_MEMORY; + } + + + + m_mix_buf_array = new(std::nothrow) float *[m_filter_num]; + if(m_mix_buf_array == NULL) + return ERROR_SUPERSOUND_MEMORY; + + for(int32_t i = 0; i < m_filter_num; i++) + { + m_mix_buf_array[i] = new(std::nothrow) float[m_frame_len]; + if(m_mix_buf_array[i] == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_mix_buf_array[i], 0, sizeof(float) * m_frame_len); + } + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t SuperSoundFastConvolution::CalFilterBuf( float * imres ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + int32_t lastframe = m_filter_num - 1; + + //考虑下这里需不需要进行增益先 + + for(int32_t i = 0; i < lastframe; i++) + { + memcpy(m_filter_buf_array[i], imres + i * m_frame_step, sizeof(float) * m_frame_step); + memset(m_filter_buf_array[i] + m_frame_step, 0, sizeof(float) * m_zero_len); + nRet = m_kiss_fft->FFT(m_filter_buf_array[i]); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + } + + //最后一帧不够的话,进行补 0 操作 + int32_t copylen = m_imres_len - lastframe * m_frame_step; + memcpy(m_filter_buf_array[lastframe], imres + lastframe * m_frame_step, sizeof(float) * copylen); + memset(m_filter_buf_array[lastframe] + copylen, 0, sizeof(float) * (m_frame_len - copylen)); + nRet = m_kiss_fft->FFT(m_filter_buf_array[lastframe]); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + return nRet; +} + +void SuperSoundFastConvolution::CalMixBuf( float * freq ) +{ + //指示当前混合的位置 + int32_t idx; + + for(int32_t i = 0; i < m_filter_num; i++) + { + idx = (m_mix_buf_idx + i) % m_filter_num; + + //将两个特殊的点先进行计算 + m_mix_buf_array[idx][0] += freq[0] * m_filter_buf_array[i][0]; + m_mix_buf_array[idx][1] += freq[1] * m_filter_buf_array[i][1]; + + for(int32_t j = 2; j < m_frame_len; j += 2) + { + //两个复数相乘 + m_mix_buf_array[idx][j] += freq[j] * m_filter_buf_array[i][j] - + freq[j + 1] * m_filter_buf_array[i][j + 1]; + m_mix_buf_array[idx][j + 1] += freq[j + 1] * m_filter_buf_array[i][j] + + freq[j] * m_filter_buf_array[i][j + 1]; + } + } +} + + +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_fast_convolution.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_fast_convolution.h new file mode 100755 index 0000000..7532c6f --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_fast_convolution.h @@ -0,0 +1,93 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现快速卷积,分段卷积 + +#ifndef __SUPERSOUND_FAST_CONVOLUTION_H__ +#define __SUPERSOUND_FAST_CONVOLUTION_H__ + +#include "isupersound_convolution.h" +#include "fft/isupersound_fft.h" + +namespace SUPERSOUND +{ + + +class SuperSoundFastConvolution : public ISuperSoundConvolution +{ +public: + SuperSoundFastConvolution(); + virtual ~SuperSoundFastConvolution(); + +public: + virtual void Flush(); + virtual int32_t SetFrameLenZeroLen(int32_t frame_len, int32_t zero_len); + virtual int32_t SetImpulseResponse(float * imres, int32_t len); + virtual int32_t ProcessFrame(float * buf, int32_t & out_num); + virtual int32_t GetLatecy(); + +private: + int32_t AllocFilterAndMixBuf(); + int32_t CalFilterBuf(float * imres); + void DestoryFilterBuf(); + void DestoryMixBuf(); + void DestoryAll(); + void CalMixBuf(float * freq); + +private: + //类内部的实际帧长,为 m_frame_len = m_frame_step + m_zero_len + int32_t m_frame_len; + //类内部实际为帧移,对外部而言是帧长,每帧的数据长度 + int32_t m_frame_step; + //补 0 的长度,交叠的长度 + int32_t m_zero_len; + int32_t m_imres_len; + ISuperSoundFFT * m_kiss_fft; + //将 im 分成这么多段,降低延迟 + int32_t m_filter_num; + //每个段对应的频谱 + float ** m_filter_buf_array; + //每段的混合频谱 + float ** m_mix_buf_array; + //使用循环的方式来找到当前的输出 mix_buf 位置 + int32_t m_mix_buf_idx; + //历史交叠部分 + float * m_history_buf; + //对输入的临时处理的 buf + float * m_process_buf; + //是否需要考虑延迟 + bool m_b_reduce_delay; +}; + + +} + +#endif /* __SUPERSOUND_FAST_CONVOLUTION_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_simple_convolution.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_simple_convolution.cpp new file mode 100644 index 0000000..673de12 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_simple_convolution.cpp @@ -0,0 +1,202 @@ +// +// Created by yangjianli on 2019-11-19. +// + +#include "supersound_simple_convolution.h" +#include "string" +#include "supersound_err.h" +#include "supersound_common.h" +namespace SUPERSOUND { + SupersoundSimpleConvolution::SupersoundSimpleConvolution() { + m_kiss_fft = NULL; + // 历史数据 + m_history_buf = NULL; + // 分段数据 + m_segmentation_buf = NULL; + // 脉冲响应的频谱 + m_filter_buf_array = NULL; + // 输入数据 + m_frame_buf = NULL; + // 脉冲响应分段数量 + m_filter_num = 0; + // 一帧长度 + m_frame_len = 0; + // 输入长度 + m_input_len = 0; + //补零长度 + m_zero_len = 0; + // history当前的下标 + m_history_idx = 0; + // 历史buf长度 + m_history_len = 0; + } + + SupersoundSimpleConvolution::~SupersoundSimpleConvolution() { + DestoryAll(); + } + +// 分段卷积 + void SupersoundSimpleConvolution::CalcSegmentation(float *input) { + memset(m_segmentation_buf, 0, sizeof(float) * m_history_len); + memset(m_frame_buf, 0, sizeof(float) * m_frame_len); + memcpy(m_frame_buf, input, sizeof(float) * m_input_len); + + // 获取频谱 + m_kiss_fft->FFT(m_frame_buf); + // 分段乘法 + for (int i = 0; i < m_filter_num; i++) { + //将两个特殊的点先进行计算 + m_tmp_buf[0] = m_frame_buf[0] * m_filter_buf_array[i][0]; + m_tmp_buf[1] = m_frame_buf[1] * m_filter_buf_array[i][1]; + + for (int j = 2; j < m_frame_len; j+=2) { + //两个复数相乘 + m_tmp_buf[j] = m_frame_buf[j] * m_filter_buf_array[i][j] - + m_frame_buf[j + 1] * m_filter_buf_array[i][j + 1]; + m_tmp_buf[j + 1] = m_frame_buf[j + 1] * m_filter_buf_array[i][j] + + m_frame_buf[j] * m_filter_buf_array[i][j + 1]; + } + + // ifft + for (int j = 0; j < m_frame_len; j++) { + m_tmp_buf[j] /= m_frame_len; + } + + m_kiss_fft->IFFT(m_tmp_buf); + + // 叠加 + for (int j = 0; j < m_frame_len; j++) { + m_segmentation_buf[i * (m_zero_len + 1) + j] += m_tmp_buf[j]; + } + } + } + +// 主处理函数 + int32_t SupersoundSimpleConvolution::ProcessFrame(float *buf, int32_t &out_num) { + CalcSegmentation(buf); + //外部叠加 + for (int i = 0; i < m_history_len; i++) { + int now_index = (m_history_idx + i) % m_history_len; + m_history_buf[now_index] += m_segmentation_buf[i]; + } + // 将数据输出 + for (int i = 0; i < m_input_len; i++) { + buf[i] = m_history_buf[m_history_idx]; + + // 恢复设置 + m_history_buf[m_history_idx] = 0; + m_history_idx = (m_history_idx + 1) % m_history_len; + } + out_num = m_input_len; + return ERROR_SUPERSOUND_SUCCESS; + } + +// 设置参数 + int32_t SupersoundSimpleConvolution::SetFrameLenZeroLen(int32_t frame_len, int32_t zero_len) { + // 外部每次输入的数据长度 + m_input_len = frame_len; + // 补零长度,用来达到和线性卷积相同的长度输出 + m_zero_len = zero_len; + // 做fft变换的长度 + m_frame_len = m_input_len + m_zero_len; + + // 长度必须是偶数 + if (m_frame_len & 1) { + return ERROR_SUPERSOUND_PARAM; + } + + // 创建补零后的数据长度 + m_frame_buf = new(std::nothrow) float[m_frame_len]; + if (NULL == m_frame_buf) { + return ERROR_SUPERSOUND_MEMORY; + } + m_tmp_buf = new(std::nothrow) float[m_frame_len]; + if (NULL == m_tmp_buf) { + return ERROR_SUPERSOUND_MEMORY; + } + // 初始化fft + m_kiss_fft = new (std::nothrow) SuperSoundKissFFT(); + if (NULL == m_kiss_fft) { + return ERROR_SUPERSOUND_MEMORY; + } + m_kiss_fft->Init(m_frame_len); + return ERROR_SUPERSOUND_SUCCESS; + } + + int32_t SupersoundSimpleConvolution::SetImpulseResponse(float *imres, int32_t len) { + int n_hlen = (m_zero_len + 1); + m_history_idx = 0; + m_filter_num = len / n_hlen; + int tp = len % n_hlen; + if (tp > 0) { + m_filter_num += 1; + } + m_history_len = n_hlen * m_filter_num + m_input_len - 1; + + // 历史数据 + m_history_buf = new(std::nothrow) float[m_history_len]; + if (NULL == m_history_buf) { + return ERROR_SUPERSOUND_MEMORY; + } + memset(m_history_buf, 0, sizeof(float) * m_history_len); + + // 输入数据与脉冲信号分段卷积 + m_segmentation_buf = new float[m_history_len]; + if (NULL == m_segmentation_buf) { + return ERROR_SUPERSOUND_MEMORY; + } + memset(m_segmentation_buf, 0, sizeof(float) * m_history_len); + + // 分段脉冲频谱 + m_filter_buf_array = new(std::nothrow) float *[m_filter_num]; + if (m_filter_buf_array == NULL) + return ERROR_SUPERSOUND_MEMORY; + + for (int32_t i = 0; i < m_filter_num; i++) { + m_filter_buf_array[i] = new(std::nothrow) float[m_frame_len]; + if (m_filter_buf_array[i] == NULL) + return ERROR_SUPERSOUND_MEMORY; + } + + // 脉冲信号频谱计算 + int n_step = n_hlen; + for (int i = 0; i < m_filter_num; i++) { + if(len - n_step * i < n_step) + { + n_step = len - n_hlen * i; + } + memset(m_filter_buf_array[i], 0, sizeof(float) * m_frame_len); + memcpy(m_filter_buf_array[i], imres + (i * n_hlen), sizeof(float) * n_step); + + // 计算频谱 + m_kiss_fft->FFT(m_filter_buf_array[i]); + } + return ERROR_SUPERSOUND_SUCCESS; + } + + int32_t SupersoundSimpleConvolution::GetLatecy() { + return 0; + } + +// 初始化 + void SupersoundSimpleConvolution::Flush() { + m_history_idx = 0; + memset(m_history_buf, 0, sizeof(float) * m_history_len); + } + +// 释放空间 + void SupersoundSimpleConvolution::DestoryAll() { + SAFE_DELETE_PTR(m_history_buf); + SAFE_DELETE_PTR(m_frame_buf); + SAFE_DELETE_PTR(m_tmp_buf); + SAFE_DELETE_PTR(m_segmentation_buf); + SAFE_DELETE_OBJ(m_kiss_fft); + + if (m_filter_buf_array) { + for (int32_t i = 0; i < m_filter_num; i++) { + SAFE_DELETE_PTR(m_filter_buf_array[i]) + } + SAFE_DELETE_PTR(m_filter_buf_array); + } + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_simple_convolution.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_simple_convolution.h new file mode 100644 index 0000000..7ace608 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/convolution/supersound_simple_convolution.h @@ -0,0 +1,62 @@ +// +// Created by yangjianli on 2019-11-19. +// + +#ifndef IMPLUSEEFFECLIBS_SUPERSOUND_SIMPLE_CONVOLUTION_H +#define IMPLUSEEFFECLIBS_SUPERSOUND_SIMPLE_CONVOLUTION_H + +#include "fft/isupersound_fft.h" +#include "fft/supersound_kiss_fft.h" +#include "isupersound_convolution.h" + +namespace SUPERSOUND { + + class SupersoundSimpleConvolution : public ISuperSoundConvolution { + public: + SupersoundSimpleConvolution(); + + virtual ~SupersoundSimpleConvolution(); + + public: + virtual void Flush(); + + virtual int32_t SetFrameLenZeroLen(int32_t frame_len, int32_t zero_len); + + virtual int32_t SetImpulseResponse(float *imres, int32_t len); + + virtual int32_t ProcessFrame(float *buf, int32_t &out_num); + + virtual int32_t GetLatecy(); + + private: + // 历史数据 + float *m_history_buf; + // 分段数据 + float *m_segmentation_buf; + // 脉冲响应的频谱 + float ** m_filter_buf_array; + // 输入数据 + float *m_frame_buf; + // 临时数据 + float *m_tmp_buf; + // 脉冲响应分段数量 + int m_filter_num; + // 一帧长度 + int m_frame_len; + // 输入长度 + int m_input_len; + //补零长度 + int m_zero_len; + // history当前的下标 + int m_history_idx; + // 历史buf总长度 + int m_history_len; + SUPERSOUND::SuperSoundKissFFT *m_kiss_fft; + private: + void CalcSegmentation(float *input); + + void DestoryAll(); + }; + +} +#endif //IMPLUSEEFFECLIBS_SUPERSOUND_SIMPLE_CONVOLUTION_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/isupersound_fft.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/isupersound_fft.h new file mode 100755 index 0000000..e5b5d09 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/isupersound_fft.h @@ -0,0 +1,66 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//封装 fft 接口 + +#ifndef __I_SUPERSOUND_FFT_H__ +#define __I_SUPERSOUND_FFT_H__ + +#include + +namespace SUPERSOUND +{ + + +//利用对称性将长度进行复数部分的长度缩放成实数长度 +class ISuperSoundFFT +{ +public: + ISuperSoundFFT() { }; + virtual ~ISuperSoundFFT() { }; + +public: + //初始化,传入进行FFT的长度 + virtual int32_t Init(int32_t size) = 0; + //外围保证输入输出的长度,长度为原始设置的 size + virtual int32_t FFT(float * in_out) = 0; + //外围保证输入输出的长度,长度为原始设置的 size + virtual int32_t IFFT(float * in_out) = 0; + //外围保证输入输出的长度,输入长度为原始设置的 size,输出为 2 * size + virtual int32_t FFT(float * in, float * out) = 0; + //外围保证输入输出的长度,输入长度为 2 * size,输出长度为 size + virtual int32_t IFFT(float * in, float * out) = 0; +}; + + +} + +#endif /* __I_SUPERSOUND_FFT_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/supersound_kiss_fft.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/supersound_kiss_fft.cpp new file mode 100755 index 0000000..f5d8647 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/supersound_kiss_fft.cpp @@ -0,0 +1,122 @@ + +#include "supersound_kiss_fft.h" +#include "supersound_err.h" + +namespace SUPERSOUND +{ + + +SuperSoundKissFFT::SuperSoundKissFFT() +{ + m_fft = NULL; + m_ifft = NULL; + m_freq = NULL; + m_len = 0; +} + +SuperSoundKissFFT::~SuperSoundKissFFT() +{ + Destory(); +} + +int32_t SuperSoundKissFFT::Init( int32_t size ) +{ + m_fft = kiss_fftr_alloc(size, 0, NULL, NULL); + if(m_fft == NULL) + return ERROR_SUPERSOUND_MEMORY; + + m_ifft = kiss_fftr_alloc(size, 1, NULL, NULL); + if(m_ifft == NULL) + return ERROR_SUPERSOUND_MEMORY; + + m_freq = (kiss_fft_cpx *)malloc(sizeof(kiss_fft_cpx) * (size / 2 + 1)); + if(m_freq == NULL) + return ERROR_SUPERSOUND_MEMORY; + + m_len = size; + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t SuperSoundKissFFT::FFT( float * in_out ) +{ + kiss_fftr(m_fft, in_out, m_freq); + + in_out[0] = m_freq[0].r; + in_out[1] = m_freq[m_len / 2].r; + + for(int32_t i = 2, j = 1; i < m_len; i += 2, j++) + { + in_out[i] = m_freq[j].r; + in_out[i + 1] = m_freq[j].i; + } + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t SuperSoundKissFFT::FFT( float * in, float * out ) +{ + kiss_fftr(m_fft, in, m_freq); + + for(int32_t i = 0, j = 0; i < m_len; i += 2, j++) + { + out[i] = m_freq[j].r; + out[i + 1] = m_freq[j].i; + } + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t SuperSoundKissFFT::IFFT( float * in_out ) +{ + m_freq[0].r = in_out[0]; + m_freq[0].i = 0; + m_freq[m_len / 2].r = in_out[1]; + m_freq[m_len / 2].i = 0; + + for(int32_t i = 2, j = 1; i < m_len; i += 2, j++) + { + m_freq[j].r = in_out[i]; + m_freq[j].i = in_out[i + 1]; + } + + kiss_fftri(m_ifft, m_freq, in_out); + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t SuperSoundKissFFT::IFFT( float * in, float * out ) +{ + for(int32_t i = 0, j = 0; i < m_len; i += 2, j++) + { + m_freq[j].r = in[i]; + m_freq[j].i = in[i + 1]; + } + + kiss_fftri(m_ifft, m_freq, out); + + return ERROR_SUPERSOUND_SUCCESS; +} + +void SuperSoundKissFFT::Destory() +{ + if(m_fft) + { + kiss_fftr_free(m_fft); + m_fft = NULL; + } + if(m_ifft) + { + kiss_fftr_free(m_ifft); + m_ifft = NULL; + } + if(m_freq) + { + free(m_freq); + m_freq = NULL; + } + m_len = 0; +} + + +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/supersound_kiss_fft.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/supersound_kiss_fft.h new file mode 100755 index 0000000..328a3b4 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/fft/supersound_kiss_fft.h @@ -0,0 +1,74 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//对 kissfft 进行调用封装 + +#ifndef __SUPERSOUND_KISS_FFT_H__ +#define __SUPERSOUND_KISS_FFT_H__ + +#include "kiss_fft/kiss_fftr.h" +#include "isupersound_fft.h" + +namespace SUPERSOUND +{ + + +class SuperSoundKissFFT : public ISuperSoundFFT +{ +public: + SuperSoundKissFFT(); + virtual ~SuperSoundKissFFT(); + +public: + virtual int32_t Init(int32_t size); + virtual int32_t FFT(float * in_out); + virtual int32_t IFFT(float * in_out); + virtual int32_t FFT(float * in, float * out); + virtual int32_t IFFT(float * in, float * out); + +private: + void Destory(); + +private: + //进行 fft 的实例 + kiss_fftr_cfg m_fft; + //进行 ifft 的实例 + kiss_fftr_cfg m_ifft; + //频域中间结果,包含了全结果 + kiss_fft_cpx * m_freq; + // fft 的计算长度 + int32_t m_len; +}; + + +} + +#endif /* __SUPERSOUND_KISS_FFT_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_common.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_common.h new file mode 100755 index 0000000..1e6d858 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_common.h @@ -0,0 +1,91 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __SUPERSOUND_COMMON_H__ +#define __SUPERSOUND_COMMON_H__ +#include +namespace SUPERSOUND +{ + + +//安全删除指针 +#ifndef SAFE_DELETE_PTR +#define SAFE_DELETE_PTR(a) \ +{ \ + if(a) \ + { \ + delete [] a; \ + a = NULL; \ + } \ +} +#endif /* SAFE_DELETE_PTR */ + +#ifndef SAFE_DELETE_OBJ +#define SAFE_DELETE_OBJ(a) \ +{ \ + if(a) \ + { \ + delete a; \ + a = NULL; \ + } \ +} +#endif /* SAFE_DELETE_OBJ */ + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif /* MIN */ + +#ifndef MAX +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#endif /* MAX */ + +//将 a 规范到 [b, c] 之间 +#ifndef MIDDLE +#define MIDDLE(a, b, c) (MIN(c, MAX(a, b))) +#endif /* MIDDLE */ + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif /* M_PI */ + +#ifndef FLOAT_EQUAL +#define FLOAT_EQUAL(a, b) (fabs((a) - (b)) <= 1E-5) +#endif /* FLOAT_EQUAL */ + +//必须是 2 的幂次,这样方便计算一些 +#define SUPERSOUND_WAV_BUF_STEP_LEN 1024 +#define SUPERSOUND_DEFAULT_FFT_LEN SUPERSOUND_WAV_BUF_STEP_LEN +#define SUPERSOUND_CHANNEL_PROC_LEN SUPERSOUND_DEFAULT_FFT_LEN + + +} + +#endif /* __SUPERSOUND_COMMON_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_wav_buf.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_wav_buf.cpp new file mode 100755 index 0000000..9d8a974 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_wav_buf.cpp @@ -0,0 +1,484 @@ + +#include "supersound_err.h" +#include "supersound_wav_buf.h" +#include "supersound_common.h" +#include +#include + +namespace SUPERSOUND +{ + + +SuperSoundWavBuf::SuperSoundWavBuf() +{ + m_nChannels = 0; + m_nStart = 0; + m_nEnd = 0; + m_nSize = 0; + + memset(m_ppBuf, 0, sizeof(m_ppBuf)); +} + +SuperSoundWavBuf::~SuperSoundWavBuf() +{ + Destory(); +} + +void SuperSoundWavBuf::Short2Float( short * src, float * dst, int32_t num ) +{ + for(int32_t i = 0; i < num; i++) + { + Short2Float(src[i], dst[i]); + } +} + +void SuperSoundWavBuf::Short2Float( short in, float & out ) +{ + out = in * 1.0f / 32768; +} + +void SuperSoundWavBuf::Float2Short( float * src, short * dst, int32_t num ) +{ + for(int32_t i = 0; i < num; i++) + { + Float2Short(src[i], dst[i]); + } +} + +void SuperSoundWavBuf::Float2Short( float in, short & out ) +{ + float tmp; + tmp = in * 32767; + + if(tmp > 0) + out = short(tmp + 0.5); + else + out = short(tmp - 0.5); +} + +int32_t SuperSoundWavBuf::SetChannels( int32_t channels ) +{ + if(channels > CHANNEL_MAX) + return ERROR_SUPERSOUND_PARAM; + + if(channels > m_nChannels) + { + if(0 == m_nSize) + m_nSize = SUPERSOUND_WAV_BUF_STEP_LEN; + + for(int32_t i = m_nChannels; i < channels; i++) + { + if(NULL == m_ppBuf[i]) + { + m_ppBuf[i] = new(std::nothrow) float[m_nSize]; + if(m_ppBuf[i] == NULL) + return ERROR_SUPERSOUND_MEMORY; + + memset(m_ppBuf[i], 0, sizeof(float) * m_nSize); + } + } + + m_nChannels = channels; + } + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t SuperSoundWavBuf::GuaranteBufferSize( int32_t size ) +{ + //因为是循环队列,用一个单元来进行保护 + ++size; + + if(size > m_nSize) + { + float * buf; + int32_t new_size = size + SUPERSOUND_WAV_BUF_STEP_LEN - (size & (SUPERSOUND_WAV_BUF_STEP_LEN - 1)); + + for(int32_t i = 0; i < m_nChannels; i++) + { + buf = new(std::nothrow) float[new_size]; + if(buf == NULL) + return ERROR_SUPERSOUND_MEMORY; + + memset(buf, 0, sizeof(float) * new_size); + + if(m_nEnd >= m_nStart) + { + memcpy(buf, m_ppBuf[i] + m_nStart, sizeof(float) * (m_nEnd - m_nStart)); + } + else + { + memcpy(buf, m_ppBuf[i] + m_nStart, sizeof(float) * (m_nSize - m_nStart)); + memcpy(buf + m_nSize - m_nStart, m_ppBuf[i], sizeof(float) * m_nEnd); + } + + SAFE_DELETE_PTR(m_ppBuf[i]); + + m_ppBuf[i] = buf; + } + + for(int32_t i = m_nChannels; i < CHANNEL_MAX; i++) + { + SAFE_DELETE_PTR(m_ppBuf[i]); + } + + //更新开始位置和结束位置 + if(m_nEnd >= m_nStart) + m_nEnd = m_nEnd - m_nStart; + else + m_nEnd = m_nEnd + m_nSize - m_nStart; + m_nStart = 0; + //更新长度信息 + m_nSize = new_size; + } + + return ERROR_SUPERSOUND_SUCCESS; +} + +void SuperSoundWavBuf::Destory() +{ + for(int32_t i = 0; i < CHANNEL_MAX; i++) + { + SAFE_DELETE_PTR(m_ppBuf[i]); + } +} + +int32_t SuperSoundWavBuf::PushSamples( int32_t num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if((num % m_nChannels) != 0) + return ERROR_SUPERSOUND_PARAM; + + int32_t new_len = m_nEnd - m_nStart + num / m_nChannels; + if(m_nEnd < m_nStart) + new_len += m_nSize; + nRet = GuaranteBufferSize(new_len); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + for(int32_t i = 0; i < num; i += m_nChannels) + { + for(int32_t j = 0; j < m_nChannels; j++) + { + m_ppBuf[j][m_nEnd] = 0; + } + ++m_nEnd; + if(m_nEnd == m_nSize) + m_nEnd = 0; + } + + return nRet; +} + +int32_t SuperSoundWavBuf::PushSamples( short * pdata, int32_t num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if((num % m_nChannels) != 0) + return ERROR_SUPERSOUND_PARAM; + + int32_t new_len = m_nEnd - m_nStart + num / m_nChannels; + if(m_nEnd < m_nStart) + new_len += m_nSize; + nRet = GuaranteBufferSize(new_len); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + for(int32_t i = 0; i < num; i += m_nChannels) + { + for(int32_t j = 0; j < m_nChannels; j++) + { + Short2Float(pdata[i + j], m_ppBuf[j][m_nEnd]); + } + ++m_nEnd; + if(m_nEnd == m_nSize) + m_nEnd = 0; + } + + return nRet; +} + +int32_t SuperSoundWavBuf::PushSamples( float * pdata, int32_t num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(m_nChannels == 0 || (num % m_nChannels) != 0) + return ERROR_SUPERSOUND_PARAM; + + int32_t new_len = m_nEnd - m_nStart + num / m_nChannels; + if(m_nEnd < m_nStart) + new_len += m_nSize; + nRet = GuaranteBufferSize(new_len); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + for(int32_t i = 0; i < num; i += m_nChannels) + { + for(int32_t j = 0; j < m_nChannels; j++) + { + m_ppBuf[j][m_nEnd] = pdata[i + j]; + } + ++m_nEnd; + if(m_nEnd == m_nSize) + m_nEnd = 0; + } + + return nRet; +} + +int32_t SuperSoundWavBuf::PushSamples( std::vector &ppdata, int32_t num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(int32_t(ppdata.size()) < m_nChannels) + return ERROR_SUPERSOUND_PARAM; + + int32_t new_len = m_nEnd - m_nStart + num; + if(m_nEnd < m_nStart) + new_len += m_nSize; + nRet = GuaranteBufferSize(new_len); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + for(int32_t i = 0; i < m_nChannels; i++) + { + //因为已经保证了缓存 buf 足够长,所以不会存在 end 覆盖 start 的情况 + if((m_nEnd + num) >= m_nSize) + { + Short2Float(ppdata[i], m_ppBuf[i] + m_nEnd, m_nSize - m_nEnd); + Short2Float(ppdata[i] + (m_nSize - m_nEnd), m_ppBuf[i], num + m_nEnd - m_nSize); + } + else + { + Short2Float(ppdata[i], m_ppBuf[i] + m_nEnd, num); + } + } + + //更新结束位置变量 + if((m_nEnd + num) >= m_nSize) + m_nEnd = num + m_nEnd - m_nSize; + else + m_nEnd = m_nEnd + num; + + return nRet; +} + +int32_t SuperSoundWavBuf::PushSamples( std::vector &ppdata, int32_t num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(int32_t(ppdata.size()) < m_nChannels) + return ERROR_SUPERSOUND_PARAM; + + int32_t new_len = m_nEnd - m_nStart + num; + if(m_nEnd < m_nStart) + new_len += m_nSize; + nRet = GuaranteBufferSize(new_len); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + + for(int32_t i = 0; i < m_nChannels; i++) + { + //因为已经保证了缓存 buf 足够长,所以不会存在 end 覆盖 start 的情况 + if((m_nEnd + num) >= m_nSize) + { + memcpy(m_ppBuf[i] + m_nEnd, ppdata[i], sizeof(float) * (m_nSize - m_nEnd)); + memcpy(m_ppBuf[i], ppdata[i] + (m_nSize - m_nEnd), sizeof(float) * (num + m_nEnd - m_nSize)); + } + else + { + memcpy(m_ppBuf[i] + m_nEnd, ppdata[i], sizeof(float) * num); + } + } + + //更新结束位置变量 + if((m_nEnd + num) >= m_nSize) + m_nEnd = num + m_nEnd - m_nSize; + else + m_nEnd = m_nEnd + num; + + return nRet; +} + +int32_t SuperSoundWavBuf::PopSamples( int32_t num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + int32_t out_num = 0; + + if(m_nEnd >= m_nStart) + out_num = m_nEnd - m_nStart; + else + out_num = m_nEnd + m_nSize - m_nStart; + + out_num = MIN(num / m_nChannels, out_num); + + //更新开始位置 + if((m_nStart + out_num) >= m_nSize) + m_nStart = out_num + m_nStart - m_nSize; + else + m_nStart = m_nStart + out_num; + + return nRet; +} + +int32_t SuperSoundWavBuf::PopSamples( short * pdata, int32_t max_num, int32_t & out_num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(m_nEnd >= m_nStart) + out_num = m_nEnd - m_nStart; + else + out_num = m_nEnd + m_nSize - m_nStart; + + out_num = MIN(max_num / m_nChannels, out_num); + + for(int32_t i = 0; i < out_num; i++) + { + for(int32_t j = 0; j < m_nChannels; j++) + { + Float2Short(m_ppBuf[j][m_nStart], pdata[i * m_nChannels + j]); + } + ++m_nStart; + if(m_nStart == m_nSize) + m_nStart = 0; + } + + out_num = out_num * m_nChannels; + + return nRet; +} + +int32_t SuperSoundWavBuf::PopSamples( float * pdata, int32_t max_num, int32_t & out_num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(m_nEnd >= m_nStart) + out_num = m_nEnd - m_nStart; + else + out_num = m_nEnd + m_nSize - m_nStart; + + out_num = MIN(max_num / m_nChannels, out_num); + + for(int32_t i = 0; i < out_num; i++) + { + for(int32_t j = 0; j < m_nChannels; j++) + { + pdata[i * m_nChannels + j] = m_ppBuf[j][m_nStart]; + } + ++m_nStart; + if(m_nStart == m_nSize) + m_nStart = 0; + } + + out_num = out_num * m_nChannels; + + return nRet; +} + +int32_t SuperSoundWavBuf::PopSamples( std::vector &ppdata, int32_t max_num, int32_t & out_num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(int32_t(ppdata.size()) < m_nChannels) + return ERROR_SUPERSOUND_PARAM; + + if(m_nEnd >= m_nStart) + out_num = m_nEnd - m_nStart; + else + out_num = m_nEnd + m_nSize - m_nStart; + + out_num = MIN(max_num, out_num); + + for(int32_t i = 0; i < m_nChannels; i++) + { + if((m_nStart + out_num) >= m_nSize) + { + Float2Short(m_ppBuf[i] + m_nStart, ppdata[i], (m_nSize - m_nStart)); + Float2Short(m_ppBuf[i], ppdata[i] + m_nSize - m_nStart, out_num + m_nStart - m_nSize); + } + else + { + Float2Short(m_ppBuf[i] + m_nStart, ppdata[i], out_num); + } + } + + //更新开始位置 + if((m_nStart + out_num) >= m_nSize) + m_nStart = out_num + m_nStart - m_nSize; + else + m_nStart = m_nStart + out_num; + + return nRet; +} + +int32_t SuperSoundWavBuf::PopSamples( std::vector &ppdata, int32_t max_num, int32_t & out_num ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(int32_t(ppdata.size()) < m_nChannels) + return ERROR_SUPERSOUND_PARAM; + + if(m_nEnd >= m_nStart) + out_num = m_nEnd - m_nStart; + else + out_num = m_nEnd + m_nSize - m_nStart; + + out_num = MIN(max_num, out_num); + + for(int32_t i = 0; i < m_nChannels; i++) + { + if((m_nStart + out_num) >= m_nSize) + { + memcpy(ppdata[i], m_ppBuf[i] + m_nStart, sizeof(float) * (m_nSize - m_nStart)); + memcpy(ppdata[i] + m_nSize - m_nStart, m_ppBuf[i], sizeof(float) * (out_num + m_nStart - m_nSize)); + } + else + { + memcpy(ppdata[i], m_ppBuf[i] + m_nStart, sizeof(float) * out_num); + } + } + + //更新开始位置 + if((m_nStart + out_num) >= m_nSize) + m_nStart = out_num + m_nStart - m_nSize; + else + m_nStart = m_nStart + out_num; + + return nRet; +} + +int32_t SuperSoundWavBuf::ShadeSamples( int32_t num ) +{ + if(m_nEnd >= m_nStart) + { + num = MIN(m_nEnd - m_nStart, num); + m_nEnd = m_nEnd - num; + } + else + { + num = MIN(m_nSize + m_nEnd - m_nStart, num); + m_nEnd = m_nEnd - num; + if(m_nEnd < 0) + m_nEnd += m_nSize; + } + return ERROR_SUPERSOUND_SUCCESS; +} + +void SuperSoundWavBuf::Flush() +{ + m_nStart = 0; + m_nEnd = 0; +} + +int32_t SuperSoundWavBuf::DataSizeInCache() +{ + if(m_nEnd >= m_nStart) + return m_nEnd - m_nStart; + else + return (m_nEnd + m_nSize - m_nStart); +} + + +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_wav_buf.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_wav_buf.h new file mode 100755 index 0000000..f389837 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/common/supersound_wav_buf.h @@ -0,0 +1,95 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现一个循环 buf + +#ifndef __SUPERSOUND_WAV_BUF_H__ +#define __SUPERSOUND_WAV_BUF_H__ + +#include "supersound_types.h" +#include +#include + +namespace SUPERSOUND +{ + + +class SuperSoundWavBuf +{ +public: + SuperSoundWavBuf(); + ~SuperSoundWavBuf(); + +public: + static void Short2Float(short in, float & out); + static void Short2Float(short * src, float * dst, int32_t num); + static void Float2Short(float in, short & out); + static void Float2Short(float * src, short * dst, int32_t num); + +public: + int32_t SetChannels(int32_t channels); + + void Flush(); + + int32_t DataSizeInCache(); + + int32_t PushSamples(int32_t num); + int32_t PushSamples(short * pdata, int32_t num); + int32_t PushSamples(float * pdata, int32_t num); + int32_t PushSamples(std::vector &ppdata, int32_t num); + int32_t PushSamples(std::vector &ppdata, int32_t num); + + int32_t PopSamples(int32_t num); + int32_t PopSamples(short * pdata, int32_t max_num, int32_t & out_num); + int32_t PopSamples(float * pdata, int32_t max_num, int32_t & out_num); + int32_t PopSamples(std::vector &ppdata, int32_t max_num, int32_t & out_num); + int32_t PopSamples(std::vector &ppdata, int32_t max_num, int32_t & out_num); + + //从后面删除 + int32_t ShadeSamples(int32_t num); + +private: + int32_t GuaranteBufferSize(int32_t size); + void Destory(); + +private: + int32_t m_nChannels; + float * m_ppBuf[CHANNEL_MAX]; + int32_t m_nStart; + int32_t m_nEnd; + //当前一个声道 buf 的长度 + int32_t m_nSize; +}; + + +} + +#endif /* __SUPERSOUND_WAV_BUF_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse.cpp new file mode 100755 index 0000000..4b087bc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse.cpp @@ -0,0 +1,258 @@ + +#include "impulse.h" +#include "supersound_err.h" +#include "supersound_common.h" + +namespace SUPERSOUND +{ +namespace IMPULSE +{ + +//#define SIMPLE_CONV + +Impulse::Impulse() +{ + memset(&m_current_param, 0, sizeof(m_current_param)); + m_new_param.fs = 44100; + m_new_param.channels = 2; + m_new_param.window_bits = 10; + m_new_param.frame_len = 511; + + m_b_change = true; + m_b_update = true; + + m_im_response_array = NULL; + m_im_res_len = 0; + m_response_channels = 0; + + m_convolution_array = NULL; +} + +Impulse::~Impulse() +{ + Destory(); +} + +void Impulse::Destory() +{ + if(m_convolution_array) + { + for(int32_t i = 0; i < m_current_param.channels; i++) + { + SAFE_DELETE_OBJ(m_convolution_array[i]); + } + SAFE_DELETE_PTR(m_convolution_array); + } + + if (m_im_response_array) + { + for(int32_t i = 0; i < m_response_channels; i++) + { + SAFE_DELETE_PTR(m_im_response_array[i]); + } + SAFE_DELETE_PTR(m_im_response_array); + } +} + +void Impulse::Flush() +{ + if(m_convolution_array) + { + for(int32_t i = 0; i < m_current_param.channels; i++) + { + if(m_convolution_array[i]) + { + m_convolution_array[i]->Flush(); + } + } + } +} + +int32_t Impulse::GetLatecy() +{ + int32_t latecy = 0; + + //因为每个通道的卷积延迟长度相同,所以直接返回一个 + if(m_convolution_array) + { + if(m_convolution_array[0]) + latecy += m_convolution_array[0]->GetLatecy(); + } + + return latecy; +} + +int32_t Impulse::GetFrameLen() +{ + return m_new_param.frame_len;; +} + +int32_t Impulse::SetSampleRate(int32_t fs) +{ + m_new_param.fs = fs; + m_b_change = true; + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t Impulse::SetChannels(int32_t channels) +{ + m_new_param.channels = channels; + m_b_change = true; + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t Impulse::SetWindowBit(int32_t bits, int32_t frame_len) +{ + m_new_param.window_bits = bits; + m_new_param.frame_len = (1 << (bits - 1)); // 为了保证输入为整数,输出也是整数 + if (frame_len > 0) + { + m_new_param.frame_len = frame_len; + } + + m_b_change = true; + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t Impulse::SetImpulseResponse(float * im_response, int32_t response_len, int32_t response_channels) +{ + if(m_response_channels != response_channels || m_im_res_len != response_len) + { + if (m_im_response_array) + { + for(int32_t i = 0; i < m_response_channels; i++) + { + SAFE_DELETE_PTR(m_im_response_array[i]); + } + if(m_response_channels != response_channels) + { + SAFE_DELETE_PTR(m_im_response_array); + } + } + m_response_channels = response_channels; + + if(m_im_response_array == NULL) + { + m_im_response_array = new (std::nothrow) float *[response_channels]; + if(m_im_response_array == NULL) + return ERROR_SUPERSOUND_MEMORY; + } + + for (int32_t i = 0; i < response_channels; i++) + { + m_im_response_array[i] = new(std::nothrow) float[response_len]; + if(m_im_response_array[i] == NULL) + return ERROR_SUPERSOUND_MEMORY; + } + m_im_res_len = response_len; + } + + //拷贝数据 +// int momo_response_len = response_len / m_response_channels; // 处理一下 + int momo_response_len = response_len; + for(int32_t i = 0; i < m_response_channels; i++) + { + for (int32_t j = 0; j < momo_response_len; j++) + { + m_im_response_array[i][j] = im_response[i + m_response_channels * j]; + } + } + m_b_change = true; + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t Impulse::Update() +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if (m_im_response_array == NULL) { + return ERROR_SUPERSOUND_PARAM; + } + + //计算窗长,这个是提供给 FFT 的窗长 + int32_t window_len = 1 << m_new_param.window_bits; + //计算窗移 + int32_t window_step = m_new_param.frame_len; + + //更新通道数 + if(m_new_param.channels != m_current_param.channels) + { + if(m_convolution_array) + { + for(int32_t i = 0; i < m_current_param.channels; i++) + { + SAFE_DELETE_OBJ(m_convolution_array[i]); + } + SAFE_DELETE_PTR(m_convolution_array); + } + + m_convolution_array = new(std::nothrow) ISuperSoundConvolution * [m_new_param.channels]; + if(m_convolution_array == NULL) + return ERROR_SUPERSOUND_MEMORY; + for(int32_t i = 0; i < m_new_param.channels; i++) + { + #ifdef SIMPLE_CONV + m_convolution_array[i] = new(std::nothrow) SupersoundSimpleConvolution(); + #else + m_convolution_array[i] = new(std::nothrow) SuperSoundFastConvolution(); + #endif + if(m_convolution_array[i] == NULL) + return ERROR_SUPERSOUND_MEMORY; + nRet = m_convolution_array[i]->SetFrameLenZeroLen(window_step, window_len - window_step); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + } + } + // 理论上还应该给一个通道数相同但是windows_step不相同的情况,但是外部不会出现,所以不做处理 + + //如果输入通道数和响应通道数相同,则每个通道使用对应的响应,否则响应通道数只能为1,即所有通道使用相同的响应 + int32_t flag = (m_new_param.channels == m_response_channels) ? 1 : 0; + for(int32_t i = 0; i < m_new_param.channels; i++) + { + nRet = m_convolution_array[i]->SetImpulseResponse(m_im_response_array[i * flag], m_im_res_len); + if(nRet != ERROR_SUPERSOUND_SUCCESS) + return nRet; + // 清空内部缓存数据,重新计算数据 + m_convolution_array[i]->Flush(); + } + + m_current_param = m_new_param; + m_b_change = false; + + return nRet; +} + +int32_t Impulse::Process(std::vector & buf_vector, int32_t & out_num) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(m_b_change && m_b_update) + { + m_b_change = false; + nRet = Update(); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + } + //外部控制更新必须得到立马的响应 + if(m_b_update) + m_b_update = false; + + for(int32_t i = 0; i < m_current_param.channels; i++) + { + nRet = m_convolution_array[i]->ProcessFrame(buf_vector[i], out_num); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + } + return nRet; +} + +void Impulse::ControlUpdate() +{ + m_b_update = true; +} + +} +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse.h new file mode 100755 index 0000000..41a62bf --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse.h @@ -0,0 +1,114 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +#ifndef __IMPULSE_H__ +#define __IMPULSE_H__ + +#include "convolution/supersound_fast_convolution.h" +#include "convolution/supersound_simple_convolution.h" + +#include + +namespace SUPERSOUND +{ +namespace IMPULSE +{ + +class Impulse +{ +public: + Impulse(); + ~Impulse(); + +public: + //清空所有缓存 + void Flush(); + + //获取延迟长度 + int32_t GetLatecy(); + + //获取外界一次传入多少数据,也就是一帧的长度(单声道) + int32_t GetFrameLen(); + + //设置、更新采样率 + int32_t SetSampleRate(int32_t fs); + + //设置、更新声道数 + int32_t SetChannels(int32_t channels); + + //设置、更新窗的 bit 长度,也就是 FFT 运算的帧长,不是外界传入数据帧长 + int32_t SetWindowBit(int32_t bits, int32_t frame_len = 0); + + //设置冲击响应 + int32_t SetImpulseResponse(float * im_response, int32_t response_len, int32_t response_channels); + + //处理函数,每次输入一数据,同时输出一数据,这样常规的 DSP 操作 + int32_t Process(std::vector & buf_vector, int32_t & out_num); + + //控制更新的时机 + void ControlUpdate(); + + int32_t Update(); + +private: + void Destory(); + +private: + //定义参数结构体 + typedef struct _ParamInfo + { + int32_t fs; + int32_t channels; + int32_t window_bits; + int32_t frame_len; + }ParamInfo; + + ParamInfo m_new_param; + ParamInfo m_current_param; + + //参数上的改动 + bool m_b_change; + //外部开始要求要用新参数进行处理 + bool m_b_update; + + //脉冲响应数据及长度 + float ** m_im_response_array; + int32_t m_im_res_len; + int32_t m_response_channels; + + ISuperSoundConvolution ** m_convolution_array; +}; + + +} +} + +#endif /* __IMPULSE_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse_effect.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse_effect.cpp new file mode 100755 index 0000000..d647c8f --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse_effect.cpp @@ -0,0 +1,280 @@ +#include "impulse_effect.h" +#include "supersound_err.h" +#include "supersound_common.h" + +namespace SUPERSOUND +{ +namespace IMPULSE +{ + +ImpulseEffect::ImpulseEffect() +{ + //保持和底层相同的默认值 + m_param.fs = 44100; + m_param.in_channels = 2; + m_param.out_channels = 2; + m_param.window_bits = 10; + m_param.im_response = NULL; + m_param.response_len = 0; + m_param.response_channels = 1; + + m_frame_len = 0; + + m_channel_buf = NULL; + m_max_buf_num = 0; + m_empty_buf = NULL; + m_pre_fill_num = 0; + + m_bInit = false; +} + +ImpulseEffect::~ImpulseEffect() +{ + Destory(); +} + +SUPERSOUND_EFFECT_TYPE ImpulseEffect::GetEffectId() +{ + return SUPERSOUND_IMPULSE_TYPE; +} + +ISuperSound * ImpulseEffect::GetEffectInst() +{ + return new(std::nothrow) ImpulseEffect(); +} + +int32_t ImpulseEffect::SetParam( void * param ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(param == NULL) + return ERROR_SUPERSOUND_PARAM; + + Impulse_Param * new_param = (Impulse_Param *)param; + + //输入输出的声道数必须相同 + if(new_param->in_channels != new_param->out_channels) + return ERROR_SUPERSOUND_PARAM; + + if(new_param->in_channels > CHANNEL_MAX) + return ERROR_SUPERSOUND_PARAM; + + if(new_param->im_response == NULL || new_param->response_len <= 0) + return ERROR_SUPERSOUND_PARAM; + + if((new_param->response_channels != 1) && new_param->response_channels != new_param->in_channels) + return ERROR_SUPERSOUND_PARAM; + + nRet = m_impulse.SetSampleRate(new_param->fs); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + nRet = m_impulse.SetChannels(new_param->response_channels); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + //更新输入输出缓冲区 + nRet = m_in_buf.SetChannels(new_param->in_channels); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + nRet = m_out_buf.SetChannels(new_param->out_channels); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + nRet = m_impulse.SetWindowBit(new_param->window_bits, new_param->process_buffer_len); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + m_param.fs = new_param->fs; + m_param.in_channels = new_param->in_channels; + m_param.out_channels = new_param->out_channels; + m_param.window_bits = new_param->window_bits; + m_param.response_channels = new_param->response_channels; + + m_param.response_len = new_param->response_len; + m_param.response_channels = new_param->response_channels; + + nRet = m_impulse.SetImpulseResponse(new_param->im_response, new_param->response_len, new_param->response_channels); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + nRet = m_impulse.Update(); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + //主要是为了防止不断压栈 + int32_t new_frame_len = m_impulse.GetFrameLen(); + int32_t new_buf_num = new_param->in_channels * new_frame_len; + if(m_max_buf_num < new_buf_num) + { + m_max_buf_num = new_buf_num; + SAFE_DELETE_PTR(m_channel_buf); + m_channel_buf = new(std::nothrow) float[m_max_buf_num]; + if(m_channel_buf == NULL) + return ERROR_SUPERSOUND_MEMORY; + } + if((m_buf_vector.size() != new_param->in_channels) || (m_frame_len != new_frame_len)) + { + m_buf_vector.clear(); + m_frame_len = new_frame_len; + for(int32_t i = 0; i < new_param->in_channels; i++) + { + m_buf_vector.push_back(m_channel_buf + i * m_frame_len); + } + } +// 不需要延迟 +// m_pre_fill_num = (m_frame_len + m_impulse.GetLatecy()) * new_param->in_channels; +// SAFE_DELETE_PTR(m_empty_buf); +// m_empty_buf = new(std::nothrow) float[m_pre_fill_num]; +// if(m_empty_buf == NULL) +// return ERROR_SUPERSOUND_MEMORY; +// memset(m_empty_buf, 0, sizeof(float) * m_pre_fill_num); +// +// m_in_buf.PushSamples(m_empty_buf, m_pre_fill_num); + + m_bInit = true; + + return nRet; +} + +void ImpulseEffect::Update() +{ + m_impulse.ControlUpdate(); +} + +int32_t ImpulseEffect::GetParam( void * param ) +{ + Impulse_Param * new_param = (Impulse_Param *)param; + new_param->fs = m_param.fs; + new_param->in_channels = m_param.in_channels; + new_param->out_channels = m_param.out_channels; + + return ERROR_SUPERSOUND_SUCCESS; +} + +int32_t ImpulseEffect::GetSampleRate() +{ + return m_param.fs; +} + +int32_t ImpulseEffect::GetInputChannels() +{ + return m_param.in_channels; +} + +int32_t ImpulseEffect::GetOutputChannels() +{ + return m_param.out_channels; +} + +int32_t ImpulseEffect::GetLatency() +{ +// int32_t latency = (int32_t)(1000.0f * m_pre_fill_num / m_param.fs + 0.5f); +// return latency; + return 0; +} + +int32_t ImpulseEffect::GetFrameLen() +{ + return m_impulse.GetFrameLen(); +} + +void ImpulseEffect::FlushOut() +{ + m_in_buf.Flush(); + m_out_buf.Flush(); + m_impulse.Flush(); + + m_in_buf.PushSamples(m_empty_buf, m_pre_fill_num); +} + +int32_t ImpulseEffect::FlushToBuf( int32_t & canOutputSamples ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + if(!m_bInit) + { + canOutputSamples = 0; + return nRet; + } + + //每个通道的延迟长度以及剩余长度 + int32_t len = m_impulse.GetLatecy() + m_in_buf.DataSizeInCache() + m_out_buf.DataSizeInCache(); + int32_t pro_len = m_param.in_channels * m_frame_len; + + while(1) + { + memset(m_channel_buf, 0, sizeof(float) * pro_len); + nRet = ProcessfInput(m_channel_buf, pro_len, canOutputSamples); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + if((canOutputSamples / m_param.out_channels) >= len) + { + m_out_buf.ShadeSamples(canOutputSamples / m_param.out_channels - len); + break; + } + } + + canOutputSamples = m_out_buf.DataSizeInCache() * m_param.out_channels; + + //将需要清空的清空 + m_in_buf.Flush(); + m_impulse.Flush(); + + return nRet; +} + +int32_t ImpulseEffect::ProcessfInput( float * in, int32_t nSamples, int32_t & canOutputSamples ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + int32_t out_num; + + if(!m_bInit) + { + nRet = SetParam((void *)(&m_param)); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + } + + nRet = m_in_buf.PushSamples(in, nSamples); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + while(m_in_buf.DataSizeInCache() >= m_frame_len) + { + nRet = m_in_buf.PopSamples(m_buf_vector, m_frame_len, out_num); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + nRet = m_impulse.Process(m_buf_vector, out_num); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + + nRet = m_out_buf.PushSamples(m_buf_vector, out_num); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + } + + canOutputSamples = m_out_buf.DataSizeInCache() * m_param.out_channels; + + return nRet; +} + +int32_t ImpulseEffect::ProcessfOutput( float * out, int32_t nSamples, int32_t & outSamples ) +{ + int32_t nRet = ERROR_SUPERSOUND_SUCCESS; + + nRet = m_out_buf.PopSamples(out, nSamples, outSamples); + if(ERROR_SUPERSOUND_SUCCESS != nRet) + return nRet; + return nRet; +} + +void ImpulseEffect::Destory() +{ + SAFE_DELETE_PTR(m_channel_buf); +// SAFE_DELETE_PTR(m_param.im_response) + SAFE_DELETE_PTR(m_empty_buf) + m_pre_fill_num = 0; +} + +} +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse_effect.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse_effect.h new file mode 100755 index 0000000..20534c3 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/impulse/impulse_effect.h @@ -0,0 +1,95 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ ·ð×æ±£ÓÓ ÓÀÎÞBUG + +//+ ----------------------------------------------------+ + +#ifndef __IMPULSE_EFFECT_H__ +#define __IMPULSE_EFFECT_H__ + +#include "impulse_types.h" +#include "impulse.h" +#include "isupersound.h" +#include "supersound_wav_buf.h" + +namespace SUPERSOUND +{ +namespace IMPULSE +{ + + +class ImpulseEffect : public ISuperSound +{ +public: + ImpulseEffect(); + virtual ~ImpulseEffect(); + +public: + virtual SUPERSOUND_EFFECT_TYPE GetEffectId(); + virtual ISuperSound * GetEffectInst(); + virtual int32_t SetParam(void * param); + virtual void Update(); + virtual int32_t GetParam(void * param); + virtual int32_t GetSampleRate(); + virtual int32_t GetInputChannels(); + virtual int32_t GetOutputChannels(); + virtual void FlushOut(); + virtual int32_t FlushToBuf(int32_t & canOutputSamples); + virtual int32_t ProcessfInput(float * in, int32_t nSamples, int32_t & canOutputSamples); + virtual int32_t ProcessfOutput(float * out, int32_t nSamples, int32_t & outSamples); + virtual int32_t GetLatency(); + virtual int32_t GetFrameLen(); + +private: + void Destory(); + +private: + Impulse_Param m_param; + + Impulse m_impulse; + + SuperSoundWavBuf m_in_buf; + SuperSoundWavBuf m_out_buf; + + //每次传入的帧长(单声道) + int32_t m_frame_len; + //防止不断压栈,使用堆空间 + float * m_channel_buf; + int32_t m_max_buf_num; + std::vector m_buf_vector; + float * m_empty_buf; + int m_pre_fill_num; + //是否初始化过了 + bool m_bInit; +}; + + +} +} + +#endif /* __IMPULSE_EFFECT_H__ */ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/isupersound.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/isupersound.h new file mode 100755 index 0000000..d8251ab --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/supersound/src/isupersound.h @@ -0,0 +1,144 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//定义效果器的基类,这样统一所有接口 + +#ifndef __I_SUPERSOUND_H__ +#define __I_SUPERSOUND_H__ + +#include "supersound_types.h" +#include + +namespace SUPERSOUND +{ + + +class ISuperSound +{ +public: + ISuperSound() { }; + virtual ~ISuperSound() { }; + +public: + //+ ---------------------------------------------------- + //+ 获取效果器 Id + //+ 返回值为 SUPERSOUND_EFFECT_TYPE + //+ ---------------------------------------------------- + virtual SUPERSOUND_EFFECT_TYPE GetEffectId() = 0; + + //+ ---------------------------------------------------- + //+ 获取效果器实例 + //+ 返回值为效果器实例 + //+ ---------------------------------------------------- + virtual ISuperSound * GetEffectInst() = 0; + + //+ ---------------------------------------------------- + //+ 设置、更新参数 + //+ 返回值为errCode + //+ ---------------------------------------------------- + virtual int32_t SetParam(void * param) = 0; + + //+ ---------------------------------------------------- + //+ 更新设置,主要是为了做存在参数设置变动比较大的情况下的破音问题 + //+ 返回值为errCode + //+ ---------------------------------------------------- + virtual void Update() = 0; + + //+ ---------------------------------------------------- + //+ 获取参数 + //+ 返回值为errCode + //+ ---------------------------------------------------- + virtual int32_t GetParam(void * param) = 0; + + //+ ---------------------------------------------------- + //+ 获取采样率 + //+ 返回值为采样率 + //+ ---------------------------------------------------- + virtual int32_t GetSampleRate() = 0; + + //+ ---------------------------------------------------- + //+ 获取输入声道数 + //+ 返回值为输入声道数 + //+ ---------------------------------------------------- + virtual int32_t GetInputChannels() = 0; + + //+ ---------------------------------------------------- + //+ 获取输出声道数 + //+ 返回值为输出声道数 + //+ ---------------------------------------------------- + virtual int32_t GetOutputChannels() = 0; + + //+ ---------------------------------------------------- + //+ 清空缓存数据,在外围有进行跳转操作时要调用的操作 + //+ ---------------------------------------------------- + virtual void FlushOut() = 0; + + //+ ---------------------------------------------------- + //+ 获取最后的缓存数据到输出缓存中 + //+ 返回值为errCode + //+ ---------------------------------------------------- + virtual int32_t FlushToBuf(int32_t & canOutputSamples) = 0; + + //+ ---------------------------------------------------- + //+ 对浮点数据进行处理 + //+ in:输入音频数据指针 + //+ nSamples:输入音频数据指针长度 + //+ canOutputSamples:能够输出的数据长度 + //+ 返回值为errCode + //+ ---------------------------------------------------- + virtual int32_t ProcessfInput(float * in, int32_t nSamples, int32_t & canOutputSamples) = 0; + + //+ ---------------------------------------------------- + //+ 对浮点数据进行处理 + //+ out:输出音频数据指针 + //+ nSamples:输出音频数据指针长度 + //+ outSamples:实际获取到的数据长度 + //+ 返回值为errCode + //+ ---------------------------------------------------- + virtual int32_t ProcessfOutput(float * out, int32_t nSamples, int32_t & outSamples) = 0; + + //+ ---------------------------------------------------- + //+ 获取效果器延迟时间 + //+ 返回值为延迟时间,单位:ms + //+ ---------------------------------------------------- + virtual int32_t GetLatency() = 0; + //+ ---------------------------------------------------- + //+ 获取效果器内部一帧的长度[单声道] + //+ 返回值为单声道采样点数量 + //+ ---------------------------------------------------- + virtual int32_t GetFrameLen() = 0; + +}; + + +} + +#endif /* __I_SUPERSOUND_H__ */ \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/CMakeLists.txt new file mode 100644 index 0000000..e1f7f34 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/CMakeLists.txt @@ -0,0 +1,15 @@ +include_directories(./ inc src) + +include_directories(src/aa_filter) +include_directories(src/bpm_detect) +include_directories(src/cpu_detect) +include_directories(src/fifo_sample_buffer) +include_directories(src/fir_filter) +include_directories(src/peak_finder) +include_directories(src/rate_transposer) +include_directories(src/sound_touch) +include_directories(src/td_stretch) + +file(GLOB_RECURSE TONESHIFT_SRC_FILES src/*cpp) +add_library(tone_shift ${TONESHIFT_SRC_FILES}) +#set_target_properties(tone_shift PROPERTIES CXX_VISIBILITY_PRESET hidden) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CSoundTouchDef.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CSoundTouchDef.h new file mode 100644 index 0000000..c7f1455 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CSoundTouchDef.h @@ -0,0 +1,15 @@ +// +// Created by yangjianli on 2020-01-15. +// + +#ifndef AUDIO_EFFECTS_LIB_SOUNDTOUCHDEF_H +#define AUDIO_EFFECTS_LIB_SOUNDTOUCHDEF_H +#include "AudioEffectsConf.h" +enum ST_ERR { + ST_ERR_SUCCESS = 0, + ST_ERR_PARAM = -1, + ST_ERR_BASE_H_MALLOC_NULL = -2, + ST_ERR_BASE_H_UNKNOWN = -3, +}; + +#endif //AUDIO_EFFECTS_LIB_SOUNDTOUCHDEF_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CSpeedShift.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CSpeedShift.h new file mode 100644 index 0000000..de269f4 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CSpeedShift.h @@ -0,0 +1,35 @@ +// +// Created by wangjianjun on 18/11/1. +// + +#ifndef CAUDIODECODER_CSPEEDSHIFT_H +#define CAUDIODECODER_CSPEEDSHIFT_H + +#include "SpeedShiftInterface.h" + +class CSpeedShift : public SpeedShiftInterface { +public: + CSpeedShift(); + ~CSpeedShift(); + +public: + int init(int sampleRate, int channel); + void uninit(); + + int set_speed_value(float speedVal); + float get_speed_value(); + + int get_latence(); + void reset(); + + int process(float *inBuffer, int inSize, float *outBuffer, int outSize); + +private: + float m_speed_value; // speed value. + int m_channel; // sample channel, 1 for mono, 2 for stereo. + + void* pTone; +}; + + +#endif //CAUDIODECODER_CSPEEDSHIFT_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CToneShift.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CToneShift.h new file mode 100644 index 0000000..4acc707 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/CToneShift.h @@ -0,0 +1,42 @@ +/************************************************************************/ +/* tone shift */ +/* written by ethanzhao, 6-16,2014 */ +/************************************************************************/ +#ifndef C_TONE_SHIFT_H +#define C_TONE_SHIFT_H + +#include "ToneShiftInterfaces.h" +#define MAX_TONE_SHIFT_VALUE 12 // tone shift max value +#define MIN_TONE_SHIFT_VALUE -12 // tone shift min value + +/* change tone style */ +class CToneShift :public ToneShiftInterface +{ +public: + CToneShift(); + ~CToneShift(); + + int init(int sampe_rate, int channel); + void uninit(); + + int get_shift_range(int *max_val, int *min_val); // get shift range. [-12,+12] + int get_shift_default(); // get shift default value. maybe 0. + int set_shift_value(int shift_val); // set shift value now. + int get_shift_value(); // get shift value now. + int get_latence(); //time:ms + void reset(); + + /* process, return the output real size after processing. */ + int process(float *in_buffer, int in_size, float *out_buffer, int out_size); + +private: + int m_shift_value; // shift value. + int m_iChannel; // sample channel, 1 for mono, 2 for stereo. + int m_i_sample_rate; // sample rate. + + void* pTone; // +}; + + +#endif + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/SpeedShiftInterface.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/SpeedShiftInterface.h new file mode 100644 index 0000000..baf1890 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/SpeedShiftInterface.h @@ -0,0 +1,30 @@ +// +// Created by wangjianjun on 18/11/1. +// + +#ifndef CAUDIODECODER_SPEEDSHIFTINTERFACE_H +#define CAUDIODECODER_SPEEDSHIFTINTERFACE_H + +#define MAX_SPEED_SHIFT_VALUE 5.0f // 快速最快 5 倍 +#define MIN_SPEED_SHIFT_VALUE 0.2f // 慢速最慢 5 倍(1/0.2f) +#include "CSoundTouchDef.h" +class SpeedShiftInterface +{ +public: + /* 功能:创建及销毁对象 **/ + static SpeedShiftInterface* CreateObject(); + static void DestroyObject(SpeedShiftInterface** pObject); +public: + virtual int init(int sample_rate, int channel) = 0; + virtual void uninit() = 0; + + virtual int set_speed_value(float speed_val) = 0; + virtual float get_speed_value() = 0; + + virtual int get_latence() = 0; + virtual void reset() = 0; + + virtual int process(float *in_buffer, int in_size, float *out_buffer, int out_size) = 0; +}; + +#endif //CAUDIODECODER_SPEEDSHIFTINTERFACE_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/ToneShiftInterfaces.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/ToneShiftInterfaces.h new file mode 100644 index 0000000..e311485 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/inc/ToneShiftInterfaces.h @@ -0,0 +1,30 @@ +/************************************************************************/ +/* kala ok audio base module interface. */ +/************************************************************************/ + +#ifndef TONE_SHIFT_INTERFACE_H +#define TONE_SHIFT_INTERFACE_H +#include "CSoundTouchDef.h" +/* change tone style */ +class ToneShiftInterface +{ +public: + /* 功能:创建及销毁对象 **/ + static ToneShiftInterface* create_object(); + static void destroy_object(ToneShiftInterface **pObject); +public: + virtual int init(int sampe_rate, int channel) = 0; + virtual void uninit() = 0; + + virtual int get_shift_range(int *max_val, int *min_val) = 0; + virtual int get_shift_default() = 0; + virtual int set_shift_value(int shift_val) = 0; + virtual int get_shift_value() = 0; + + virtual int get_latence() = 0; + virtual void reset() = 0; + + virtual int process(float *in_buffer, int in_size, float *out_buffer, int out_size) = 0; +}; + +#endif //TONE_SHIFT_INTERFACE_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/CSpeedShift.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/CSpeedShift.cpp new file mode 100644 index 0000000..a448817 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/CSpeedShift.cpp @@ -0,0 +1,144 @@ +// +// Created by wangjianjun on 18/11/1. +// + +#include +#include "CSpeedShift.h" +#include "SoundTouch.h" +#include "CSoundTouchDef.h" +#include "string" + +using namespace soundtouch; + +static float gs_buf[3837 * 2] = { 0.0f }; + +SpeedShiftInterface* SpeedShiftInterface::CreateObject() +{ + CSpeedShift * pSpeedShift = new CSpeedShift(); + return pSpeedShift; +} + +void SpeedShiftInterface::DestroyObject(SpeedShiftInterface** pObject) +{ + delete (*pObject); + (*pObject) = NULL; +} + +CSpeedShift::CSpeedShift() +{ + pTone = NULL; + m_speed_value = 1.0f; +} + +CSpeedShift::~CSpeedShift() +{ + uninit(); +} + +void CSpeedShift::uninit() +{ + SoundTouch* stchTone = (SoundTouch*)pTone; + if (stchTone != NULL) + { + stchTone->flush(); + delete stchTone; + + pTone = NULL; + } +} + +int CSpeedShift::init(int sampleRate, int channel) { + SoundTouch* stchTone = new SoundTouch; + pTone = stchTone; + + stchTone->setChannels(channel); + stchTone->setSampleRate(sampleRate); + + //stchTone->setSetting(SETTING_USE_AA_FILTER,1); + stchTone->setSetting(SETTING_USE_AA_FILTER,0); + stchTone->setSetting(SETTING_USE_QUICKSEEK,1); + stchTone->setSetting(SETTING_SEQUENCE_MS,40); + stchTone->setSetting(SETTING_SEEKWINDOW_MS, 15); + stchTone->setSetting(SETTING_OVERLAP_MS,8); + + stchTone->setTempo(1.0f); + + stchTone->putSamples(gs_buf, 3837); + + m_channel = channel; + return 0; +} + +int CSpeedShift::set_speed_value(float speedVal) +{ + SoundTouch* stchTone = (SoundTouch*)pTone; + if (stchTone == NULL) + { + return ST_ERR_BASE_H_MALLOC_NULL; + } + + if (speedVal > MAX_SPEED_SHIFT_VALUE) + { + speedVal = MAX_SPEED_SHIFT_VALUE; + } + else if (speedVal < MIN_SPEED_SHIFT_VALUE) + { + speedVal = MIN_SPEED_SHIFT_VALUE; + } + + m_speed_value = speedVal; + stchTone->setTempo(m_speed_value); + + return 0; +} + +float CSpeedShift::get_speed_value() +{ + return m_speed_value; +} + +void CSpeedShift::reset() +{ + SoundTouch* stchTone = (SoundTouch*)pTone; + if (stchTone == NULL) + { + return; + } + stchTone->clear(); + + stchTone->putSamples(gs_buf, 3837); +} + +int CSpeedShift::get_latence() +{ + return 80; +} + +int CSpeedShift::process(float *inBuffer, int inSize, float *outBuffer, int outSize) +{ + int inNum; + int outNum; + + SoundTouch* stchTone = (SoundTouch*)pTone; + if (stchTone == NULL) + { + return ST_ERR_BASE_H_MALLOC_NULL; + } + + if (fabsf(m_speed_value - 1.0f) <= 0.001f) + { + outSize = outSize > inSize ? inSize : outSize; + // not process it. + memcpy(outBuffer, inBuffer, outSize * sizeof(float)); + return outSize; + } + + memset(outBuffer, 0, outSize * sizeof(float)); + + inNum = inSize / m_channel; + stchTone->putSamples((SAMPLETYPE*)inBuffer, inNum); + + outNum = stchTone->receiveSamples((SAMPLETYPE*)outBuffer, outSize / m_channel); + + return outNum * m_channel; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/CToneShift.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/CToneShift.cpp new file mode 100644 index 0000000..bb23c10 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/CToneShift.cpp @@ -0,0 +1,164 @@ +#include "CToneShift.h" +#include "SoundTouch.h" +#include "CSoundTouchDef.h" +#include "string" + +using namespace soundtouch; + +//static SoundTouch stchTone; // for tone shift. +static float gs_blk[3837 * 2] = { 0.0f }; + +ToneShiftInterface* ToneShiftInterface::create_object() +{ + ToneShiftInterface* pObject = new CToneShift; + return pObject; +} + +void ToneShiftInterface::destroy_object(ToneShiftInterface **pObject) +{ + delete (*pObject); + (*pObject) = NULL; +} + +CToneShift::CToneShift() +{ +} + +CToneShift::~CToneShift() +{ + uninit(); +} + +///////////////////////////// functions for tone shift below /////////////////////////////////// +int CToneShift::process(float *in_buffer, int in_size, float *out_buffer, int out_size) +{ + int inNum; + int outNum; + + SoundTouch* stchTone = (SoundTouch*)pTone; + if (stchTone == NULL) + { + return ST_ERR_BASE_H_MALLOC_NULL; + } + + out_size = out_size>in_size?in_size:out_size; + if (m_shift_value == 0) + { + // not process it. + memcpy(out_buffer,in_buffer,out_size * sizeof(float)); + return out_size; + } + + inNum = in_size/m_iChannel; + stchTone->putSamples((SAMPLETYPE*)in_buffer, inNum); + + // 保证in_buffer和out_buffer同一个的时候也可以用 + memset(out_buffer,0, out_size * sizeof(float)); + + outNum = stchTone->receiveSamples((SAMPLETYPE*)out_buffer, out_size / m_iChannel); + //return outNum*m_iChannel*sizeof(short);; + return outNum * m_iChannel; +} + +int CToneShift::get_latence() +{ + return 84; +} + +void CToneShift::reset() +{ + SoundTouch* stchTone = (SoundTouch*)pTone; + if (stchTone == NULL) + { + return; + } + stchTone->clear(); + + stchTone->putSamples(gs_blk,3837); +} + +int CToneShift::get_shift_value() +{ + return m_shift_value; +} + +int CToneShift::set_shift_value(int shift_val) +{ + SoundTouch* stchTone = (SoundTouch*)pTone; + if (stchTone == NULL) + { + return ST_ERR_BASE_H_MALLOC_NULL; + } + + if (shift_val>MAX_TONE_SHIFT_VALUE) + { + shift_val = MAX_TONE_SHIFT_VALUE; + } + else if (shift_val < MIN_TONE_SHIFT_VALUE) + { + shift_val = MIN_TONE_SHIFT_VALUE; + } + + m_shift_value = shift_val; + stchTone->setPitchSemiTones(m_shift_value); + + return 0; +} + +int CToneShift::get_shift_default() +{ + return 0; +} + +int CToneShift::get_shift_range(int *max_val, int *min_val) +{ + *max_val = MAX_TONE_SHIFT_VALUE; + *min_val = MIN_TONE_SHIFT_VALUE; + + return 0; +} + + +void CToneShift::uninit() +{ + SoundTouch* stchTone = (SoundTouch*)pTone; + if (stchTone != NULL) + { + stchTone->flush(); + + //delete pTone; + delete stchTone; + + pTone = NULL; + } +} + +int CToneShift::init(int sampe_rate, int channel) +{ + pTone = NULL; + + SoundTouch* stchTone = new SoundTouch; + pTone = stchTone; + + stchTone->setChannels(channel); + stchTone->setSampleRate(sampe_rate); + + //stchTone->setSetting(SETTING_USE_AA_FILTER,1); + stchTone->setSetting(SETTING_USE_AA_FILTER,0); + stchTone->setSetting(SETTING_USE_QUICKSEEK,1); + stchTone->setSetting(SETTING_SEQUENCE_MS,40); + stchTone->setSetting(SETTING_SEEKWINDOW_MS, 15); + stchTone->setSetting(SETTING_OVERLAP_MS,8); + + stchTone->setPitchSemiTones(0); + + stchTone->putSamples(gs_blk,3837); + + m_shift_value = 0; + m_i_sample_rate = sampe_rate; + m_iChannel = channel; + return 0; +} + +///////////////////////////// functions for tone shift on top /////////////////////////////////// + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/FIFOSamplePipe.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/FIFOSamplePipe.h new file mode 100644 index 0000000..f26c57b --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/FIFOSamplePipe.h @@ -0,0 +1,234 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// 'FIFOSamplePipe' : An abstract base class for classes that manipulate sound +/// samples by operating like a first-in-first-out pipe: New samples are fed +/// into one end of the pipe with the 'putSamples' function, and the processed +/// samples are received from the other end with the 'receiveSamples' function. +/// +/// 'FIFOProcessor' : A base class for classes the do signal processing with +/// the samples while operating like a first-in-first-out pipe. When samples +/// are input with the 'putSamples' function, the class processes them +/// and moves the processed samples to the given 'output' pipe object, which +/// may be either another processing stage, or a fifo sample buffer object. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-06-13 22:29:53 +0300 (Wed, 13 Jun 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: FIFOSamplePipe.h 143 2012-06-13 19:29:53Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef FIFOSamplePipe_H +#define FIFOSamplePipe_H + +#include +#include +#include "STTypes.h" + +namespace soundtouch +{ + +/// Abstract base class for FIFO (first-in-first-out) sample processing classes. +class FIFOSamplePipe +{ +public: + // virtual default destructor + virtual ~FIFOSamplePipe() {} + + + /// Returns a pointer to the beginning of the output samples. + /// This function is provided for accessing the output samples directly. + /// Please be careful for not to corrupt the book-keeping! + /// + /// When using this function to output samples, also remember to 'remove' the + /// output samples from the buffer by calling the + /// 'receiveSamples(numSamples)' function + virtual SAMPLETYPE *ptrBegin() = 0; + + /// Adds 'numSamples' pcs of samples from the 'samples' memory position to + /// the sample buffer. + virtual void putSamples(const SAMPLETYPE *samples, ///< Pointer to samples. + uint numSamples ///< Number of samples to insert. + ) = 0; + + + // Moves samples from the 'other' pipe instance to this instance. + void moveSamples(FIFOSamplePipe &other ///< Other pipe instance where from the receive the data. + ) + { + int oNumSamples = other.numSamples(); + + putSamples(other.ptrBegin(), oNumSamples); + other.receiveSamples(oNumSamples); + }; + + /// Output samples from beginning of the sample buffer. Copies requested samples to + /// output buffer and removes them from the sample buffer. If there are less than + /// 'numsample' samples in the buffer, returns all that available. + /// + /// \return Number of samples returned. + virtual uint receiveSamples(SAMPLETYPE *output, ///< Buffer where to copy output samples. + uint maxSamples ///< How many samples to receive at max. + ) = 0; + + /// Adjusts book-keeping so that given number of samples are removed from beginning of the + /// sample buffer without copying them anywhere. + /// + /// Used to reduce the number of samples in the buffer when accessing the sample buffer directly + /// with 'ptrBegin' function. + virtual uint receiveSamples(uint maxSamples ///< Remove this many samples from the beginning of pipe. + ) = 0; + + /// Returns number of samples currently available. + virtual uint numSamples() const = 0; + + // Returns nonzero if there aren't any samples available for outputting. + virtual int isEmpty() const = 0; + + /// Clears all the samples. + virtual void clear() = 0; + + /// allow trimming (downwards) amount of samples in pipeline. + /// Returns adjusted amount of samples + virtual uint adjustAmountOfSamples(uint numSamples) = 0; + +}; + + + +/// Base-class for sound processing routines working in FIFO principle. With this base +/// class it's easy to implement sound processing stages that can be chained together, +/// so that samples that are fed into beginning of the pipe automatically go through +/// all the processing stages. +/// +/// When samples are input to this class, they're first processed and then put to +/// the FIFO pipe that's defined as output of this class. This output pipe can be +/// either other processing stage or a FIFO sample buffer. +class FIFOProcessor :public FIFOSamplePipe +{ +protected: + /// Internal pipe where processed samples are put. + FIFOSamplePipe *output; + + /// Sets output pipe. + void setOutPipe(FIFOSamplePipe *pOutput) + { + assert(output == NULL); + assert(pOutput != NULL); + output = pOutput; + } + + + /// Constructor. Doesn't define output pipe; it has to be set be + /// 'setOutPipe' function. + FIFOProcessor() + { + output = NULL; + } + + + /// Constructor. Configures output pipe. + FIFOProcessor(FIFOSamplePipe *pOutput ///< Output pipe. + ) + { + output = pOutput; + } + + + /// Destructor. + virtual ~FIFOProcessor() + { + } + + + /// Returns a pointer to the beginning of the output samples. + /// This function is provided for accessing the output samples directly. + /// Please be careful for not to corrupt the book-keeping! + /// + /// When using this function to output samples, also remember to 'remove' the + /// output samples from the buffer by calling the + /// 'receiveSamples(numSamples)' function + virtual SAMPLETYPE *ptrBegin() + { + return output->ptrBegin(); + } + +public: + + /// Output samples from beginning of the sample buffer. Copies requested samples to + /// output buffer and removes them from the sample buffer. If there are less than + /// 'numsample' samples in the buffer, returns all that available. + /// + /// \return Number of samples returned. + virtual uint receiveSamples(SAMPLETYPE *outBuffer, ///< Buffer where to copy output samples. + uint maxSamples ///< How many samples to receive at max. + ) + { + return output->receiveSamples(outBuffer, maxSamples); + } + + + /// Adjusts book-keeping so that given number of samples are removed from beginning of the + /// sample buffer without copying them anywhere. + /// + /// Used to reduce the number of samples in the buffer when accessing the sample buffer directly + /// with 'ptrBegin' function. + virtual uint receiveSamples(uint maxSamples ///< Remove this many samples from the beginning of pipe. + ) + { + return output->receiveSamples(maxSamples); + } + + + /// Returns number of samples currently available. + virtual uint numSamples() const + { + return output->numSamples(); + } + + + /// Returns nonzero if there aren't any samples available for outputting. + virtual int isEmpty() const + { + return output->isEmpty(); + } + + /// allow trimming (downwards) amount of samples in pipeline. + /// Returns adjusted amount of samples + virtual uint adjustAmountOfSamples(uint numSamples) + { + return output->adjustAmountOfSamples(numSamples); + } + +}; + +} + +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/STTypes.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/STTypes.h new file mode 100644 index 0000000..7af6bb9 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/STTypes.h @@ -0,0 +1,188 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Common type definitions for SoundTouch audio processing library. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-12-28 16:53:56 +0200 (Fri, 28 Dec 2012) $ +// File revision : $Revision: 3 $ +// +// $Id: STTypes.h 162 2012-12-28 14:53:56Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef STTypes_H +#define STTypes_H + +typedef unsigned int uint; +typedef unsigned long ulong; + +// Patch for MinGW: on Win64 long is 32-bit +#ifdef _WIN64 + typedef unsigned long long ulongptr; +#else + typedef ulong ulongptr; +#endif + +// Helper macro for aligning pointer up to next 16-byte boundary +#define SOUNDTOUCH_ALIGN_POINTER_16(x) ( ( (ulongptr)(x) + 15 ) & ~(ulongptr)15 ) + +//#if (defined(__GNUC__) && !defined(ANDROID)) +// // In GCC, include soundtouch_config.h made by config scritps. +// // Skip this in Android compilation that uses GCC but without configure scripts. +// #include "soundtouch_config.h" +//#endif + +#ifndef _WINDEF_ + // if these aren't defined already by Windows headers, define now + + //typedef int BOOL; +#ifndef BOOL +#define BOOL int +#endif + + #define FALSE 0 + #define TRUE 1 + +#endif // _WINDEF_ + + +namespace soundtouch +{ + /// Activate these undef's to overrule the possible sampletype + /// setting inherited from some other header file: + //#undef SOUNDTOUCH_INTEGER_SAMPLES + //#undef SOUNDTOUCH_FLOAT_SAMPLES + + #if (defined(__SOFTFP__)) + // For Android compilation: Force use of Integer samples in case that + // compilation uses soft-floating point emulation - soft-fp is way too slow + #undef SOUNDTOUCH_FLOAT_SAMPLES + #define SOUNDTOUCH_INTEGER_SAMPLES 1 + #endif + + #if !(SOUNDTOUCH_INTEGER_SAMPLES || SOUNDTOUCH_FLOAT_SAMPLES) + + /// Choose either 32bit floating point or 16bit integer sampletype + /// by choosing one of the following defines, unless this selection + /// has already been done in some other file. + //// + /// Notes: + /// - In Windows environment, choose the sample format with the + /// following defines. + /// - In GNU environment, the floating point samples are used by + /// default, but integer samples can be chosen by giving the + /// following switch to the configure script: + /// ./configure --enable-integer-samples + /// However, if you still prefer to select the sample format here + /// also in GNU environment, then please #undef the INTEGER_SAMPLE + /// and FLOAT_SAMPLE defines first as in comments above. + //#define SOUNDTOUCH_INTEGER_SAMPLES 1 //< 16bit integer samples + #define SOUNDTOUCH_FLOAT_SAMPLES 1 //< 32bit float samples + + #endif + + #if (_M_IX86 || __i386__ || __x86_64__ || _M_X64) + /// Define this to allow X86-specific assembler/intrinsic optimizations. + /// Notice that library contains also usual C++ versions of each of these + /// these routines, so if you're having difficulties getting the optimized + /// routines compiled for whatever reason, you may disable these optimizations + /// to make the library compile. + + //#define SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS 0 + + /// In GNU environment, allow the user to override this setting by + /// giving the following switch to the configure script: + /// ./configure --disable-x86-optimizations + /// ./configure --enable-x86-optimizations=no + #ifdef SOUNDTOUCH_DISABLE_X86_OPTIMIZATIONS + #undef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS + #endif + #else + /// Always disable optimizations when not using a x86 systems. + #undef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS + + #endif + + // If defined, allows the SIMD-optimized routines to take minor shortcuts + // for improved performance. Undefine to require faithfully similar SIMD + // calculations as in normal C implementation. + #define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION 1 + + + #ifdef SOUNDTOUCH_INTEGER_SAMPLES + // 16bit integer sample type + typedef short SAMPLETYPE; + // data type for sample accumulation: Use 32bit integer to prevent overflows + typedef long LONG_SAMPLETYPE; + + #ifdef SOUNDTOUCH_FLOAT_SAMPLES + // check that only one sample type is defined + #error "conflicting sample types defined" + #endif // SOUNDTOUCH_FLOAT_SAMPLES + + #ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS + // Allow MMX optimizations + #define SOUNDTOUCH_ALLOW_MMX 1 + #endif + + #else + + // floating point samples + typedef float SAMPLETYPE; + // data type for sample accumulation: Use double to utilize full precision. + typedef double LONG_SAMPLETYPE; + + #ifdef SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS + // Allow SSE optimizations + #define SOUNDTOUCH_ALLOW_SSE 1 + #endif + + #endif // SOUNDTOUCH_INTEGER_SAMPLES + +}; + +// define ST_NO_EXCEPTION_HANDLING switch to disable throwing std exceptions: +#define ST_NO_EXCEPTION_HANDLING 1 +#ifdef ST_NO_EXCEPTION_HANDLING + // Exceptions disabled. Throw asserts instead if enabled. + #include + #define ST_THROW_RT_ERROR(x) {assert((const char *)x);} +#else + // use c++ standard exceptions + #include + #define ST_THROW_RT_ERROR(x) {throw std::runtime_error(x);} +#endif + +// When this #define is active, eliminates a clicking sound when the "rate" or "pitch" +// parameter setting crosses from value <1 to >=1 or vice versa during processing. +// Default is off as such crossover is untypical case and involves a slight sound +// quality compromise. +//#define SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER 1 + +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/aa_filter/AAFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/aa_filter/AAFilter.cpp new file mode 100644 index 0000000..f099bce --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/aa_filter/AAFilter.cpp @@ -0,0 +1,184 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// FIR low-pass (anti-alias) filter with filter coefficient design routine and +/// MMX optimization. +/// +/// Anti-alias filter is used to prevent folding of high frequencies when +/// transposing the sample rate with interpolation. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2009-01-11 13:34:24 +0200 (Sun, 11 Jan 2009) $ +// File revision : $Revision: 4 $ +// +// $Id: AAFilter.cpp 45 2009-01-11 11:34:24Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include "AAFilter.h" +#include "FIRFilter.h" + +using namespace soundtouch; + +#define PI 3.141592655357989 +#define TWOPI (2 * PI) + +/***************************************************************************** + * + * Implementation of the class 'AAFilter' + * + *****************************************************************************/ + +AAFilter::AAFilter(uint len) +{ + pFIR = FIRFilter::newInstance(); + cutoffFreq = 0.5; + setLength(len); +} + + + +AAFilter::~AAFilter() +{ + delete pFIR; +} + + + +// Sets new anti-alias filter cut-off edge frequency, scaled to +// sampling frequency (nyquist frequency = 0.5). +// The filter will cut frequencies higher than the given frequency. +void AAFilter::setCutoffFreq(double newCutoffFreq) +{ + cutoffFreq = newCutoffFreq; + calculateCoeffs(); +} + + + +// Sets number of FIR filter taps +void AAFilter::setLength(uint newLength) +{ + length = newLength; + calculateCoeffs(); +} + + + +// Calculates coefficients for a low-pass FIR filter using Hamming window +void AAFilter::calculateCoeffs() +{ + uint i; + double cntTemp, temp, tempCoeff,h, w; + double fc2, wc; + double scaleCoeff, sum; + double *work; + SAMPLETYPE *coeffs; + + assert(length >= 2); + assert(length % 4 == 0); + assert(cutoffFreq >= 0); + assert(cutoffFreq <= 0.5); + + work = new double[length]; + coeffs = new SAMPLETYPE[length]; + + fc2 = 2.0 * cutoffFreq; + wc = PI * fc2; + tempCoeff = TWOPI / (double)length; + + sum = 0; + for (i = 0; i < length; i ++) + { + cntTemp = (double)i - (double)(length / 2); + + temp = cntTemp * wc; + if (temp != 0) + { + h = fc2 * sin(temp) / temp; // sinc function + } + else + { + h = 1.0; + } + w = 0.54 + 0.46 * cos(tempCoeff * cntTemp); // hamming window + + temp = w * h; + work[i] = temp; + + // calc net sum of coefficients + sum += temp; + } + + // ensure the sum of coefficients is larger than zero + assert(sum > 0); + + // ensure we've really designed a lowpass filter... + assert(work[length/2] > 0); + assert(work[length/2 + 1] > -1e-6); + assert(work[length/2 - 1] > -1e-6); + + // Calculate a scaling coefficient in such a way that the result can be + // divided by 16384 + scaleCoeff = 16384.0f / sum; + + for (i = 0; i < length; i ++) + { + // scale & round to nearest integer + temp = work[i] * scaleCoeff; + temp += (temp >= 0) ? 0.5 : -0.5; + // ensure no overfloods + assert(temp >= -32768 && temp <= 32767); + coeffs[i] = (SAMPLETYPE)temp; + } + + // Set coefficients. Use divide factor 14 => divide result by 2^14 = 16384 + pFIR->setCoefficients(coeffs, length, 14); + + delete[] work; + delete[] coeffs; +} + + +// Applies the filter to the given sequence of samples. +// Note : The amount of outputted samples is by value of 'filter length' +// smaller than the amount of input samples. +uint AAFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const +{ + return pFIR->evaluate(dest, src, numSamples, numChannels); +} + + +uint AAFilter::getLength() const +{ + return pFIR->getLength(); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/aa_filter/AAFilter.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/aa_filter/AAFilter.h new file mode 100644 index 0000000..d099757 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/aa_filter/AAFilter.h @@ -0,0 +1,91 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo +/// while maintaining the original pitch by using a time domain WSOLA-like method +/// with several performance-increasing tweaks. +/// +/// Anti-alias filter is used to prevent folding of high frequencies when +/// transposing the sample rate with interpolation. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $ +// File revision : $Revision: 4 $ +// +// $Id: AAFilter.h 11 2008-02-10 16:26:55Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef AAFilter_H +#define AAFilter_H + +#include "STTypes.h" + +namespace soundtouch +{ + +class AAFilter +{ +protected: + class FIRFilter *pFIR; + + /// Low-pass filter cut-off frequency, negative = invalid + double cutoffFreq; + + /// num of filter taps + uint length; + + /// Calculate the FIR coefficients realizing the given cutoff-frequency + void calculateCoeffs(); +public: + AAFilter(uint length); + + ~AAFilter(); + + /// Sets new anti-alias filter cut-off edge frequency, scaled to sampling + /// frequency (nyquist frequency = 0.5). The filter will cut off the + /// frequencies than that. + void setCutoffFreq(double newCutoffFreq); + + /// Sets number of FIR filter taps, i.e. ~filter complexity + void setLength(uint newLength); + + uint getLength() const; + + /// Applies the filter to the given sequence of samples. + /// Note : The amount of outputted samples is by value of 'filter length' + /// smaller than the amount of input samples. + uint evaluate(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples, + uint numChannels) const; +}; + +} + +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/bpm_detect/BPMDetect.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/bpm_detect/BPMDetect.cpp new file mode 100644 index 0000000..a48cbd9 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/bpm_detect/BPMDetect.cpp @@ -0,0 +1,370 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Beats-per-minute (BPM) detection routine. +/// +/// The beat detection algorithm works as follows: +/// - Use function 'inputSamples' to input a chunks of samples to the class for +/// analysis. It's a good idea to enter a large sound file or stream in smallish +/// chunks of around few kilosamples in order not to extinguish too much RAM memory. +/// - Inputted sound data is decimated to approx 500 Hz to reduce calculation burden, +/// which is basically ok as low (bass) frequencies mostly determine the beat rate. +/// Simple averaging is used for anti-alias filtering because the resulting signal +/// quality isn't of that high importance. +/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by +/// taking absolute value that's smoothed by sliding average. Signal levels that +/// are below a couple of times the general RMS amplitude level are cut away to +/// leave only notable peaks there. +/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term +/// autocorrelation function of the enveloped signal. +/// - After whole sound data file has been analyzed as above, the bpm level is +/// detected by function 'getBpm' that finds the highest peak of the autocorrelation +/// function, calculates it's precise location and converts this reading to bpm's. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-08-30 22:45:25 +0300 (Thu, 30 Aug 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: BPMDetect.cpp 149 2012-08-30 19:45:25Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include "FIFOSampleBuffer.h" +#include "PeakFinder.h" +#include "BPMDetect.h" + +using namespace soundtouch; + +#define INPUT_BLOCK_SAMPLES 2048 +#define DECIMATED_BLOCK_SAMPLES 256 + +/// decay constant for calculating RMS volume sliding average approximation +/// (time constant is about 10 sec) +const float avgdecay = 0.99986f; + +/// Normalization coefficient for calculating RMS sliding average approximation. +const float avgnorm = (1 - avgdecay); + + +//////////////////////////////////////////////////////////////////////////////// + +// Enable following define to create bpm analysis file: + +// #define _CREATE_BPM_DEBUG_FILE + +#ifdef _CREATE_BPM_DEBUG_FILE + + #define DEBUGFILE_NAME "c:\\temp\\soundtouch-bpm-debug.txt" + + static void _SaveDebugData(const float *data, int minpos, int maxpos, double coeff) + { + FILE *fptr = fopen(DEBUGFILE_NAME, "wt"); + int i; + + if (fptr) + { + printf("\n\nWriting BPM debug data into file " DEBUGFILE_NAME "\n\n"); + for (i = minpos; i < maxpos; i ++) + { + fprintf(fptr, "%d\t%.1lf\t%f\n", i, coeff / (double)i, data[i]); + } + fclose(fptr); + } + } +#else + #define _SaveDebugData(a,b,c,d) +#endif + +//////////////////////////////////////////////////////////////////////////////// + + +BPMDetect::BPMDetect(int numChannels, int aSampleRate) +{ + this->sampleRate = aSampleRate; + this->channels = numChannels; + + decimateSum = 0; + decimateCount = 0; + + envelopeAccu = 0; + + // Initialize RMS volume accumulator to RMS level of 1500 (out of 32768) that's + // safe initial RMS signal level value for song data. This value is then adapted + // to the actual level during processing. +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + // integer samples + RMSVolumeAccu = (1500 * 1500) / avgnorm; +#else + // float samples, scaled to range [-1..+1[ + RMSVolumeAccu = (0.045f * 0.045f) / avgnorm; +#endif + + // choose decimation factor so that result is approx. 1000 Hz + decimateBy = sampleRate / 1000; + assert(decimateBy > 0); + assert(INPUT_BLOCK_SAMPLES < decimateBy * DECIMATED_BLOCK_SAMPLES); + + // Calculate window length & starting item according to desired min & max bpms + windowLen = (60 * sampleRate) / (decimateBy * MIN_BPM); + windowStart = (60 * sampleRate) / (decimateBy * MAX_BPM); + + assert(windowLen > windowStart); + + // allocate new working objects + xcorr = new float[windowLen]; + memset(xcorr, 0, windowLen * sizeof(float)); + + // allocate processing buffer + buffer = new FIFOSampleBuffer(); + // we do processing in mono mode + buffer->setChannels(1); + buffer->clear(); +} + + + +BPMDetect::~BPMDetect() +{ + delete[] xcorr; + delete buffer; +} + + + +/// convert to mono, low-pass filter & decimate to about 500 Hz. +/// return number of outputted samples. +/// +/// Decimation is used to remove the unnecessary frequencies and thus to reduce +/// the amount of data needed to be processed as calculating autocorrelation +/// function is a very-very heavy operation. +/// +/// Anti-alias filtering is done simply by averaging the samples. This is really a +/// poor-man's anti-alias filtering, but it's not so critical in this kind of application +/// (it'd also be difficult to design a high-quality filter with steep cut-off at very +/// narrow band) +int BPMDetect::decimate(SAMPLETYPE *dest, const SAMPLETYPE *src, int numsamples) +{ + int count, outcount; + LONG_SAMPLETYPE out; + + assert(channels > 0); + assert(decimateBy > 0); + outcount = 0; + for (count = 0; count < numsamples; count ++) + { + int j; + + // convert to mono and accumulate + for (j = 0; j < channels; j ++) + { + decimateSum += src[j]; + } + src += j; + + decimateCount ++; + if (decimateCount >= decimateBy) + { + // Store every Nth sample only + out = (LONG_SAMPLETYPE)(decimateSum / (decimateBy * channels)); + decimateSum = 0; + decimateCount = 0; +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + // check ranges for sure (shouldn't actually be necessary) + if (out > 32767) + { + out = 32767; + } + else if (out < -32768) + { + out = -32768; + } +#endif // SOUNDTOUCH_INTEGER_SAMPLES + dest[outcount] = (SAMPLETYPE)out; + outcount ++; + } + } + return outcount; +} + + + +// Calculates autocorrelation function of the sample history buffer +void BPMDetect::updateXCorr(int process_samples) +{ + int offs; + SAMPLETYPE *pBuffer; + + assert(buffer->numSamples() >= (uint)(process_samples + windowLen)); + + pBuffer = buffer->ptrBegin(); + for (offs = windowStart; offs < windowLen; offs ++) + { + LONG_SAMPLETYPE sum; + int i; + + sum = 0; + for (i = 0; i < process_samples; i ++) + { + sum += pBuffer[i] * pBuffer[i + offs]; // scaling the sub-result shouldn't be necessary + } +// xcorr[offs] *= xcorr_decay; // decay 'xcorr' here with suitable coefficients + // if it's desired that the system adapts automatically to + // various bpms, e.g. in processing continouos music stream. + // The 'xcorr_decay' should be a value that's smaller than but + // close to one, and should also depend on 'process_samples' value. + + xcorr[offs] += (float)sum; + } +} + + +// Calculates envelope of the sample data +void BPMDetect::calcEnvelope(SAMPLETYPE *samples, int numsamples) +{ + const static double decay = 0.7f; // decay constant for smoothing the envelope + const static double norm = (1 - decay); + + int i; + LONG_SAMPLETYPE out; + double val; + + for (i = 0; i < numsamples; i ++) + { + // calc average RMS volume + RMSVolumeAccu *= avgdecay; + val = (float)fabs((float)samples[i]); + RMSVolumeAccu += val * val; + + // cut amplitudes that are below cutoff ~2 times RMS volume + // (we're interested in peak values, not the silent moments) + if (val < 0.5 * sqrt(RMSVolumeAccu * avgnorm)) + { + val = 0; + } + + // smooth amplitude envelope + envelopeAccu *= decay; + envelopeAccu += val; + out = (LONG_SAMPLETYPE)(envelopeAccu * norm); + +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + // cut peaks (shouldn't be necessary though) + if (out > 32767) out = 32767; +#endif // SOUNDTOUCH_INTEGER_SAMPLES + samples[i] = (SAMPLETYPE)out; + } +} + + + +void BPMDetect::inputSamples(const SAMPLETYPE *samples, int numSamples) +{ + SAMPLETYPE decimated[DECIMATED_BLOCK_SAMPLES]; + + // iterate so that max INPUT_BLOCK_SAMPLES processed per iteration + while (numSamples > 0) + { + int block; + int decSamples; + + block = (numSamples > INPUT_BLOCK_SAMPLES) ? INPUT_BLOCK_SAMPLES : numSamples; + + // decimate. note that converts to mono at the same time + decSamples = decimate(decimated, samples, block); + samples += block * channels; + numSamples -= block; + + // envelope new samples and add them to buffer + calcEnvelope(decimated, decSamples); + buffer->putSamples(decimated, decSamples); + } + + // when the buffer has enought samples for processing... + if ((int)buffer->numSamples() > windowLen) + { + int processLength; + + // how many samples are processed + processLength = (int)buffer->numSamples() - windowLen; + + // ... calculate autocorrelations for oldest samples... + updateXCorr(processLength); + // ... and remove them from the buffer + buffer->receiveSamples(processLength); + } +} + + + +void BPMDetect::removeBias() +{ + int i; + float minval = 1e12f; // arbitrary large number + + for (i = windowStart; i < windowLen; i ++) + { + if (xcorr[i] < minval) + { + minval = xcorr[i]; + } + } + + for (i = windowStart; i < windowLen; i ++) + { + xcorr[i] -= minval; + } +} + + +float BPMDetect::getBpm() +{ + double peakPos; + double coeff; + PeakFinder peakFinder; + + coeff = 60.0 * ((double)sampleRate / (double)decimateBy); + + // save bpm debug analysis data if debug data enabled + _SaveDebugData(xcorr, windowStart, windowLen, coeff); + + // remove bias from xcorr data + removeBias(); + + // find peak position + peakPos = peakFinder.detectPeak(xcorr, windowStart, windowLen); + + assert(decimateBy != 0); + if (peakPos < 1e-9) return 0.0; // detection failed. + + // calculate BPM + return (float) (coeff / peakPos); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/bpm_detect/BPMDetect.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/bpm_detect/BPMDetect.h new file mode 100644 index 0000000..7248989 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/bpm_detect/BPMDetect.h @@ -0,0 +1,164 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Beats-per-minute (BPM) detection routine. +/// +/// The beat detection algorithm works as follows: +/// - Use function 'inputSamples' to input a chunks of samples to the class for +/// analysis. It's a good idea to enter a large sound file or stream in smallish +/// chunks of around few kilosamples in order not to extinguish too much RAM memory. +/// - Input sound data is decimated to approx 500 Hz to reduce calculation burden, +/// which is basically ok as low (bass) frequencies mostly determine the beat rate. +/// Simple averaging is used for anti-alias filtering because the resulting signal +/// quality isn't of that high importance. +/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by +/// taking absolute value that's smoothed by sliding average. Signal levels that +/// are below a couple of times the general RMS amplitude level are cut away to +/// leave only notable peaks there. +/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term +/// autocorrelation function of the enveloped signal. +/// - After whole sound data file has been analyzed as above, the bpm level is +/// detected by function 'getBpm' that finds the highest peak of the autocorrelation +/// function, calculates it's precise location and converts this reading to bpm's. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-08-30 22:53:44 +0300 (Thu, 30 Aug 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: BPMDetect.h 150 2012-08-30 19:53:44Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef _BPMDetect_H_ +#define _BPMDetect_H_ + +#include "STTypes.h" +#include "FIFOSampleBuffer.h" + +namespace soundtouch +{ + +/// Minimum allowed BPM rate. Used to restrict accepted result above a reasonable limit. +#define MIN_BPM 29 + +/// Maximum allowed BPM rate. Used to restrict accepted result below a reasonable limit. +#define MAX_BPM 200 + + +/// Class for calculating BPM rate for audio data. +class BPMDetect +{ +protected: + /// Auto-correlation accumulator bins. + float *xcorr; + + /// Amplitude envelope sliding average approximation level accumulator + double envelopeAccu; + + /// RMS volume sliding average approximation level accumulator + double RMSVolumeAccu; + + /// Sample average counter. + int decimateCount; + + /// Sample average accumulator for FIFO-like decimation. + soundtouch::LONG_SAMPLETYPE decimateSum; + + /// Decimate sound by this coefficient to reach approx. 500 Hz. + int decimateBy; + + /// Auto-correlation window length + int windowLen; + + /// Number of channels (1 = mono, 2 = stereo) + int channels; + + /// sample rate + int sampleRate; + + /// Beginning of auto-correlation window: Autocorrelation isn't being updated for + /// the first these many correlation bins. + int windowStart; + + /// FIFO-buffer for decimated processing samples. + soundtouch::FIFOSampleBuffer *buffer; + + /// Updates auto-correlation function for given number of decimated samples that + /// are read from the internal 'buffer' pipe (samples aren't removed from the pipe + /// though). + void updateXCorr(int process_samples /// How many samples are processed. + ); + + /// Decimates samples to approx. 500 Hz. + /// + /// \return Number of output samples. + int decimate(soundtouch::SAMPLETYPE *dest, ///< Destination buffer + const soundtouch::SAMPLETYPE *src, ///< Source sample buffer + int numsamples ///< Number of source samples. + ); + + /// Calculates amplitude envelope for the buffer of samples. + /// Result is output to 'samples'. + void calcEnvelope(soundtouch::SAMPLETYPE *samples, ///< Pointer to input/output data buffer + int numsamples ///< Number of samples in buffer + ); + + /// remove constant bias from xcorr data + void removeBias(); + +public: + /// Constructor. + BPMDetect(int numChannels, ///< Number of channels in sample data. + int sampleRate ///< Sample rate in Hz. + ); + + /// Destructor. + virtual ~BPMDetect(); + + /// Inputs a block of samples for analyzing: Envelopes the samples and then + /// updates the autocorrelation estimation. When whole song data has been input + /// in smaller blocks using this function, read the resulting bpm with 'getBpm' + /// function. + /// + /// Notice that data in 'samples' array can be disrupted in processing. + void inputSamples(const soundtouch::SAMPLETYPE *samples, ///< Pointer to input/working data buffer + int numSamples ///< Number of samples in buffer + ); + + + /// Analyzes the results and returns the BPM rate. Use this function to read result + /// after whole song data has been input to the class by consecutive calls of + /// 'inputSamples' function. + /// + /// \return Beats-per-minute rate, or zero if detection failed. + float getBpm(); +}; + +} + +#endif // _BPMDetect_H_ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/cpu_detect/cpu_detect.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/cpu_detect/cpu_detect.h new file mode 100644 index 0000000..7859ffb --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/cpu_detect/cpu_detect.h @@ -0,0 +1,62 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// A header file for detecting the Intel MMX instructions set extension. +/// +/// Please see 'mmx_win.cpp', 'mmx_cpp.cpp' and 'mmx_non_x86.cpp' for the +/// routine implementations for x86 Windows, x86 gnu version and non-x86 +/// platforms, respectively. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2008-02-10 18:26:55 +0200 (Sun, 10 Feb 2008) $ +// File revision : $Revision: 4 $ +// +// $Id: cpu_detect.h 11 2008-02-10 16:26:55Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef _CPU_DETECT_H_ +#define _CPU_DETECT_H_ + +#include "STTypes.h" + +#define SUPPORT_MMX 0x0001 +#define SUPPORT_3DNOW 0x0002 +#define SUPPORT_ALTIVEC 0x0004 +#define SUPPORT_SSE 0x0008 +#define SUPPORT_SSE2 0x0010 + +/// Checks which instruction set extensions are supported by the CPU. +/// +/// \return A bitmask of supported extensions, see SUPPORT_... defines. +uint detectCPUextensions(void); + +/// Disables given set of instruction extensions. See SUPPORT_... defines. +void disableExtensions(uint wDisableMask); + +#endif // _CPU_DETECT_H_ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/cpu_detect/cpu_detect_x86.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/cpu_detect/cpu_detect_x86.cpp new file mode 100644 index 0000000..fff3240 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/cpu_detect/cpu_detect_x86.cpp @@ -0,0 +1,137 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Generic version of the x86 CPU extension detection routine. +/// +/// This file is for GNU & other non-Windows compilers, see 'cpu_detect_x86_win.cpp' +/// for the Microsoft compiler version. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-11-08 20:44:37 +0200 (Thu, 08 Nov 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: cpu_detect_x86.cpp 159 2012-11-08 18:44:37Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include "cpu_detect.h" +#include "STTypes.h" + +#if defined(SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS) + + #if defined(__GNUC__) && defined(__i386__) + // gcc + #include "cpuid.h" + #elif defined(_M_IX86) + // windows non-gcc + #include + #define bit_MMX (1 << 23) + #define bit_SSE (1 << 25) + #define bit_SSE2 (1 << 26) + #endif + +#endif + + +////////////////////////////////////////////////////////////////////////////// +// +// processor instructions extension detection routines +// +////////////////////////////////////////////////////////////////////////////// + +// Flag variable indicating whick ISA extensions are disabled (for debugging) +static uint _dwDisabledISA = 0x00; // 0xffffffff; //<- use this to disable all extensions + +// Disables given set of instruction extensions. See SUPPORT_... defines. +void disableExtensions(uint dwDisableMask) +{ + _dwDisabledISA = dwDisableMask; +} + + + +/// Checks which instruction set extensions are supported by the CPU. +uint detectCPUextensions(void) +{ +/// If building for a 64bit system (no Itanium) and the user wants optimizations. +/// Return the OR of SUPPORT_{MMX,SSE,SSE2}. 11001 or 0x19. +/// Keep the _dwDisabledISA test (2 more operations, could be eliminated). +#if ((defined(__GNUC__) && defined(__x86_64__)) \ + || defined(_M_X64)) \ + && defined(SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS) + return 0x19 & ~_dwDisabledISA; + +/// If building for a 32bit system and the user wants optimizations. +/// Keep the _dwDisabledISA test (2 more operations, could be eliminated). +#elif ((defined(__GNUC__) && defined(__i386__)) \ + || defined(_M_IX86)) \ + && defined(SOUNDTOUCH_ALLOW_X86_OPTIMIZATIONS) + + if (_dwDisabledISA == 0xffffffff) return 0; + + uint res = 0; + +#if defined(__GNUC__) + // GCC version of cpuid. Requires GCC 4.3.0 or later for __cpuid intrinsic support. + uint eax, ebx, ecx, edx; // unsigned int is the standard type. uint is defined by the compiler and not guaranteed to be portable. + + // Check if no cpuid support. + if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)) return 0; // always disable extensions. + + if (edx & bit_MMX) res = res | SUPPORT_MMX; + if (edx & bit_SSE) res = res | SUPPORT_SSE; + if (edx & bit_SSE2) res = res | SUPPORT_SSE2; + +#else + // Window / VS version of cpuid. Notice that Visual Studio 2005 or later required + // for __cpuid intrinsic support. + int reg[4] = {-1}; + + // Check if no cpuid support. + __cpuid(reg,0); + if ((unsigned int)reg[0] == 0) return 0; // always disable extensions. + + __cpuid(reg,1); + if ((unsigned int)reg[3] & bit_MMX) res = res | SUPPORT_MMX; + if ((unsigned int)reg[3] & bit_SSE) res = res | SUPPORT_SSE; + if ((unsigned int)reg[3] & bit_SSE2) res = res | SUPPORT_SSE2; + +#endif + + return res & ~_dwDisabledISA; + +#else + +/// One of these is true: +/// 1) We don't want optimizations. +/// 2) Using an unsupported compiler. +/// 3) Running on a non-x86 platform. + return 0; + +#endif +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fifo_sample_buffer/FIFOSampleBuffer.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fifo_sample_buffer/FIFOSampleBuffer.cpp new file mode 100644 index 0000000..4d9740a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fifo_sample_buffer/FIFOSampleBuffer.cpp @@ -0,0 +1,274 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// A buffer class for temporarily storaging sound samples, operates as a +/// first-in-first-out pipe. +/// +/// Samples are added to the end of the sample buffer with the 'putSamples' +/// function, and are received from the beginning of the buffer by calling +/// the 'receiveSamples' function. The class automatically removes the +/// outputted samples from the buffer, as well as grows the buffer size +/// whenever necessary. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-11-08 20:53:01 +0200 (Thu, 08 Nov 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: FIFOSampleBuffer.cpp 160 2012-11-08 18:53:01Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include + +#include "FIFOSampleBuffer.h" + +using namespace soundtouch; + +// Constructor +FIFOSampleBuffer::FIFOSampleBuffer(int numChannels) +{ + assert(numChannels > 0); + sizeInBytes = 0; // reasonable initial value + buffer = NULL; + bufferUnaligned = NULL; + samplesInBuffer = 0; + bufferPos = 0; + channels = (uint)numChannels; + ensureCapacity(32); // allocate initial capacity +} + + +// destructor +FIFOSampleBuffer::~FIFOSampleBuffer() +{ + delete[] bufferUnaligned; + bufferUnaligned = NULL; + buffer = NULL; +} + + +// Sets number of channels, 1 = mono, 2 = stereo +void FIFOSampleBuffer::setChannels(int numChannels) +{ + uint usedBytes; + + assert(numChannels > 0); + usedBytes = channels * samplesInBuffer; + channels = (uint)numChannels; + samplesInBuffer = usedBytes / channels; +} + + +// if output location pointer 'bufferPos' isn't zero, 'rewinds' the buffer and +// zeroes this pointer by copying samples from the 'bufferPos' pointer +// location on to the beginning of the buffer. +void FIFOSampleBuffer::rewind() +{ + if (buffer && bufferPos) + { + memmove(buffer, ptrBegin(), sizeof(SAMPLETYPE) * channels * samplesInBuffer); + bufferPos = 0; + } +} + + +// Adds 'numSamples' pcs of samples from the 'samples' memory position to +// the sample buffer. +void FIFOSampleBuffer::putSamples(const SAMPLETYPE *samples, uint nSamples) +{ + memcpy(ptrEnd(nSamples), samples, sizeof(SAMPLETYPE) * nSamples * channels); + samplesInBuffer += nSamples; +} + + +// Increases the number of samples in the buffer without copying any actual +// samples. +// +// This function is used to update the number of samples in the sample buffer +// when accessing the buffer directly with 'ptrEnd' function. Please be +// careful though! +void FIFOSampleBuffer::putSamples(uint nSamples) +{ + uint req; + + req = samplesInBuffer + nSamples; + ensureCapacity(req); + samplesInBuffer += nSamples; +} + + +// Returns a pointer to the end of the used part of the sample buffer (i.e. +// where the new samples are to be inserted). This function may be used for +// inserting new samples into the sample buffer directly. Please be careful! +// +// Parameter 'slackCapacity' tells the function how much free capacity (in +// terms of samples) there _at least_ should be, in order to the caller to +// succesfully insert all the required samples to the buffer. When necessary, +// the function grows the buffer size to comply with this requirement. +// +// When using this function as means for inserting new samples, also remember +// to increase the sample count afterwards, by calling the +// 'putSamples(numSamples)' function. +SAMPLETYPE *FIFOSampleBuffer::ptrEnd(uint slackCapacity) +{ + ensureCapacity(samplesInBuffer + slackCapacity); + return buffer + samplesInBuffer * channels; +} + + +// Returns a pointer to the beginning of the currently non-outputted samples. +// This function is provided for accessing the output samples directly. +// Please be careful! +// +// When using this function to output samples, also remember to 'remove' the +// outputted samples from the buffer by calling the +// 'receiveSamples(numSamples)' function +SAMPLETYPE *FIFOSampleBuffer::ptrBegin() +{ + assert(buffer); + return buffer + bufferPos * channels; +} + + +// Ensures that the buffer has enought capacity, i.e. space for _at least_ +// 'capacityRequirement' number of samples. The buffer is grown in steps of +// 4 kilobytes to eliminate the need for frequently growing up the buffer, +// as well as to round the buffer size up to the virtual memory page size. +void FIFOSampleBuffer::ensureCapacity(uint capacityRequirement) +{ + SAMPLETYPE *tempUnaligned, *temp; + + if (capacityRequirement > getCapacity()) + { + // enlarge the buffer in 4kbyte steps (round up to next 4k boundary) + sizeInBytes = (capacityRequirement * channels * sizeof(SAMPLETYPE) + 4095) & (uint)-4096; + assert(sizeInBytes % 2 == 0); + tempUnaligned = new SAMPLETYPE[sizeInBytes / sizeof(SAMPLETYPE) + 16 / sizeof(SAMPLETYPE)]; + if (tempUnaligned == NULL) + { + ST_THROW_RT_ERROR("Couldn't allocate memory!\n"); + } + // Align the buffer to begin at 16byte cache line boundary for optimal performance + temp = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(tempUnaligned); + if (samplesInBuffer) + { + memcpy(temp, ptrBegin(), samplesInBuffer * channels * sizeof(SAMPLETYPE)); + } + delete[] bufferUnaligned; + buffer = temp; + bufferUnaligned = tempUnaligned; + bufferPos = 0; + } + else + { + // simply rewind the buffer (if necessary) + rewind(); + } +} + + +// Returns the current buffer capacity in terms of samples +uint FIFOSampleBuffer::getCapacity() const +{ + return sizeInBytes / (channels * sizeof(SAMPLETYPE)); +} + + +// Returns the number of samples currently in the buffer +uint FIFOSampleBuffer::numSamples() const +{ + return samplesInBuffer; +} + + +// Output samples from beginning of the sample buffer. Copies demanded number +// of samples to output and removes them from the sample buffer. If there +// are less than 'numsample' samples in the buffer, returns all available. +// +// Returns number of samples copied. +uint FIFOSampleBuffer::receiveSamples(SAMPLETYPE *output, uint maxSamples) +{ + uint num; + + num = (maxSamples > samplesInBuffer) ? samplesInBuffer : maxSamples; + + memcpy(output, ptrBegin(), channels * sizeof(SAMPLETYPE) * num); + return receiveSamples(num); +} + + +// Removes samples from the beginning of the sample buffer without copying them +// anywhere. Used to reduce the number of samples in the buffer, when accessing +// the sample buffer with the 'ptrBegin' function. +uint FIFOSampleBuffer::receiveSamples(uint maxSamples) +{ + if (maxSamples >= samplesInBuffer) + { + uint temp; + + temp = samplesInBuffer; + samplesInBuffer = 0; + return temp; + } + + samplesInBuffer -= maxSamples; + bufferPos += maxSamples; + + return maxSamples; +} + + +// Returns nonzero if the sample buffer is empty +int FIFOSampleBuffer::isEmpty() const +{ + return (samplesInBuffer == 0) ? 1 : 0; +} + + +// Clears the sample buffer +void FIFOSampleBuffer::clear() +{ + samplesInBuffer = 0; + bufferPos = 0; +} + + +/// allow trimming (downwards) amount of samples in pipeline. +/// Returns adjusted amount of samples +uint FIFOSampleBuffer::adjustAmountOfSamples(uint numSamples) +{ + if (numSamples < samplesInBuffer) + { + samplesInBuffer = numSamples; + } + return samplesInBuffer; +} + diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fifo_sample_buffer/FIFOSampleBuffer.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fifo_sample_buffer/FIFOSampleBuffer.h new file mode 100644 index 0000000..3789b4d --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fifo_sample_buffer/FIFOSampleBuffer.h @@ -0,0 +1,178 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// A buffer class for temporarily storaging sound samples, operates as a +/// first-in-first-out pipe. +/// +/// Samples are added to the end of the sample buffer with the 'putSamples' +/// function, and are received from the beginning of the buffer by calling +/// the 'receiveSamples' function. The class automatically removes the +/// output samples from the buffer as well as grows the storage size +/// whenever necessary. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-06-13 22:29:53 +0300 (Wed, 13 Jun 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: FIFOSampleBuffer.h 143 2012-06-13 19:29:53Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef FIFOSampleBuffer_H +#define FIFOSampleBuffer_H + +#include "FIFOSamplePipe.h" + +namespace soundtouch +{ + +/// Sample buffer working in FIFO (first-in-first-out) principle. The class takes +/// care of storage size adjustment and data moving during input/output operations. +/// +/// Notice that in case of stereo audio, one sample is considered to consist of +/// both channel data. +class FIFOSampleBuffer : public FIFOSamplePipe +{ +private: + /// Sample buffer. + SAMPLETYPE *buffer; + + // Raw unaligned buffer memory. 'buffer' is made aligned by pointing it to first + // 16-byte aligned location of this buffer + SAMPLETYPE *bufferUnaligned; + + /// Sample buffer size in bytes + uint sizeInBytes; + + /// How many samples are currently in buffer. + uint samplesInBuffer; + + /// Channels, 1=mono, 2=stereo. + uint channels; + + /// Current position pointer to the buffer. This pointer is increased when samples are + /// removed from the pipe so that it's necessary to actually rewind buffer (move data) + /// only new data when is put to the pipe. + uint bufferPos; + + /// Rewind the buffer by moving data from position pointed by 'bufferPos' to real + /// beginning of the buffer. + void rewind(); + + /// Ensures that the buffer has capacity for at least this many samples. + void ensureCapacity(uint capacityRequirement); + + /// Returns current capacity. + uint getCapacity() const; + +public: + + /// Constructor + FIFOSampleBuffer(int numChannels = 2 ///< Number of channels, 1=mono, 2=stereo. + ///< Default is stereo. + ); + + /// destructor + ~FIFOSampleBuffer(); + + /// Returns a pointer to the beginning of the output samples. + /// This function is provided for accessing the output samples directly. + /// Please be careful for not to corrupt the book-keeping! + /// + /// When using this function to output samples, also remember to 'remove' the + /// output samples from the buffer by calling the + /// 'receiveSamples(numSamples)' function + virtual SAMPLETYPE *ptrBegin(); + + /// Returns a pointer to the end of the used part of the sample buffer (i.e. + /// where the new samples are to be inserted). This function may be used for + /// inserting new samples into the sample buffer directly. Please be careful + /// not corrupt the book-keeping! + /// + /// When using this function as means for inserting new samples, also remember + /// to increase the sample count afterwards, by calling the + /// 'putSamples(numSamples)' function. + SAMPLETYPE *ptrEnd( + uint slackCapacity ///< How much free capacity (in samples) there _at least_ + ///< should be so that the caller can succesfully insert the + ///< desired samples to the buffer. If necessary, the function + ///< grows the buffer size to comply with this requirement. + ); + + /// Adds 'numSamples' pcs of samples from the 'samples' memory position to + /// the sample buffer. + virtual void putSamples(const SAMPLETYPE *samples, ///< Pointer to samples. + uint numSamples ///< Number of samples to insert. + ); + + /// Adjusts the book-keeping to increase number of samples in the buffer without + /// copying any actual samples. + /// + /// This function is used to update the number of samples in the sample buffer + /// when accessing the buffer directly with 'ptrEnd' function. Please be + /// careful though! + virtual void putSamples(uint numSamples ///< Number of samples been inserted. + ); + + /// Output samples from beginning of the sample buffer. Copies requested samples to + /// output buffer and removes them from the sample buffer. If there are less than + /// 'numsample' samples in the buffer, returns all that available. + /// + /// \return Number of samples returned. + virtual uint receiveSamples(SAMPLETYPE *output, ///< Buffer where to copy output samples. + uint maxSamples ///< How many samples to receive at max. + ); + + /// Adjusts book-keeping so that given number of samples are removed from beginning of the + /// sample buffer without copying them anywhere. + /// + /// Used to reduce the number of samples in the buffer when accessing the sample buffer directly + /// with 'ptrBegin' function. + virtual uint receiveSamples(uint maxSamples ///< Remove this many samples from the beginning of pipe. + ); + + /// Returns number of samples currently available. + virtual uint numSamples() const; + + /// Sets number of channels, 1 = mono, 2 = stereo. + void setChannels(int numChannels); + + /// Returns nonzero if there aren't any samples available for outputting. + virtual int isEmpty() const; + + /// Clears all the samples. + virtual void clear(); + + /// allow trimming (downwards) amount of samples in pipeline. + /// Returns adjusted amount of samples + uint adjustAmountOfSamples(uint numSamples); +}; + +} + +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fir_filter/FIRFilter.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fir_filter/FIRFilter.cpp new file mode 100644 index 0000000..1570516 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fir_filter/FIRFilter.cpp @@ -0,0 +1,259 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// General FIR digital filter routines with MMX optimization. +/// +/// Note : MMX optimized functions reside in a separate, platform-specific file, +/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2011-09-02 21:56:11 +0300 (Fri, 02 Sep 2011) $ +// File revision : $Revision: 4 $ +// +// $Id: FIRFilter.cpp 131 2011-09-02 18:56:11Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include "FIRFilter.h" +#include "cpu_detect.h" + +using namespace soundtouch; + +/***************************************************************************** + * + * Implementation of the class 'FIRFilter' + * + *****************************************************************************/ + +FIRFilter::FIRFilter() +{ + resultDivFactor = 0; + resultDivider = 0; + length = 0; + lengthDiv8 = 0; + filterCoeffs = NULL; +} + + +FIRFilter::~FIRFilter() +{ + delete[] filterCoeffs; +} + +// Usual C-version of the filter routine for stereo sound +uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const +{ + uint i, j, end; + LONG_SAMPLETYPE suml, sumr; +#ifdef SOUNDTOUCH_FLOAT_SAMPLES + // when using floating point samples, use a scaler instead of a divider + // because division is much slower operation than multiplying. + double dScaler = 1.0 / (double)resultDivider; +#endif + + assert(length != 0); + assert(src != NULL); + assert(dest != NULL); + assert(filterCoeffs != NULL); + + end = 2 * (numSamples - length); + + for (j = 0; j < end; j += 2) + { + const SAMPLETYPE *ptr; + + suml = sumr = 0; + ptr = src + j; + + for (i = 0; i < length; i += 4) + { + // loop is unrolled by factor of 4 here for efficiency + suml += ptr[2 * i + 0] * filterCoeffs[i + 0] + + ptr[2 * i + 2] * filterCoeffs[i + 1] + + ptr[2 * i + 4] * filterCoeffs[i + 2] + + ptr[2 * i + 6] * filterCoeffs[i + 3]; + sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] + + ptr[2 * i + 3] * filterCoeffs[i + 1] + + ptr[2 * i + 5] * filterCoeffs[i + 2] + + ptr[2 * i + 7] * filterCoeffs[i + 3]; + } + +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + suml >>= resultDivFactor; + sumr >>= resultDivFactor; + // saturate to 16 bit integer limits + suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml; + // saturate to 16 bit integer limits + sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr; +#else + suml *= dScaler; + sumr *= dScaler; +#endif // SOUNDTOUCH_INTEGER_SAMPLES + dest[j] = (SAMPLETYPE)suml; + dest[j + 1] = (SAMPLETYPE)sumr; + } + return numSamples - length; +} + + + + +// Usual C-version of the filter routine for mono sound +uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const +{ + uint i, j, end; + LONG_SAMPLETYPE sum; +#ifdef SOUNDTOUCH_FLOAT_SAMPLES + // when using floating point samples, use a scaler instead of a divider + // because division is much slower operation than multiplying. + double dScaler = 1.0 / (double)resultDivider; +#endif + + + assert(length != 0); + + end = numSamples - length; + for (j = 0; j < end; j ++) + { + sum = 0; + for (i = 0; i < length; i += 4) + { + // loop is unrolled by factor of 4 here for efficiency + sum += src[i + 0] * filterCoeffs[i + 0] + + src[i + 1] * filterCoeffs[i + 1] + + src[i + 2] * filterCoeffs[i + 2] + + src[i + 3] * filterCoeffs[i + 3]; + } +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + sum >>= resultDivFactor; + // saturate to 16 bit integer limits + sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum; +#else + sum *= dScaler; +#endif // SOUNDTOUCH_INTEGER_SAMPLES + dest[j] = (SAMPLETYPE)sum; + src ++; + } + return end; +} + + +// Set filter coeffiecients and length. +// +// Throws an exception if filter length isn't divisible by 8 +void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor) +{ + assert(newLength > 0); + if (newLength % 8) ST_THROW_RT_ERROR("FIR filter length not divisible by 8"); + + lengthDiv8 = newLength / 8; + length = lengthDiv8 * 8; + assert(length == newLength); + + resultDivFactor = uResultDivFactor; + resultDivider = (SAMPLETYPE)::pow(2.0, (int)resultDivFactor); + + delete[] filterCoeffs; + filterCoeffs = new SAMPLETYPE[length]; + memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE)); +} + + +uint FIRFilter::getLength() const +{ + return length; +} + + + +// Applies the filter to the given sequence of samples. +// +// Note : The amount of outputted samples is by value of 'filter_length' +// smaller than the amount of input samples. +uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const +{ + assert(numChannels == 1 || numChannels == 2); + + assert(length > 0); + assert(lengthDiv8 * 8 == length); + if (numSamples < length) return 0; + if (numChannels == 2) + { + return evaluateFilterStereo(dest, src, numSamples); + } else { + return evaluateFilterMono(dest, src, numSamples); + } +} + + + +// Operator 'new' is overloaded so that it automatically creates a suitable instance +// depending on if we've a MMX-capable CPU available or not. +void * FIRFilter::operator new(size_t s) +{ + // Notice! don't use "new FIRFilter" directly, use "newInstance" to create a new instance instead! + ST_THROW_RT_ERROR("Error in FIRFilter::new: Don't use 'new FIRFilter', use 'newInstance' member instead!"); + return newInstance(); +} + + +FIRFilter * FIRFilter::newInstance() +{ + uint uExtensions; + + uExtensions = detectCPUextensions(); + + // Check if MMX/SSE instruction set extensions supported by CPU + +#ifdef SOUNDTOUCH_ALLOW_MMX + // MMX routines available only with integer sample types + if (uExtensions & SUPPORT_MMX) + { + return ::new FIRFilterMMX; + } + else +#endif // SOUNDTOUCH_ALLOW_MMX + +#ifdef SOUNDTOUCH_ALLOW_SSE + if (uExtensions & SUPPORT_SSE) + { + // SSE support + return ::new FIRFilterSSE; + } + else +#endif // SOUNDTOUCH_ALLOW_SSE + + { + // ISA optimizations not supported, use plain C version + return ::new FIRFilter; + } +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fir_filter/FIRFilter.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fir_filter/FIRFilter.h new file mode 100644 index 0000000..e156309 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/fir_filter/FIRFilter.h @@ -0,0 +1,145 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// General FIR digital filter routines with MMX optimization. +/// +/// Note : MMX optimized functions reside in a separate, platform-specific file, +/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2011-02-13 21:13:57 +0200 (Sun, 13 Feb 2011) $ +// File revision : $Revision: 4 $ +// +// $Id: FIRFilter.h 104 2011-02-13 19:13:57Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef FIRFilter_H +#define FIRFilter_H + +#include +#include "STTypes.h" + +namespace soundtouch +{ + +class FIRFilter +{ +protected: + // Number of FIR filter taps + uint length; + // Number of FIR filter taps divided by 8 + uint lengthDiv8; + + // Result divider factor in 2^k format + uint resultDivFactor; + + // Result divider value. + SAMPLETYPE resultDivider; + + // Memory for filter coefficients + SAMPLETYPE *filterCoeffs; + + virtual uint evaluateFilterStereo(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples) const; + virtual uint evaluateFilterMono(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples) const; + +public: + FIRFilter(); + virtual ~FIRFilter(); + + /// Operator 'new' is overloaded so that it automatically creates a suitable instance + /// depending on if we've a MMX-capable CPU available or not. + static void * operator new(size_t s); + + static FIRFilter *newInstance(); + + /// Applies the filter to the given sequence of samples. + /// Note : The amount of outputted samples is by value of 'filter_length' + /// smaller than the amount of input samples. + /// + /// \return Number of samples copied to 'dest'. + uint evaluate(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples, + uint numChannels) const; + + uint getLength() const; + + virtual void setCoefficients(const SAMPLETYPE *coeffs, + uint newLength, + uint uResultDivFactor); +}; + + +// Optional subclasses that implement CPU-specific optimizations: + +#ifdef SOUNDTOUCH_ALLOW_MMX + +/// Class that implements MMX optimized functions exclusive for 16bit integer samples type. + class FIRFilterMMX : public FIRFilter + { + protected: + short *filterCoeffsUnalign; + short *filterCoeffsAlign; + + virtual uint evaluateFilterStereo(short *dest, const short *src, uint numSamples) const; + public: + FIRFilterMMX(); + ~FIRFilterMMX(); + + virtual void setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor); + }; + +#endif // SOUNDTOUCH_ALLOW_MMX + + +#ifdef SOUNDTOUCH_ALLOW_SSE + /// Class that implements SSE optimized functions exclusive for floating point samples type. + class FIRFilterSSE : public FIRFilter + { + protected: + float *filterCoeffsUnalign; + float *filterCoeffsAlign; + + virtual uint evaluateFilterStereo(float *dest, const float *src, uint numSamples) const; + public: + FIRFilterSSE(); + ~FIRFilterSSE(); + + virtual void setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor); + }; + +#endif // SOUNDTOUCH_ALLOW_SSE + +} + +#endif // FIRFilter_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/mmx_optimized.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/mmx_optimized.cpp new file mode 100644 index 0000000..c3e251a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/mmx_optimized.cpp @@ -0,0 +1,317 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// MMX optimized routines. All MMX optimized functions have been gathered into +/// this single source code file, regardless to their class or original source +/// code file, in order to ease porting the library to other compiler and +/// processor platforms. +/// +/// The MMX-optimizations are programmed using MMX compiler intrinsics that +/// are supported both by Microsoft Visual C++ and GCC compilers, so this file +/// should compile with both toolsets. +/// +/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ +/// 6.0 processor pack" update to support compiler intrinsic syntax. The update +/// is available for download at Microsoft Developers Network, see here: +/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-11-08 20:53:01 +0200 (Thu, 08 Nov 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: mmx_optimized.cpp 160 2012-11-08 18:53:01Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include "STTypes.h" + +#ifdef SOUNDTOUCH_ALLOW_MMX +// MMX routines available only with integer sample type + +using namespace soundtouch; + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of MMX optimized functions of class 'TDStretchMMX' +// +////////////////////////////////////////////////////////////////////////////// + +#include "TDStretch.h" +#include +#include +#include + + +// Calculates cross correlation of two buffers +double TDStretchMMX::calcCrossCorr(const short *pV1, const short *pV2) const +{ + const __m64 *pVec1, *pVec2; + __m64 shifter; + __m64 accu, normaccu; + long corr, norm; + int i; + + pVec1 = (__m64*)pV1; + pVec2 = (__m64*)pV2; + + shifter = _m_from_int(overlapDividerBits); + normaccu = accu = _mm_setzero_si64(); + + // process 4 parallel sets of 2 * stereo samples or 4 * mono samples + // during each round for improved CPU-level parallellization. + for (i = 0; i < channels * overlapLength / 16; i ++) + { + __m64 temp, temp2; + + // dictionary of instructions: + // _m_pmaddwd : 4*16bit multiply-add, resulting two 32bits = [a0*b0+a1*b1 ; a2*b2+a3*b3] + // _mm_add_pi32 : 2*32bit add + // _m_psrad : 32bit right-shift + + temp = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec2[0]), + _mm_madd_pi16(pVec1[1], pVec2[1])); + temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[0], pVec1[0]), + _mm_madd_pi16(pVec1[1], pVec1[1])); + accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter)); + normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter)); + + temp = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec2[2]), + _mm_madd_pi16(pVec1[3], pVec2[3])); + temp2 = _mm_add_pi32(_mm_madd_pi16(pVec1[2], pVec1[2]), + _mm_madd_pi16(pVec1[3], pVec1[3])); + accu = _mm_add_pi32(accu, _mm_sra_pi32(temp, shifter)); + normaccu = _mm_add_pi32(normaccu, _mm_sra_pi32(temp2, shifter)); + + pVec1 += 4; + pVec2 += 4; + } + + // copy hi-dword of mm0 to lo-dword of mm1, then sum mmo+mm1 + // and finally store the result into the variable "corr" + + accu = _mm_add_pi32(accu, _mm_srli_si64(accu, 32)); + corr = _m_to_int(accu); + + normaccu = _mm_add_pi32(normaccu, _mm_srli_si64(normaccu, 32)); + norm = _m_to_int(normaccu); + + // Clear MMS state + _m_empty(); + + // Normalize result by dividing by sqrt(norm) - this step is easiest + // done using floating point operation + if (norm == 0) norm = 1; // to avoid div by zero + + return (double)corr / sqrt((double)norm); + // Note: Warning about the missing EMMS instruction is harmless + // as it'll be called elsewhere. +} + + + +void TDStretchMMX::clearCrossCorrState() +{ + // Clear MMS state + _m_empty(); + //_asm EMMS; +} + + + +// MMX-optimized version of the function overlapStereo +void TDStretchMMX::overlapStereo(short *output, const short *input) const +{ + const __m64 *pVinput, *pVMidBuf; + __m64 *pVdest; + __m64 mix1, mix2, adder, shifter; + int i; + + pVinput = (const __m64*)input; + pVMidBuf = (const __m64*)pMidBuffer; + pVdest = (__m64*)output; + + // mix1 = mixer values for 1st stereo sample + // mix1 = mixer values for 2nd stereo sample + // adder = adder for updating mixer values after each round + + mix1 = _mm_set_pi16(0, overlapLength, 0, overlapLength); + adder = _mm_set_pi16(1, -1, 1, -1); + mix2 = _mm_add_pi16(mix1, adder); + adder = _mm_add_pi16(adder, adder); + + // Overlaplength-division by shifter. "+1" is to account for "-1" deduced in + // overlapDividerBits calculation earlier. + shifter = _m_from_int(overlapDividerBits + 1); + + for (i = 0; i < overlapLength / 4; i ++) + { + __m64 temp1, temp2; + + // load & shuffle data so that input & mixbuffer data samples are paired + temp1 = _mm_unpacklo_pi16(pVMidBuf[0], pVinput[0]); // = i0l m0l i0r m0r + temp2 = _mm_unpackhi_pi16(pVMidBuf[0], pVinput[0]); // = i1l m1l i1r m1r + + // temp = (temp .* mix) >> shifter + temp1 = _mm_sra_pi32(_mm_madd_pi16(temp1, mix1), shifter); + temp2 = _mm_sra_pi32(_mm_madd_pi16(temp2, mix2), shifter); + pVdest[0] = _mm_packs_pi32(temp1, temp2); // pack 2*2*32bit => 4*16bit + + // update mix += adder + mix1 = _mm_add_pi16(mix1, adder); + mix2 = _mm_add_pi16(mix2, adder); + + // --- second round begins here --- + + // load & shuffle data so that input & mixbuffer data samples are paired + temp1 = _mm_unpacklo_pi16(pVMidBuf[1], pVinput[1]); // = i2l m2l i2r m2r + temp2 = _mm_unpackhi_pi16(pVMidBuf[1], pVinput[1]); // = i3l m3l i3r m3r + + // temp = (temp .* mix) >> shifter + temp1 = _mm_sra_pi32(_mm_madd_pi16(temp1, mix1), shifter); + temp2 = _mm_sra_pi32(_mm_madd_pi16(temp2, mix2), shifter); + pVdest[1] = _mm_packs_pi32(temp1, temp2); // pack 2*2*32bit => 4*16bit + + // update mix += adder + mix1 = _mm_add_pi16(mix1, adder); + mix2 = _mm_add_pi16(mix2, adder); + + pVinput += 2; + pVMidBuf += 2; + pVdest += 2; + } + + _m_empty(); // clear MMS state +} + + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of MMX optimized functions of class 'FIRFilter' +// +////////////////////////////////////////////////////////////////////////////// + +#include "FIRFilter.h" + + +FIRFilterMMX::FIRFilterMMX() : FIRFilter() +{ + filterCoeffsUnalign = NULL; +} + + +FIRFilterMMX::~FIRFilterMMX() +{ + delete[] filterCoeffsUnalign; +} + + +// (overloaded) Calculates filter coefficients for MMX routine +void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor) +{ + uint i; + FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor); + + // Ensure that filter coeffs array is aligned to 16-byte boundary + delete[] filterCoeffsUnalign; + filterCoeffsUnalign = new short[2 * newLength + 8]; + filterCoeffsAlign = (short *)SOUNDTOUCH_ALIGN_POINTER_16(filterCoeffsUnalign); + + // rearrange the filter coefficients for mmx routines + for (i = 0;i < length; i += 4) + { + filterCoeffsAlign[2 * i + 0] = coeffs[i + 0]; + filterCoeffsAlign[2 * i + 1] = coeffs[i + 2]; + filterCoeffsAlign[2 * i + 2] = coeffs[i + 0]; + filterCoeffsAlign[2 * i + 3] = coeffs[i + 2]; + + filterCoeffsAlign[2 * i + 4] = coeffs[i + 1]; + filterCoeffsAlign[2 * i + 5] = coeffs[i + 3]; + filterCoeffsAlign[2 * i + 6] = coeffs[i + 1]; + filterCoeffsAlign[2 * i + 7] = coeffs[i + 3]; + } +} + + + +// mmx-optimized version of the filter routine for stereo sound +uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, uint numSamples) const +{ + // Create stack copies of the needed member variables for asm routines : + uint i, j; + __m64 *pVdest = (__m64*)dest; + + if (length < 2) return 0; + + for (i = 0; i < (numSamples - length) / 2; i ++) + { + __m64 accu1; + __m64 accu2; + const __m64 *pVsrc = (const __m64*)src; + const __m64 *pVfilter = (const __m64*)filterCoeffsAlign; + + accu1 = accu2 = _mm_setzero_si64(); + for (j = 0; j < lengthDiv8 * 2; j ++) + { + __m64 temp1, temp2; + + temp1 = _mm_unpacklo_pi16(pVsrc[0], pVsrc[1]); // = l2 l0 r2 r0 + temp2 = _mm_unpackhi_pi16(pVsrc[0], pVsrc[1]); // = l3 l1 r3 r1 + + accu1 = _mm_add_pi32(accu1, _mm_madd_pi16(temp1, pVfilter[0])); // += l2*f2+l0*f0 r2*f2+r0*f0 + accu1 = _mm_add_pi32(accu1, _mm_madd_pi16(temp2, pVfilter[1])); // += l3*f3+l1*f1 r3*f3+r1*f1 + + temp1 = _mm_unpacklo_pi16(pVsrc[1], pVsrc[2]); // = l4 l2 r4 r2 + + accu2 = _mm_add_pi32(accu2, _mm_madd_pi16(temp2, pVfilter[0])); // += l3*f2+l1*f0 r3*f2+r1*f0 + accu2 = _mm_add_pi32(accu2, _mm_madd_pi16(temp1, pVfilter[1])); // += l4*f3+l2*f1 r4*f3+r2*f1 + + // accu1 += l2*f2+l0*f0 r2*f2+r0*f0 + // += l3*f3+l1*f1 r3*f3+r1*f1 + + // accu2 += l3*f2+l1*f0 r3*f2+r1*f0 + // l4*f3+l2*f1 r4*f3+r2*f1 + + pVfilter += 2; + pVsrc += 2; + } + // accu >>= resultDivFactor + accu1 = _mm_srai_pi32(accu1, resultDivFactor); + accu2 = _mm_srai_pi32(accu2, resultDivFactor); + + // pack 2*2*32bits => 4*16 bits + pVdest[0] = _mm_packs_pi32(accu1, accu2); + src += 4; + pVdest ++; + } + + _m_empty(); // clear emms state + + return (numSamples & 0xfffffffe) - length; +} + +#endif // SOUNDTOUCH_ALLOW_MMX diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/peak_finder/PeakFinder.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/peak_finder/PeakFinder.cpp new file mode 100644 index 0000000..6dbf1d8 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/peak_finder/PeakFinder.cpp @@ -0,0 +1,276 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Peak detection routine. +/// +/// The routine detects highest value on an array of values and calculates the +/// precise peak location as a mass-center of the 'hump' around the peak value. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-12-28 21:52:47 +0200 (Fri, 28 Dec 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: PeakFinder.cpp 164 2012-12-28 19:52:47Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include + +#include "PeakFinder.h" + +using namespace soundtouch; + +#define max(x, y) (((x) > (y)) ? (x) : (y)) + + +PeakFinder::PeakFinder() +{ + minPos = maxPos = 0; +} + + +// Finds real 'top' of a peak hump from neighnourhood of the given 'peakpos'. +int PeakFinder::findTop(const float *data, int peakpos) const +{ + int i; + int start, end; + float refvalue; + + refvalue = data[peakpos]; + + // seek within ?0 points + start = peakpos - 10; + if (start < minPos) start = minPos; + end = peakpos + 10; + if (end > maxPos) end = maxPos; + + for (i = start; i <= end; i ++) + { + if (data[i] > refvalue) + { + peakpos = i; + refvalue = data[i]; + } + } + + // failure if max value is at edges of seek range => it's not peak, it's at slope. + if ((peakpos == start) || (peakpos == end)) return 0; + + return peakpos; +} + + +// Finds 'ground level' of a peak hump by starting from 'peakpos' and proceeding +// to direction defined by 'direction' until next 'hump' after minimum value will +// begin +int PeakFinder::findGround(const float *data, int peakpos, int direction) const +{ + int lowpos; + int pos; + int climb_count; + float refvalue; + float delta; + + climb_count = 0; + refvalue = data[peakpos]; + lowpos = peakpos; + + pos = peakpos; + + while ((pos > minPos+1) && (pos < maxPos-1)) + { + int prevpos; + + prevpos = pos; + pos += direction; + + // calculate derivate + delta = data[pos] - data[prevpos]; + if (delta <= 0) + { + // going downhill, ok + if (climb_count) + { + climb_count --; // decrease climb count + } + + // check if new minimum found + if (data[pos] < refvalue) + { + // new minimum found + lowpos = pos; + refvalue = data[pos]; + } + } + else + { + // going uphill, increase climbing counter + climb_count ++; + if (climb_count > 5) break; // we've been climbing too long => it's next uphill => quit + } + } + return lowpos; +} + + +// Find offset where the value crosses the given level, when starting from 'peakpos' and +// proceeds to direction defined in 'direction' +int PeakFinder::findCrossingLevel(const float *data, float level, int peakpos, int direction) const +{ + float peaklevel; + int pos; + + peaklevel = data[peakpos]; + assert(peaklevel >= level); + pos = peakpos; + while ((pos >= minPos) && (pos < maxPos)) + { + if (data[pos + direction] < level) return pos; // crossing found + pos += direction; + } + return -1; // not found +} + + +// Calculates the center of mass location of 'data' array items between 'firstPos' and 'lastPos' +double PeakFinder::calcMassCenter(const float *data, int firstPos, int lastPos) const +{ + int i; + float sum; + float wsum; + + sum = 0; + wsum = 0; + for (i = firstPos; i <= lastPos; i ++) + { + sum += (float)i * data[i]; + wsum += data[i]; + } + + if (wsum < 1e-6) return 0; + return sum / wsum; +} + + + +/// get exact center of peak near given position by calculating local mass of center +double PeakFinder::getPeakCenter(const float *data, int peakpos) const +{ + float peakLevel; // peak level + int crosspos1, crosspos2; // position where the peak 'hump' crosses cutting level + float cutLevel; // cutting value + float groundLevel; // ground level of the peak + int gp1, gp2; // bottom positions of the peak 'hump' + + // find ground positions. + gp1 = findGround(data, peakpos, -1); + gp2 = findGround(data, peakpos, 1); + + groundLevel = 0.5f * (data[gp1] + data[gp2]); + peakLevel = data[peakpos]; + + // calculate 70%-level of the peak + cutLevel = 0.70f * peakLevel + 0.30f * groundLevel; + // find mid-level crossings + crosspos1 = findCrossingLevel(data, cutLevel, peakpos, -1); + crosspos2 = findCrossingLevel(data, cutLevel, peakpos, 1); + + if ((crosspos1 < 0) || (crosspos2 < 0)) return 0; // no crossing, no peak.. + + // calculate mass center of the peak surroundings + return calcMassCenter(data, crosspos1, crosspos2); +} + + + +double PeakFinder::detectPeak(const float *data, int aminPos, int amaxPos) +{ + + int i; + int peakpos; // position of peak level + double highPeak, peak; + + this->minPos = aminPos; + this->maxPos = amaxPos; + + // find absolute peak + peakpos = minPos; + peak = data[minPos]; + for (i = minPos + 1; i < maxPos; i ++) + { + if (data[i] > peak) + { + peak = data[i]; + peakpos = i; + } + } + + // Calculate exact location of the highest peak mass center + highPeak = getPeakCenter(data, peakpos); + peak = highPeak; + + // Now check if the highest peak were in fact harmonic of the true base beat peak + // - sometimes the highest peak can be Nth harmonic of the true base peak yet + // just a slightly higher than the true base + + for (i = 3; i < 10; i ++) + { + double peaktmp, harmonic; + int i1,i2; + + harmonic = (double)i * 0.5; + peakpos = (int)(highPeak / harmonic + 0.5f); + if (peakpos < minPos) break; + peakpos = findTop(data, peakpos); // seek true local maximum index + if (peakpos == 0) continue; // no local max here + + // calculate mass-center of possible harmonic peak + peaktmp = getPeakCenter(data, peakpos); + + // accept harmonic peak if + // (a) it is found + // (b) is within ?% of the expected harmonic interval + // (c) has at least half x-corr value of the max. peak + + double diff = harmonic * peaktmp / highPeak; + if ((diff < 0.96) || (diff > 1.04)) continue; // peak too afar from expected + + // now compare to highest detected peak + i1 = (int)(highPeak + 0.5); + i2 = (int)(peaktmp + 0.5); + if (data[i2] >= 0.4*data[i1]) + { + // The harmonic is at least half as high primary peak, + // thus use the harmonic peak instead + peak = peaktmp; + } + } + + return peak; +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/peak_finder/PeakFinder.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/peak_finder/PeakFinder.h new file mode 100644 index 0000000..d170b1c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/peak_finder/PeakFinder.h @@ -0,0 +1,97 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// The routine detects highest value on an array of values and calculates the +/// precise peak location as a mass-center of the 'hump' around the peak value. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2011-12-30 22:33:46 +0200 (Fri, 30 Dec 2011) $ +// File revision : $Revision: 4 $ +// +// $Id: PeakFinder.h 132 2011-12-30 20:33:46Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef _PeakFinder_H_ +#define _PeakFinder_H_ + +namespace soundtouch +{ + +class PeakFinder +{ +protected: + /// Min, max allowed peak positions within the data vector + int minPos, maxPos; + + /// Calculates the mass center between given vector items. + double calcMassCenter(const float *data, ///< Data vector. + int firstPos, ///< Index of first vector item beloging to the peak. + int lastPos ///< Index of last vector item beloging to the peak. + ) const; + + /// Finds the data vector index where the monotoniously decreasing signal crosses the + /// given level. + int findCrossingLevel(const float *data, ///< Data vector. + float level, ///< Goal crossing level. + int peakpos, ///< Peak position index within the data vector. + int direction /// Direction where to proceed from the peak: 1 = right, -1 = left. + ) const; + + // Finds real 'top' of a peak hump from neighnourhood of the given 'peakpos'. + int findTop(const float *data, int peakpos) const; + + + /// Finds the 'ground' level, i.e. smallest level between two neighbouring peaks, to right- + /// or left-hand side of the given peak position. + int findGround(const float *data, /// Data vector. + int peakpos, /// Peak position index within the data vector. + int direction /// Direction where to proceed from the peak: 1 = right, -1 = left. + ) const; + + /// get exact center of peak near given position by calculating local mass of center + double getPeakCenter(const float *data, int peakpos) const; + +public: + /// Constructor. + PeakFinder(); + + /// Detect exact peak position of the data vector by finding the largest peak 'hump' + /// and calculating the mass-center location of the peak hump. + /// + /// \return The location of the largest base harmonic peak hump. + double detectPeak(const float *data, /// Data vector to be analyzed. The data vector has + /// to be at least 'maxPos' items long. + int minPos, ///< Min allowed peak location within the vector data. + int maxPos ///< Max allowed peak location within the vector data. + ); +}; + +} + +#endif // _PeakFinder_H_ diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/rate_transposer/RateTransposer.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/rate_transposer/RateTransposer.cpp new file mode 100644 index 0000000..0ccb1cc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/rate_transposer/RateTransposer.cpp @@ -0,0 +1,626 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sample rate transposer. Changes sample rate by using linear interpolation +/// together with anti-alias filtering (first order interpolation with anti- +/// alias filtering should be quite adequate for this application) +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2011-09-02 21:56:11 +0300 (Fri, 02 Sep 2011) $ +// File revision : $Revision: 4 $ +// +// $Id: RateTransposer.cpp 131 2011-09-02 18:56:11Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include "RateTransposer.h" +#include "AAFilter.h" + +using namespace soundtouch; + + +/// A linear samplerate transposer class that uses integer arithmetics. +/// for the transposing. +class RateTransposerInteger : public RateTransposer +{ +protected: + int iSlopeCount; + int iRate; + SAMPLETYPE sPrevSampleL, sPrevSampleR; + + virtual void resetRegisters(); + + virtual uint transposeStereo(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + virtual uint transposeMono(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + +public: + RateTransposerInteger(); + virtual ~RateTransposerInteger(); + + /// Sets new target rate. Normal rate = 1.0, smaller values represent slower + /// rate, larger faster rates. + virtual void setRate(float newRate); + +}; + + +/// A linear samplerate transposer class that uses floating point arithmetics +/// for the transposing. +class RateTransposerFloat : public RateTransposer +{ +protected: + float fSlopeCount; + SAMPLETYPE sPrevSampleL, sPrevSampleR; + + virtual void resetRegisters(); + + virtual uint transposeStereo(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + virtual uint transposeMono(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + +public: + RateTransposerFloat(); + virtual ~RateTransposerFloat(); +}; + + + + +// Operator 'new' is overloaded so that it automatically creates a suitable instance +// depending on if we've a MMX/SSE/etc-capable CPU available or not. +void * RateTransposer::operator new(size_t s) +{ + ST_THROW_RT_ERROR("Error in RateTransoser::new: don't use \"new TDStretch\" directly, use \"newInstance\" to create a new instance instead!"); + return newInstance(); +} + + +RateTransposer *RateTransposer::newInstance() +{ +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + return ::new RateTransposerInteger; +#else + return ::new RateTransposerFloat; +#endif +} + + +// Constructor +RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer) +{ + numChannels = 2; + bUseAAFilter = TRUE; + fRate = 0; + + // Instantiates the anti-alias filter with default tap length + // of 32 + pAAFilter = new AAFilter(32); +} + + + +RateTransposer::~RateTransposer() +{ + delete pAAFilter; +} + + + +/// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable +void RateTransposer::enableAAFilter(BOOL newMode) +{ + bUseAAFilter = newMode; +} + + +/// Returns nonzero if anti-alias filter is enabled. +BOOL RateTransposer::isAAFilterEnabled() const +{ + return bUseAAFilter; +} + + +AAFilter *RateTransposer::getAAFilter() +{ + return pAAFilter; +} + + + +// Sets new target iRate. Normal iRate = 1.0, smaller values represent slower +// iRate, larger faster iRates. +void RateTransposer::setRate(float newRate) +{ + double fCutoff; + + fRate = newRate; + + // design a new anti-alias filter + if (newRate > 1.0f) + { + fCutoff = 0.5f / newRate; + } + else + { + fCutoff = 0.5f * newRate; + } + pAAFilter->setCutoffFreq(fCutoff); +} + + +// Outputs as many samples of the 'outputBuffer' as possible, and if there's +// any room left, outputs also as many of the incoming samples as possible. +// The goal is to drive the outputBuffer empty. +// +// It's allowed for 'output' and 'input' parameters to point to the same +// memory position. +/* +void RateTransposer::flushStoreBuffer() +{ + if (storeBuffer.isEmpty()) return; + + outputBuffer.moveSamples(storeBuffer); +} +*/ + + +// Adds 'nSamples' pcs of samples from the 'samples' memory position into +// the input of the object. +void RateTransposer::putSamples(const SAMPLETYPE *samples, uint nSamples) +{ + processSamples(samples, nSamples); +} + + + +// Transposes up the sample rate, causing the observed playback 'rate' of the +// sound to decrease +void RateTransposer::upsample(const SAMPLETYPE *src, uint nSamples) +{ + uint count, sizeTemp, num; + + // If the parameter 'uRate' value is smaller than 'SCALE', first transpose + // the samples and then apply the anti-alias filter to remove aliasing. + + // First check that there's enough room in 'storeBuffer' + // (+16 is to reserve some slack in the destination buffer) + sizeTemp = (uint)((float)nSamples / fRate + 16.0f); + + // Transpose the samples, store the result into the end of "storeBuffer" + count = transpose(storeBuffer.ptrEnd(sizeTemp), src, nSamples); + storeBuffer.putSamples(count); + + // Apply the anti-alias filter to samples in "store output", output the + // result to "dest" + num = storeBuffer.numSamples(); + count = pAAFilter->evaluate(outputBuffer.ptrEnd(num), + storeBuffer.ptrBegin(), num, (uint)numChannels); + outputBuffer.putSamples(count); + + // Remove the processed samples from "storeBuffer" + storeBuffer.receiveSamples(count); +} + + +// Transposes down the sample rate, causing the observed playback 'rate' of the +// sound to increase +void RateTransposer::downsample(const SAMPLETYPE *src, uint nSamples) +{ + uint count, sizeTemp; + + // If the parameter 'uRate' value is larger than 'SCALE', first apply the + // anti-alias filter to remove high frequencies (prevent them from folding + // over the lover frequencies), then transpose. + + // Add the new samples to the end of the storeBuffer + storeBuffer.putSamples(src, nSamples); + + // Anti-alias filter the samples to prevent folding and output the filtered + // data to tempBuffer. Note : because of the FIR filter length, the + // filtering routine takes in 'filter_length' more samples than it outputs. + assert(tempBuffer.isEmpty()); + sizeTemp = storeBuffer.numSamples(); + + count = pAAFilter->evaluate(tempBuffer.ptrEnd(sizeTemp), + storeBuffer.ptrBegin(), sizeTemp, (uint)numChannels); + + if (count == 0) return; + + // Remove the filtered samples from 'storeBuffer' + storeBuffer.receiveSamples(count); + + // Transpose the samples (+16 is to reserve some slack in the destination buffer) + sizeTemp = (uint)((float)nSamples / fRate + 16.0f); + count = transpose(outputBuffer.ptrEnd(sizeTemp), tempBuffer.ptrBegin(), count); + outputBuffer.putSamples(count); +} + + +// Transposes sample rate by applying anti-alias filter to prevent folding. +// Returns amount of samples returned in the "dest" buffer. +// The maximum amount of samples that can be returned at a time is set by +// the 'set_returnBuffer_size' function. +void RateTransposer::processSamples(const SAMPLETYPE *src, uint nSamples) +{ + uint count; + uint sizeReq; + + if (nSamples == 0) return; + assert(pAAFilter); + + // If anti-alias filter is turned off, simply transpose without applying + // the filter + if (bUseAAFilter == FALSE) + { + sizeReq = (uint)((float)nSamples / fRate + 1.0f); + count = transpose(outputBuffer.ptrEnd(sizeReq), src, nSamples); + outputBuffer.putSamples(count); + return; + } + + // Transpose with anti-alias filter + if (fRate < 1.0f) + { + upsample(src, nSamples); + } + else + { + downsample(src, nSamples); + } +} + + +// Transposes the sample rate of the given samples using linear interpolation. +// Returns the number of samples returned in the "dest" buffer +inline uint RateTransposer::transpose(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples) +{ + if (numChannels == 2) + { + return transposeStereo(dest, src, nSamples); + } + else + { + return transposeMono(dest, src, nSamples); + } +} + + +// Sets the number of channels, 1 = mono, 2 = stereo +void RateTransposer::setChannels(int nChannels) +{ + assert(nChannels > 0); + if (numChannels == nChannels) return; + + assert(nChannels == 1 || nChannels == 2); + numChannels = nChannels; + + storeBuffer.setChannels(numChannels); + tempBuffer.setChannels(numChannels); + outputBuffer.setChannels(numChannels); + + // Inits the linear interpolation registers + resetRegisters(); +} + + +// Clears all the samples in the object +void RateTransposer::clear() +{ + outputBuffer.clear(); + storeBuffer.clear(); +} + + +// Returns nonzero if there aren't any samples available for outputting. +int RateTransposer::isEmpty() const +{ + int res; + + res = FIFOProcessor::isEmpty(); + if (res == 0) return 0; + return storeBuffer.isEmpty(); +} + + +////////////////////////////////////////////////////////////////////////////// +// +// RateTransposerInteger - integer arithmetic implementation +// + +/// fixed-point interpolation routine precision +#define SCALE 65536 + +// Constructor +RateTransposerInteger::RateTransposerInteger() : RateTransposer() +{ + // Notice: use local function calling syntax for sake of clarity, + // to indicate the fact that C++ constructor can't call virtual functions. + RateTransposerInteger::resetRegisters(); + RateTransposerInteger::setRate(1.0f); +} + + +RateTransposerInteger::~RateTransposerInteger() +{ +} + + +void RateTransposerInteger::resetRegisters() +{ + iSlopeCount = 0; + sPrevSampleL = + sPrevSampleR = 0; +} + + + +// Transposes the sample rate of the given samples using linear interpolation. +// 'Mono' version of the routine. Returns the number of samples returned in +// the "dest" buffer +uint RateTransposerInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples) +{ + unsigned int i, used; + LONG_SAMPLETYPE temp, vol1; + + if (nSamples == 0) return 0; // no samples, no work + + used = 0; + i = 0; + + // process the last sample saved from the previous call first... + while (iSlopeCount <= SCALE) + { + vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount); + temp = vol1 * sPrevSampleL + iSlopeCount * src[0]; + dest[i] = (SAMPLETYPE)(temp / SCALE); + i++; + iSlopeCount += iRate; + } + // now always (iSlopeCount > SCALE) + iSlopeCount -= SCALE; + + while (1) + { + while (iSlopeCount > SCALE) + { + iSlopeCount -= SCALE; + used ++; + if (used >= nSamples - 1) goto end; + } + vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount); + temp = src[used] * vol1 + iSlopeCount * src[used + 1]; + dest[i] = (SAMPLETYPE)(temp / SCALE); + + i++; + iSlopeCount += iRate; + } +end: + // Store the last sample for the next round + sPrevSampleL = src[nSamples - 1]; + + return i; +} + + +// Transposes the sample rate of the given samples using linear interpolation. +// 'Stereo' version of the routine. Returns the number of samples returned in +// the "dest" buffer +uint RateTransposerInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples) +{ + unsigned int srcPos, i, used; + LONG_SAMPLETYPE temp, vol1; + + if (nSamples == 0) return 0; // no samples, no work + + used = 0; + i = 0; + + // process the last sample saved from the sPrevSampleLious call first... + while (iSlopeCount <= SCALE) + { + vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount); + temp = vol1 * sPrevSampleL + iSlopeCount * src[0]; + dest[2 * i] = (SAMPLETYPE)(temp / SCALE); + temp = vol1 * sPrevSampleR + iSlopeCount * src[1]; + dest[2 * i + 1] = (SAMPLETYPE)(temp / SCALE); + i++; + iSlopeCount += iRate; + } + // now always (iSlopeCount > SCALE) + iSlopeCount -= SCALE; + + while (1) + { + while (iSlopeCount > SCALE) + { + iSlopeCount -= SCALE; + used ++; + if (used >= nSamples - 1) goto end; + } + srcPos = 2 * used; + vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount); + temp = src[srcPos] * vol1 + iSlopeCount * src[srcPos + 2]; + dest[2 * i] = (SAMPLETYPE)(temp / SCALE); + temp = src[srcPos + 1] * vol1 + iSlopeCount * src[srcPos + 3]; + dest[2 * i + 1] = (SAMPLETYPE)(temp / SCALE); + + i++; + iSlopeCount += iRate; + } +end: + // Store the last sample for the next round + sPrevSampleL = src[2 * nSamples - 2]; + sPrevSampleR = src[2 * nSamples - 1]; + + return i; +} + + +// Sets new target iRate. Normal iRate = 1.0, smaller values represent slower +// iRate, larger faster iRates. +void RateTransposerInteger::setRate(float newRate) +{ + iRate = (int)(newRate * SCALE + 0.5f); + RateTransposer::setRate(newRate); +} + + +////////////////////////////////////////////////////////////////////////////// +// +// RateTransposerFloat - floating point arithmetic implementation +// +////////////////////////////////////////////////////////////////////////////// + +// Constructor +RateTransposerFloat::RateTransposerFloat() : RateTransposer() +{ + // Notice: use local function calling syntax for sake of clarity, + // to indicate the fact that C++ constructor can't call virtual functions. + RateTransposerFloat::resetRegisters(); + RateTransposerFloat::setRate(1.0f); +} + + +RateTransposerFloat::~RateTransposerFloat() +{ +} + + +void RateTransposerFloat::resetRegisters() +{ + fSlopeCount = 0; + sPrevSampleL = + sPrevSampleR = 0; +} + + + +// Transposes the sample rate of the given samples using linear interpolation. +// 'Mono' version of the routine. Returns the number of samples returned in +// the "dest" buffer +uint RateTransposerFloat::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples) +{ + unsigned int i, used; + + used = 0; + i = 0; + + // process the last sample saved from the previous call first... + while (fSlopeCount <= 1.0f) + { + dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src[0]); + i++; + fSlopeCount += fRate; + } + fSlopeCount -= 1.0f; + + if (nSamples > 1) + { + while (1) + { + while (fSlopeCount > 1.0f) + { + fSlopeCount -= 1.0f; + used ++; + if (used >= nSamples - 1) goto end; + } + dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[used] + fSlopeCount * src[used + 1]); + i++; + fSlopeCount += fRate; + } + } +end: + // Store the last sample for the next round + sPrevSampleL = src[nSamples - 1]; + + return i; +} + + +// Transposes the sample rate of the given samples using linear interpolation. +// 'Mono' version of the routine. Returns the number of samples returned in +// the "dest" buffer +uint RateTransposerFloat::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint nSamples) +{ + unsigned int srcPos, i, used; + + if (nSamples == 0) return 0; // no samples, no work + + used = 0; + i = 0; + + // process the last sample saved from the sPrevSampleLious call first... + while (fSlopeCount <= 1.0f) + { + dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src[0]); + dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount * src[1]); + i++; + fSlopeCount += fRate; + } + // now always (iSlopeCount > 1.0f) + fSlopeCount -= 1.0f; + + if (nSamples > 1) + { + while (1) + { + while (fSlopeCount > 1.0f) + { + fSlopeCount -= 1.0f; + used ++; + if (used >= nSamples - 1) goto end; + } + srcPos = 2 * used; + + dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos] + + fSlopeCount * src[srcPos + 2]); + dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos + 1] + + fSlopeCount * src[srcPos + 3]); + + i++; + fSlopeCount += fRate; + } + } +end: + // Store the last sample for the next round + sPrevSampleL = src[2 * nSamples - 2]; + sPrevSampleR = src[2 * nSamples - 1]; + + return i; +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/rate_transposer/RateTransposer.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/rate_transposer/RateTransposer.h new file mode 100644 index 0000000..48f7bed --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/rate_transposer/RateTransposer.h @@ -0,0 +1,159 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sample rate transposer. Changes sample rate by using linear interpolation +/// together with anti-alias filtering (first order interpolation with anti- +/// alias filtering should be quite adequate for this application). +/// +/// Use either of the derived classes of 'RateTransposerInteger' or +/// 'RateTransposerFloat' for corresponding integer/floating point tranposing +/// algorithm implementation. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2009-02-21 18:00:14 +0200 (Sat, 21 Feb 2009) $ +// File revision : $Revision: 4 $ +// +// $Id: RateTransposer.h 63 2009-02-21 16:00:14Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef RateTransposer_H +#define RateTransposer_H + +#include +#include "AAFilter.h" +#include "FIFOSamplePipe.h" +#include "FIFOSampleBuffer.h" + +#include "STTypes.h" + +namespace soundtouch +{ + +/// A common linear samplerate transposer class. +/// +/// Note: Use function "RateTransposer::newInstance()" to create a new class +/// instance instead of the "new" operator; that function automatically +/// chooses a correct implementation depending on if integer or floating +/// arithmetics are to be used. +class RateTransposer : public FIFOProcessor +{ +protected: + /// Anti-alias filter object + AAFilter *pAAFilter; + + float fRate; + + int numChannels; + + /// Buffer for collecting samples to feed the anti-alias filter between + /// two batches + FIFOSampleBuffer storeBuffer; + + /// Buffer for keeping samples between transposing & anti-alias filter + FIFOSampleBuffer tempBuffer; + + /// Output sample buffer + FIFOSampleBuffer outputBuffer; + + BOOL bUseAAFilter; + + virtual void resetRegisters() = 0; + + virtual uint transposeStereo(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples) = 0; + virtual uint transposeMono(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples) = 0; + inline uint transpose(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + + void downsample(const SAMPLETYPE *src, + uint numSamples); + void upsample(const SAMPLETYPE *src, + uint numSamples); + + /// Transposes sample rate by applying anti-alias filter to prevent folding. + /// Returns amount of samples returned in the "dest" buffer. + /// The maximum amount of samples that can be returned at a time is set by + /// the 'set_returnBuffer_size' function. + void processSamples(const SAMPLETYPE *src, + uint numSamples); + + +public: + RateTransposer(); + virtual ~RateTransposer(); + + /// Operator 'new' is overloaded so that it automatically creates a suitable instance + /// depending on if we're to use integer or floating point arithmetics. + static void *operator new(size_t s); + + /// Use this function instead of "new" operator to create a new instance of this class. + /// This function automatically chooses a correct implementation, depending on if + /// integer ot floating point arithmetics are to be used. + static RateTransposer *newInstance(); + + /// Returns the output buffer object + FIFOSamplePipe *getOutput() { return &outputBuffer; }; + + /// Returns the store buffer object + FIFOSamplePipe *getStore() { return &storeBuffer; }; + + /// Return anti-alias filter object + AAFilter *getAAFilter(); + + /// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable + void enableAAFilter(BOOL newMode); + + /// Returns nonzero if anti-alias filter is enabled. + BOOL isAAFilterEnabled() const; + + /// Sets new target rate. Normal rate = 1.0, smaller values represent slower + /// rate, larger faster rates. + virtual void setRate(float newRate); + + /// Sets the number of channels, 1 = mono, 2 = stereo + void setChannels(int channels); + + /// Adds 'numSamples' pcs of samples from the 'samples' memory position into + /// the input of the object. + void putSamples(const SAMPLETYPE *samples, uint numSamples); + + /// Clears all the samples in the object + void clear(); + + /// Returns nonzero if there aren't any samples available for outputting. + int isEmpty() const; +}; + +} + +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sound_touch/SoundTouch.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sound_touch/SoundTouch.cpp new file mode 100644 index 0000000..f9160ed --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sound_touch/SoundTouch.cpp @@ -0,0 +1,501 @@ +////////////////////////////////////////////////////////////////////////////// +/// +/// SoundTouch - main class for tempo/pitch/rate adjusting routines. +/// +/// Notes: +/// - Initialize the SoundTouch object instance by setting up the sound stream +/// parameters with functions 'setSampleRate' and 'setChannels', then set +/// desired tempo/pitch/rate settings with the corresponding functions. +/// +/// - The SoundTouch class behaves like a first-in-first-out pipeline: The +/// samples that are to be processed are fed into one of the pipe by calling +/// function 'putSamples', while the ready processed samples can be read +/// from the other end of the pipeline with function 'receiveSamples'. +/// +/// - The SoundTouch processing classes require certain sized 'batches' of +/// samples in order to process the sound. For this reason the classes buffer +/// incoming samples until there are enough of samples available for +/// processing, then they carry out the processing step and consequently +/// make the processed samples available for outputting. +/// +/// - For the above reason, the processing routines introduce a certain +/// 'latency' between the input and output, so that the samples input to +/// SoundTouch may not be immediately available in the output, and neither +/// the amount of outputtable samples may not immediately be in direct +/// relationship with the amount of previously input samples. +/// +/// - The tempo/pitch/rate control parameters can be altered during processing. +/// Please notice though that they aren't currently protected by semaphores, +/// so in multi-thread application external semaphore protection may be +/// required. +/// +/// - This class utilizes classes 'TDStretch' for tempo change (without modifying +/// pitch) and 'RateTransposer' for changing the playback rate (that is, both +/// tempo and pitch in the same ratio) of the sound. The third available control +/// 'pitch' (change pitch but maintain tempo) is produced by a combination of +/// combining the two other controls. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-06-13 22:29:53 +0300 (Wed, 13 Jun 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: SoundTouch.cpp 143 2012-06-13 19:29:53Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include + +#include "SoundTouch.h" +#include "TDStretch.h" +#include "RateTransposer.h" +#include "cpu_detect.h" + +using namespace soundtouch; + +/// test if two floating point numbers are equal +#define TEST_FLOAT_EQUAL(a, b) (fabs(a - b) < 1e-10) + + +/// Print library version string for autoconf +extern "C" void soundtouch_ac_test() +{ + printf("SoundTouch Version: %s\n",SOUNDTOUCH_VERSION); +} + + +SoundTouch::SoundTouch() +{ + // Initialize rate transposer and tempo changer instances + + pRateTransposer = RateTransposer::newInstance(); + pTDStretch = TDStretch::newInstance(); + + setOutPipe(pTDStretch); + + rate = tempo = 0; + + virtualPitch = + virtualRate = + virtualTempo = 1.0; + + calcEffectiveRateAndTempo(); + + channels = 0; + bSrateSet = FALSE; +} + + + +SoundTouch::~SoundTouch() +{ + delete pRateTransposer; + delete pTDStretch; +} + + + +/// Get SoundTouch library version string +const char *SoundTouch::getVersionString() +{ + static const char *_version = SOUNDTOUCH_VERSION; + + return _version; +} + + +/// Get SoundTouch library version Id +uint SoundTouch::getVersionId() +{ + return SOUNDTOUCH_VERSION_ID; +} + + +// Sets the number of channels, 1 = mono, 2 = stereo +void SoundTouch::setChannels(uint numChannels) +{ + if (numChannels != 1 && numChannels != 2) + { + ST_THROW_RT_ERROR("Illegal number of channels"); + } + channels = numChannels; + pRateTransposer->setChannels((int)numChannels); + pTDStretch->setChannels((int)numChannels); +} + + + +// Sets new rate control value. Normal rate = 1.0, smaller values +// represent slower rate, larger faster rates. +void SoundTouch::setRate(float newRate) +{ + virtualRate = newRate; + calcEffectiveRateAndTempo(); +} + + + +// Sets new rate control value as a difference in percents compared +// to the original rate (-50 .. +100 %) +void SoundTouch::setRateChange(float newRate) +{ + virtualRate = 1.0f + 0.01f * newRate; + calcEffectiveRateAndTempo(); +} + + + +// Sets new tempo control value. Normal tempo = 1.0, smaller values +// represent slower tempo, larger faster tempo. +void SoundTouch::setTempo(float newTempo) +{ + virtualTempo = newTempo; + calcEffectiveRateAndTempo(); +} + + + +// Sets new tempo control value as a difference in percents compared +// to the original tempo (-50 .. +100 %) +void SoundTouch::setTempoChange(float newTempo) +{ + virtualTempo = 1.0f + 0.01f * newTempo; + calcEffectiveRateAndTempo(); +} + + + +// Sets new pitch control value. Original pitch = 1.0, smaller values +// represent lower pitches, larger values higher pitch. +void SoundTouch::setPitch(float newPitch) +{ + virtualPitch = newPitch; + calcEffectiveRateAndTempo(); +} + + + +// Sets pitch change in octaves compared to the original pitch +// (-1.00 .. +1.00) +void SoundTouch::setPitchOctaves(float newPitch) +{ + virtualPitch = (float)exp(0.69314718056f * newPitch); + calcEffectiveRateAndTempo(); +} + + + +// Sets pitch change in semi-tones compared to the original pitch +// (-12 .. +12) +void SoundTouch::setPitchSemiTones(int newPitch) +{ + setPitchOctaves((float)newPitch / 12.0f); +} + + + +void SoundTouch::setPitchSemiTones(float newPitch) +{ + setPitchOctaves(newPitch / 12.0f); +} + + +// Calculates 'effective' rate and tempo values from the +// nominal control values. +void SoundTouch::calcEffectiveRateAndTempo() +{ + float oldTempo = tempo; + float oldRate = rate; + + tempo = virtualTempo / virtualPitch; + rate = virtualPitch * virtualRate; + + if (!TEST_FLOAT_EQUAL(rate,oldRate)) pRateTransposer->setRate(rate); + if (!TEST_FLOAT_EQUAL(tempo, oldTempo)) pTDStretch->setTempo(tempo); + +#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER + if (rate <= 1.0f) + { + if (output != pTDStretch) + { + FIFOSamplePipe *tempoOut; + + assert(output == pRateTransposer); + // move samples in the current output buffer to the output of pTDStretch + tempoOut = pTDStretch->getOutput(); + tempoOut->moveSamples(*output); + // move samples in pitch transposer's store buffer to tempo changer's input + pTDStretch->moveSamples(*pRateTransposer->getStore()); + + output = pTDStretch; + } + } + else +#endif + { + if (output != pRateTransposer) + { + FIFOSamplePipe *transOut; + + assert(output == pTDStretch); + // move samples in the current output buffer to the output of pRateTransposer + transOut = pRateTransposer->getOutput(); + transOut->moveSamples(*output); + // move samples in tempo changer's input to pitch transposer's input + pRateTransposer->moveSamples(*pTDStretch->getInput()); + + output = pRateTransposer; + } + } +} + + +// Sets sample rate. +void SoundTouch::setSampleRate(uint srate) +{ + bSrateSet = TRUE; + // set sample rate, leave other tempo changer parameters as they are. + pTDStretch->setParameters((int)srate); +} + + +// Adds 'numSamples' pcs of samples from the 'samples' memory position into +// the input of the object. +void SoundTouch::putSamples(const SAMPLETYPE *samples, uint nSamples) +{ + if (bSrateSet == FALSE) + { + ST_THROW_RT_ERROR("SoundTouch : Sample rate not defined"); + } + else if (channels == 0) + { + ST_THROW_RT_ERROR("SoundTouch : Number of channels not defined"); + } + + // Transpose the rate of the new samples if necessary + /* Bypass the nominal setting - can introduce a click in sound when tempo/pitch control crosses the nominal value... + if (rate == 1.0f) + { + // The rate value is same as the original, simply evaluate the tempo changer. + assert(output == pTDStretch); + if (pRateTransposer->isEmpty() == 0) + { + // yet flush the last samples in the pitch transposer buffer + // (may happen if 'rate' changes from a non-zero value to zero) + pTDStretch->moveSamples(*pRateTransposer); + } + pTDStretch->putSamples(samples, nSamples); + } + */ +#ifndef SOUNDTOUCH_PREVENT_CLICK_AT_RATE_CROSSOVER + else if (rate <= 1.0f) + { + // transpose the rate down, output the transposed sound to tempo changer buffer + assert(output == pTDStretch); + pRateTransposer->putSamples(samples, nSamples); + pTDStretch->moveSamples(*pRateTransposer); + } + else +#endif + { + // evaluate the tempo changer, then transpose the rate up, + assert(output == pRateTransposer); + pTDStretch->putSamples(samples, nSamples); + pRateTransposer->moveSamples(*pTDStretch); + } +} + + +// Flushes the last samples from the processing pipeline to the output. +// Clears also the internal processing buffers. +// +// Note: This function is meant for extracting the last samples of a sound +// stream. This function may introduce additional blank samples in the end +// of the sound stream, and thus it's not recommended to call this function +// in the middle of a sound stream. +void SoundTouch::flush() +{ + int i; + int nUnprocessed; + int nOut; + SAMPLETYPE buff[64*2]; // note: allocate 2*64 to cater 64 sample frames of stereo sound + + // check how many samples still await processing, and scale + // that by tempo & rate to get expected output sample count + nUnprocessed = numUnprocessedSamples(); + nUnprocessed = (int)((double)nUnprocessed / (tempo * rate) + 0.5); + + nOut = numSamples(); // ready samples currently in buffer ... + nOut += nUnprocessed; // ... and how many we expect there to be in the end + + memset(buff, 0, 64 * channels * sizeof(SAMPLETYPE)); + // "Push" the last active samples out from the processing pipeline by + // feeding blank samples into the processing pipeline until new, + // processed samples appear in the output (not however, more than + // 8ksamples in any case) + for (i = 0; i < 128; i ++) + { + putSamples(buff, 64); + if ((int)numSamples() >= nOut) + { + // Enough new samples have appeared into the output! + // As samples come from processing with bigger chunks, now truncate it + // back to maximum "nOut" samples to improve duration accuracy + adjustAmountOfSamples(nOut); + + // finish + break; + } + } + + // Clear working buffers + pRateTransposer->clear(); + pTDStretch->clearInput(); + // yet leave the 'tempoChanger' output intouched as that's where the + // flushed samples are! +} + + +// Changes a setting controlling the processing system behaviour. See the +// 'SETTING_...' defines for available setting ID's. +BOOL SoundTouch::setSetting(int settingId, int value) +{ + int sampleRate, sequenceMs, seekWindowMs, overlapMs; + + // read current tdstretch routine parameters + pTDStretch->getParameters(&sampleRate, &sequenceMs, &seekWindowMs, &overlapMs); + + switch (settingId) + { + case SETTING_USE_AA_FILTER : + // enables / disabless anti-alias filter + pRateTransposer->enableAAFilter((value != 0) ? TRUE : FALSE); + return TRUE; + + case SETTING_AA_FILTER_LENGTH : + // sets anti-alias filter length + pRateTransposer->getAAFilter()->setLength(value); + return TRUE; + + case SETTING_USE_QUICKSEEK : + // enables / disables tempo routine quick seeking algorithm + pTDStretch->enableQuickSeek((value != 0) ? TRUE : FALSE); + return TRUE; + + case SETTING_SEQUENCE_MS: + // change time-stretch sequence duration parameter + pTDStretch->setParameters(sampleRate, value, seekWindowMs, overlapMs); + return TRUE; + + case SETTING_SEEKWINDOW_MS: + // change time-stretch seek window length parameter + pTDStretch->setParameters(sampleRate, sequenceMs, value, overlapMs); + return TRUE; + + case SETTING_OVERLAP_MS: + // change time-stretch overlap length parameter + pTDStretch->setParameters(sampleRate, sequenceMs, seekWindowMs, value); + return TRUE; + + default : + return FALSE; + } +} + + +// Reads a setting controlling the processing system behaviour. See the +// 'SETTING_...' defines for available setting ID's. +// +// Returns the setting value. +int SoundTouch::getSetting(int settingId) const +{ + int temp; + + switch (settingId) + { + case SETTING_USE_AA_FILTER : + return (uint)pRateTransposer->isAAFilterEnabled(); + + case SETTING_AA_FILTER_LENGTH : + return pRateTransposer->getAAFilter()->getLength(); + + case SETTING_USE_QUICKSEEK : + return (uint) pTDStretch->isQuickSeekEnabled(); + + case SETTING_SEQUENCE_MS: + pTDStretch->getParameters(NULL, &temp, NULL, NULL); + return temp; + + case SETTING_SEEKWINDOW_MS: + pTDStretch->getParameters(NULL, NULL, &temp, NULL); + return temp; + + case SETTING_OVERLAP_MS: + pTDStretch->getParameters(NULL, NULL, NULL, &temp); + return temp; + + case SETTING_NOMINAL_INPUT_SEQUENCE : + return pTDStretch->getInputSampleReq(); + + case SETTING_NOMINAL_OUTPUT_SEQUENCE : + return pTDStretch->getOutputBatchSize(); + + default : + return 0; + } +} + + +// Clears all the samples in the object's output and internal processing +// buffers. +void SoundTouch::clear() +{ + pRateTransposer->clear(); + pTDStretch->clear(); +} + + + +/// Returns number of samples currently unprocessed. +uint SoundTouch::numUnprocessedSamples() const +{ + FIFOSamplePipe * psp; + if (pTDStretch) + { + psp = pTDStretch->getInput(); + if (psp) + { + return psp->numSamples(); + } + } + return 0; +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sound_touch/SoundTouch.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sound_touch/SoundTouch.h new file mode 100644 index 0000000..c6af895 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sound_touch/SoundTouch.h @@ -0,0 +1,277 @@ +////////////////////////////////////////////////////////////////////////////// +/// +/// SoundTouch - main class for tempo/pitch/rate adjusting routines. +/// +/// Notes: +/// - Initialize the SoundTouch object instance by setting up the sound stream +/// parameters with functions 'setSampleRate' and 'setChannels', then set +/// desired tempo/pitch/rate settings with the corresponding functions. +/// +/// - The SoundTouch class behaves like a first-in-first-out pipeline: The +/// samples that are to be processed are fed into one of the pipe by calling +/// function 'putSamples', while the ready processed samples can be read +/// from the other end of the pipeline with function 'receiveSamples'. +/// +/// - The SoundTouch processing classes require certain sized 'batches' of +/// samples in order to process the sound. For this reason the classes buffer +/// incoming samples until there are enough of samples available for +/// processing, then they carry out the processing step and consequently +/// make the processed samples available for outputting. +/// +/// - For the above reason, the processing routines introduce a certain +/// 'latency' between the input and output, so that the samples input to +/// SoundTouch may not be immediately available in the output, and neither +/// the amount of outputtable samples may not immediately be in direct +/// relationship with the amount of previously input samples. +/// +/// - The tempo/pitch/rate control parameters can be altered during processing. +/// Please notice though that they aren't currently protected by semaphores, +/// so in multi-thread application external semaphore protection may be +/// required. +/// +/// - This class utilizes classes 'TDStretch' for tempo change (without modifying +/// pitch) and 'RateTransposer' for changing the playback rate (that is, both +/// tempo and pitch in the same ratio) of the sound. The third available control +/// 'pitch' (change pitch but maintain tempo) is produced by a combination of +/// combining the two other controls. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-12-28 21:32:59 +0200 (Fri, 28 Dec 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: SoundTouch.h 163 2012-12-28 19:32:59Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef SoundTouch_H +#define SoundTouch_H + +#include "FIFOSamplePipe.h" +#include "STTypes.h" + +namespace soundtouch +{ + +/// Soundtouch library version string +#define SOUNDTOUCH_VERSION "1.7.1" + +/// SoundTouch library version id +#define SOUNDTOUCH_VERSION_ID (10701) + +// +// Available setting IDs for the 'setSetting' & 'get_setting' functions: + +/// Enable/disable anti-alias filter in pitch transposer (0 = disable) +#define SETTING_USE_AA_FILTER 0 + +/// Pitch transposer anti-alias filter length (8 .. 128 taps, default = 32) +#define SETTING_AA_FILTER_LENGTH 1 + +/// Enable/disable quick seeking algorithm in tempo changer routine +/// (enabling quick seeking lowers CPU utilization but causes a minor sound +/// quality compromising) +#define SETTING_USE_QUICKSEEK 2 + +/// Time-stretch algorithm single processing sequence length in milliseconds. This determines +/// to how long sequences the original sound is chopped in the time-stretch algorithm. +/// See "STTypes.h" or README for more information. +#define SETTING_SEQUENCE_MS 3 + +/// Time-stretch algorithm seeking window length in milliseconds for algorithm that finds the +/// best possible overlapping location. This determines from how wide window the algorithm +/// may look for an optimal joining location when mixing the sound sequences back together. +/// See "STTypes.h" or README for more information. +#define SETTING_SEEKWINDOW_MS 4 + +/// Time-stretch algorithm overlap length in milliseconds. When the chopped sound sequences +/// are mixed back together, to form a continuous sound stream, this parameter defines over +/// how long period the two consecutive sequences are let to overlap each other. +/// See "STTypes.h" or README for more information. +#define SETTING_OVERLAP_MS 5 + + +/// Call "getSetting" with this ID to query nominal average processing sequence +/// size in samples. This value tells approcimate value how many input samples +/// SoundTouch needs to gather before it does DSP processing run for the sample batch. +/// +/// Notices: +/// - This is read-only parameter, i.e. setSetting ignores this parameter +/// - Returned value is approximate average value, exact processing batch +/// size may wary from time to time +/// - This parameter value is not constant but may change depending on +/// tempo/pitch/rate/samplerate settings. +#define SETTING_NOMINAL_INPUT_SEQUENCE 6 + + +/// Call "getSetting" with this ID to query nominal average processing output +/// size in samples. This value tells approcimate value how many output samples +/// SoundTouch outputs once it does DSP processing run for a batch of input samples. +/// +/// Notices: +/// - This is read-only parameter, i.e. setSetting ignores this parameter +/// - Returned value is approximate average value, exact processing batch +/// size may wary from time to time +/// - This parameter value is not constant but may change depending on +/// tempo/pitch/rate/samplerate settings. +#define SETTING_NOMINAL_OUTPUT_SEQUENCE 7 + +class SoundTouch : public FIFOProcessor +{ +private: + /// Rate transposer class instance + class RateTransposer *pRateTransposer; + + /// Time-stretch class instance + class TDStretch *pTDStretch; + + /// Virtual pitch parameter. Effective rate & tempo are calculated from these parameters. + float virtualRate; + + /// Virtual pitch parameter. Effective rate & tempo are calculated from these parameters. + float virtualTempo; + + /// Virtual pitch parameter. Effective rate & tempo are calculated from these parameters. + float virtualPitch; + + /// Flag: Has sample rate been set? + BOOL bSrateSet; + + /// Calculates effective rate & tempo valuescfrom 'virtualRate', 'virtualTempo' and + /// 'virtualPitch' parameters. + void calcEffectiveRateAndTempo(); + +protected : + /// Number of channels + uint channels; + + /// Effective 'rate' value calculated from 'virtualRate', 'virtualTempo' and 'virtualPitch' + float rate; + + /// Effective 'tempo' value calculated from 'virtualRate', 'virtualTempo' and 'virtualPitch' + float tempo; + +public: + SoundTouch(); + virtual ~SoundTouch(); + + /// Get SoundTouch library version string + static const char *getVersionString(); + + /// Get SoundTouch library version Id + static uint getVersionId(); + + /// Sets new rate control value. Normal rate = 1.0, smaller values + /// represent slower rate, larger faster rates. + void setRate(float newRate); + + /// Sets new tempo control value. Normal tempo = 1.0, smaller values + /// represent slower tempo, larger faster tempo. + void setTempo(float newTempo); + + /// Sets new rate control value as a difference in percents compared + /// to the original rate (-50 .. +100 %) + void setRateChange(float newRate); + + /// Sets new tempo control value as a difference in percents compared + /// to the original tempo (-50 .. +100 %) + void setTempoChange(float newTempo); + + /// Sets new pitch control value. Original pitch = 1.0, smaller values + /// represent lower pitches, larger values higher pitch. + void setPitch(float newPitch); + + /// Sets pitch change in octaves compared to the original pitch + /// (-1.00 .. +1.00) + void setPitchOctaves(float newPitch); + + /// Sets pitch change in semi-tones compared to the original pitch + /// (-12 .. +12) + void setPitchSemiTones(int newPitch); + void setPitchSemiTones(float newPitch); + + /// Sets the number of channels, 1 = mono, 2 = stereo + void setChannels(uint numChannels); + + /// Sets sample rate. + void setSampleRate(uint srate); + + /// Flushes the last samples from the processing pipeline to the output. + /// Clears also the internal processing buffers. + // + /// Note: This function is meant for extracting the last samples of a sound + /// stream. This function may introduce additional blank samples in the end + /// of the sound stream, and thus it's not recommended to call this function + /// in the middle of a sound stream. + void flush(); + + /// Adds 'numSamples' pcs of samples from the 'samples' memory position into + /// the input of the object. Notice that sample rate _has_to_ be set before + /// calling this function, otherwise throws a runtime_error exception. + virtual void putSamples( + const SAMPLETYPE *samples, ///< Pointer to sample buffer. + uint numSamples ///< Number of samples in buffer. Notice + ///< that in case of stereo-sound a single sample + ///< contains data for both channels. + ); + + /// Clears all the samples in the object's output and internal processing + /// buffers. + virtual void clear(); + + /// Changes a setting controlling the processing system behaviour. See the + /// 'SETTING_...' defines for available setting ID's. + /// + /// \return 'TRUE' if the setting was succesfully changed + BOOL setSetting(int settingId, ///< Setting ID number. see SETTING_... defines. + int value ///< New setting value. + ); + + /// Reads a setting controlling the processing system behaviour. See the + /// 'SETTING_...' defines for available setting ID's. + /// + /// \return the setting value. + int getSetting(int settingId ///< Setting ID number, see SETTING_... defines. + ) const; + + /// Returns number of samples currently unprocessed. + virtual uint numUnprocessedSamples() const; + + + /// Other handy functions that are implemented in the ancestor classes (see + /// classes 'FIFOProcessor' and 'FIFOSamplePipe') + /// + /// - receiveSamples() : Use this function to receive 'ready' processed samples from SoundTouch. + /// - numSamples() : Get number of 'ready' samples that can be received with + /// function 'receiveSamples()' + /// - isEmpty() : Returns nonzero if there aren't any 'ready' samples. + /// - clear() : Clears all samples from ready/processing buffers. +}; + +} +#endif diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sse_optimized.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sse_optimized.cpp new file mode 100644 index 0000000..ffb6706 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/sse_optimized.cpp @@ -0,0 +1,361 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// SSE optimized routines for Pentium-III, Athlon-XP and later CPUs. All SSE +/// optimized functions have been gathered into this single source +/// code file, regardless to their class or original source code file, in order +/// to ease porting the library to other compiler and processor platforms. +/// +/// The SSE-optimizations are programmed using SSE compiler intrinsics that +/// are supported both by Microsoft Visual C++ and GCC compilers, so this file +/// should compile with both toolsets. +/// +/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ +/// 6.0 processor pack" update to support SSE instruction set. The update is +/// available for download at Microsoft Developers Network, see here: +/// http://msdn.microsoft.com/en-us/vstudio/aa718349.aspx +/// +/// If the above URL is expired or removed, go to "http://msdn.microsoft.com" and +/// perform a search with keywords "processor pack". +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-11-08 20:53:01 +0200 (Thu, 08 Nov 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: sse_optimized.cpp 160 2012-11-08 18:53:01Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include "cpu_detect.h" +#include "STTypes.h" + +using namespace soundtouch; + +#ifdef SOUNDTOUCH_ALLOW_SSE + +// SSE routines available only with float sample type + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of SSE optimized functions of class 'TDStretchSSE' +// +////////////////////////////////////////////////////////////////////////////// + +#include "TDStretch.h" +#include +#include + +// Calculates cross correlation of two buffers +double TDStretchSSE::calcCrossCorr(const float *pV1, const float *pV2) const +{ + int i; + const float *pVec1; + const __m128 *pVec2; + __m128 vSum, vNorm; + + // Note. It means a major slow-down if the routine needs to tolerate + // unaligned __m128 memory accesses. It's way faster if we can skip + // unaligned slots and use _mm_load_ps instruction instead of _mm_loadu_ps. + // This can mean up to ~ 10-fold difference (incl. part of which is + // due to skipping every second round for stereo sound though). + // + // Compile-time define SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION is provided + // for choosing if this little cheating is allowed. + +#ifdef SOUNDTOUCH_ALLOW_NONEXACT_SIMD_OPTIMIZATION + // Little cheating allowed, return valid correlation only for + // aligned locations, meaning every second round for stereo sound. + + #define _MM_LOAD _mm_load_ps + + if (((ulongptr)pV1) & 15) return -1e50; // skip unaligned locations + +#else + // No cheating allowed, use unaligned load & take the resulting + // performance hit. + #define _MM_LOAD _mm_loadu_ps +#endif + + // ensure overlapLength is divisible by 8 + assert((overlapLength % 8) == 0); + + // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors + // Note: pV2 _must_ be aligned to 16-bit boundary, pV1 need not. + pVec1 = (const float*)pV1; + pVec2 = (const __m128*)pV2; + vSum = vNorm = _mm_setzero_ps(); + + // Unroll the loop by factor of 4 * 4 operations. Use same routine for + // stereo & mono, for mono it just means twice the amount of unrolling. + for (i = 0; i < channels * overlapLength / 16; i ++) + { + __m128 vTemp; + // vSum += pV1[0..3] * pV2[0..3] + vTemp = _MM_LOAD(pVec1); + vSum = _mm_add_ps(vSum, _mm_mul_ps(vTemp ,pVec2[0])); + vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp)); + + // vSum += pV1[4..7] * pV2[4..7] + vTemp = _MM_LOAD(pVec1 + 4); + vSum = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[1])); + vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp)); + + // vSum += pV1[8..11] * pV2[8..11] + vTemp = _MM_LOAD(pVec1 + 8); + vSum = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[2])); + vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp)); + + // vSum += pV1[12..15] * pV2[12..15] + vTemp = _MM_LOAD(pVec1 + 12); + vSum = _mm_add_ps(vSum, _mm_mul_ps(vTemp, pVec2[3])); + vNorm = _mm_add_ps(vNorm, _mm_mul_ps(vTemp ,vTemp)); + + pVec1 += 16; + pVec2 += 4; + } + + // return value = vSum[0] + vSum[1] + vSum[2] + vSum[3] + float *pvNorm = (float*)&vNorm; + double norm = sqrt(pvNorm[0] + pvNorm[1] + pvNorm[2] + pvNorm[3]); + if (norm < 1e-9) norm = 1.0; // to avoid div by zero + + float *pvSum = (float*)&vSum; + return (double)(pvSum[0] + pvSum[1] + pvSum[2] + pvSum[3]) / norm; + + /* This is approximately corresponding routine in C-language yet without normalization: + double corr, norm; + uint i; + + // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors + corr = norm = 0.0; + for (i = 0; i < channels * overlapLength / 16; i ++) + { + corr += pV1[0] * pV2[0] + + pV1[1] * pV2[1] + + pV1[2] * pV2[2] + + pV1[3] * pV2[3] + + pV1[4] * pV2[4] + + pV1[5] * pV2[5] + + pV1[6] * pV2[6] + + pV1[7] * pV2[7] + + pV1[8] * pV2[8] + + pV1[9] * pV2[9] + + pV1[10] * pV2[10] + + pV1[11] * pV2[11] + + pV1[12] * pV2[12] + + pV1[13] * pV2[13] + + pV1[14] * pV2[14] + + pV1[15] * pV2[15]; + + for (j = 0; j < 15; j ++) norm += pV1[j] * pV1[j]; + + pV1 += 16; + pV2 += 16; + } + return corr / sqrt(norm); + */ +} + + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of SSE optimized functions of class 'FIRFilter' +// +////////////////////////////////////////////////////////////////////////////// + +#include "FIRFilter.h" + +FIRFilterSSE::FIRFilterSSE() : FIRFilter() +{ + filterCoeffsAlign = NULL; + filterCoeffsUnalign = NULL; +} + + +FIRFilterSSE::~FIRFilterSSE() +{ + delete[] filterCoeffsUnalign; + filterCoeffsAlign = NULL; + filterCoeffsUnalign = NULL; +} + + +// (overloaded) Calculates filter coefficients for SSE routine +void FIRFilterSSE::setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor) +{ + uint i; + float fDivider; + + FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor); + + // Scale the filter coefficients so that it won't be necessary to scale the filtering result + // also rearrange coefficients suitably for SSE + // Ensure that filter coeffs array is aligned to 16-byte boundary + delete[] filterCoeffsUnalign; + filterCoeffsUnalign = new float[2 * newLength + 4]; + filterCoeffsAlign = (float *)SOUNDTOUCH_ALIGN_POINTER_16(filterCoeffsUnalign); + + fDivider = (float)resultDivider; + + // rearrange the filter coefficients for mmx routines + for (i = 0; i < newLength; i ++) + { + filterCoeffsAlign[2 * i + 0] = + filterCoeffsAlign[2 * i + 1] = coeffs[i + 0] / fDivider; + } +} + + + +// SSE-optimized version of the filter routine for stereo sound +uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *source, uint numSamples) const +{ + int count = (int)((numSamples - length) & (uint)-2); + int j; + + assert(count % 2 == 0); + + if (count < 2) return 0; + + assert(source != NULL); + assert(dest != NULL); + assert((length % 8) == 0); + assert(filterCoeffsAlign != NULL); + assert(((ulongptr)filterCoeffsAlign) % 16 == 0); + + // filter is evaluated for two stereo samples with each iteration, thus use of 'j += 2' + for (j = 0; j < count; j += 2) + { + const float *pSrc; + const __m128 *pFil; + __m128 sum1, sum2; + uint i; + + pSrc = (const float*)source; // source audio data + pFil = (const __m128*)filterCoeffsAlign; // filter coefficients. NOTE: Assumes coefficients + // are aligned to 16-byte boundary + sum1 = sum2 = _mm_setzero_ps(); + + for (i = 0; i < length / 8; i ++) + { + // Unroll loop for efficiency & calculate filter for 2*2 stereo samples + // at each pass + + // sum1 is accu for 2*2 filtered stereo sound data at the primary sound data offset + // sum2 is accu for 2*2 filtered stereo sound data for the next sound sample offset. + + sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc) , pFil[0])); + sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 2), pFil[0])); + + sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 4), pFil[1])); + sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 6), pFil[1])); + + sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 8) , pFil[2])); + sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 10), pFil[2])); + + sum1 = _mm_add_ps(sum1, _mm_mul_ps(_mm_loadu_ps(pSrc + 12), pFil[3])); + sum2 = _mm_add_ps(sum2, _mm_mul_ps(_mm_loadu_ps(pSrc + 14), pFil[3])); + + pSrc += 16; + pFil += 4; + } + + // Now sum1 and sum2 both have a filtered 2-channel sample each, but we still need + // to sum the two hi- and lo-floats of these registers together. + + // post-shuffle & add the filtered values and store to dest. + _mm_storeu_ps(dest, _mm_add_ps( + _mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(1,0,3,2)), // s2_1 s2_0 s1_3 s1_2 + _mm_shuffle_ps(sum1, sum2, _MM_SHUFFLE(3,2,1,0)) // s2_3 s2_2 s1_1 s1_0 + )); + source += 4; + dest += 4; + } + + // Ideas for further improvement: + // 1. If it could be guaranteed that 'source' were always aligned to 16-byte + // boundary, a faster aligned '_mm_load_ps' instruction could be used. + // 2. If it could be guaranteed that 'dest' were always aligned to 16-byte + // boundary, a faster '_mm_store_ps' instruction could be used. + + return (uint)count; + + /* original routine in C-language. please notice the C-version has differently + organized coefficients though. + double suml1, suml2; + double sumr1, sumr2; + uint i, j; + + for (j = 0; j < count; j += 2) + { + const float *ptr; + const float *pFil; + + suml1 = sumr1 = 0.0; + suml2 = sumr2 = 0.0; + ptr = src; + pFil = filterCoeffs; + for (i = 0; i < lengthLocal; i ++) + { + // unroll loop for efficiency. + + suml1 += ptr[0] * pFil[0] + + ptr[2] * pFil[2] + + ptr[4] * pFil[4] + + ptr[6] * pFil[6]; + + sumr1 += ptr[1] * pFil[1] + + ptr[3] * pFil[3] + + ptr[5] * pFil[5] + + ptr[7] * pFil[7]; + + suml2 += ptr[8] * pFil[0] + + ptr[10] * pFil[2] + + ptr[12] * pFil[4] + + ptr[14] * pFil[6]; + + sumr2 += ptr[9] * pFil[1] + + ptr[11] * pFil[3] + + ptr[13] * pFil[5] + + ptr[15] * pFil[7]; + + ptr += 16; + pFil += 8; + } + dest[0] = (float)suml1; + dest[1] = (float)sumr1; + dest[2] = (float)suml2; + dest[3] = (float)sumr2; + + src += 4; + dest += 4; + } + */ +} + +#endif // SOUNDTOUCH_ALLOW_SSE diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/td_stretch/TDStretch.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/td_stretch/TDStretch.cpp new file mode 100644 index 0000000..fd55e70 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/td_stretch/TDStretch.cpp @@ -0,0 +1,808 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo +/// while maintaining the original pitch by using a time domain WSOLA-like +/// method with several performance-increasing tweaks. +/// +/// Note : MMX optimized functions reside in a separate, platform-specific +/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-11-08 20:53:01 +0200 (Thu, 08 Nov 2012) $ +// File revision : $Revision: 1.12 $ +// +// $Id: TDStretch.cpp 160 2012-11-08 18:53:01Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include + +#include "STTypes.h" +#include "cpu_detect.h" +#include "TDStretch.h" + +#include + +using namespace soundtouch; + +#define max(x, y) (((x) > (y)) ? (x) : (y)) + + +/***************************************************************************** + * + * Constant definitions + * + *****************************************************************************/ + +// Table for the hierarchical mixing position seeking algorithm +static const short _scanOffsets[5][24]={ + { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, + 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, + {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { 121, 114, 97, 114, 98, 105, 108, 32, 104, 99, 117, 111, + 116, 100, 110, 117, 111, 115, 0, 0, 0, 0, 0, 0}}; + +/***************************************************************************** + * + * Implementation of the class 'TDStretch' + * + *****************************************************************************/ + + +TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) +{ + bQuickSeek = FALSE; + channels = 2; + + pMidBuffer = NULL; + pMidBufferUnaligned = NULL; + overlapLength = 0; + + bAutoSeqSetting = TRUE; + bAutoSeekSetting = TRUE; + +// outDebt = 0; + skipFract = 0; + + tempo = 1.0f; + setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS); + setTempo(1.0f); + + clear(); +} + + + +TDStretch::~TDStretch() +{ + delete[] pMidBufferUnaligned; +} + + + +// Sets routine control parameters. These control are certain time constants +// defining how the sound is stretched to the desired duration. +// +// 'sampleRate' = sample rate of the sound +// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms) +// 'seekwindowMS' = seeking window length for scanning the best overlapping +// position (default = 28 ms) +// 'overlapMS' = overlapping length (default = 12 ms) + +void TDStretch::setParameters(int aSampleRate, int aSequenceMS, + int aSeekWindowMS, int aOverlapMS) +{ + // accept only positive parameter values - if zero or negative, use old values instead + if (aSampleRate > 0) this->sampleRate = aSampleRate; + if (aOverlapMS > 0) this->overlapMs = aOverlapMS; + + if (aSequenceMS > 0) + { + this->sequenceMs = aSequenceMS; + bAutoSeqSetting = FALSE; + } + else if (aSequenceMS == 0) + { + // if zero, use automatic setting + bAutoSeqSetting = TRUE; + } + + if (aSeekWindowMS > 0) + { + this->seekWindowMs = aSeekWindowMS; + bAutoSeekSetting = FALSE; + } + else if (aSeekWindowMS == 0) + { + // if zero, use automatic setting + bAutoSeekSetting = TRUE; + } + + calcSeqParameters(); + + calculateOverlapLength(overlapMs); + + // set tempo to recalculate 'sampleReq' + setTempo(tempo); + +} + + + +/// Get routine control parameters, see setParameters() function. +/// Any of the parameters to this function can be NULL, in such case corresponding parameter +/// value isn't returned. +void TDStretch::getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const +{ + if (pSampleRate) + { + *pSampleRate = sampleRate; + } + + if (pSequenceMs) + { + *pSequenceMs = (bAutoSeqSetting) ? (USE_AUTO_SEQUENCE_LEN) : sequenceMs; + } + + if (pSeekWindowMs) + { + *pSeekWindowMs = (bAutoSeekSetting) ? (USE_AUTO_SEEKWINDOW_LEN) : seekWindowMs; + } + + if (pOverlapMs) + { + *pOverlapMs = overlapMs; + } +} + + +// Overlaps samples in 'midBuffer' with the samples in 'pInput' +void TDStretch::overlapMono(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput) const +{ + int i; + SAMPLETYPE m1, m2; + + m1 = (SAMPLETYPE)0; + m2 = (SAMPLETYPE)overlapLength; + + for (i = 0; i < overlapLength ; i ++) + { + pOutput[i] = (SAMPLETYPE)((pInput[i] * m1 + pMidBuffer[i] * m2 ) / overlapLength); + m1 += 1; + m2 -= 1; + } +} + + + +void TDStretch::clearMidBuffer() +{ + memset(pMidBuffer, 0, 2 * sizeof(SAMPLETYPE) * overlapLength); +} + + +void TDStretch::clearInput() +{ + inputBuffer.clear(); + clearMidBuffer(); +} + + +// Clears the sample buffers +void TDStretch::clear() +{ + outputBuffer.clear(); + clearInput(); +} + + + +// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero +// to enable +void TDStretch::enableQuickSeek(BOOL enable) +{ + bQuickSeek = enable; +} + + +// Returns nonzero if the quick seeking algorithm is enabled. +BOOL TDStretch::isQuickSeekEnabled() const +{ + return bQuickSeek; +} + + +// Seeks for the optimal overlap-mixing position. +int TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) +{ + if (bQuickSeek) + { + return seekBestOverlapPositionQuick(refPos); + } + else + { + return seekBestOverlapPositionFull(refPos); + } +} + + +// Overlaps samples in 'midBuffer' with the samples in 'pInputBuffer' at position +// of 'ovlPos'. +inline void TDStretch::overlap(SAMPLETYPE *pOutput, const SAMPLETYPE *pInput, uint ovlPos) const +{ + if (channels == 2) + { + // stereo sound + overlapStereo(pOutput, pInput + 2 * ovlPos); + } else { + // mono sound. + overlapMono(pOutput, pInput + ovlPos); + } +} + + + +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the +// routine +// +// The best position is determined as the position where the two overlapped +// sample sequences are 'most alike', in terms of the highest cross-correlation +// value over the overlapping period +int TDStretch::seekBestOverlapPositionFull(const SAMPLETYPE *refPos) +{ + int bestOffs; + double bestCorr, corr; + int i; + + bestCorr = FLT_MIN; + bestOffs = 0; + + // Scans for the best correlation value by testing each possible position + // over the permitted range. + for (i = 0; i < seekLength; i ++) + { + // Calculates correlation value for the mixing position corresponding + // to 'i' + corr = calcCrossCorr(refPos + channels * i, pMidBuffer); + // heuristic rule to slightly favour values close to mid of the range + double tmp = (double)(2 * i - seekLength) / (double)seekLength; + corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = i; + } + } + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). + clearCrossCorrState(); + + return bestOffs; +} + + +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the +// routine +// +// The best position is determined as the position where the two overlapped +// sample sequences are 'most alike', in terms of the highest cross-correlation +// value over the overlapping period +int TDStretch::seekBestOverlapPositionQuick(const SAMPLETYPE *refPos) +{ + int j; + int bestOffs; + double bestCorr, corr; + int scanCount, corrOffset, tempOffset; + + bestCorr = FLT_MIN; + bestOffs = _scanOffsets[0][0]; + corrOffset = 0; + tempOffset = 0; + + // Scans for the best correlation value using four-pass hierarchical search. + // + // The look-up table 'scans' has hierarchical position adjusting steps. + // In first pass the routine searhes for the highest correlation with + // relatively coarse steps, then rescans the neighbourhood of the highest + // correlation with better resolution and so on. + for (scanCount = 0;scanCount < 4; scanCount ++) + { + j = 0; + while (_scanOffsets[scanCount][j]) + { + tempOffset = corrOffset + _scanOffsets[scanCount][j]; + if (tempOffset >= seekLength) break; + + // Calculates correlation value for the mixing position corresponding + // to 'tempOffset' + corr = (double)calcCrossCorr(refPos + channels * tempOffset, pMidBuffer); + // heuristic rule to slightly favour values close to mid of the range + double tmp = (double)(2 * tempOffset - seekLength) / seekLength; + corr = ((corr + 0.1) * (1.0 - 0.25 * tmp * tmp)); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = tempOffset; + } + j ++; + } + corrOffset = bestOffs; + } + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). + clearCrossCorrState(); + + return bestOffs; +} + + + +/// clear cross correlation routine state if necessary +void TDStretch::clearCrossCorrState() +{ + // default implementation is empty. +} + + +/// Calculates processing sequence length according to tempo setting +void TDStretch::calcSeqParameters() +{ + // Adjust tempo param according to tempo, so that variating processing sequence length is used + // at varius tempo settings, between the given low...top limits + #define AUTOSEQ_TEMPO_LOW 0.5 // auto setting low tempo range (-50%) + #define AUTOSEQ_TEMPO_TOP 2.0 // auto setting top tempo range (+100%) + + // sequence-ms setting values at above low & top tempo + #define AUTOSEQ_AT_MIN 125.0 + #define AUTOSEQ_AT_MAX 50.0 + #define AUTOSEQ_K ((AUTOSEQ_AT_MAX - AUTOSEQ_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) + #define AUTOSEQ_C (AUTOSEQ_AT_MIN - (AUTOSEQ_K) * (AUTOSEQ_TEMPO_LOW)) + + // seek-window-ms setting values at above low & top tempo + #define AUTOSEEK_AT_MIN 25.0 + #define AUTOSEEK_AT_MAX 15.0 + #define AUTOSEEK_K ((AUTOSEEK_AT_MAX - AUTOSEEK_AT_MIN) / (AUTOSEQ_TEMPO_TOP - AUTOSEQ_TEMPO_LOW)) + #define AUTOSEEK_C (AUTOSEEK_AT_MIN - (AUTOSEEK_K) * (AUTOSEQ_TEMPO_LOW)) + + #define CHECK_LIMITS(x, mi, ma) (((x) < (mi)) ? (mi) : (((x) > (ma)) ? (ma) : (x))) + + double seq, seek; + + if (bAutoSeqSetting) + { + seq = AUTOSEQ_C + AUTOSEQ_K * tempo; + seq = CHECK_LIMITS(seq, AUTOSEQ_AT_MAX, AUTOSEQ_AT_MIN); + sequenceMs = (int)(seq + 0.5); + } + + if (bAutoSeekSetting) + { + seek = AUTOSEEK_C + AUTOSEEK_K * tempo; + seek = CHECK_LIMITS(seek, AUTOSEEK_AT_MAX, AUTOSEEK_AT_MIN); + seekWindowMs = (int)(seek + 0.5); + } + + // Update seek window lengths + seekWindowLength = (sampleRate * sequenceMs) / 1000; + if (seekWindowLength < 2 * overlapLength) + { + seekWindowLength = 2 * overlapLength; + } + seekLength = (sampleRate * seekWindowMs) / 1000; +} + + + +// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower +// tempo, larger faster tempo. +void TDStretch::setTempo(float newTempo) +{ + int intskip; + + tempo = newTempo; + + // Calculate new sequence duration + calcSeqParameters(); + + // Calculate ideal skip length (according to tempo value) + nominalSkip = tempo * (seekWindowLength - overlapLength); + intskip = (int)(nominalSkip + 0.5f); + + // Calculate how many samples are needed in the 'inputBuffer' to + // process another batch of samples + //sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength / 2; + sampleReq = max(intskip + overlapLength, seekWindowLength) + seekLength; +} + + + +// Sets the number of channels, 1 = mono, 2 = stereo +void TDStretch::setChannels(int numChannels) +{ + assert(numChannels > 0); + if (channels == numChannels) return; + assert(numChannels == 1 || numChannels == 2); + + channels = numChannels; + inputBuffer.setChannels(channels); + outputBuffer.setChannels(channels); +} + + +// nominal tempo, no need for processing, just pass the samples through +// to outputBuffer +/* +void TDStretch::processNominalTempo() +{ + assert(tempo == 1.0f); + + if (bMidBufferDirty) + { + // If there are samples in pMidBuffer waiting for overlapping, + // do a single sliding overlapping with them in order to prevent a + // clicking distortion in the output sound + if (inputBuffer.numSamples() < overlapLength) + { + // wait until we've got overlapLength input samples + return; + } + // Mix the samples in the beginning of 'inputBuffer' with the + // samples in 'midBuffer' using sliding overlapping + overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0); + outputBuffer.putSamples(overlapLength); + inputBuffer.receiveSamples(overlapLength); + clearMidBuffer(); + // now we've caught the nominal sample flow and may switch to + // bypass mode + } + + // Simply bypass samples from input to output + outputBuffer.moveSamples(inputBuffer); +} +*/ + +#include + +// Processes as many processing frames of the samples 'inputBuffer', store +// the result into 'outputBuffer' +void TDStretch::processSamples() +{ + int ovlSkip, offset; + int temp; + + /* Removed this small optimization - can introduce a click to sound when tempo setting + crosses the nominal value + if (tempo == 1.0f) + { + // tempo not changed from the original, so bypass the processing + processNominalTempo(); + return; + } + */ + + // process samples as long as there are enough samples in 'inputBuffer' + // to form a processing frame. + while ((int)inputBuffer.numSamples() >= sampleReq) + { + // If tempo differs from the normal ('SCALE'), scan for the best overlapping + // position + offset = seekBestOverlapPosition(inputBuffer.ptrBegin()); + + // Mix the samples in the 'inputBuffer' at position of 'offset' with the + // samples in 'midBuffer' using sliding overlapping + // ... first partially overlap with the end of the previous sequence + // (that's in 'midBuffer') + overlap(outputBuffer.ptrEnd((uint)overlapLength), inputBuffer.ptrBegin(), (uint)offset); + outputBuffer.putSamples((uint)overlapLength); + + // ... then copy sequence samples from 'inputBuffer' to output: + + // length of sequence + temp = (seekWindowLength - 2 * overlapLength); + + // crosscheck that we don't have buffer overflow... + if ((int)inputBuffer.numSamples() < (offset + temp + overlapLength * 2)) + { + continue; // just in case, shouldn't really happen + } + + outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), (uint)temp); + + // Copies the end of the current sequence from 'inputBuffer' to + // 'midBuffer' for being mixed with the beginning of the next + // processing sequence and so on + assert((offset + temp + overlapLength * 2) <= (int)inputBuffer.numSamples()); + memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + temp + overlapLength), + channels * sizeof(SAMPLETYPE) * overlapLength); + + // Remove the processed samples from the input buffer. Update + // the difference between integer & nominal skip step to 'skipFract' + // in order to prevent the error from accumulating over time. + skipFract += nominalSkip; // real skip size + ovlSkip = (int)skipFract; // rounded to integer skip + skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip + inputBuffer.receiveSamples((uint)ovlSkip); + } +} + + +// Adds 'numsamples' pcs of samples from the 'samples' memory position into +// the input of the object. +void TDStretch::putSamples(const SAMPLETYPE *samples, uint nSamples) +{ + // Add the samples into the input buffer + inputBuffer.putSamples(samples, nSamples); + // process the samples in input buffer + processSamples(); +} + + + +/// Set new overlap length parameter & reallocate RefMidBuffer if necessary. +void TDStretch::acceptNewOverlapLength(int newOverlapLength) +{ + int prevOvl; + + assert(newOverlapLength >= 0); + prevOvl = overlapLength; + overlapLength = newOverlapLength; + + if (overlapLength > prevOvl) + { + delete[] pMidBufferUnaligned; + + pMidBufferUnaligned = new SAMPLETYPE[overlapLength * 2 + 16 / sizeof(SAMPLETYPE)]; + // ensure that 'pMidBuffer' is aligned to 16 byte boundary for efficiency + pMidBuffer = (SAMPLETYPE *)SOUNDTOUCH_ALIGN_POINTER_16(pMidBufferUnaligned); + + clearMidBuffer(); + } +} + + +// Operator 'new' is overloaded so that it automatically creates a suitable instance +// depending on if we've a MMX/SSE/etc-capable CPU available or not. +void * TDStretch::operator new(size_t s) +{ + // Notice! don't use "new TDStretch" directly, use "newInstance" to create a new instance instead! + ST_THROW_RT_ERROR("Error in TDStretch::new: Don't use 'new TDStretch' directly, use 'newInstance' member instead!"); + return newInstance(); +} + + +TDStretch * TDStretch::newInstance() +{ + uint uExtensions; + + uExtensions = detectCPUextensions(); + + // Check if MMX/SSE instruction set extensions supported by CPU + +#ifdef SOUNDTOUCH_ALLOW_MMX + // MMX routines available only with integer sample types + if (uExtensions & SUPPORT_MMX) + { + return ::new TDStretchMMX; + } + else +#endif // SOUNDTOUCH_ALLOW_MMX + + +#ifdef SOUNDTOUCH_ALLOW_SSE + if (uExtensions & SUPPORT_SSE) + { + // SSE support + return ::new TDStretchSSE; + } + else +#endif // SOUNDTOUCH_ALLOW_SSE + + { + // ISA optimizations not supported, use plain C version + return ::new TDStretch; + } +} + + +////////////////////////////////////////////////////////////////////////////// +// +// Integer arithmetics specific algorithm implementations. +// +////////////////////////////////////////////////////////////////////////////// + +#ifdef SOUNDTOUCH_INTEGER_SAMPLES + +// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' +// version of the routine. +void TDStretch::overlapStereo(short *poutput, const short *input) const +{ + int i; + short temp; + int cnt2; + + for (i = 0; i < overlapLength ; i ++) + { + temp = (short)(overlapLength - i); + cnt2 = 2 * i; + poutput[cnt2] = (short)((input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength); + poutput[cnt2 + 1] = (short)((input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength); + } +} + +// Calculates the x having the closest 2^x value for the given value +static int _getClosest2Power(double value) +{ + return (int)(log(value) / log(2.0) + 0.5); +} + + +/// Calculates overlap period length in samples. +/// Integer version rounds overlap length to closest power of 2 +/// for a divide scaling operation. +void TDStretch::calculateOverlapLength(int aoverlapMs) +{ + int newOvl; + + assert(aoverlapMs >= 0); + + // calculate overlap length so that it's power of 2 - thus it's easy to do + // integer division by right-shifting. Term "-1" at end is to account for + // the extra most significatnt bit left unused in result by signed multiplication + overlapDividerBits = _getClosest2Power((sampleRate * aoverlapMs) / 1000.0) - 1; + if (overlapDividerBits > 9) overlapDividerBits = 9; + if (overlapDividerBits < 3) overlapDividerBits = 3; + newOvl = (int)pow(2.0, (int)overlapDividerBits + 1); // +1 => account for -1 above + + acceptNewOverlapLength(newOvl); + + // calculate sloping divider so that crosscorrelation operation won't + // overflow 32-bit register. Max. sum of the crosscorrelation sum without + // divider would be 2^30*(N^3-N)/3, where N = overlap length + slopingDivider = (newOvl * newOvl - 1) / 3; +} + + +double TDStretch::calcCrossCorr(const short *mixingPos, const short *compare) const +{ + long corr; + long norm; + int i; + + corr = norm = 0; + // Same routine for stereo and mono. For stereo, unroll loop for better + // efficiency and gives slightly better resolution against rounding. + // For mono it same routine, just unrolls loop by factor of 4 + for (i = 0; i < channels * overlapLength; i += 4) + { + corr += (mixingPos[i] * compare[i] + + mixingPos[i + 1] * compare[i + 1] + + mixingPos[i + 2] * compare[i + 2] + + mixingPos[i + 3] * compare[i + 3]) >> overlapDividerBits; + norm += (mixingPos[i] * mixingPos[i] + + mixingPos[i + 1] * mixingPos[i + 1] + + mixingPos[i + 2] * mixingPos[i + 2] + + mixingPos[i + 3] * mixingPos[i + 3]) >> overlapDividerBits; + } + + // Normalize result by dividing by sqrt(norm) - this step is easiest + // done using floating point operation + if (norm == 0) norm = 1; // to avoid div by zero + return (double)corr / sqrt((double)norm); +} + +#endif // SOUNDTOUCH_INTEGER_SAMPLES + +////////////////////////////////////////////////////////////////////////////// +// +// Floating point arithmetics specific algorithm implementations. +// + +#ifdef SOUNDTOUCH_FLOAT_SAMPLES + +// Overlaps samples in 'midBuffer' with the samples in 'pInput' +void TDStretch::overlapStereo(float *pOutput, const float *pInput) const +{ + int i; + float fScale; + float f1; + float f2; + + fScale = 1.0f / (float)overlapLength; + + f1 = 0; + f2 = 1.0f; + + for (i = 0; i < 2 * (int)overlapLength ; i += 2) + { + pOutput[i + 0] = pInput[i + 0] * f1 + pMidBuffer[i + 0] * f2; + pOutput[i + 1] = pInput[i + 1] * f1 + pMidBuffer[i + 1] * f2; + + f1 += fScale; + f2 -= fScale; + } +} + + +/// Calculates overlapInMsec period length in samples. +void TDStretch::calculateOverlapLength(int overlapInMsec) +{ + int newOvl; + + assert(overlapInMsec >= 0); + newOvl = (sampleRate * overlapInMsec) / 1000; + if (newOvl < 16) newOvl = 16; + + // must be divisible by 8 + newOvl -= newOvl % 8; + + acceptNewOverlapLength(newOvl); +} + + +double TDStretch::calcCrossCorr(const float *mixingPos, const float *compare) const +{ + double corr; + double norm; + int i; + + corr = norm = 0; + // Same routine for stereo and mono. For Stereo, unroll by factor of 2. + // For mono it's same routine yet unrollsd by factor of 4. + for (i = 0; i < channels * overlapLength; i += 4) + { + corr += mixingPos[i] * compare[i] + + mixingPos[i + 1] * compare[i + 1]; + + norm += mixingPos[i] * mixingPos[i] + + mixingPos[i + 1] * mixingPos[i + 1]; + + // unroll the loop for better CPU efficiency: + corr += mixingPos[i + 2] * compare[i + 2] + + mixingPos[i + 3] * compare[i + 3]; + + norm += mixingPos[i + 2] * mixingPos[i + 2] + + mixingPos[i + 3] * mixingPos[i + 3]; + } + + if (norm < 1e-9) norm = 1.0; // to avoid div by zero + return corr / sqrt(norm); +} + +#endif // SOUNDTOUCH_FLOAT_SAMPLES diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/td_stretch/TDStretch.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/td_stretch/TDStretch.h new file mode 100644 index 0000000..6d6e735 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/tone_shift/src/td_stretch/TDStretch.h @@ -0,0 +1,268 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo +/// while maintaining the original pitch by using a time domain WSOLA-like method +/// with several performance-increasing tweaks. +/// +/// Note : MMX/SSE optimized functions reside in separate, platform-specific files +/// 'mmx_optimized.cpp' and 'sse_optimized.cpp' +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai 'at' iki.fi +/// SoundTouch WWW: http://www.surina.net/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date: 2012-04-01 22:49:30 +0300 (Sun, 01 Apr 2012) $ +// File revision : $Revision: 4 $ +// +// $Id: TDStretch.h 137 2012-04-01 19:49:30Z oparviai $ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef TDStretch_H +#define TDStretch_H + +#include +#include "STTypes.h" +#include "RateTransposer.h" +#include "FIFOSamplePipe.h" + +namespace soundtouch +{ + +/// Default values for sound processing parameters: +/// Notice that the default parameters are tuned for contemporary popular music +/// processing. For speech processing applications these parameters suit better: +/// #define DEFAULT_SEQUENCE_MS 40 +/// #define DEFAULT_SEEKWINDOW_MS 15 +/// #define DEFAULT_OVERLAP_MS 8 +/// + +/// Default length of a single processing sequence, in milliseconds. This determines to how +/// long sequences the original sound is chopped in the time-stretch algorithm. +/// +/// The larger this value is, the lesser sequences are used in processing. In principle +/// a bigger value sounds better when slowing down tempo, but worse when increasing tempo +/// and vice versa. +/// +/// Increasing this value reduces computational burden & vice versa. +//#define DEFAULT_SEQUENCE_MS 40 +#define DEFAULT_SEQUENCE_MS USE_AUTO_SEQUENCE_LEN + +/// Giving this value for the sequence length sets automatic parameter value +/// according to tempo setting (recommended) +#define USE_AUTO_SEQUENCE_LEN 0 + +/// Seeking window default length in milliseconds for algorithm that finds the best possible +/// overlapping location. This determines from how wide window the algorithm may look for an +/// optimal joining location when mixing the sound sequences back together. +/// +/// The bigger this window setting is, the higher the possibility to find a better mixing +/// position will become, but at the same time large values may cause a "drifting" artifact +/// because consequent sequences will be taken at more uneven intervals. +/// +/// If there's a disturbing artifact that sounds as if a constant frequency was drifting +/// around, try reducing this setting. +/// +/// Increasing this value increases computational burden & vice versa. +//#define DEFAULT_SEEKWINDOW_MS 15 +#define DEFAULT_SEEKWINDOW_MS USE_AUTO_SEEKWINDOW_LEN + +/// Giving this value for the seek window length sets automatic parameter value +/// according to tempo setting (recommended) +#define USE_AUTO_SEEKWINDOW_LEN 0 + +/// Overlap length in milliseconds. When the chopped sound sequences are mixed back together, +/// to form a continuous sound stream, this parameter defines over how long period the two +/// consecutive sequences are let to overlap each other. +/// +/// This shouldn't be that critical parameter. If you reduce the DEFAULT_SEQUENCE_MS setting +/// by a large amount, you might wish to try a smaller value on this. +/// +/// Increasing this value increases computational burden & vice versa. +#define DEFAULT_OVERLAP_MS 8 + + +/// Class that does the time-stretch (tempo change) effect for the processed +/// sound. +class TDStretch : public FIFOProcessor +{ +protected: + int channels; + int sampleReq; + float tempo; + + SAMPLETYPE *pMidBuffer; + SAMPLETYPE *pMidBufferUnaligned; + int overlapLength; + int seekLength; + int seekWindowLength; + int overlapDividerBits; + int slopingDivider; + float nominalSkip; + float skipFract; + FIFOSampleBuffer outputBuffer; + FIFOSampleBuffer inputBuffer; + BOOL bQuickSeek; + + int sampleRate; + int sequenceMs; + int seekWindowMs; + int overlapMs; + BOOL bAutoSeqSetting; + BOOL bAutoSeekSetting; + + void acceptNewOverlapLength(int newOverlapLength); + + virtual void clearCrossCorrState(); + void calculateOverlapLength(int overlapMs); + + virtual double calcCrossCorr(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const; + + virtual int seekBestOverlapPositionFull(const SAMPLETYPE *refPos); + virtual int seekBestOverlapPositionQuick(const SAMPLETYPE *refPos); + int seekBestOverlapPosition(const SAMPLETYPE *refPos); + + virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const; + virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const; + + void clearMidBuffer(); + void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const; + + void calcSeqParameters(); + + /// Changes the tempo of the given sound samples. + /// Returns amount of samples returned in the "output" buffer. + /// The maximum amount of samples that can be returned at a time is set by + /// the 'set_returnBuffer_size' function. + void processSamples(); + +public: + TDStretch(); + virtual ~TDStretch(); + + /// Operator 'new' is overloaded so that it automatically creates a suitable instance + /// depending on if we've a MMX/SSE/etc-capable CPU available or not. + static void *operator new(size_t s); + + /// Use this function instead of "new" operator to create a new instance of this class. + /// This function automatically chooses a correct feature set depending on if the CPU + /// supports MMX/SSE/etc extensions. + static TDStretch *newInstance(); + + /// Returns the output buffer object + FIFOSamplePipe *getOutput() { return &outputBuffer; }; + + /// Returns the input buffer object + FIFOSamplePipe *getInput() { return &inputBuffer; }; + + /// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower + /// tempo, larger faster tempo. + void setTempo(float newTempo); + + /// Returns nonzero if there aren't any samples available for outputting. + virtual void clear(); + + /// Clears the input buffer + void clearInput(); + + /// Sets the number of channels, 1 = mono, 2 = stereo + void setChannels(int numChannels); + + /// Enables/disables the quick position seeking algorithm. Zero to disable, + /// nonzero to enable + void enableQuickSeek(BOOL enable); + + /// Returns nonzero if the quick seeking algorithm is enabled. + BOOL isQuickSeekEnabled() const; + + /// Sets routine control parameters. These control are certain time constants + /// defining how the sound is stretched to the desired duration. + // + /// 'sampleRate' = sample rate of the sound + /// 'sequenceMS' = one processing sequence length in milliseconds + /// 'seekwindowMS' = seeking window length for scanning the best overlapping + /// position + /// 'overlapMS' = overlapping length + void setParameters(int sampleRate, ///< Samplerate of sound being processed (Hz) + int sequenceMS = -1, ///< Single processing sequence length (ms) + int seekwindowMS = -1, ///< Offset seeking window length (ms) + int overlapMS = -1 ///< Sequence overlapping length (ms) + ); + + /// Get routine control parameters, see setParameters() function. + /// Any of the parameters to this function can be NULL, in such case corresponding parameter + /// value isn't returned. + void getParameters(int *pSampleRate, int *pSequenceMs, int *pSeekWindowMs, int *pOverlapMs) const; + + /// Adds 'numsamples' pcs of samples from the 'samples' memory position into + /// the input of the object. + virtual void putSamples( + const SAMPLETYPE *samples, ///< Input sample data + uint numSamples ///< Number of samples in 'samples' so that one sample + ///< contains both channels if stereo + ); + + /// return nominal input sample requirement for triggering a processing batch + int getInputSampleReq() const + { + return (int)(nominalSkip + 0.5); + } + + /// return nominal output sample amount when running a processing batch + int getOutputBatchSize() const + { + return seekWindowLength - overlapLength; + } +}; + + + +// Implementation-specific class declarations: + +#ifdef SOUNDTOUCH_ALLOW_MMX + /// Class that implements MMX optimized routines for 16bit integer samples type. + class TDStretchMMX : public TDStretch + { + protected: + double calcCrossCorr(const short *mixingPos, const short *compare) const; + virtual void overlapStereo(short *output, const short *input) const; + virtual void clearCrossCorrState(); + }; +#endif /// SOUNDTOUCH_ALLOW_MMX + + +#ifdef SOUNDTOUCH_ALLOW_SSE + /// Class that implements SSE optimized routines for floating point samples type. + class TDStretchSSE : public TDStretch + { + protected: + double calcCrossCorr(const float *mixingPos, const float *compare) const; + }; + +#endif /// SOUNDTOUCH_ALLOW_SSE + +} +#endif /// TDStretch_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/CMakeLists.txt new file mode 100644 index 0000000..fc1cfe0 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/CMakeLists.txt @@ -0,0 +1,4 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_WAVES_SRCS) +add_library(waves ${DIR_WAVES_SRCS}) +#set_target_properties(waves PROPERTIES CXX_VISIBILITY_PRESET hidden) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/inc/ExtraMono.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/inc/ExtraMono.h new file mode 100755 index 0000000..280fab0 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/inc/ExtraMono.h @@ -0,0 +1,230 @@ + +#include +#include + +#define SIZE_LONG 4 +#define SIZE_SHORT 2 + +#define SIZE_FLAG 4 +#define FMT_TAG 0x0001 + +#define BITS_PER_BYTE 8 + +#ifndef AFS_CMPL_MAX_WAV +#define AFS_CMPL_MAX_WAV 15360000 // 时长16分(960*16000) +#endif + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个32位数据 +//+---------------------------------------------------------------------------+ +unsigned long fa_read_u32(FILE* fp) +{ + unsigned long cx; + unsigned char temp[SIZE_LONG]; + + fread(temp, sizeof(unsigned char), SIZE_LONG, fp); + cx = (unsigned long)temp[0]; + cx |= (unsigned long)temp[1] << 8; + cx |= (unsigned long)temp[2] << 16; + cx |= (unsigned long)temp[3] << 24; + return cx; +} + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个16位数据 +//+---------------------------------------------------------------------------+ +unsigned short fa_read_u16(FILE *fp) +{ + unsigned short cx; + unsigned char temp[SIZE_SHORT]; + + fread(temp, sizeof(unsigned char), SIZE_SHORT, fp); + cx = temp[0] | (temp[1] * 256); + return cx; +} + +int GetWaveHeadLen(const char* pszFile,unsigned short &channels, int &nPos, int& nLength) +{ + //+---------------------------------------------------------------------------+ + //+ 读取WAVE的头信息 + //+---------------------------------------------------------------------------+ + unsigned char temp[SIZE_FLAG]; + unsigned short bits_per_sample; + unsigned long x_size; + unsigned long n_skip; + + unsigned short format; + //unsigned short channels; + unsigned long sample_rate; + unsigned short block_align; + unsigned long data_size; + int nCnt = 0; + + /* 读取通用信息 */ + FILE* pWavFile = fopen(pszFile, "rb"); + if ( pWavFile == NULL ) + { + printf("Input file can not be opened!\n"); + return -1; + } + + fseek(pWavFile, 0, SEEK_END ); + nLength = ftell(pWavFile); + fseek(pWavFile, 0, SEEK_SET ); + + // 判断资源标识为"RIFF" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "RIFF", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Resource flag is not RIFF!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + nCnt += SIZE_LONG; + + // 判断文件标识为"WAVE" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "WAVE", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "File flag is not WAVE\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + // 判断格式标识为"fmt " + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "fmt ", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Format flag is not FMT!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + x_size = fa_read_u32(pWavFile); + nCnt += SIZE_LONG; + + // 判断编码格式为0x0001 + format = fa_read_u16(pWavFile); + nCnt += SIZE_SHORT; + if ( format != FMT_TAG ) + { + fprintf(stderr, "Encoding format is not 0x0001!\n"); + fclose(pWavFile); + + return -1; + } + + // 读取声道数目和采样频率 + channels = fa_read_u16(pWavFile); + sample_rate = fa_read_u32(pWavFile); + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + + // 读取对齐单位和样本位数 + block_align = fa_read_u16(pWavFile); + bits_per_sample = fa_read_u16(pWavFile); + + /* 读取特殊信息 */ + x_size -= (4*SIZE_SHORT + 2*SIZE_LONG); + if ( x_size != 0 ) + { + fseek(pWavFile, x_size, SEEK_CUR); + } + + // 读取数据大小 + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + while ( memcmp(temp, "data", SIZE_FLAG) != 0 ) + { + n_skip = fa_read_u32(pWavFile); + fseek(pWavFile, n_skip, SEEK_CUR); + + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + } + + data_size = fa_read_u32(pWavFile); + fclose(pWavFile); + + //+---------------------------------------------------------------------------+ + //+ 返回WAVE的头长度 + //+---------------------------------------------------------------------------+ + nPos = nCnt; + int nHeadLength = nLength - data_size; + return nHeadLength; +} + +bool ExtraMono(const std::string &sInput, const std::string &sOutput) +{ + FILE *pFile = fopen(sInput.c_str(), "rb"); + if ( NULL == pFile ) + { + printf("Fopen Error %s", sInput.c_str()); + return false; + } + + FILE *pFile2 = fopen(sOutput.c_str(), "wb"); + if ( NULL == pFile2 ) + { + printf("Fopen2 Error %s", sOutput.c_str()); + return false; + } + + short *pBuf = new short[AFS_CMPL_MAX_WAV]; + int nLen = 0; + + nLen = fread(pBuf, sizeof(short), AFS_CMPL_MAX_WAV, pFile); + if ( nLen <= 0 ) + { + perror("Fread Error!"); + return false; + } + + unsigned short channels=0; + int nPos; + int nLength; + int nHeadByte = GetWaveHeadLen(sInput.c_str(),channels, nPos, nLength); + int nHeadShort = nHeadByte/2; + + if (channels==1) + { + fwrite(pBuf + nHeadShort, sizeof(short), nLen - nHeadShort, pFile2); + } + else + { + short *pBuf2 = new short[AFS_CMPL_MAX_WAV]; + memcpy( pBuf2, pBuf, nHeadShort*sizeof(short)); + pBuf2[nPos] = 1; + + unsigned char tmp[2]; + memcpy(tmp, &pBuf2[nPos], 2); + + pBuf2[nPos] = static_cast(tmp[0] | tmp[1]*256); + + short *pWav = pBuf + nHeadShort; + nLen -= nHeadShort; + + int halfnlen=nLen/2; + for (int i=0;i<=halfnlen;i++ ) + { + pBuf2[nHeadShort+i] = *(pWav+i*2); + } + fwrite(pBuf2, sizeof(short), nLen+nHeadShort, pFile2); + + delete []pBuf; + delete []pBuf2; + pBuf = NULL; + pBuf2 = NULL; + } + + + fclose(pFile); + fclose(pFile2); + return true; +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/inc/STWaveFile.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/inc/STWaveFile.h new file mode 100755 index 0000000..0593058 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/inc/STWaveFile.h @@ -0,0 +1,74 @@ +#ifndef WAVE_FILE_H +#define WAVE_FILE_H + +#include +#include + + +typedef enum SAMPLE_FORMAT +{ + SF_U8 = 8, + SF_S16 = 16, + SF_S24 = 24, + SF_S32 = 32, + SF_IEEE_FLOAT = 0x100 + 32, + SF_IEEE_DOUBLE = 0x100 + 64, + SF_MAX, +} SAMPLE_FORMAT; + +/* 主处理对象 **/ +class STCWaveFile +{ +public: + /* 构造传入文件及 是读还是写 **/ + STCWaveFile(const char* Filename, bool Write); + virtual ~STCWaveFile(); + +public: + int GetChannels(); + int GetSampleRate(); + double GetDuration(); // in second + uint32_t GetChannelMask(); + void SetChannels(int Channels); + void SetSampleRate(int SampleRate); + void SetSampleFormat(SAMPLE_FORMAT Format); + void SetChannelMask(uint32_t Mask); + void Stat(); + void SetupDone(); + bool ReadFrameAsS16(short* FrameSamples, int Frames = 1); + bool ReadFrameAsDouble(double* FrameSamples, int Frames = 1); + bool ReadFrameAsfloat(float* FrameSamples, int Frames = 1); + void WriteRaw(void* Raw, int Size); + void WriteFrame(uint8_t* FrameSamples, int Frames = 1); + void WriteFrame(short* FrameSamples, int Frames = 1); + void WriteFrame(int32_t* FrameSamples, int Frames = 1); + void WriteFrameS24(int32_t* FrameSamples, int Frames = 1); + void WriteFrame(double* FrameSamples, int Frames = 1); + void WriteFrame(float* FrameSamples, int Frames=1); + void Seek(int FramePos, int Where = SEEK_SET); + bool GetStatus(); + SAMPLE_FORMAT GetFormat(); + int GetTotalFrames(); + int GetFramesRead(); + + +protected: + FILE* File; + int Channels; /* 通道数 **/ + int SampleRate; /* 采样率 **/ + SAMPLE_FORMAT Format; /* 采样精度 **/ + int SampleSize; // Measured in Bits + unsigned int FrameStartPos; /* 音频数据的起始位置 **/ + unsigned long TotalFrames; /* 总帧数,如果16bit,则一个short为一帧 **/ + unsigned long FramesRead; + double Duration; /* 时长 **/ + + bool ReadOnly; /* 是度还是写 **/ + + uint32_t ChannelMask; + + bool m_bOK; /* 文件是否已经被打开 **/ +}; + + +#endif \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/src/STWaveFile.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/src/STWaveFile.cpp new file mode 100755 index 0000000..29d5b32 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/ref/waves/src/STWaveFile.cpp @@ -0,0 +1,822 @@ + +#include +#include +#include +#include + +#if WIN32 +#else +#include +#endif + +#include "STWaveFile.h" +#define SPEAKER_FRONT_LEFT 0x1 +#define SPEAKER_FRONT_RIGHT 0x2 +#define SPEAKER_FRONT_CENTER 0x4 +#define SPEAKER_LOW_FREQUENCY 0x8 +#define SPEAKER_BACK_LEFT 0x10 +#define SPEAKER_BACK_RIGHT 0x20 +#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40 +#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80 +#define SPEAKER_BACK_CENTER 0x100 +#define SPEAKER_SIDE_LEFT 0x200 +#define SPEAKER_SIDE_RIGHT 0x400 +#define SPEAKER_TOP_CENTER 0x800 +#define SPEAKER_TOP_FRONT_LEFT 0x1000 +#define SPEAKER_TOP_FRONT_CENTER 0x2000 +#define SPEAKER_TOP_FRONT_RIGHT 0x4000 +#define SPEAKER_TOP_BACK_LEFT 0x8000 +#define SPEAKER_TOP_BACK_CENTER 0x10000 +#define SPEAKER_TOP_BACK_RIGHT 0x20000 +#define SPEAKER_RESERVED 0x80000000 + + +#define SPEAKER_REAR_CENTER_SURROUND SPEAKER_BACK_CENTER + +#define DCA_MONO 0 +#define DCA_CHANNEL 1 +#define DCA_STEREO 2 +#define DCA_STEREO_SUMDIFF 3 +#define DCA_STEREO_TOTAL 4 +#define DCA_3F 5 +#define DCA_2F1R 6 +#define DCA_3F1R 7 +#define DCA_2F2R 8 +#define DCA_3F2R 9 +#define DCA_4F2R 10 + +#define DCA_DOLBY 101 /* FIXME */ + +#define DCA_CHANNEL_MAX DCA_3F2R /* We don't handle anything above that */ +#define DCA_CHANNEL_BITS 6 +#define DCA_CHANNEL_MASK 0x3F + +#define DCA_LFE 0x80 +#define DCA_ADJUST_LEVEL 0x100 + +#define WAVE_FORMAT_PCM 0x0001 +#define WAVE_FORMAT_IEEE_FLOAT 0x0003 +#define WAVE_FORMAT_EXTENSIBLE 0xFFFE + +static uint8_t wav_header[] = { + 'R', 'I', 'F', 'F', 0xfc, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 16, 0, 0, 0, + WAVE_FORMAT_PCM, WAVE_FORMAT_PCM >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, + 'd', 'a', 't', 'a', 0xd8, 0xff, 0xff, 0xff +}; + +static uint8_t wavmulti_header[] = { + 'R', 'I', 'F', 'F', 0xf0, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 40, 0, 0, 0, + (uint8_t)(WAVE_FORMAT_EXTENSIBLE & 0xFF), WAVE_FORMAT_EXTENSIBLE >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 22, 0, + 0, 0, 0, 0, 0, 0, + WAVE_FORMAT_IEEE_FLOAT, WAVE_FORMAT_IEEE_FLOAT >> 8, + 0, 0, 0, 0, 0x10, 0x00, 0x80, 0, 0, 0xaa, 0, 0x38, 0x9b, 0x71, + 'd', 'a', 't', 'a', 0xb4, 0xff, 0xff, 0xff +}; + +static void store4 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; + buf[2] = value >> 16; + buf[3] = value >> 24; +} + +static void store2 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; +} + + +static uint32_t find_chunk(FILE * file, const uint8_t chunk_id[4]) +{ + uint8_t buffer[8]; + while (1) { + size_t chunksize; + size_t s = fread(buffer, 1, 8, file); + if (s < 8) + return 0; + chunksize = (uint32_t)buffer[4] | ((uint32_t)buffer[5] << 8) | + ((uint32_t)buffer[6] << 16) | ((uint32_t)buffer[7] << 24); + if (!memcmp(buffer, chunk_id, 4)) + return chunksize; + fseek(file, chunksize, SEEK_CUR); + } +} + + +STCWaveFile::STCWaveFile(const char* Filename, bool Write) + : Duration(0), ReadOnly(false), m_bOK(false) +{ + Channels = 0; + + /* 打开文件 **/ + File = fopen(Filename, Write ? "wb":"rb"); + if ( !File ) + return; + + /* 设置写文件初始参数 **/ + if ( Write ) + { + SampleRate = 44100; + Channels = 2; + Format = SF_S16; + SampleSize = 16; + ChannelMask = 0; + m_bOK = true; + return; + } + + ReadOnly = true; + + size_t s; + uint8_t buffer[8]; + uint8_t *fmt = NULL; + uint32_t v; + uint32_t avg_bps; + uint32_t block_align; + unsigned short FormatType; + unsigned short SampleType; + + static const uint8_t riff[4] = { 'R', 'I', 'F', 'F' }; + static const uint8_t wave[4] = { 'W', 'A', 'V', 'E' }; + static const uint8_t fmt_[4] = { 'f', 'm', 't', ' ' }; + static const uint8_t data[4] = { 'd', 'a', 't', 'a' }; + + /* 前四个字节为 riff **/ + s = fread(buffer, 1, 8, File); + if (s < 8) + goto err2; + + if (memcmp(buffer, riff, 4)) + goto err2; + + /* 8~12为wave **/ + /* TODO: check size (in buffer[4..8]) */ + s = fread(buffer, 1, 4, File); + if (s < 4) + goto err2; + + if (memcmp(buffer, wave, 4)) + goto err2; + + s = find_chunk(File, fmt_); + if ( s != 16 && s != 18 && s != 40 ) + goto err2; + + fmt = (uint8_t*)malloc(s); + if (!fmt) + goto err2; + + if (fread(fmt, 1, s, File) != s) + goto err3; + + /* wFormatTag */ + v = (uint32_t)fmt[0] | ((uint32_t)fmt[1] << 8); + if (v != WAVE_FORMAT_PCM && v != WAVE_FORMAT_IEEE_FLOAT && v != WAVE_FORMAT_EXTENSIBLE) + goto err3; + + FormatType = v; + + if (s == 40 && 0xfffe == v) + { + // fmt begins at 0x14 of the wave file + v = *(unsigned short*)&fmt[0x2C - 0x14]; + } + + SampleType = v; + + /* wChannels */ + v = (uint32_t)fmt[2] | ((uint32_t)fmt[3] << 8); + + Channels = v; + + if (v < 1 || v > 32) + goto err3; + + /* dwSamplesPerSec */ + SampleRate = (uint32_t)fmt[4] | ((uint32_t)fmt[5] << 8) | + ((uint32_t)fmt[6] << 16) | ((uint32_t)fmt[7] << 24); + + /* dwAvgBytesPerSec */ + avg_bps = (uint32_t)fmt[8] | ((uint32_t)fmt[9] << 8) | + ((uint32_t)fmt[10] << 16) | ((uint32_t)fmt[11] << 24); + + /* wBlockAlign */ + block_align = (uint32_t)fmt[12] | ((uint32_t)fmt[13] << 8); + + /* wBitsPerSample */ + SampleSize = (uint32_t)fmt[14] | ((uint32_t)fmt[15] << 8); + if (SampleSize != 8 && SampleSize != 16 && SampleSize != 32 && SampleSize != 24 && SampleSize != 64) + goto err3; + + switch (SampleSize) + { + case 8: + Format = SF_U8; + break; + case 16: + Format = SF_S16; + break; + case 24: + Format = SF_S24; + break; + case 32: + { + if (SampleType == WAVE_FORMAT_IEEE_FLOAT) + Format = SF_IEEE_FLOAT; + else + Format = SF_S32; + + } + break; + case 64: + if (SampleType != WAVE_FORMAT_IEEE_FLOAT) + goto err3; + Format = SF_IEEE_DOUBLE; + break; + } + + + // Handle 24-bit samples individually +#if 0 + if (SampleSize == 24 && Channels <= 2) + { + int ba24 = Channels * (SampleSize / 8); // Align to 4x + + ba24 = (ba24 + 3) / 4 * 4; + + if (block_align != ba24) + goto err3; + } + else +#endif + { + if (block_align != Channels * (SampleSize / 8)) + goto err3; + } + + if (avg_bps != block_align * SampleRate) + goto err3; + + v = find_chunk(File, data); + + if (v == 0 || v % block_align != 0) + goto err3; + + TotalFrames = v / block_align; + + FramesRead = 0; + + if (FormatType == WAVE_FORMAT_EXTENSIBLE) + { + ChannelMask = *(unsigned int*)(&fmt[0x14]); + } + else + { + ChannelMask = 0; + } + + FrameStartPos = ftell(File); + + free(fmt); + m_bOK = true; + return; + +err3: + free(fmt); +err2: + fclose(File); + + File = NULL; +} + +bool STCWaveFile::GetStatus() +{ + return m_bOK; +} + +SAMPLE_FORMAT STCWaveFile::GetFormat() +{ + return Format; +} + +int STCWaveFile::GetTotalFrames() +{ + return TotalFrames; +} + +int STCWaveFile::GetFramesRead() +{ + return FramesRead; +} + +STCWaveFile::~STCWaveFile() +{ + if (File != NULL) + { + if (!ReadOnly) + { + unsigned int Size = ftell(File) - FrameStartPos;// 44; + + fseek(File, FrameStartPos - 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + + Size += FrameStartPos - 8; + + fseek(File, 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + } + + fclose(File); + } +} + +int STCWaveFile::GetSampleRate() +{ + return SampleRate; +} + +void STCWaveFile::SetSampleRate(int SampleRate) +{ + this->SampleRate = SampleRate; +} + +void STCWaveFile::SetupDone() +{ + unsigned char Header[68]; + + fseek(File, 0, SEEK_SET); + + SampleSize = Format & 0xFF; + + if (ChannelMask) + { + memcpy(Header, wavmulti_header, sizeof(wavmulti_header)); + + if (Format < SF_IEEE_FLOAT) + { + // store2(Header + 20, WAVE_FORMAT_PCM); + store2(Header + 44, WAVE_FORMAT_PCM); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + store2(Header + 38, SampleSize / 8 * 8); + store4(Header + 40, ChannelMask); + + fwrite(Header, sizeof(wavmulti_header), 1, File); + } + else + { + memcpy(Header, wav_header, sizeof(wav_header)); + + if (Format >= SF_IEEE_FLOAT) + { + store2(Header + 20, WAVE_FORMAT_IEEE_FLOAT); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + fwrite(Header, sizeof(wav_header), 1, File); + } + + + FrameStartPos = ftell(File); +} + + +void STCWaveFile::Seek(int FramePos, int Where) +{ + // Ignoring Where + + fseek(File, FrameStartPos + FramePos * Channels* (SampleSize / 8), Where); + + FramesRead = FramePos; + +} + +int STCWaveFile::GetChannels() +{ + return Channels; +} + +void STCWaveFile::SetChannels(int Channels) +{ + this->Channels = Channels; +} + +void STCWaveFile::SetSampleFormat(SAMPLE_FORMAT Format) +{ + this->Format = Format; +} + +uint32_t STCWaveFile::GetChannelMask() +{ + return ChannelMask; +} + +void STCWaveFile::SetChannelMask(uint32_t Mask) +{ + ChannelMask = Mask; +} + +bool STCWaveFile::ReadFrameAsS16(short* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) << 8; + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + return Frames == fread(FrameSamples, sizeof(FrameSamples[0])*Channels, Frames, File); + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 8); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 16); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[Frames * Channels]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + double DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + } + return false; +} + +bool STCWaveFile::ReadFrameAsfloat(float* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + if(fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; +// float DirectSamples[32]; +// +// if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) +// { +// for (int frame = 0; frame < Frames; frame++) +// { +// for (int ch = 0; ch < Channels; ch++) +// { +// FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); +// } +// } +// return true; +// } +// return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +bool STCWaveFile::ReadFrameAsDouble(double* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample ))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +void STCWaveFile::WriteRaw(void* Raw, int Size) +{ + fwrite(Raw, Size, 1, File); +} + + +void STCWaveFile::WriteFrame(uint8_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void STCWaveFile::WriteFrame(short* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void STCWaveFile::WriteFrame(int32_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void STCWaveFile::WriteFrameS24(int32_t* FrameSamples, int Frames) +{ + for (int c = 0; c < Channels; c++) + { + fwrite(&FrameSamples[c], 3, 1, File); + } +} + +void STCWaveFile::WriteFrame(double* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void STCWaveFile::WriteFrame(float* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + + +double STCWaveFile::GetDuration() +{ + return Duration; +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/CAudioEffectsChainApi.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/CAudioEffectsChainApi.cpp new file mode 100644 index 0000000..29805ae --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/CAudioEffectsChainApi.cpp @@ -0,0 +1,74 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#include "CAudioEffectsChainApi.h" +#include "audio_chain/CAudioEffectsChain.h" + +void* ae_create_object() +{ + return new CAudioEffectsChain(); // 构造 +} + +void ae_destory_object(void *p) +{ + ae_uninit(p); + delete static_cast(p); // 释放 +} + +AE_ERR ae_init(void *p, int sample_rate, int channel) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + return audio_effect_chain->init(sample_rate, channel); +} + +AE_ERR ae_uninit(void *p) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + return audio_effect_chain->uninit(); +} + +AE_ERR ae_reset(void *p) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + return audio_effect_chain->reset(); +} + +int ae_get_latency_ms(void *p) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + return audio_effect_chain->get_latency_ms(); +} + +AE_ERR ae_set_params(void *p, void *casw, AE_PARAMS *params) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + CAudioSmoothWrapper* audio_smooth_wrapper = static_cast(casw); + return audio_effect_chain->set_params(audio_smooth_wrapper, params); +} + +AE_ERR ae_get_params(void *p, void *casw, AE_PARAMS *params) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + CAudioSmoothWrapper* audio_smooth_wrapper = static_cast(casw); + return audio_effect_chain->get_params(audio_smooth_wrapper, params); +} + +AE_ERR ae_process(void *p, float *in_buf, float *out_buf, int length) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + return audio_effect_chain->process(in_buf, out_buf, length); +} + +void* ae_add_effect(void *p, AE_EFFECT_TYPE effects_number) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + return audio_effect_chain->add_effect(effects_number); +} + +AE_ERR ae_delete_effect(void *p, void *casw) +{ + CAudioEffectsChain* audio_effect_chain = static_cast(p); + CAudioSmoothWrapper* audio_smooth_wrapper = static_cast(casw); + return audio_effect_chain->delete_effect(audio_smooth_wrapper); +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/IAudioEffects.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/IAudioEffects.h new file mode 100644 index 0000000..575178a --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/IAudioEffects.h @@ -0,0 +1,45 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#ifndef AUDIO_EFFECTS_LIB_IAUDIOEFFECTS_H +#define AUDIO_EFFECTS_LIB_IAUDIOEFFECTS_H + +/** + * 效果器的接口 + */ +#include "manager/Manager.h" +#include "AudioEffectsDef.h" + +class IAudioEffects +{ +public: + IAudioEffects() {}; + virtual ~IAudioEffects() {}; // 增加虚类的析构函数,防止内存泄漏 +public: + virtual int init(int sample_rate, int channel) = 0; + virtual int reset() = 0; + virtual int uninit() = 0; + virtual int process(float* in_buf, float* out_buf, int length) = 0; + virtual int get_latency_ms() = 0; + virtual int set_params(AE_PARAMS* param) = 0; + virtual int get_params(AE_PARAMS* param) = 0; // 在外部将空间开辟好 + virtual int get_effectId() = 0; // 获取唯一ID +}; + + +class ICreator +{ + +public: + ICreator() = delete; + explicit ICreator(int type) + { + registered(type, this); + }; + +public: + virtual IAudioEffects* get_inst() = 0; +}; + +#endif //AUDIO_EFFECTS_LIB_IAUDIOEFFECTS_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_al_reverb/CAudioAlReverb.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_al_reverb/CAudioAlReverb.cpp new file mode 100644 index 0000000..cbcc936 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_al_reverb/CAudioAlReverb.cpp @@ -0,0 +1,93 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#include "CAudioAlReverb.h" +#include "al_reverb/inc/AlReverbApi.h" + +static CAudioAlReverbCreator gs_al_reverb_api = CAudioAlReverbCreator(AE_EFFECT_TYPE_AL_REVERB); + +CAudioAlReverb::CAudioAlReverb() +{ + m_al_reverb_api = nullptr; + m_ae_params_al_reverb = nullptr; +} + +CAudioAlReverb::~CAudioAlReverb() +{ + uninit(); +} + +int CAudioAlReverb::init(int sample_rate, int channel) +{ + m_al_reverb_api = new SUPERSOUND::ALREVERB::AlReverbApi(); + m_al_reverb_api->init(sample_rate, channel, 0); + return AE_ERR_SUCCESS; +} + +int CAudioAlReverb::uninit() +{ + if(nullptr != m_al_reverb_api) + { + m_al_reverb_api->uninit(); + delete m_al_reverb_api; + m_al_reverb_api = nullptr; + } + + if(nullptr != m_ae_params_al_reverb) + { + delete m_ae_params_al_reverb; + m_ae_params_al_reverb = nullptr; + } + return AE_ERR_SUCCESS; +} + +int CAudioAlReverb::reset() +{ + m_al_reverb_api->reset(); + return AE_ERR_SUCCESS; +} + +int CAudioAlReverb::get_effectId() +{ + return AE_EFFECT_TYPE_AL_REVERB; +} + +int CAudioAlReverb::get_latency_ms() +{ + return m_al_reverb_api->get_latency(); +} + +int CAudioAlReverb::set_params(AE_PARAMS *param) +{ + if(nullptr != param) + { + AE_PARAMS_AL_REVERB* tp = (AE_PARAMS_AL_REVERB*) param; + + // 第一次设置时开辟空间 + if(nullptr == m_ae_params_al_reverb) + { + m_ae_params_al_reverb = new AE_PARAMS_AL_REVERB(); + } + memcpy(m_ae_params_al_reverb, tp, sizeof(AE_PARAMS_AL_REVERB)); + } + return m_al_reverb_api->set_param((AE_PARAMS_AL_REVERB*) param); +} + +int CAudioAlReverb::get_params(AE_PARAMS *param) +{ + if(nullptr != param && nullptr != m_ae_params_al_reverb) + { + memcpy(param, m_ae_params_al_reverb, sizeof(AE_PARAMS_AL_REVERB)); + } + return AE_ERR_SUCCESS; +} + +int CAudioAlReverb::process(float *in_buf, float *out_buf, int length) +{ + if(in_buf != out_buf) + { + memcpy(out_buf, in_buf, sizeof(float) * length); + } + return m_al_reverb_api->process(out_buf, length); +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_al_reverb/CAudioAlReverb.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_al_reverb/CAudioAlReverb.h new file mode 100644 index 0000000..cd85bf0 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_al_reverb/CAudioAlReverb.h @@ -0,0 +1,48 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOALREVERB_H +#define AUDIO_EFFECTS_LIB_CAUDIOALREVERB_H + +#include "IAudioEffects.h" +namespace SUPERSOUND { + namespace ALREVERB { + class AlReverbApi; + } +} + +class CAudioAlReverb : public IAudioEffects +{ +public: + CAudioAlReverb(); + ~CAudioAlReverb(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID + +private: + SUPERSOUND::ALREVERB::AlReverbApi* m_al_reverb_api; + AE_PARAMS_AL_REVERB* m_ae_params_al_reverb; +}; + + +class CAudioAlReverbCreator : public ICreator +{ +public: + CAudioAlReverbCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + return new CAudioAlReverb(); + }; +}; + +#endif //AUDIO_EFFECTS_LIB_CAUDIOALREVERB_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_autotune/CAudioAutoTune.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_autotune/CAudioAutoTune.cpp new file mode 100644 index 0000000..5db5271 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_autotune/CAudioAutoTune.cpp @@ -0,0 +1,73 @@ +// +// Created by yangjianli on 2020-01-13. +// + +#include +#include "CAudioAutoTune.h" +#include "autotune/inc/ATndkWrapper.h" + +static CAudioAutoTuneCreator gs_autotune_creator = CAudioAutoTuneCreator(AE_EFFECT_TYPE_AUTOTUNE); +CAudioAutoTune::CAudioAutoTune() +{ + m_atndk_api = nullptr; +} + +CAudioAutoTune::~CAudioAutoTune() +{ + uninit(); +} +int CAudioAutoTune::init(int sample_rate, int channel) +{ + m_atndk_api = new CATndkWrapper(); + int ret = m_atndk_api->init(sample_rate, channel); + if(AT_ERR_SUCCESS != ret) + { + uninit(); + return ret; + } + return AE_ERR_SUCCESS; +} + +int CAudioAutoTune::uninit() +{ + if(nullptr != m_atndk_api) + { + m_atndk_api->uninit(); + delete m_atndk_api; + m_atndk_api = nullptr; + } + return AE_ERR_SUCCESS; +} + +int CAudioAutoTune::reset() +{ + m_atndk_api->reset(); + return AE_ERR_SUCCESS; +} + +int CAudioAutoTune::get_effectId() +{ + return AE_EFFECT_TYPE_AUTOTUNE; +} + +int CAudioAutoTune::get_latency_ms() +{ + return m_atndk_api->get_latency_time_ms(); +} + +int CAudioAutoTune::set_params(AE_PARAMS *param) +{ + // 不需要设置参数 + return AE_ERR_SUCCESS; +} + +int CAudioAutoTune::get_params(AE_PARAMS *param) +{ + return AE_ERR_SUCCESS; +} + +int CAudioAutoTune::process(float *in_buf, float *out_buf, int length) +{ + // 这里的ms在内部没有被调用,故此,设置成0 + return m_atndk_api->process(in_buf, out_buf, length, 0); +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_autotune/CAudioAutoTune.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_autotune/CAudioAutoTune.h new file mode 100644 index 0000000..03b9a05 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_autotune/CAudioAutoTune.h @@ -0,0 +1,42 @@ +// +// Created by yangjianli on 2020-01-13. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOAUTOTUNE_H +#define AUDIO_EFFECTS_LIB_CAUDIOAUTOTUNE_H + +#include "IAudioEffects.h" + +class CATndkWrapper; +class CAudioAutoTune : public IAudioEffects +{ +public: + CAudioAutoTune(); + ~CAudioAutoTune(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID + +private: + CATndkWrapper* m_atndk_api; +}; + +class CAudioAutoTuneCreator : public ICreator +{ +public: + CAudioAutoTuneCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + return new CAudioAutoTune(); + }; + +}; +#endif //AUDIO_EFFECTS_LIB_CAUDIOAUTOTUNE_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_chain/CAudioEffectsChain.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_chain/CAudioEffectsChain.cpp new file mode 100644 index 0000000..20efbc8 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_chain/CAudioEffectsChain.cpp @@ -0,0 +1,201 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#include "CAudioEffectsChain.h" + +CAudioEffectsChain::CAudioEffectsChain() +{ + m_list.clear(); + m_list_tmp.clear(); + m_process_list.clear(); + m_sample_rate = 0; + m_channel = 0; + m_list_update = false; +} + +CAudioEffectsChain::~CAudioEffectsChain() +{ + uninit(); +} + +CAudioSmoothWrapper* CAudioEffectsChain::add_effect(AE_EFFECT_TYPE effects_number) +{ + if(0 == m_sample_rate || 0 == m_channel) + { + return nullptr; + } + + // 自定义删除函数,保证在析构的时候自动uninit释放空间 + std::shared_ptr sw = std::shared_ptr(new CAudioSmoothWrapper(), + [](CAudioSmoothWrapper *inst){ + inst->uninit(); + delete inst; + inst = nullptr; + } + ); + + int ret = sw->init(effects_number, m_sample_rate, m_channel); + if(AE_ERR_SUCCESS != ret) + { + return nullptr; + } + // 塞入时加锁 + { + std::lock_guard lock(m_mutex); + m_list.push_back(sw); + m_list_update = true; + } + return sw.get(); +} + +AE_ERR CAudioEffectsChain::init(int sample_rate, int channel) +{ + m_sample_rate = sample_rate; + m_channel = channel; + return AE_ERR_SUCCESS; +} + +AE_ERR CAudioEffectsChain::uninit() +{ + // 删除时加锁 + std::lock_guard lock(m_mutex); + m_list.clear(); + m_list_tmp.clear(); + m_process_list.clear(); + + m_sample_rate = 0; + m_channel = 0; + m_list_update = false; + return AE_ERR_SUCCESS; +} + +AE_ERR CAudioEffectsChain::reset() +{ + // 拷贝一份做reset,防止reset时在process中被释放 + copylist2tmp(); + for(auto & sw : m_list_tmp) + { + sw->reset(); + } + return AE_ERR_SUCCESS; +} + +int CAudioEffectsChain::get_latency_ms() +{ + // 拷贝一份做get_latency_ms,防止get_latency_ms时在process中被释放 + copylist2tmp(); + int latency = 0; + for(auto & sw : m_list_tmp) + { + latency += sw->get_latency_ms(); + } + return latency; +} + +AE_ERR CAudioEffectsChain::set_params(CAudioSmoothWrapper* casw, AE_PARAMS *params) +{ + copylist2tmp(); + AE_ERR ret = AE_ERR_EFFECT_NOT_IN_CHAIN; + for(auto & sw : m_list_tmp) + { + if(casw == sw.get()) + { + ret = AE_ERR_SUCCESS; + sw->set_param(params); + break; + } + } + return ret; +} + +AE_ERR CAudioEffectsChain::get_params(CAudioSmoothWrapper *casw, AE_PARAMS *params) +{ + copylist2tmp(); + AE_ERR ret = AE_ERR_EFFECT_NOT_IN_CHAIN; + for(auto & sw : m_list_tmp) + { + if(casw == sw.get()) + { + ret = AE_ERR_SUCCESS; + sw->get_param(params); + break; + } + } + return ret; +} + +AE_ERR CAudioEffectsChain::process(float *in_buf, float *out_buf, int length) +{ + if(m_list_update){ + std::lock_guard lock(m_mutex); + m_list_update = false; + m_process_list.clear(); + m_process_list.insert(m_process_list.end(), m_list.begin(), m_list.end()); + } + + // 防止list为空时,没有任何输出 + if(in_buf != out_buf) + { + memcpy(out_buf, in_buf, sizeof(float) * length); + } + + bool update = false; + std::vector >::iterator it; + for(it=m_process_list.begin(); it != m_process_list.end();) + { + (*it)->process(in_buf, out_buf, length); + if(!(*it)->get_switch_status() && (*it)->get_change_status()) + { + // 此处不需要手动做uninit,内部是智能指针 + // 当没有任何vector引用时,析构函数自动调用uninit函数 + // 本次直接从vector中删除,其他临时的vector取用数据时仍旧可用 + // erase(it),返回值是下一个元素的地址 +// (*it)->uninit(); + it=m_process_list.erase(it); + update = true; + } + else + { + ++it; + } + + if(in_buf != out_buf) + { + memcpy(in_buf, out_buf, sizeof(float) * length); + } + } + + // 更新数据 + if(update) + { + std::lock_guard lock(m_mutex); + m_list.clear(); + m_list.insert(m_list.end(), m_process_list.begin(), m_process_list.end()); + } + return AE_ERR_SUCCESS; +} + +AE_ERR CAudioEffectsChain::delete_effect(CAudioSmoothWrapper* casw) +{ + copylist2tmp(); + AE_ERR ret = AE_ERR_EFFECT_NOT_IN_CHAIN; + for(auto &ae : m_list_tmp) + { + if (casw == ae.get()) + { + ret = AE_ERR_SUCCESS; + ae->close_effect(); + break; + } + } + return ret; +} + +// 拷贝列表,防止在添加或者删除时,遍历列表出现异常 +void CAudioEffectsChain::copylist2tmp() +{ + std::lock_guard lock(m_mutex); + m_list_tmp.clear(); + m_list_tmp.insert(m_list_tmp.end(), m_list.begin(), m_list.end()); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_chain/CAudioEffectsChain.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_chain/CAudioEffectsChain.h new file mode 100644 index 0000000..9760c4c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_chain/CAudioEffectsChain.h @@ -0,0 +1,61 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOEFFECTSCHAIN_H +#define AUDIO_EFFECTS_LIB_CAUDIOEFFECTSCHAIN_H + +#include "audio_smooth_wrapper/CAudioSmoothWrapper.h" +#include "AudioEffectsDef.h" +#include "vector" +#include "map" +#include "mutex" +#include +/** + * 注意: + * 1 init之后做其他操作 + * 2 process是一个单独的线程,reset需要和process在同一个线程 & 其他操作在同一个线程 + * 3 必须process之后才能uninit + * 效果链 + * 承担功能: + * 1 添加效果 + * 2 剔除效果 + * 3 保证处理顺序 + * 4 修改效果参数 + */ + +class CAudioEffectsChain +{ +public: + CAudioEffectsChain(); + ~CAudioEffectsChain(); + +public: + AE_ERR init(int sample_rate, int channel); + AE_ERR uninit(); + AE_ERR reset(); + int get_latency_ms(); + AE_ERR set_params(CAudioSmoothWrapper* casw, AE_PARAMS* params); + AE_ERR get_params(CAudioSmoothWrapper* casw, AE_PARAMS* params); + AE_ERR process(float* in_buf, float* out_buf, int length); + + // 返回效果的地址,作为外部指定内部操作的唯一标示 + // 尽量不要在外部直接使用 + CAudioSmoothWrapper* add_effect(AE_EFFECT_TYPE effects_number); + AE_ERR delete_effect(CAudioSmoothWrapper* casw); + +private: + void copylist2tmp(); +private: + std::vector > m_list; // 多个效果的叠加顺序 + std::vector > m_list_tmp; // 多个效果使用时拷贝出来 + std::vector > m_process_list; // 处理时拷贝出来 + std::mutex m_mutex; // 保证m_list多进程处理问题 + int m_sample_rate; + int m_channel; + // 当非process线程修改list时更新 + bool m_list_update; +}; + + +#endif //AUDIO_EFFECTS_LIB_CAUDIOEFFECTSCHAIN_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_eq/CAudioEqApi.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_eq/CAudioEqApi.cpp new file mode 100644 index 0000000..dc16372 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_eq/CAudioEqApi.cpp @@ -0,0 +1,105 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#include "CAudioEqApi.h" +#include "stdio.h" +#include "iir_eq/inc/CAudaciousEqApi.h" + +static CEqApiCreator gs_eq_creator = CEqApiCreator(AE_EFFECT_TYPE_EQ); + +CAudioEqApi::CAudioEqApi() +{ + m_eq_api = nullptr; + m_eq_param = nullptr; +} + +CAudioEqApi::~CAudioEqApi() +{ + uninit(); +} + +int CAudioEqApi::init(int sample_rate, int channel) +{ + m_eq_api = new CAudaciousEqApi(); + int ret = m_eq_api->init(sample_rate, channel); + if(AUDACIOUS_EQ_ERROR_CODE_SUCCESS != ret) + { + uninit(); + return ret; + } + m_kvs.init(channel); + return AE_ERR_SUCCESS; +} + +int CAudioEqApi::uninit() +{ + if(nullptr != m_eq_api) + { + m_eq_api->uninit(); + delete m_eq_api; + m_eq_api = NULL; + } + + if(nullptr != m_eq_param) + { + delete m_eq_param; + m_eq_param = nullptr; + } + + return AE_ERR_SUCCESS; +} + +int CAudioEqApi::reset() +{ + m_kvs.reset(); + return m_eq_api->reset(); +} + +int CAudioEqApi::get_effectId() +{ + return AE_EFFECT_TYPE_EQ; +} + +int CAudioEqApi::get_latency_ms() +{ + return m_eq_api->get_latency_ms(); +} + +int CAudioEqApi::set_params(AE_PARAMS* param) +{ + // 不论如何将数据传下去,下面会做处理。 + AE_PARAMS_EQ* audio_effects_eq_params = (AE_PARAMS_EQ*) param; + if(NULL != audio_effects_eq_params) + { + m_eq_api->set_param(audio_effects_eq_params->params); + if(nullptr == m_eq_param) + { + m_eq_param = new AE_PARAMS_EQ(); + } + memcpy(m_eq_param, audio_effects_eq_params, sizeof(AE_PARAMS_EQ)); + }else + { + const float* tmp = NULL; + m_eq_api->set_param(tmp); + } + + return AE_ERR_SUCCESS; +} + +int CAudioEqApi::get_params(AE_PARAMS *param) +{ + if(nullptr != m_eq_param && nullptr != param) + { + memcpy(param, m_eq_param, sizeof(AE_PARAMS_EQ)); + } + return AE_ERR_SUCCESS; +} + +int CAudioEqApi::process(float *in_buf, float *out_buf, int length) +{ + m_kvs.pre_process(in_buf, length); + int ret = m_eq_api->process(in_buf, out_buf, length); + m_kvs.after_process(out_buf, length); + return ret; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_eq/CAudioEqApi.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_eq/CAudioEqApi.h new file mode 100644 index 0000000..8e5a5f5 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_eq/CAudioEqApi.h @@ -0,0 +1,46 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#ifndef AUDIO_EFFECTS_LIB_AUDIOEQAPI_H +#define AUDIO_EFFECTS_LIB_AUDIOEQAPI_H + +#include "IAudioEffects.h" +#include "common/keep_volume_steady/CKeepVolumeSteady.h" +class CAudaciousEqApi; +class CAudioEqApi : public IAudioEffects +{ +public: + CAudioEqApi(); + ~CAudioEqApi(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID + +private: + CAudaciousEqApi* m_eq_api; + AE_PARAMS_EQ* m_eq_param; + CKeepVolumeSteady m_kvs; +}; + + +class CEqApiCreator : public ICreator +{ + +public: + CEqApiCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + return new CAudioEqApi(); + }; +}; + +#endif //AUDIO_EFFECTS_LIB_AUDIOEQAPI_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_im_effect/CImEffectApi.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_im_effect/CImEffectApi.cpp new file mode 100644 index 0000000..6772fde --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_im_effect/CImEffectApi.cpp @@ -0,0 +1,167 @@ +// +// Created by yangjianli on 2022/9/7. +// + +// 要求: 外部输入的每个单声道的帧长小于等于 (1 << (bits - 1)) + +#include "CImEffectApi.h" +#include "supersound/inc/supersound_err.h" +#include "impulse_effect.h" +#include +static CImEffectApiCreator gs_im_effect_creator = CImEffectApiCreator(AE_EFFECT_TYPE_IM_EFFECT); + +CImEffectApi::CImEffectApi() +{ + m_impluse_effect = nullptr; + m_sample_rate = 0; + m_channel = 0; + m_latency_ms = 0; + m_empty_buf = nullptr; + m_empty_buf_len = 0; + m_need_push_data = false; +} + +CImEffectApi::~CImEffectApi() +{ + uninit(); +} + +int CImEffectApi::init(int sample_rate, int channel) +{ + m_impluse_effect = new SUPERSOUND::IMPULSE::ImpulseEffect(); + m_sample_rate = sample_rate; + m_channel = channel; + m_params = new Impulse_Param(); + m_latency_ms = 0; + return AE_ERR_SUCCESS; +} + +int CImEffectApi::uninit() +{ + if (nullptr != m_impluse_effect) + { + // delete的时候内部自主释放空间 + delete m_impluse_effect; + m_impluse_effect = nullptr; + } + + if (nullptr != m_params) + { + // 内部的音频空间由外部统一管理 + delete m_params; + m_params = nullptr; + } + + if (nullptr != m_empty_buf) + { + delete[] m_empty_buf; + m_empty_buf = nullptr; + m_empty_buf_len = 0; + } + return AE_ERR_SUCCESS; +} + +int CImEffectApi::get_effectId() +{ + return AE_EFFECT_TYPE_IM_EFFECT; +} + +int CImEffectApi::get_latency_ms() +{ + return m_latency_ms; +} + +int CImEffectApi::reset() +{ + m_impluse_effect->FlushOut(); + if (m_need_push_data) + { + pre_push_data(); + } + return AE_ERR_SUCCESS; +} + +int CImEffectApi::set_params(AE_PARAMS *param) +{ + int err = 0; + if (param != NULL) + { + Impulse_Param *new_param = (Impulse_Param *) param; + copy_impluse_params(m_params, new_param); + // 要求当前帧长比内部帧长小 + int cur_frame_len = (1 << (new_param->window_bits - 1)); + while (cur_frame_len < new_param->process_buffer_len) + { + new_param->window_bits += 1; + cur_frame_len = (1 << (new_param->window_bits - 1)); + } + + // 高性能意味着使用内部的帧长,此时有可能存在延迟 + int process_buffer_len = new_param->process_buffer_len; + if (new_param->high_performance) + { + // 卡一下最小的fft计算时间 + if (new_param->window_bits <= 10) + { + new_param->window_bits = 10; + cur_frame_len = (1 << (new_param->window_bits - 1)); + } + new_param->process_buffer_len = cur_frame_len; + } + + new_param->fs = m_sample_rate; + new_param->in_channels = m_channel; + new_param->out_channels = m_channel; + err = m_impluse_effect->SetParam(param); + if (err != 0) + { + return err; + } + + // 当两者相等时,可以直接做处理,延迟为0 + // 当process_buffer_len 小于 内部帧长时,由于需要凑帧长,所以提前塞满一帧0,这样可以保证之后可以入多少出多少 + m_latency_ms = 0; + m_need_push_data = process_buffer_len != m_impluse_effect->GetFrameLen(); + } + return err; +} + +int CImEffectApi::get_params(AE_PARAMS *param) +{ + copy_impluse_params((Impulse_Param*)param, m_params); + return 0; +} + +int CImEffectApi::process(float *in_buf, float *out_buf, int length) +{ + int out_length = 0; + m_impluse_effect->ProcessfInput(in_buf, length, out_length); + if (out_length < length) + { + // 这种情况理论上不会出现 + memset(out_buf, 0, sizeof(float) * length); + return 0; + } + m_impluse_effect->ProcessfOutput(out_buf, length, out_length); + return 0; +} + +// 为了保证之后的输出一直是稳定的,保证只有最前面的一部分是0 +// 所以在设置参数过后或者reset之后都需要做该操作 +// 但是两者不能同时操作 | 目前外部做了包装,只要在reset做即可,set_params之后外部会自动做reset +void CImEffectApi::pre_push_data() +{ + if (m_empty_buf == nullptr || m_empty_buf_len < m_impluse_effect->GetFrameLen() * m_channel) + { + if (m_empty_buf != nullptr) + { + delete [] m_empty_buf; + } + m_empty_buf_len = m_impluse_effect->GetFrameLen() * m_channel; + m_empty_buf = new float[m_empty_buf_len]; + memset(m_empty_buf, 0, sizeof(float) * m_empty_buf_len); + } + int out_length = 0; + m_impluse_effect->ProcessfInput(m_empty_buf, m_impluse_effect->GetFrameLen() * m_channel, out_length); + m_latency_ms = round(m_impluse_effect->GetFrameLen() * 1.0 / m_sample_rate * 1000); +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_im_effect/CImEffectApi.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_im_effect/CImEffectApi.h new file mode 100644 index 0000000..43b1dca --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_im_effect/CImEffectApi.h @@ -0,0 +1,60 @@ +// +// Created by yangjianli on 2022/9/7. +// + +#ifndef AUDIO_EFFECTS_LIB_CIMEFFECTAPI_H +#define AUDIO_EFFECTS_LIB_CIMEFFECTAPI_H +#include "IAudioEffects.h" +namespace SUPERSOUND +{ + namespace IMPULSE + { + class ImpulseEffect; + } +} + +class CImEffectApi : public IAudioEffects +{ +public: + CImEffectApi(); + ~CImEffectApi(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID + +private: + void pre_push_data(); +private: + SUPERSOUND::IMPULSE::ImpulseEffect* m_impluse_effect; + Impulse_Param * m_params; + int32_t m_sample_rate; + int32_t m_channel; + int32_t m_latency_ms; + float* m_empty_buf; + int32_t m_empty_buf_len; + bool m_need_push_data; +}; + + +class CImEffectApiCreator : public ICreator +{ + +public: + CImEffectApiCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + printf("IM Create !\n"); + return new CImEffectApi(); + }; +}; + + +#endif //AUDIO_EFFECTS_LIB_CIMEFFECTAPI_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_phonograph/CAudioPhonograph.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_phonograph/CAudioPhonograph.cpp new file mode 100644 index 0000000..7822cc7 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_phonograph/CAudioPhonograph.cpp @@ -0,0 +1,81 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#include "CAudioPhonograph.h" +#include "phonograph/inc/CPhonograph.h" + +#define APG_VOLUME_RATE 1.68 //保持音量响度不变需要的倍数 + +static CAudioPhonographCreator gs_audio_phonograph_creator = CAudioPhonographCreator(AE_EFFECT_TYPE_PHONOGRAPH); + +CAudioPhonograph::CAudioPhonograph() +{ + m_phonogh_api = nullptr; +} + +CAudioPhonograph::~CAudioPhonograph() +{ + uninit(); +} + +int CAudioPhonograph::init(int sample_rate, int channel) +{ + m_phonogh_api = new CPhonograph(); + int ret = m_phonogh_api->init(sample_rate, channel); + if(0 != ret) + { + uninit(); + return ret; + } + return AE_ERR_SUCCESS; +} + +int CAudioPhonograph::uninit() +{ + if(nullptr != m_phonogh_api) + { + m_phonogh_api->uninit(); + delete m_phonogh_api; + m_phonogh_api = nullptr; + } + return AE_ERR_SUCCESS; +} + +int CAudioPhonograph::get_effectId() +{ + return AE_EFFECT_TYPE_PHONOGRAPH; +} + +int CAudioPhonograph::get_latency_ms() +{ + return 0; +} + +int CAudioPhonograph::reset() +{ + m_phonogh_api->reset(); + return AE_ERR_SUCCESS; +} + +int CAudioPhonograph::set_params(AE_PARAMS *param) +{ + return AE_ERR_SUCCESS; +} + +int CAudioPhonograph::get_params(AE_PARAMS *param) +{ + return AE_ERR_SUCCESS; +} + +int CAudioPhonograph::process(float *in_buf, float *out_buf, int length) { + if (in_buf != out_buf) { + memcpy(out_buf, in_buf, sizeof(float) * length); + } + int ret = m_phonogh_api->process(out_buf, length); + for (int i = 0; i < length; i++) + { + out_buf[i] *= APG_VOLUME_RATE; + } + return ret; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_phonograph/CAudioPhonograph.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_phonograph/CAudioPhonograph.h new file mode 100644 index 0000000..bca5b04 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_phonograph/CAudioPhonograph.h @@ -0,0 +1,46 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOPHONOGRAPH_H +#define AUDIO_EFFECTS_LIB_CAUDIOPHONOGRAPH_H + + +#include "IAudioEffects.h" +#include "common/keep_volume_steady/CKeepVolumeSteady.h" +class CPhonograph; +class CAudioPhonograph : public IAudioEffects +{ +public: + CAudioPhonograph(); + ~CAudioPhonograph(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID + +private: + CPhonograph* m_phonogh_api; +}; + + +class CAudioPhonographCreator : public ICreator +{ + +public: + CAudioPhonographCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + return new CAudioPhonograph(); + }; +}; + + +#endif //AUDIO_EFFECTS_LIB_CAUDIOPHONOGRAPH_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_reverb/CAudioReverbApi.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_reverb/CAudioReverbApi.cpp new file mode 100644 index 0000000..0a35fa2 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_reverb/CAudioReverbApi.cpp @@ -0,0 +1,115 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#include "CAudioReverbApi.h" +#include "reverb/inc/CReverb.h" + +#define RB_VOLUME_RATE 1.68 // 对于DISTINCT需要保持不变 + +static CAudioReverbApiCreator gs_reverb_api = CAudioReverbApiCreator(AE_EFFECT_TYPE_REVERB); +CAudioReverbApi::CAudioReverbApi() +{ + m_reverb_api = nullptr; + m_reverb_param = nullptr; + + m_rate = 1.0; +} + +CAudioReverbApi::~CAudioReverbApi() +{ + uninit(); +} + +int CAudioReverbApi::init(int sample_rate, int channel) +{ + m_reverb_api = new CReverb(); + int ret = m_reverb_api->init(sample_rate, channel); + if(ret != RB_ERR_SUCCESS) + { + uninit(); + return ret; + } + return AE_ERR_SUCCESS; +} + +int CAudioReverbApi::uninit() +{ + if(nullptr != m_reverb_api) + { + m_reverb_api->uninit(); + delete m_reverb_api; + m_reverb_api = nullptr; + } + + if(nullptr != m_reverb_param) + { + delete m_reverb_param; + m_reverb_param = nullptr; + } + return AE_ERR_SUCCESS; +} + +int CAudioReverbApi::reset() +{ + m_reverb_api->reset(); + return AE_ERR_SUCCESS; +} + +int CAudioReverbApi::get_effectId() +{ + return AE_EFFECT_TYPE_REVERB; +} + +int CAudioReverbApi::get_latency_ms() +{ + return m_reverb_api->get_latency(); +} + +int CAudioReverbApi::set_params(AE_PARAMS *param) +{ + AE_PARAMS_REVERB* tp = (AE_PARAMS_REVERB*) param; + if(tp != nullptr) + { + if(nullptr == m_reverb_param) + { + m_reverb_param = new AE_PARAMS_REVERB(); + } + memcpy(m_reverb_param, tp, sizeof(AE_PARAMS_REVERB)); + + // 判断新输入的值是否与DISTINCT相等 + m_rate = params_cmp(tp, + (AE_PARAMS_REVERB*)&(gs_ae_params_reverb_params[AE_PARAMS_TYPE_REVERB_ID_15 - AE_PARAMS_TYPE_RERVERB])) + ? RB_VOLUME_RATE : 1.0; + + } + return m_reverb_api->set_params(tp); +} + +int CAudioReverbApi::get_params(AE_PARAMS *param) +{ + if(nullptr != param && nullptr != m_reverb_param) + { + memcpy(param, m_reverb_param, sizeof(AE_PARAMS_REVERB)); + } + return AE_ERR_SUCCESS; +} + +int CAudioReverbApi::process(float *in_buf, float *out_buf, int length) +{ + m_reverb_api->process(in_buf, length, out_buf, length); + if(m_rate != 1.0) + { + for(int i=0;iroom_size == b->room_size && a->wet == b->wet && a->width == b->width + && a->dry == b->dry && a->damp == b->damp && a->mode == b->mode); +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_reverb/CAudioReverbApi.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_reverb/CAudioReverbApi.h new file mode 100644 index 0000000..bf531a5 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_reverb/CAudioReverbApi.h @@ -0,0 +1,44 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOREVERBAPI_H +#define AUDIO_EFFECTS_LIB_CAUDIOREVERBAPI_H + +#include "IAudioEffects.h" +class CReverb; +class CAudioReverbApi : public IAudioEffects +{ +public: + CAudioReverbApi(); + ~CAudioReverbApi(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID +private: + bool params_cmp(AE_PARAMS_REVERB* a, AE_PARAMS_REVERB* b); +private: + CReverb* m_reverb_api; + AE_PARAMS_REVERB* m_reverb_param; + float m_rate; +}; + +class CAudioReverbApiCreator : public ICreator +{ +public: + CAudioReverbApiCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + return new CAudioReverbApi(); + }; +}; + +#endif //AUDIO_EFFECTS_LIB_CAUDIOREVERBAPI_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_saudio_effects/CAudioSAudioEffectsApi.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_saudio_effects/CAudioSAudioEffectsApi.cpp new file mode 100644 index 0000000..54b8a20 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_saudio_effects/CAudioSAudioEffectsApi.cpp @@ -0,0 +1,99 @@ +// +// Created by yangjianli on 2020-01-10. +// + +#include "CAudioSAudioEffectsApi.h" +#include "saudio_effects/inc/SAudioEffectsApi.h" + +static CAudioSAudioEffectsCreator gs_eq_creator = CAudioSAudioEffectsCreator(AE_EFFECT_TYPE_SAE); + +CAudioSAudioEffectsApi::CAudioSAudioEffectsApi() +{ + m_sae_api = nullptr; + m_ae_params_sae = nullptr; +} + +CAudioSAudioEffectsApi::~CAudioSAudioEffectsApi() +{ + uninit(); +} + +int CAudioSAudioEffectsApi::init(int sample_rate, int channel) +{ + m_sae_api = new SAudioEffectsApi(); + int ret = m_sae_api->init(sample_rate, channel); + if(ERROR_CODE_SUCCESS != ret) + { + uninit(); + return ret; + } + return AE_ERR_SUCCESS; +} + +int CAudioSAudioEffectsApi::uninit() +{ + if(nullptr != m_sae_api) + { + m_sae_api->uninit(); + delete m_sae_api; + m_sae_api = nullptr; + } + + if(nullptr != m_ae_params_sae) + { + delete m_ae_params_sae; + m_ae_params_sae = nullptr; + } + return AE_ERR_SUCCESS; +} + +int CAudioSAudioEffectsApi::reset() +{ + m_sae_api->reset(); + return AE_ERR_SUCCESS; +} + +int CAudioSAudioEffectsApi::get_effectId() +{ + return AE_EFFECT_TYPE_SAE; +} + +int CAudioSAudioEffectsApi::get_latency_ms() +{ + return m_sae_api->get_latency(); +} + +int CAudioSAudioEffectsApi::set_params(AE_PARAMS *param) +{ + if(nullptr != param) + { + if(nullptr == m_ae_params_sae) + { + m_ae_params_sae = new AE_PARAMS_SAE(); + } + + AE_PARAMS_SAE* tp = (AE_PARAMS_SAE*) param; + m_ae_params_sae->params_list.assign( + tp->params_list.begin(),tp->params_list.end() + ); + } + return m_sae_api->set_audio_effect(param); +} + +int CAudioSAudioEffectsApi::get_params(AE_PARAMS *param) +{ + if(nullptr != param && nullptr != m_ae_params_sae) + { + AE_PARAMS_SAE* tp = (AE_PARAMS_SAE*) param; + tp->params_list.assign( + m_ae_params_sae->params_list.begin(), + m_ae_params_sae->params_list.end() + ); + } + return AE_ERR_SUCCESS; +} + +int CAudioSAudioEffectsApi::process(float *in_buf, float *out_buf, int length) +{ + return m_sae_api->process(in_buf, out_buf, length); +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_saudio_effects/CAudioSAudioEffectsApi.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_saudio_effects/CAudioSAudioEffectsApi.h new file mode 100644 index 0000000..33348b1 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_saudio_effects/CAudioSAudioEffectsApi.h @@ -0,0 +1,40 @@ +// +// Created by yangjianli on 2020-01-10. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOSAUDIOEFFECTSAPI_H +#define AUDIO_EFFECTS_LIB_CAUDIOSAUDIOEFFECTSAPI_H + +#include "IAudioEffects.h" + +class SAudioEffectsApi; +class CAudioSAudioEffectsApi : public IAudioEffects +{ +public: + CAudioSAudioEffectsApi(); + ~CAudioSAudioEffectsApi(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID +private: + SAudioEffectsApi* m_sae_api; + AE_PARAMS_SAE* m_ae_params_sae; +}; + +class CAudioSAudioEffectsCreator : ICreator { +public: + CAudioSAudioEffectsCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + return new CAudioSAudioEffectsApi(); + }; +}; +#endif //AUDIO_EFFECTS_LIB_CAUDIOSAUDIOEFFECTSAPI_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_slow_flanging/CAudioSlowFlanging.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_slow_flanging/CAudioSlowFlanging.cpp new file mode 100644 index 0000000..c972bf6 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_slow_flanging/CAudioSlowFlanging.cpp @@ -0,0 +1,71 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#include "CAudioSlowFlanging.h" +#include "slow_flanging/inc/CSlowFlanging.h" + +static CAudioSlowFlangingCreator gs_audio_slow_flaging_creator = CAudioSlowFlangingCreator(AE_EFFECT_TYPE_SLOWFLANGING); +CAudioSlowFlanging::CAudioSlowFlanging() +{ + m_slow_flanging_api = nullptr; +} + +CAudioSlowFlanging::~CAudioSlowFlanging() +{ + uninit(); +} + +int CAudioSlowFlanging::init(int sample_rate, int channel) +{ + m_slow_flanging_api = new CSlowFlanging(); + int ret = m_slow_flanging_api->init(channel, sample_rate); + if(0 != ret) + { + uninit(); + return ret; + } + return AE_ERR_SUCCESS; +} + +int CAudioSlowFlanging::uninit() +{ + if(nullptr != m_slow_flanging_api) + { + m_slow_flanging_api->uninit(); + delete m_slow_flanging_api; + m_slow_flanging_api = nullptr; + } + return AE_ERR_SUCCESS; +} + +int CAudioSlowFlanging::get_effectId() +{ + return AE_EFFECT_TYPE_SLOWFLANGING; +} + +int CAudioSlowFlanging::get_latency_ms() +{ + return 0; +} + +int CAudioSlowFlanging::reset() +{ + m_slow_flanging_api->reset(); + return AE_ERR_SUCCESS; +} + +int CAudioSlowFlanging::set_params(AE_PARAMS *param) +{ + return AE_ERR_SUCCESS; +} + +int CAudioSlowFlanging::get_params(AE_PARAMS *param) +{ + return AE_ERR_SUCCESS; +} + +int CAudioSlowFlanging::process(float *in_buf, float *out_buf, int length) +{ + return m_slow_flanging_api->process(in_buf, out_buf, length); +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_slow_flanging/CAudioSlowFlanging.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_slow_flanging/CAudioSlowFlanging.h new file mode 100644 index 0000000..001b4bb --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_slow_flanging/CAudioSlowFlanging.h @@ -0,0 +1,42 @@ +// +// Created by yangjianli on 2020-01-14. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOSLOWFLANGING_H +#define AUDIO_EFFECTS_LIB_CAUDIOSLOWFLANGING_H + + +#include "IAudioEffects.h" + +class CSlowFlanging; +class CAudioSlowFlanging : public IAudioEffects +{ +public: + CAudioSlowFlanging(); + ~CAudioSlowFlanging(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID +private: + CSlowFlanging* m_slow_flanging_api; +}; + +class CAudioSlowFlangingCreator : ICreator { +public: + CAudioSlowFlangingCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + return new CAudioSlowFlanging(); + }; +}; + + +#endif //AUDIO_EFFECTS_LIB_CAUDIOSLOWFLANGING_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_smooth_wrapper/CAudioSmoothWrapper.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_smooth_wrapper/CAudioSmoothWrapper.cpp new file mode 100644 index 0000000..29344ec --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_smooth_wrapper/CAudioSmoothWrapper.cpp @@ -0,0 +1,219 @@ +// +// Created by yangjianli on 2020-01-02. +// + +#include "CAudioSmoothWrapper.h" +#include "common/utils.h" + +CAudioSmoothWrapper::CAudioSmoothWrapper() +{ + m_update = false; + m_last_ms = 0; + m_tmp_in_buf = nullptr; + m_tmp_out_buf = nullptr; + m_current_iaudio = nullptr; + m_new_iaudio = nullptr; +} + +CAudioSmoothWrapper::~CAudioSmoothWrapper() +{ + uninit(); +} + +int CAudioSmoothWrapper::init(int type, int sample_rate, int channel) +{ + m_sample_rate = sample_rate; + m_channel = channel; + m_current_iaudio = get_inst(type); + m_new_iaudio = get_inst(type); + if(!m_current_iaudio || !m_new_iaudio) + { + return AE_ERR_NO_EFFECTS; + } + + m_new_iaudio->init(m_sample_rate, m_channel); + m_current_iaudio->init(m_sample_rate, m_channel); + + m_update = false; + m_last_ms = 0; + m_change_status = false; + + m_current_switch = false; + m_new_switch = false; + m_fade_in = false; // 初始化,当前是空的,但是一开始是关闭状态,数据直接拷贝 + + m_tmp_in_buf = nullptr; + m_tmp_out_buf = nullptr; + m_tmp_buf_len = 0; + + return AE_ERR_SUCCESS; +} + +int CAudioSmoothWrapper::uninit() +{ + if(nullptr != m_tmp_in_buf) + { + delete [] m_tmp_in_buf; + m_tmp_in_buf = nullptr; + } + + if(nullptr != m_tmp_out_buf) + { + delete [] m_tmp_out_buf; + m_tmp_out_buf = nullptr; + } + + if(nullptr != m_current_iaudio) + { + m_current_iaudio->uninit(); + delete m_current_iaudio; + m_current_iaudio = nullptr; + } + if(nullptr != m_new_iaudio) + { + m_new_iaudio->uninit(); + delete m_new_iaudio; + m_new_iaudio = nullptr; + } + return AE_ERR_SUCCESS; +} + + +int CAudioSmoothWrapper::set_param(AE_PARAMS *param) +{ + std::lock_guard lock(m_mutex); + + m_new_iaudio->set_params(param); + m_new_iaudio->reset(); + + // 操作之后再进行标志更新,减少耗时 + m_update = true; + m_last_ms = 0; + m_change_status = false; + m_new_switch = true; + return AE_ERR_SUCCESS; +} + +int CAudioSmoothWrapper::get_param(AE_PARAMS *param) +{ + m_current_iaudio->get_params(param); + return AE_ERR_SUCCESS; +} + +int CAudioSmoothWrapper::reset() +{ + m_new_iaudio->reset(); + m_current_iaudio->reset(); + m_last_ms = 0; + m_fade_in = true; + return AE_ERR_SUCCESS; +} + +int CAudioSmoothWrapper::get_latency_ms() +{ + return m_current_switch ? m_current_iaudio->get_latency_ms() : 0; +} + +// 将本音效关闭 +int CAudioSmoothWrapper::close_effect() +{ + std::lock_guard lock(m_mutex); + m_new_switch = false; + m_update = true; + m_last_ms = 0; + m_change_status = false; + return AE_ERR_SUCCESS; +} + +// mLength 对应的是总长度 Sample * channel +int CAudioSmoothWrapper::process(float *in_buf, float *out_buf, int length) +{ + // 复用缓存 + if (length != m_tmp_buf_len) { + if (nullptr != m_tmp_in_buf) { + delete[] m_tmp_in_buf; + } + if(nullptr != m_tmp_out_buf) + { + delete [] m_tmp_out_buf; + } + m_tmp_buf_len = length; + m_tmp_in_buf = new float[m_tmp_buf_len]; + m_tmp_out_buf = new float[m_tmp_buf_len]; + } + + // init和reset之后,防止阶跃噪声 + if(m_fade_in) + { + m_fade_in = false; + au_float_fade_in(in_buf, length, m_channel); + } + + // 拷贝一份原来的数据 + memcpy(m_tmp_in_buf, in_buf, sizeof(float) * length); // 为新数据做准备 + if(m_current_switch) + { + m_current_iaudio->process(in_buf, out_buf, length); + }else if(in_buf != out_buf) + { + // 没有打开音效开关,直接拷贝数据 + memcpy(out_buf, in_buf, sizeof(float) * length); + } + + if(m_update) + { + std::lock_guard lock(m_mutex); + if(0 == m_last_ms) + { + au_float_fade_in(m_tmp_in_buf, length, m_channel); + } + if(m_new_switch) + { + m_new_iaudio->process(m_tmp_in_buf, m_tmp_out_buf, length); + }else + { + memcpy(m_tmp_out_buf, m_tmp_in_buf, sizeof(float) * length); + } + + // 平稳后交换 + int nLatency = m_new_switch ? m_new_iaudio->get_latency_ms() : 0; + if(m_last_ms >= nLatency) + { + au_float_fade_in(m_tmp_out_buf, length, m_channel); + au_float_cross_fade(m_tmp_out_buf, out_buf,out_buf, + length, m_channel); + + // 交换效果器 + IAudioEffects* tmp; + tmp = m_new_iaudio; + m_new_iaudio = m_current_iaudio; + m_current_iaudio = tmp; + + // 交换开关状态 + m_current_switch = m_new_switch; + + // 状态重置 + m_last_ms = 0; + m_update = false; + m_change_status = true; + } else { + m_last_ms += float(length * 1.0 / m_channel / m_sample_rate * 1000); + } + } + return AE_ERR_SUCCESS; +} + +int CAudioSmoothWrapper::get_change_status() +{ + return m_change_status; +} + +int CAudioSmoothWrapper::get_effect_id() +{ + return m_current_iaudio->get_effectId(); +} + +int CAudioSmoothWrapper::get_switch_status() +{ + return m_current_switch; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_smooth_wrapper/CAudioSmoothWrapper.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_smooth_wrapper/CAudioSmoothWrapper.h new file mode 100644 index 0000000..92abb40 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_smooth_wrapper/CAudioSmoothWrapper.h @@ -0,0 +1,57 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOSMOOTHWRAPPER_H +#define AUDIO_EFFECTS_LIB_CAUDIOSMOOTHWRAPPER_H + +#include +#include "IAudioEffects.h" + +/** + * 对IAudioEffects进行一层包装,使得其满足 + * 音效更改参数的时候,平滑切换,没有杂音 + */ + +class CAudioSmoothWrapper +{ +public: + CAudioSmoothWrapper(); + ~CAudioSmoothWrapper(); + +public: + // type 是需要的效果器的Type + int init(int type,int sample_rate, int channel); + int uninit(); + int set_param(AE_PARAMS* param); + int get_param(AE_PARAMS* param); + int reset(); + int get_latency_ms(); + int process(float* in_buf, float* out_buf, int length); + + // 切换到新的效果器完成,用来给外部做延迟时间调整,只有切换完成才可以重新调整延迟时间,否则会有杂音问题 + int get_change_status(); + int close_effect(); // 关闭该音效 + int get_switch_status();// 获取音效开关状态 + int get_effect_id(); // 获取唯一标示ID + +private: + IAudioEffects* m_current_iaudio; + IAudioEffects* m_new_iaudio; + + int m_sample_rate; + int m_channel; + + float m_last_ms; // 当前切换参数过程中已经持续的时间 + std::mutex m_mutex; // 加锁 + bool m_update; // 需要切换到新的效果器 + bool m_change_status; + bool m_current_switch; // 是否处于音效开启状态 + bool m_new_switch; + bool m_fade_in; // 是否reset + + float* m_tmp_in_buf; + float* m_tmp_out_buf; + int m_tmp_buf_len; +}; +#endif //AUDIO_EFFECTS_LIB_CAUDIOSMOOTHWRAPPER_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_tone_shift/CAudioToneShift.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_tone_shift/CAudioToneShift.cpp new file mode 100644 index 0000000..e3dfcd0 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_tone_shift/CAudioToneShift.cpp @@ -0,0 +1,123 @@ +// +// Created by yangjianli on 2020-01-15. +// + +#include "CAudioToneShift.h" +#include "tone_shift/inc/CToneShift.h" +#include "common/simple_delay/CSimpleDelayWrapper.h" +static CAudioToneShiftCreator gs_audio_tone_shift_creator = CAudioToneShiftCreator(AE_EFFECT_TYPE_TONE_SHIFT); + +CAudioToneShift::CAudioToneShift() +{ + m_tone_shift_api = nullptr; + m_need_process = false; + m_ae_params_tone_shift = nullptr; + m_simple_delay = nullptr; +} + +CAudioToneShift::~CAudioToneShift() +{ + uninit(); +} + +int CAudioToneShift::init(int sample_rate, int channel) +{ + m_tone_shift_api = new CToneShift(); + m_tone_shift_api->init(sample_rate, channel); + int samples = int(sample_rate * 1.0 / 1000 * m_tone_shift_api->get_latence()); + m_simple_delay = new CSimpleDelayWrapper(); + m_simple_delay->init(samples, channel); + m_need_process = false; + return AE_ERR_SUCCESS; +} + +int CAudioToneShift::uninit() { + if (nullptr != m_tone_shift_api) + { + m_tone_shift_api->uninit(); + delete m_tone_shift_api; + m_tone_shift_api = nullptr; + } + + if(nullptr != m_ae_params_tone_shift) + { + delete m_ae_params_tone_shift; + m_ae_params_tone_shift = nullptr; + } + + if (nullptr != m_simple_delay) + { + delete m_simple_delay; + m_simple_delay = nullptr; + } + return AE_ERR_SUCCESS; +} + +int CAudioToneShift::get_effectId() +{ + return AE_EFFECT_TYPE_TONE_SHIFT; +} + +int CAudioToneShift::get_latency_ms() +{ + return m_tone_shift_api->get_latence(); +} + +int CAudioToneShift::reset() +{ + m_tone_shift_api->reset(); + m_simple_delay->reset(); + return AE_ERR_SUCCESS; +} + +int CAudioToneShift::set_params(AE_PARAMS *param) +{ + if(param != nullptr) + { + AE_PARAMS_TONE_SHIFT* tp = (AE_PARAMS_TONE_SHIFT*) param; + if(tp->shift_value != 0) + { + m_need_process = true; + m_tone_shift_api->set_shift_value(tp->shift_value); + if(nullptr == m_ae_params_tone_shift) + { + m_ae_params_tone_shift = new AE_PARAMS_TONE_SHIFT(); + m_ae_params_tone_shift->max_value = MAX_TONE_SHIFT_VALUE; + m_ae_params_tone_shift->min_value = MIN_TONE_SHIFT_VALUE; + } + memcpy(m_ae_params_tone_shift, tp, sizeof(AE_PARAMS_TONE_SHIFT)); + } + else + { + m_need_process = false; + } + }else + { + m_need_process = false; + } + return AE_ERR_SUCCESS; +} + +int CAudioToneShift::get_params(AE_PARAMS *param) +{ + if(nullptr != param && nullptr != m_ae_params_tone_shift) + { + memcpy(param, m_ae_params_tone_shift, sizeof(AE_PARAMS_TONE_SHIFT)); + } + return AE_ERR_SUCCESS; +} + +int CAudioToneShift::process(float *in_buf, float *out_buf, int length) +{ + if(in_buf != out_buf) + { + memcpy(out_buf, in_buf, sizeof(float) * length); + } + + if(m_need_process) + { + return m_tone_shift_api->process(in_buf, length, out_buf, length); + } + m_simple_delay->process(in_buf, out_buf, length); + return AE_ERR_SUCCESS; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_tone_shift/CAudioToneShift.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_tone_shift/CAudioToneShift.h new file mode 100644 index 0000000..51d9229 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/audio_tone_shift/CAudioToneShift.h @@ -0,0 +1,48 @@ +// +// Created by yangjianli on 2020-01-15. +// + +#ifndef AUDIO_EFFECTS_LIB_CAUDIOTONESHIFT_H +#define AUDIO_EFFECTS_LIB_CAUDIOTONESHIFT_H + +#include "IAudioEffects.h" + +class CToneShift; +class CSimpleDelayWrapper; +class CAudioToneShift : public IAudioEffects +{ +public: + CAudioToneShift(); + ~CAudioToneShift(); +public: + int init(int sample_rate, int channel) override; + int reset() override; + int uninit() override; + int process(float* in_buf, float* out_buf, int length) override; + int get_latency_ms() override; + int set_params(AE_PARAMS* param) override; + int get_params(AE_PARAMS* param) override; + int get_effectId() override; // 获取唯一ID + +private: + CToneShift* m_tone_shift_api; + AE_PARAMS_TONE_SHIFT* m_ae_params_tone_shift; + CSimpleDelayWrapper* m_simple_delay; + bool m_need_process; +}; + + +class CAudioToneShiftCreator : public ICreator +{ + +public: + CAudioToneShiftCreator(int type):ICreator(type){}; + +public: + IAudioEffects* get_inst() override + { + return new CAudioToneShift(); + }; +}; + +#endif //AUDIO_EFFECTS_LIB_CAUDIOTONESHIFT_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/keep_volume_steady/CKeepVolumeSteady.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/keep_volume_steady/CKeepVolumeSteady.cpp new file mode 100644 index 0000000..9c4f058 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/keep_volume_steady/CKeepVolumeSteady.cpp @@ -0,0 +1,40 @@ +// +// Created by yangjianli on 2020-02-05. +// + +#include "CKeepVolumeSteady.h" +#include "common/utils.h" +#include "stdio.h" +CKeepVolumeSteady::CKeepVolumeSteady() +{ + m_channel = 1; // 默认给个1,防止未给参数导致bug出现 +} + +CKeepVolumeSteady::~CKeepVolumeSteady() +{ + +} + +void CKeepVolumeSteady::init(int channel) +{ + m_channel = channel; + reset(); +} + +void CKeepVolumeSteady::reset() +{ + m_current_gain = 1.0; +} + +void CKeepVolumeSteady::pre_process(float *buf, int len) +{ + m_before_rms = au_calc_rms(buf, len, m_channel); +} + +void CKeepVolumeSteady::after_process(float *buf, int len) +{ + float after_rms = au_calc_rms(buf, len, m_channel); + float new_gain = after_rms > 0 ? m_before_rms / after_rms : 1.0; + au_float_gain_crossfade(m_current_gain, new_gain, buf, len, m_channel); + m_current_gain = new_gain; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/keep_volume_steady/CKeepVolumeSteady.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/keep_volume_steady/CKeepVolumeSteady.h new file mode 100644 index 0000000..fc8f72c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/keep_volume_steady/CKeepVolumeSteady.h @@ -0,0 +1,32 @@ +// +// Created by yangjianli on 2020-02-05. +// + +#ifndef AUDIO_EFFECTS_LIB_CKEEPVOLUMESTEADY_H +#define AUDIO_EFFECTS_LIB_CKEEPVOLUMESTEADY_H + +/** + * 保证音频的RMS值不变来保持整体的音量平稳 + * 该算法对于使用带有回声的音效的效果不好 + */ +#include "AudioEffectsConf.h" +class CKeepVolumeSteady { +public: + CKeepVolumeSteady(); + ~CKeepVolumeSteady(); + +public: + void init(int channel); + void reset(); + + void pre_process(float* buf, int len); + void after_process(float* buf, int len); + +private: + int m_channel; + float m_current_gain; + float m_before_rms; +}; + + +#endif //AUDIO_EFFECTS_LIB_CKEEPVOLUMESTEADY_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelay.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelay.cpp new file mode 100644 index 0000000..536c5de --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelay.cpp @@ -0,0 +1,96 @@ +// +// Created by yangjianli on 2020/12/11. +// + +#include "CSimpleDelay.h" +#include +int32_t simple_delay_next_power_2(int32_t x) +{ + if(x > 0) + { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + } + return x + 1; +} + +CSimpleDelay::CSimpleDelay() +{ + m_idx = 0; + m_mask = 0; + m_buffer_len = 0; + m_delay_len = 0; + m_cache = new float[1]; + m_cache[0] = 0; +} + +CSimpleDelay::~CSimpleDelay() +{ + m_idx = 0; + m_mask = 0; + m_buffer_len = 0; + m_delay_len = 0; + + if(m_cache) + { + delete [] m_cache; + m_cache = NULL; + } +} + +void CSimpleDelay::reset() +{ + if(m_cache) + { + memset(m_cache, 0, m_buffer_len * sizeof(float)); + } +} + +bool CSimpleDelay::set_delay(int delay_len) +{ + //如果完全一致的话 + if(m_delay_len == delay_len) + { + return true; + } + + if(m_buffer_len > delay_len) + { + reset(); + m_delay_len = delay_len; + return true; + } + + int len = simple_delay_next_power_2(delay_len); + float * buffer = new float[len]; + memset(buffer, 0, len * sizeof(len)); + + if(m_cache) + { + delete [] m_cache; + m_cache = NULL; + } + + m_idx = 0; + m_mask = len - 1; + m_buffer_len = len; + m_delay_len = delay_len; + m_cache = buffer; + + return true; +} + +void CSimpleDelay::process(float * buffer, int len) +{ + if(m_delay_len > 0) + { + for(int i = 0; i < len; i++) + { + buffer[i] = process(buffer[i]); + } + } +} diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelay.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelay.h new file mode 100644 index 0000000..5a4d627 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelay.h @@ -0,0 +1,43 @@ +// +// Created by yangjianli on 2020/12/11. +// + +#ifndef AUDIO_EFFECTS_LIB_CSIMPLEDELAY_H +#define AUDIO_EFFECTS_LIB_CSIMPLEDELAY_H + +#include "stdint.h" +#include +class CSimpleDelay +{ +public: + CSimpleDelay(); + ~CSimpleDelay(); + +public: + bool set_delay(int delay_len); + void reset(); + void process(float * buffer, int len); + inline float process(float in) + { + int idx = (m_idx + m_delay_len) & m_mask; + + float out = m_cache[m_idx]; + + m_cache[idx] = in; + + m_idx = (m_idx + 1) & m_mask; + + return out; + } + +private: + int m_idx; + int m_mask; + int m_buffer_len; + int m_delay_len; + float * m_cache; +}; + + + +#endif //AUDIO_EFFECTS_LIB_CSIMPLEDELAY_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelayWrapper.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelayWrapper.cpp new file mode 100644 index 0000000..4891c7f --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelayWrapper.cpp @@ -0,0 +1,49 @@ +// +// Created by yangjianli on 2020/12/11. +// + +#include "CSimpleDelayWrapper.h" +#include "CSimpleDelay.h" +CSimpleDelayWrapper::CSimpleDelayWrapper() +{ + m_delay_list.clear(); + m_channel = 0; +} + +CSimpleDelayWrapper::~CSimpleDelayWrapper() +{ + // clear时,智能指针销毁,各个实例调用自己的析构函数,释放空间 + m_delay_list.clear(); + m_channel = 0; +} + +int CSimpleDelayWrapper::init(int samples, int channel) +{ + m_delay_list.clear(); + for(int i = 0; i < channel; i++) + { + m_delay_list.emplace_back(std::make_shared()); + m_delay_list[i]->set_delay(samples); + } + m_channel = channel; + return 0; +} + +void CSimpleDelayWrapper::process(float *in_buf, float* out_buf, int len) +{ + for(int i = 0; i < len; i+= m_channel) + { + for (int j =0 ; j < m_channel; j++) + { + out_buf[i+j] = m_delay_list[j]->process(in_buf[i+j]); + } + } +} + +void CSimpleDelayWrapper::reset() +{ + for(int i = 0; i < m_channel; i++) + { + m_delay_list[i]->reset(); + } +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelayWrapper.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelayWrapper.h new file mode 100644 index 0000000..a804933 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/simple_delay/CSimpleDelayWrapper.h @@ -0,0 +1,47 @@ +// +// Created by yangjianli on 2020/12/11. +// + +#ifndef AUDIO_EFFECTS_LIB_CSIMPLEDELAYWRAPPER_H +#define AUDIO_EFFECTS_LIB_CSIMPLEDELAYWRAPPER_H + + +// 封装延迟器 +#include "common/simple_delay/CSimpleDelay.h" +#include +#include +class CSimpleDelayWrapper +{ +public: + CSimpleDelayWrapper(); + ~CSimpleDelayWrapper(); + +public: + /** + * 初始化延迟器 + * @param samples 每个通道的采样点的数量 + * @param channel 通道数 + * @return + */ + int init(int samples, int channel); + + /** + * 处理过程,入多少出多少 + * 立体声使用交错方式存储 + * @param buf 数据地址 + * @param len 数据长度 + */ + void process(float* in_buf, float* out_buf, int len); + + /** + * 清空内部数据,重新开始 + * @return + */ + void reset(); + +public: + std::vector> m_delay_list; + int m_channel; +}; + +#endif //AUDIO_EFFECTS_LIB_CSIMPLEDELAYWRAPPER_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/utils.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/utils.cpp new file mode 100644 index 0000000..21db75c --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/common/utils.cpp @@ -0,0 +1,138 @@ +// +// Created by yangjianli on 2020-01-03. +// + +#include "utils.h" +#include "math.h" + +void au_short_fade_in(short *buffer, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + buffer[i] = (short)(buffer[i] * i / size); + } + } + else + { + for(int i = 0; i < size; i += 2) + { + buffer[i] = (short)(buffer[i] * i / size); + buffer[i + 1] = (short)(buffer[i + 1] * i / size); + } + } +} + +void au_float_fade_in(float *buffer, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + buffer[i] = buffer[i] * i / size; + } + } + else + { + for(int i = 0; i < size; i += 2) + { + buffer[i] = buffer[i] * i / size; + buffer[i + 1] = buffer[i + 1] * i / size; + } + } +} + +void au_short_fade_out(short *buffer, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + buffer[i] = (short)(buffer[i] * (size - i) / size); + } + } + else + { + for(int i = 0; i < size; i += 2) + { + buffer[i] = (short)(buffer[i] * (size - i) / size); + buffer[i + 1] = (short)(buffer[i + 1] * (size - i) / size); + } + } +} + +void au_float_fade_out(float *buffer, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + buffer[i] = buffer[i] * (size - i) / size; + } + } + else + { + for(int i = 0; i < size; i += 2) + { + buffer[i] = buffer[i] * (size - i) / size; + buffer[i + 1] = buffer[i + 1] * (size - i) / size; + } + } +} + + + +void ShortToFloat(short * in, float * out, int num) +{ + for(int i = 0; i < num; i++) + { + out[i] = in[i] / 32768.0f; + } +} + +void au_float_cross_fade(float *fadein_buf, float *fadeout_buf, float *out, int size, int channel) +{ + if(1 == channel) + { + for(int i = 0; i < size; i++) + { + out[i] = fadein_buf[i] + fadeout_buf[i] * (size - i) / size; + } + } + else + { + for(int i = 0; i < size; i += 2) + { + out[i] = fadein_buf[i] + fadeout_buf[i] * (size - i) / size; + out[i + 1] = fadein_buf[i + 1] + fadeout_buf[i + 1] * (size - i) / size; + } + } +} + +float au_calc_rms(float *in, int len, int channel) +{ + float rms = 0; + // 只取用第一个声道数据 + for(int i=0;idst_gain增益平滑增长 +void au_float_gain_crossfade(float src_gain, float dst_gain, float *in, int size, int channel); + +#endif //AUDIO_EFFECTS_LIB_UTILS_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/manager/Manager.cpp b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/manager/Manager.cpp new file mode 100644 index 0000000..7f09124 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/manager/Manager.cpp @@ -0,0 +1,33 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#include "Manager.h" +#include "IAudioEffects.h" +#include "map" + +// 通过这个方式保证静态变量的顺序 +// 因为不同的类属于不同的编译单元,不同的编译单元之间,其全局变量的初始化顺序不一致 +// 通过这个方式保证该变量被调用的时候,一定进行过初始化 +static std::map* get_map() +{ + static std::map ae_creator_map; + return &ae_creator_map; +} + +void registered(int type, ICreator* creator) +{ + std::map* ae_map = get_map(); + ae_map->insert(std::make_pair(type, creator)); +} + +IAudioEffects* get_inst(int type) +{ + std::map* ae_map = get_map(); + std::map::iterator it = ae_map->find(type); + if(it != ae_map->end()) + { + return it->second->get_inst(); + } + return nullptr; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/manager/Manager.h b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/manager/Manager.h new file mode 100644 index 0000000..5c40f36 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/src/manager/Manager.h @@ -0,0 +1,16 @@ +// +// Created by yangjianli on 2020-01-09. +// + +#ifndef AUDIO_EFFECTS_LIB_MANAGER_H +#define AUDIO_EFFECTS_LIB_MANAGER_H +#include "AudioEffectsConf.h" +class IAudioEffects; +class ICreator; + + +void registered(int type, ICreator* creator); + +IAudioEffects* get_inst(int type); + +#endif //AUDIO_EFFECTS_LIB_MANAGER_H diff --git a/AutoCoverTool/ref/tools/mixer/audio_effects_lib/toolchain/ios.toolchain.cmake b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/toolchain/ios.toolchain.cmake new file mode 100644 index 0000000..a56ea82 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_effects_lib/toolchain/ios.toolchain.cmake @@ -0,0 +1,429 @@ +# This file is part of the ios-cmake project. It was retrieved from +# https://github.com/cristeab/ios-cmake.git, which is a fork of +# https://code.google.com/p/ios-cmake/. Which in turn is based off of +# the Platform/Darwin.cmake and Platform/UnixPaths.cmake files which +# are included with CMake 2.8.4 +# +# The ios-cmake project is licensed under the new BSD license. +# +# Copyright (c) 2014, Bogdan Cristea and LTE Engineering Software, +# Kitware, Inc., Insight Software Consortium. All rights reserved. +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# +# This file is based off of the Platform/Darwin.cmake and +# Platform/UnixPaths.cmake files which are included with CMake 2.8.4 +# It has been altered for iOS development. +# +# Updated by Alex Stewart (alexs.mac@gmail.com) +# +# ***************************************************************************** +# Now maintained by Alexander Widerberg (widerbergaren [at] gmail.com) +# under the BSD-3-Clause license +# ***************************************************************************** +# +# INFORMATION / HELP +# +# The following variables control the behaviour of this toolchain: +# +# IOS_PLATFORM: OS (default) or SIMULATOR or SIMULATOR64 or TVOS or SIMULATOR_TVOS +# OS = Build for iPhoneOS. +# SIMULATOR = Build for x86 i386 iPhone Simulator. +# SIMULATOR64 = Build for x86_64 iPhone Simulator. +# TVOS = Build for AppleTVOS. +# SIMULATOR_TVOS = Build for x86_64 AppleTV Simulator. +# CMAKE_OSX_SYSROOT: Path to the iOS SDK to use. By default this is +# automatically determined from IOS_PLATFORM and xcodebuild, but +# can also be manually specified (although this should not be required). +# CMAKE_IOS_DEVELOPER_ROOT: Path to the Developer directory for the iOS platform +# being compiled for. By default this is automatically determined from +# CMAKE_OSX_SYSROOT, but can also be manually specified (although this should +# not be required). +# ENABLE_BITCODE: (1|0) Enables or disables bitcode support. Default 1 (true) +# ENABLE_ARC: (1|0) Enables or disables ARC support. Default 1 (true, ARC enabled by default) +# ENABLE_VISIBILITY: (1|0) Enables or disables symbol visibility support. Default 0 (false, visibility hidden by default) +# IOS_ARCH: (armv7 armv7s arm64 i386 x86_64) If specified, will override the default architectures for the given IOS_PLATFORM +# OS = armv7 armv7s arm64 +# SIMULATOR = i386 +# SIMULATOR64 = x86_64 +# TVOS = arm64 +# SIMULATOR_TVOS = x86_64 +# +# This toolchain defines the following variables for use externally: +# +# XCODE_VERSION: Version number (not including Build version) of Xcode detected. +# IOS_SDK_VERSION: Version of iOS SDK being used. +# CMAKE_OSX_ARCHITECTURES: Architectures being compiled for (generated from +# IOS_PLATFORM). +# +# This toolchain defines the following macros for use externally: +# +# set_xcode_property (TARGET XCODE_PROPERTY XCODE_VALUE XCODE_VARIANT) +# A convenience macro for setting xcode specific properties on targets. +# Available variants are: All, Release, RelWithDebInfo, Debug, MinSizeRel +# example: set_xcode_property (myioslib IPHONEOS_DEPLOYMENT_TARGET "3.1" "all"). +# +# find_host_package (PROGRAM ARGS) +# A macro used to find executable programs on the host system, not within the +# iOS environment. Thanks to the android-cmake project for providing the +# command. + +# Fix for PThread library not in path +set(CMAKE_THREAD_LIBS_INIT "-lpthread") +set(CMAKE_HAVE_THREADS_LIBRARY 1) +set(CMAKE_USE_WIN32_THREADS_INIT 0) +set(CMAKE_USE_PTHREADS_INIT 1) + +# Get the Xcode version being used. +execute_process(COMMAND xcodebuild -version + OUTPUT_VARIABLE XCODE_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +string(REGEX MATCH "Xcode [0-9\\.]+" XCODE_VERSION "${XCODE_VERSION}") +string(REGEX REPLACE "Xcode ([0-9\\.]+)" "\\1" XCODE_VERSION "${XCODE_VERSION}") +message(STATUS "Building with Xcode version: ${XCODE_VERSION}") +# Default to building for iPhoneOS if not specified otherwise, and we cannot +# determine the platform from the CMAKE_OSX_ARCHITECTURES variable. The use +# of CMAKE_OSX_ARCHITECTURES is such that try_compile() projects can correctly +# determine the value of IOS_PLATFORM from the root project, as +# CMAKE_OSX_ARCHITECTURES is propagated to them by CMake. +if (NOT DEFINED IOS_PLATFORM) + if (CMAKE_OSX_ARCHITECTURES) + if (CMAKE_OSX_ARCHITECTURES MATCHES ".*arm.*") + set(IOS_PLATFORM "OS") + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "i386") + set(IOS_PLATFORM "SIMULATOR") + elseif (CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") + set(IOS_PLATFORM "SIMULATOR64") + endif() + endif() + if (NOT IOS_PLATFORM) + set(IOS_PLATFORM "OS") + endif() +endif() +set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING + "Type of iOS platform for which to build.") +# Determine the platform name and architectures for use in xcodebuild commands +# from the specified IOS_PLATFORM name. +if (IOS_PLATFORM STREQUAL "OS") + set(XCODE_IOS_PLATFORM iphoneos) + if(NOT IOS_ARCH) + set(IOS_ARCH armv7 armv7s arm64) + endif() +elseif (IOS_PLATFORM STREQUAL "SIMULATOR") + set(XCODE_IOS_PLATFORM iphonesimulator) + if(NOT IOS_ARCH) + set(IOS_ARCH i386) + endif() +elseif(IOS_PLATFORM STREQUAL "SIMULATOR64") + set(XCODE_IOS_PLATFORM iphonesimulator) + if(NOT IOS_ARCH) + set(IOS_ARCH x86_64) + endif() +elseif (IOS_PLATFORM STREQUAL "TVOS") + set(XCODE_IOS_PLATFORM appletvos) + if(NOT IOS_ARCH) + set(IOS_ARCH arm64) + endif() +elseif (IOS_PLATFORM STREQUAL "SIMULATOR_TVOS") + set(XCODE_IOS_PLATFORM appletvsimulator) + if(NOT IOS_ARCH) + set(IOS_ARCH x86_64) + endif() +else() + message(FATAL_ERROR "Invalid IOS_PLATFORM: ${IOS_PLATFORM}") +endif() +message(STATUS "Configuring iOS build for platform: ${IOS_PLATFORM}, " + "architecture(s): ${IOS_ARCH}") +# If user did not specify the SDK root to use, then query xcodebuild for it. +if (NOT CMAKE_OSX_SYSROOT) + execute_process(COMMAND xcodebuild -version -sdk ${XCODE_IOS_PLATFORM} Path + OUTPUT_VARIABLE CMAKE_OSX_SYSROOT + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "Using SDK: ${CMAKE_OSX_SYSROOT} for platform: ${IOS_PLATFORM}") +endif() +if (NOT EXISTS ${CMAKE_OSX_SYSROOT}) + message(FATAL_ERROR "Invalid CMAKE_OSX_SYSROOT: ${CMAKE_OSX_SYSROOT} " + "does not exist.") +endif() +# Specify minimum version of deployment target. +if (NOT DEFINED IOS_DEPLOYMENT_TARGET) + # Unless specified, SDK version 8.0 is used by default as minimum target version. + set(IOS_DEPLOYMENT_TARGET "8.0" + CACHE STRING "Minimum iOS version to build for." ) + message(STATUS "Using the default min-version since IOS_DEPLOYMENT_TARGET not provided!") +endif() +# Use bitcode or not +if (NOT DEFINED ENABLE_BITCODE AND NOT IOS_ARCH MATCHES "((^|, )(i386|x86_64))+") + # Unless specified, enable bitcode support by default + set(ENABLE_BITCODE TRUE CACHE BOOL "Whether or not to enable bitcode") + message(STATUS "Enabling bitcode support by default. ENABLE_BITCODE not provided!") +endif() +if (NOT DEFINED ENABLE_BITCODE) + message(STATUS "Disabling bitcode support by default on simulators. ENABLE_BITCODE not provided for override!") +endif() +# Use ARC or not +if (NOT DEFINED ENABLE_ARC) + # Unless specified, enable ARC support by default + set(ENABLE_ARC TRUE CACHE BOOL "Whether or not to enable ARC") + message(STATUS "Enabling ARC support by default. ENABLE_ARC not provided!") +endif() +# Use hidden visibility or not +if (NOT DEFINED ENABLE_VISIBILITY) + # Unless specified, disable symbols visibility by default + set(ENABLE_VISIBILITY FALSE CACHE BOOL "Whether or not to hide symbols (-fvisibility=hidden)") + message(STATUS "Hiding symbols visibility by default. ENABLE_VISIBILITY not provided!") +endif() +# Get the SDK version information. +execute_process(COMMAND xcodebuild -sdk ${CMAKE_OSX_SYSROOT} -version SDKVersion + OUTPUT_VARIABLE IOS_SDK_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +# Find the Developer root for the specific iOS platform being compiled for +# from CMAKE_OSX_SYSROOT. Should be ../../ from SDK specified in +# CMAKE_OSX_SYSROOT. There does not appear to be a direct way to obtain +# this information from xcrun or xcodebuild. +if (NOT CMAKE_IOS_DEVELOPER_ROOT) + get_filename_component(IOS_PLATFORM_SDK_DIR ${CMAKE_OSX_SYSROOT} PATH) + get_filename_component(CMAKE_IOS_DEVELOPER_ROOT ${IOS_PLATFORM_SDK_DIR} PATH) +endif() +if (NOT EXISTS ${CMAKE_IOS_DEVELOPER_ROOT}) + message(FATAL_ERROR "Invalid CMAKE_IOS_DEVELOPER_ROOT: " + "${CMAKE_IOS_DEVELOPER_ROOT} does not exist.") +endif() +# Find the C & C++ compilers for the specified SDK. +if (NOT CMAKE_C_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang + OUTPUT_VARIABLE CMAKE_C_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "Using C compiler: ${CMAKE_C_COMPILER}") +endif() +if (NOT CMAKE_CXX_COMPILER) + execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find clang++ + OUTPUT_VARIABLE CMAKE_CXX_COMPILER + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "Using CXX compiler: ${CMAKE_CXX_COMPILER}") +endif() +# Find (Apple's) libtool. +execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find libtool + OUTPUT_VARIABLE IOS_LIBTOOL + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +message(STATUS "Using libtool: ${IOS_LIBTOOL}") +# Configure libtool to be used instead of ar + ranlib to build static libraries. +# This is required on Xcode 7+, but should also work on previous versions of +# Xcode. +set(CMAKE_C_CREATE_STATIC_LIBRARY + "${IOS_LIBTOOL} -static -o ") +set(CMAKE_CXX_CREATE_STATIC_LIBRARY + "${IOS_LIBTOOL} -static -o ") +# Get the version of Darwin (OS X) of the host. +execute_process(COMMAND uname -r + OUTPUT_VARIABLE CMAKE_HOST_SYSTEM_VERSION + ERROR_QUIET + OUTPUT_STRIP_TRAILING_WHITESPACE) +# Standard settings. +set(CMAKE_SYSTEM_NAME Darwin CACHE INTERNAL "") +set(CMAKE_SYSTEM_VERSION ${IOS_SDK_VERSION} CACHE INTERNAL "") +set(UNIX TRUE CACHE BOOL "") +set(APPLE TRUE CACHE BOOL "") +set(IOS TRUE CACHE BOOL "") +set(CMAKE_AR ar CACHE FILEPATH "" FORCE) +set(CMAKE_RANLIB ranlib CACHE FILEPATH "" FORCE) +# Force unset of OS X-specific deployment target (otherwise autopopulated), +# required as of cmake 2.8.10. +set(CMAKE_OSX_DEPLOYMENT_TARGET "" CACHE STRING + "Must be empty for iOS builds." FORCE) +# Set the architectures for which to build. +set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE STRING "Build architecture for iOS") +# Skip the platform compiler checks for cross compiling. +set(CMAKE_CXX_COMPILER_FORCED TRUE) +set(CMAKE_CXX_COMPILER_WORKS TRUE) +set(CMAKE_C_COMPILER_FORCED TRUE) +set(CMAKE_C_COMPILER_WORKS TRUE) +# All iOS/Darwin specific settings - some may be redundant. +set(CMAKE_SHARED_LIBRARY_PREFIX "lib") +set(CMAKE_SHARED_LIBRARY_SUFFIX ".dylib") +set(CMAKE_SHARED_MODULE_PREFIX "lib") +set(CMAKE_SHARED_MODULE_SUFFIX ".so") +set(CMAKE_C_COMPILER_ABI ELF) +set(CMAKE_CXX_COMPILER_ABI ELF) +set(CMAKE_C_HAS_ISYSROOT 1) +set(CMAKE_CXX_HAS_ISYSROOT 1) +set(CMAKE_MODULE_EXISTS 1) +set(CMAKE_DL_LIBS "") +set(CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG "-compatibility_version ") +set(CMAKE_C_OSX_CURRENT_VERSION_FLAG "-current_version ") +set(CMAKE_CXX_OSX_COMPATIBILITY_VERSION_FLAG "${CMAKE_C_OSX_COMPATIBILITY_VERSION_FLAG}") +set(CMAKE_CXX_OSX_CURRENT_VERSION_FLAG "${CMAKE_C_OSX_CURRENT_VERSION_FLAG}") + +if(IOS_ARCH MATCHES "((^|, )(arm64|x86_64))+") + set(CMAKE_C_SIZEOF_DATA_PTR 8) + set(CMAKE_CXX_SIZEOF_DATA_PTR 8) + message(STATUS "Using a data_ptr size of 8") +else() + set(CMAKE_C_SIZEOF_DATA_PTR 4) + set(CMAKE_CXX_SIZEOF_DATA_PTR 4) + message(STATUS "Using a data_ptr size of 4") +endif() + +message(STATUS "Building for minimum iOS version: ${IOS_DEPLOYMENT_TARGET}" + " (SDK version: ${IOS_SDK_VERSION})") +# Note that only Xcode 7+ supports the newer more specific: +# -m${XCODE_IOS_PLATFORM}-version-min flags, older versions of Xcode use: +# -m(ios/ios-simulator)-version-min instead. +if (IOS_PLATFORM STREQUAL "OS") + if (XCODE_VERSION VERSION_LESS 7.0) + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-mios-version-min=${IOS_DEPLOYMENT_TARGET}") + else() + # Xcode 7.0+ uses flags we can build directly from XCODE_IOS_PLATFORM. + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-m${XCODE_IOS_PLATFORM}-version-min=${IOS_DEPLOYMENT_TARGET}") + endif() +elseif (IOS_PLATFORM STREQUAL "TVOS") + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-mtvos-version-min=${IOS_DEPLOYMENT_TARGET}") +elseif (IOS_PLATFORM STREQUAL "SIMULATOR_TVOS") + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-mtvos-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") +else() + # SIMULATOR or SIMULATOR64 both use -mios-simulator-version-min. + set(XCODE_IOS_PLATFORM_VERSION_FLAGS + "-mios-simulator-version-min=${IOS_DEPLOYMENT_TARGET}") +endif() +message(STATUS "Version flags set to: ${XCODE_IOS_PLATFORM_VERSION_FLAGS}") + +if (ENABLE_BITCODE) + set(BITCODE "-fembed-bitcode") + set(HEADER_PAD "") + message(STATUS "Enabling bitcode support.") +else() + set(BITCODE "") + set(HEADER_PAD "-headerpad_max_install_names") + message(STATUS "Disabling bitcode support.") +endif() + +if (ENABLE_ARC) + set(FOBJC_ARC "-fobjc-arc") + message(STATUS "Enabling ARC support.") +else() + set(FOBJC_ARC "-fno-objc-arc") + message(STATUS "Disabling ARC support.") +endif() + +if (NOT ENABLE_VISIBILITY) + set(VISIBILITY "-fvisibility=hidden") + message(STATUS "Hiding symbols (-fvisibility=hidden).") +else() + set(VISIBILITY "") +endif() + +set(CMAKE_C_FLAGS +"${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${BITCODE} -fobjc-abi-version=2 ${FOBJC_ARC} ${C_FLAGS}") +# Hidden visibilty is required for C++ on iOS. +set(CMAKE_CXX_FLAGS +"${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${BITCODE} ${VISIBILITY} -fvisibility-inlines-hidden -fobjc-abi-version=2 ${FOBJC_ARC} ${CXX_FLAGS}") +set(CMAKE_CXX_FLAGS_MINSIZEREL "${CMAKE_CXX_FLAGS} -DNDEBUG -Os -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_MINSIZEREL}") +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS} -DNDEBUG -O2 -g -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_RELWITHDEBINFO}") +set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS} -DNDEBUG -O3 -fomit-frame-pointer -ffast-math ${BITCODE} ${CXX_FLAGS_RELEASE}") +set(CMAKE_C_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${C_LINK_FLAGS}") +set(CMAKE_CXX_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first ${CXX_LINK_FLAGS}") + +# In order to ensure that the updated compiler flags are used in try_compile() +# tests, we have to forcibly set them in the CMake cache, not merely set them +# in the local scope. +list(APPEND VARS_TO_FORCE_IN_CACHE + CMAKE_C_FLAGS + CMAKE_CXX_FLAGS + CMAKE_CXX_FLAGS_RELWITHDEBINFO + CMAKE_CXX_FLAGS_MINSIZEREL + CMAKE_CXX_FLAGS_RELEASE + CMAKE_C_LINK_FLAGS + CMAKE_CXX_LINK_FLAGS) +foreach(VAR_TO_FORCE ${VARS_TO_FORCE_IN_CACHE}) + set(${VAR_TO_FORCE} "${${VAR_TO_FORCE}}" CACHE STRING "" FORCE) +endforeach() + +set(CMAKE_PLATFORM_HAS_INSTALLNAME 1) +set (CMAKE_SHARED_LINKER_FLAGS "-rpath @executable_path/Frameworks -rpath @loader_path/Frameworks") +set(CMAKE_SHARED_LIBRARY_CREATE_C_FLAGS "-dynamiclib ${HEADER_PAD}") +set(CMAKE_SHARED_MODULE_CREATE_C_FLAGS "-bundle ${HEADER_PAD}") +set(CMAKE_SHARED_MODULE_LOADER_C_FLAG "-Wl,-bundle_loader,") +set(CMAKE_SHARED_MODULE_LOADER_CXX_FLAG "-Wl,-bundle_loader,") +set(CMAKE_FIND_LIBRARY_SUFFIXES ".dylib" ".so" ".a") + +# Hack: if a new cmake (which uses CMAKE_INSTALL_NAME_TOOL) runs on an old +# build tree (where install_name_tool was hardcoded) and where +# CMAKE_INSTALL_NAME_TOOL isn't in the cache and still cmake didn't fail in +# CMakeFindBinUtils.cmake (because it isn't rerun) hardcode +# CMAKE_INSTALL_NAME_TOOL here to install_name_tool, so it behaves as it did +# before, Alex. +if (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + find_program(CMAKE_INSTALL_NAME_TOOL install_name_tool) +endif (NOT DEFINED CMAKE_INSTALL_NAME_TOOL) + +# Set the find root to the iOS developer roots and to user defined paths. +set(CMAKE_FIND_ROOT_PATH ${CMAKE_IOS_DEVELOPER_ROOT} ${CMAKE_OSX_SYSROOT} + ${CMAKE_PREFIX_PATH} CACHE string "iOS find search path root" FORCE) +# Default to searching for frameworks first. +set(CMAKE_FIND_FRAMEWORK FIRST) +# Set up the default search directories for frameworks. +set(CMAKE_SYSTEM_FRAMEWORK_PATH + ${CMAKE_OSX_SYSROOT}/System/Library/Frameworks + ${CMAKE_OSX_SYSROOT}/System/Library/PrivateFrameworks + ${CMAKE_OSX_SYSROOT}/Developer/Library/Frameworks) +# Only search the specified iOS SDK, not the remainder of the host filesystem. +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +# This little macro lets you set any XCode specific property. +macro(set_xcode_property TARGET XCODE_PROPERTY XCODE_VALUE XCODE_RELVERSION) + set(XCODE_RELVERSION_I "${XCODE_RELVERSION}") + if (XCODE_RELVERSION_I STREQUAL "All") + set_property(TARGET ${TARGET} PROPERTY + XCODE_ATTRIBUTE_${XCODE_PROPERTY} "${XCODE_VALUE}") + else() + set_property(TARGET ${TARGET} PROPERTY + XCODE_ATTRIBUTE_${XCODE_PROPERTY}[variant=${XCODE_RELVERSION_I}] "${XCODE_VALUE}") + endif() +endmacro(set_xcode_property) +# This macro lets you find executable programs on the host system. +macro(find_host_package) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY NEVER) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE NEVER) + set(IOS FALSE) + find_package(${ARGN}) + set(IOS TRUE) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +endmacro(find_host_package) diff --git a/AutoCoverTool/ref/tools/mixer/audio_mixer/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/audio_mixer/CMakeLists.txt new file mode 100644 index 0000000..22884c4 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_mixer/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_AUDIO_MIXER_SRCS) +add_library(audio_mixer ${DIR_AUDIO_MIXER_SRCS}) \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/audio_mixer/inc/AudioMixer.h b/AutoCoverTool/ref/tools/mixer/audio_mixer/inc/AudioMixer.h new file mode 100644 index 0000000..57d24fc --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_mixer/inc/AudioMixer.h @@ -0,0 +1,78 @@ + +#ifndef __CAUDIO_MIXER_H__ +#define __CAUDIO_MIXER_H__ + +#define AUMIX_VOLUME_MAX (DEFAULT_VOLUME * 2 * 10) +#define AUMIX_VOLUME_MIN 0 +#define AUMIX_VOLUME_NO_CHANGE DEFAULT_VOLUME + +#define AUMIX_FLT_MIN -1 +#define AUMIX_FLT_MAX 0.99996f +#define AUMIX_LIMIT_HIGH 0.91556f +#define AUMIX_LIMIT_LOW -0.91556f + +#include "alimiter.h" + +class CFastDelay +{ +public: + CFastDelay(); + ~CFastDelay(); + +public: + bool set_delay(int delay_len); + void reset(); + void process(float * buffer, int len); + inline float process(float in) + { + int idx = (m_idx + m_delay_len) & m_mask; + + m_cache[idx] = in; + + float out = m_cache[m_idx]; + + m_idx = (m_idx + 1) & m_mask; + + return out; + } + +private: + int m_idx; + int m_mask; + int m_buffer_len; + int m_delay_len; + float * m_cache; +}; + +class CAudioMixer +{ +public: + CAudioMixer(); + ~CAudioMixer(); + +public: + bool init(int fs, int nChannels); + int get_latency(); + void process(float * vocal, float * acc, float * out, int len); + int set_acc_delay(int delay_len); + void set_vocal_volume(int volume); + void set_acc_volume(int volume); + void reset(); + void uninit(); + +private: + void quick_agc(float * buffer, int len, float * max); + +private: + int m_nChannels; + int m_vocal_volume; + int m_acc_volume; + float m_factor_last[2]; + int m_current_acc_delay_len; + int m_new_acc_delay_len; + CFastDelay * m_acc_delay; + SUPERSOUND::Alimiter* m_alimiter; + +}; + +#endif // !__CAUDIO_MIXER_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_mixer/inc/common.h b/AutoCoverTool/ref/tools/mixer/audio_mixer/inc/common.h new file mode 100644 index 0000000..2a16ec3 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_mixer/inc/common.h @@ -0,0 +1,149 @@ +// +// Created by 杨将 on 2017/6/27. +// + +#ifndef __COMMON_H__ +#define __COMMON_H__ + +//#include +#include +#include +//#include + +#ifdef ST_DEBUG +#include +#define ASSERT(e) assert(e) +#else +#define ASSERT(e) +#endif + + +//定义一些常用的宏或者常量等 + +//录制、播放、解码的缓存buffer的时间长度,单位ms +#define RECORDER_CIRCLE_BUFFER_TIME ((int)200) + +//默认写入文件的人声采样率 +#define FILE_VOCAL_SAMPLERATE ((int)44100) + +//跳转在多少ms以内不需要进行跳转 +#define SEEK_NO_OPRATE (1.01) + +//文件读取的基本长度 +#define FILE_OPERATE_LEN ((int)2048) +//人声文件中进行fade的最大长度 +#define FILE_FADE_LEN ((int)240) +//最大声道数,这个值不能改动,可以改成1 +#define MAX_CHANNEL ((int)2) + +//底层音量的中值 +#define DEFAULT_VOLUME ((int)50) + +//伴奏音量的基准值(分贝) +#define DEFAULT_BASELINE_DB ((float)-14.57f) + +//安全关闭文件 +#ifndef SAFE_CLOSE_FILE +#define SAFE_CLOSE_FILE(file) \ +{ \ + if(file) \ + { \ + fclose(file); \ + file = NULL; \ + } \ +} +#endif //SAFE_CLOSE_FILE + +//安全释放内存 +#ifndef SAFE_FREE +#define SAFE_FREE(ptr) \ +{ \ + if(ptr) \ + { \ + free(ptr); \ + ptr = NULL; \ + } \ +} +#endif //SAFE_FREE + +//安全删除对象 +#ifndef SAFE_DELETE_OBJ +#define SAFE_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_DELETE_OBJ + +//安全逆初始化并删除对象 +#ifndef SAFE_UNINIT_DELETE_OBJ +#define SAFE_UNINIT_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + obj->uninit(); \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_UNINIT_DELETE_OBJ + +#ifndef SAFE_CLOSE_DELETE_OBJ +#define SAFE_CLOSE_DELETE_OBJ(obj) \ +{ \ + if(obj) \ + { \ + obj->close(); \ + delete obj; \ + obj = NULL; \ + } \ +} +#endif //SAFE_CLOSE_DELETE_OBJ + +//安全删除数组 +#ifndef SAFE_DELETE_ARRAY +#define SAFE_DELETE_ARRAY(array) \ +{ \ + if(array) \ + { \ + delete [] array; \ + array = NULL; \ + } \ +} +#endif //SAFE_DELETE_ARRAY + +//取大值 +#ifndef GLOBAL_MAX +#define GLOBAL_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +//取小值 +#ifndef GLOBAL_MIN +#define GLOBAL_MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +//取中间 +#ifndef GLOBAL_MID +#define GLOBAL_MID(a, b, c) (GLOBAL_MAX(a, GLOBAL_MIN(b, c))) +#endif + +//取绝对值 +#ifndef GLOBAL_ABS +#define GLOBAL_ABS(a) ((a) < 0 ? (-(a)) : (a)) +#endif + + +#ifndef CHECK_FLOAT_EQUAL +#define CHECK_FLOAT_EQUAL(a, b) (fabs(a - b) < 0.001f) +#endif + + +//extern GlobParam gGlobparm; + +#define TYPE_PLAY_ORIGIN 1 +#define TYPE_PLAY_CORRECTION 2 + +#endif //__COMMON_H__ diff --git a/AutoCoverTool/ref/tools/mixer/audio_mixer/src/AudioMixer.cpp b/AutoCoverTool/ref/tools/mixer/audio_mixer/src/AudioMixer.cpp new file mode 100644 index 0000000..808f0fa --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/audio_mixer/src/AudioMixer.cpp @@ -0,0 +1,318 @@ + +#include +#include +#include +#include +#include "AudioMixer.h" +#include "common.h" +#include "alimiter.h" + +int32_t supersound_next_power_2(int32_t x) +{ + if(x > 0) + { + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + } + return x + 1; +} + +CFastDelay::CFastDelay() +{ + m_idx = 0; + m_mask = 0; + m_buffer_len = 0; + m_delay_len = 0; + m_cache = new float[1]; + m_cache[0] = 0; +} + +CFastDelay::~CFastDelay() +{ + m_idx = 0; + m_mask = 0; + m_buffer_len = 0; + m_delay_len = 0; + + if(m_cache) + { + delete [] m_cache; + m_cache = NULL; + } +} + +void CFastDelay::reset() +{ + if(m_cache) + { + memset(m_cache, 0, m_buffer_len * sizeof(float)); + } +} + +bool CFastDelay::set_delay(int delay_len) +{ + //如果完全一致的话 + if(m_delay_len == delay_len) + { + return true; + } + + if(m_buffer_len > delay_len) + { + reset(); + m_delay_len = delay_len; + return true; + } + + int len = supersound_next_power_2(delay_len); + float * buffer = new float[len]; + memset(buffer, 0, len * sizeof(len)); + + if(m_cache) + { + delete [] m_cache; + m_cache = NULL; + } + + m_idx = 0; + m_mask = len - 1; + m_buffer_len = len; + m_delay_len = delay_len; + m_cache = buffer; + + return true; +} + +void CFastDelay::process(float * buffer, int len) +{ + if(m_delay_len > 0) + { + for(int i = 0; i < len; i++) + { + buffer[i] = process(buffer[i]); + } + } +} + +CAudioMixer::CAudioMixer() +{ + m_vocal_volume = AUMIX_VOLUME_NO_CHANGE; + m_acc_volume = AUMIX_VOLUME_NO_CHANGE; + m_factor_last[0] = 1; + m_factor_last[1] = 1; + + m_current_acc_delay_len = 0; + m_new_acc_delay_len = 0; + m_acc_delay = NULL; +} + +CAudioMixer::~CAudioMixer() +{ + uninit(); +} + +bool CAudioMixer::init(int fs, int nChannels) +{ + if ((nChannels != 2) && (nChannels != 1)) + { + return false; + } + + m_nChannels = nChannels; + + m_acc_delay = new(std::nothrow) CFastDelay(); + if(NULL == m_acc_delay) + { + return false; + } + m_alimiter = new(std::nothrow) SUPERSOUND::Alimiter(); + if(NULL == m_alimiter) + { + return false; + } + m_alimiter->SetParam(fs, nChannels); + return true; +} + +int CAudioMixer::get_latency() +{ + return 0; +} + +void CAudioMixer::reset() +{ + m_factor_last[0] = 1; + m_factor_last[1] = 1; + + if(m_acc_delay) + { + m_acc_delay->reset(); + } + +// if(m_alimiter) +// { +// m_alimiter->Flush(); +// } +} + +int CAudioMixer::set_acc_delay(int delay_len) +{ + if(m_new_acc_delay_len != delay_len) + { + m_new_acc_delay_len = delay_len; + } + + return false; +} + +void CAudioMixer::process(float * vocal, float * acc, float * out, int len) +{ + float vocal_gain = m_vocal_volume / (float)AUMIX_VOLUME_NO_CHANGE; + float acc_gain = m_acc_volume / (float)AUMIX_VOLUME_NO_CHANGE; + + float value = 0; + float max[2]; + max[0] = max[1] = 0; + + if(m_new_acc_delay_len != m_current_acc_delay_len) + { + m_current_acc_delay_len = m_new_acc_delay_len; + m_acc_delay->set_delay(m_current_acc_delay_len * m_nChannels); + } + + m_acc_delay->process(acc, len); + + for (int i = 0; i < len; i += m_nChannels) + { + for (int j = 0; j < m_nChannels; j++) + { + out[i + j] = vocal[i + j] * vocal_gain + acc[i + j] * acc_gain; + value = (float)fabs(out[i + j]); + if (max[j] < value) + { + max[j] = value; + } + } + } + + quick_agc(out, len, max); + for (int i = 0; i < len; i++) + { + out[i] = out[i] > AUMIX_FLT_MAX ? AUMIX_FLT_MAX : (out[i] < AUMIX_FLT_MIN ? AUMIX_FLT_MIN : out[i]); + } +// float* tp = new float[len]; +// memset(tp, 0, sizeof(float)*len); +// m_alimiter->Filter(out, tp, len); +// for(int i=0;i 0.5) +//// { +//// continue; +//// } +// out[i] = tp[i]; +// } +// delete [] tp; + + +} + +void CAudioMixer::set_vocal_volume(int volume) +{ + if (volume == m_vocal_volume) + return ; + + if (volume > AUMIX_VOLUME_MAX) + volume = AUMIX_VOLUME_MAX; + else if (volume < AUMIX_VOLUME_MIN) + volume = AUMIX_VOLUME_MIN; + + m_vocal_volume = volume; +} + +void CAudioMixer::set_acc_volume(int volume) +{ + if (volume == m_acc_volume) + return; + + if (volume > AUMIX_VOLUME_MAX) + volume = AUMIX_VOLUME_MAX; + else if (volume < AUMIX_VOLUME_MIN) + volume = AUMIX_VOLUME_MIN; + + m_acc_volume = volume; +} + +void CAudioMixer::quick_agc(float * buffer, int len, float * max) +{ + float fdeta[2]; + float fts[2]; + int idx = len / m_nChannels / 8; + + for (int i = 0; i < m_nChannels; i++) + { + if (max[i] <= AUMIX_LIMIT_HIGH) + { + fts[i] = m_factor_last[i] + 0.1f; + if (fts[i] > 1) + fts[i] = 1; + } + else + { + fts[i] = AUMIX_LIMIT_HIGH / max[i]; + if (fts[i] < 0.34f) + fts[i] = 0.34f; + } + + fdeta[i] = (fts[i] - m_factor_last[i]) / idx; + } + + idx *= m_nChannels; + for (int i = 0; i < idx; i += m_nChannels) + { + for (int j = 0; j < m_nChannels; j++) + { + buffer[i + j] *= m_factor_last[j]; + m_factor_last[j] += fdeta[j]; + } + } + for (int i = idx; i < len; i += m_nChannels) + { + for (int j = 0; j < m_nChannels; j++) + { + buffer[i + j] *= m_factor_last[j]; + } + } + + for (int i = 0; i < m_nChannels; i++) + { + m_factor_last[i] = fts[i]; + } +} + +void CAudioMixer::uninit() +{ + m_vocal_volume = AUMIX_VOLUME_NO_CHANGE; + m_acc_volume = AUMIX_VOLUME_NO_CHANGE; + m_factor_last[0] = 1; + m_factor_last[1] = 1; + + m_current_acc_delay_len = 0; + m_new_acc_delay_len = 0; + + if(m_acc_delay) + { + delete m_acc_delay; + m_acc_delay = NULL; + } + + if(m_alimiter) + { + delete m_alimiter; + m_alimiter = NULL; + } +} + diff --git a/AutoCoverTool/ref/tools/mixer/denoise.cpp b/AutoCoverTool/ref/tools/mixer/denoise.cpp new file mode 100644 index 0000000..bd49530 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/denoise.cpp @@ -0,0 +1,225 @@ +// +// Created by yangjianli on 2019-09-09. +// +/** + * 输入一个音频和伴奏自动进行混合 + * gated_loudness 当前音量 + * gain 预期增益 + */ +#include "iostream" +#include "WaveFile.h" +#include "math.h" +#include "ebur128.h" +#include "AudioMixer.h" +#include "alimiter.h" +#include "waves/inc/WaveFile.h" +#include "CAudioEffectsChainApi.h" +#include "string" +#include "ae_server/CAeServer.h" +#include +#include +#include +#include +#include +#include "denoise/webrtc/include/WebrtcDenoise.h" + +#define PROC_LEN 1024 +#define DEFAULT_BASELINE_DB (float)-14.57f + +int short2float(short *pInBuf, int nLen, float *pOutBuf) +{ + for (int i = 0; i < nLen; i++) + { + pOutBuf[i] = pInBuf[i] * 1.0 / 32768; + } + return 0; +} + +int float2short(float *pInBuf, int nLen, short *pOutBuf) +{ + for (int i = 0; i < nLen; i++) + { + pOutBuf[i] = int(pInBuf[i] * 32768); + } + return 0; +} + +/** + * 获取增益 + * @param nChannel + * @param nSampleRate + * @param pData + * @param nLength + * @param gain + * @return + */ +int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness, double &gain) +{ + printf("ebur128_init start .. %d\n", nLength); + ebur128_state *st = NULL; + st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I); + if (NULL == st) + { + return -1; + } + int nPos = 0; + int nTmpLength = 0; + int nRet; + printf("process start ..\n"); + while (nPos < nLength) + { + nTmpLength = PROC_LEN; + if (nLength - nPos < PROC_LEN) + { + nTmpLength = nLength - nPos; + } + nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel); + if (nRet != 0) + { + return -2; + } + nPos += nTmpLength; + } + printf("process ok..\n"); + gated_loudness = -1; + ebur128_loudness_global(st, &gated_loudness); + float db = (DEFAULT_BASELINE_DB - gated_loudness) / 20.f; + gain = pow(10, db); + printf("gated_loudness = %f db = %f gain = %f\n", gated_loudness, db, gain); + ebur128_destroy(&st); + return 0; +} + + +/** + * 混合音频和伴奏 + * @param pVocalIn + * @param pAccIn + * @param nLength + * @param gainVocal + * @param gainAcc + * @param pOutBuf + * @return + */ +int mix(float *pVocalIn, float *pAccIn, int nLength, double gainVocal, double gainAcc, float *pOutBuf, + int nSampleRate, int nChannel, int nDelay, std::string effect_file) +{ + + CAudioMixer *cAudioMixer = new CAudioMixer(); + cAudioMixer->init(nSampleRate, nChannel); + cAudioMixer->set_acc_delay(nDelay); + cAudioMixer->set_vocal_volume(int(gainVocal * 50)); + cAudioMixer->set_acc_volume(int(gainAcc * 50)); + + int nPos = 0; + int nStep = 1024; + float *fTmp = new float[nStep]; + cAudioMixer->reset(); + nPos = 0; + nStep = 1024; + int cnt = 0; + + CAeServer cAeServer; + cAeServer.init(nSampleRate, nChannel, nStep / nChannel); + AE_PARAMS_IM_EFFECT im_params = { + .effect_path = effect_file, + }; + cAeServer.set_params(AE_TYPE_IM_EFFECT, (void *) &im_params); + + + while (nPos < nLength) + { + if (nLength - nPos < nStep) + { + nStep = nLength - nPos; + } + cnt++; + cAeServer.process(pVocalIn + nPos, pVocalIn + nPos, nStep); + cAudioMixer->process(pVocalIn + nPos, pAccIn + nPos, pOutBuf + nPos, nStep); + nPos += nStep; + } + cAeServer.uninit(); + delete cAudioMixer; + delete[] fTmp; + return 0; +} + +int denoise_webrtc(short *pInBuf, int nLength, int nChannel, int nSampleRate) +{ + CWebrtcDenoise cWebrtcDenoise; + cWebrtcDenoise.init(nSampleRate, nChannel); + float *pTmp = new float[nLength]; + for (int i = 0; i < nLength; i++) + { + pTmp[i] = pInBuf[i] * 1.0 / 32768; + } + cWebrtcDenoise.set_level(kHigh); + int nStep = 512 * nChannel; + + for (int i = 0; i < nStep; i++) + { + pTmp[i] = pTmp[i] * i * 1.0 / nStep; + } + + for (int i = 0, cnt = 0; i < nLength; i += nStep, cnt++) + { + if (nLength - i < nStep) continue; + cWebrtcDenoise.process(pTmp + i, nStep); + } + + for (int i = 0; i < nLength; i++) + { + pInBuf[i] = short(pTmp[i] * 32768); + } + delete[] pTmp; + return 0; +} + +double calc_power_rate(float *in_data, int32_t in_len, float *ref_data, int32_t ref_len) +{ + double in_power = 0; + double ref_power = 0; + int32_t min_len = in_len > ref_len ? ref_len : in_len; + for (int i = 0; i < min_len; i++) + { + in_power += (in_data[i]) * (in_data[i]); + ref_power += (ref_data[i]) * (ref_data[i]); + } + return ref_power / in_power; +} + + +int main(int argc, char *argv[]) +{ + if (argc != 3) + { + printf("input error! example: ./main vocal_path dst_vocal_path\n"); + return -1; + } + std::string sVocal = argv[1]; + std::string sDstVocal = argv[2]; + + // 读取人声 + CWaveFile *oWaveFile = new CWaveFile(sVocal.c_str(), false); + short *pVocalBuf = new short[oWaveFile->GetTotalFrames() * oWaveFile->GetChannels()]; + oWaveFile->ReadFrameAsS16(pVocalBuf, oWaveFile->GetTotalFrames()); + + // 对人声做降噪 + denoise_webrtc(pVocalBuf, oWaveFile->GetTotalFrames() * oWaveFile->GetChannels(), + oWaveFile->GetChannels(), oWaveFile->GetSampleRate()); + + //写入文件 + printf("write2file nLength:%d path:%s!\n", oWaveFile->GetTotalFrames(), sDstVocal.c_str()); + CWaveFile *oWaveFile2 = new CWaveFile(sDstVocal.c_str(), true); + oWaveFile2->SetSampleFormat(SF_S16); + oWaveFile2->SetSampleRate(oWaveFile->GetSampleRate()); + oWaveFile2->SetChannels(oWaveFile->GetChannels()); + oWaveFile2->SetupDone(); + oWaveFile2->WriteFrame(pVocalBuf, oWaveFile->GetTotalFrames()); + + delete oWaveFile; + delete oWaveFile2; + + delete[] pVocalBuf; + return 0; +} \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/denoise/CMakeLists.txt b/AutoCoverTool/ref/tools/mixer/denoise/CMakeLists.txt new file mode 100644 index 0000000..a5b6167 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/denoise/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 2.8) +project(denoise) +#set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +set(NOT_CROSSCOMPIRE 0) + +include_directories(./) +add_subdirectory(webrtc) +if(NOT_CROSSCOMPIRE) + add_subdirectory(waves) + add_executable(test test.cpp) + target_link_libraries(test ${LIBRARY_OUTPUT_PATH}/libwebrtc.a + ${LIBRARY_OUTPUT_PATH}/libwaves.a) +endif() \ No newline at end of file diff --git a/AutoCoverTool/ref/tools/mixer/denoise/build_android.sh b/AutoCoverTool/ref/tools/mixer/denoise/build_android.sh new file mode 100755 index 0000000..d24b9a8 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/denoise/build_android.sh @@ -0,0 +1,64 @@ +#!/bin/sh + +# @Time : 2019-06-18 17:50 +# @Author : AlanWang +# @FileName: build_android.sh + +# MY_NDK 和 MY_CMAKE 需要改成自己对应的 ndk 中的目录 +MY_NDK="/Users/yangjianli/Library/Android/sdk/ndk-bundle" +MY_CMAKE="/Users/yangjianli/Library/Android/sdk/cmake/3.6.4111459/bin/cmake" + +if [ -z "$MY_NDK" ]; then + echo "Please set MY_NDK to the Android NDK folder" + exit 1 +fi + +if [ -z "$MY_CMAKE" ]; then + echo "Please set MY_CMAKE to the Android CMake folder" + exit 1 +fi + +OUTPUT_LIBS="./build/libs/android" +ANDROID_NATIVE_API_LEVEL="android-16" + +# arme_abis=(armeabi armeabi-v7a arm64-v8a x86 x86_64 mips mips64) +arme_abis=(armeabi-v7a arm64-v8a x86 x86_64) + +function build_with_armeabi() { + ARME_ABI=$1 + echo ${ARME_ABI} + + BUILD_DIR="./build/android/${ARME_ABI}" + BUILD_REF_DIR="./build/android/${ARME_ABI}/ref" + OUTPUT_SO_DIR="${BUILD_DIR}/build/android/libs/${ARME_ABI}" + + PRE_EXE_DIR=$(pwd) + echo ${PRE_EXE_DIR} + + ${MY_CMAKE} \ + -H"./" \ + -B"${BUILD_DIR}" \ + -DANDROID_ABI="${ARME_ABI}" \ + -DANDROID_NDK="${MY_NDK}" \ + -DCMAKE_LIBRARY_OUTPUT_DIRECTORY="./build/android/libs/${ARME_ABI}" \ + -DCMAKE_BUILD_TYPE="Release" \ + -DCMAKE_TOOLCHAIN_FILE="${MY_NDK}/build/cmake/android.toolchain.cmake" \ + -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} \ + -DANDROID_TOOLCHAIN="clang" \ + -DCMAKE_C_FLAGS="-fpic -fexceptions -frtti -Wno-narrowing" \ + -DCMAKE_CXX_FLAGS="-fpic -fexceptions -frtti -Wno-narrowing" \ + -DANDROID_STL="c++_static" \ + + + cd ${BUILD_DIR} + make + + cd ${PRE_EXE_DIR} + mkdir -p ${OUTPUT_LIBS}/${ARME_ABI}/ + mv ${PRE_EXE_DIR}/lib/* ${OUTPUT_LIBS}/${ARME_ABI}/ + rm -r ./build/android +} + +for i in ${arme_abis[@]}; do + build_with_armeabi $i +done diff --git a/AutoCoverTool/ref/tools/mixer/denoise/build_ios.sh b/AutoCoverTool/ref/tools/mixer/denoise/build_ios.sh new file mode 100755 index 0000000..689d274 --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/denoise/build_ios.sh @@ -0,0 +1,39 @@ +#!/bin/sh + +OUTPUT_LIBS="./build/libs/ios" + +function build_with_platform_and_armeabi() { + PLATFORM=$1 + ARME_ABI=$2 + echo ${PLATFORM} + echo ${ARME_ABI} + + BUILD_DIR="./build/ios/${ARME_ABI}" + PRE_EXE_DIR=$(pwd) + echo ${PRE_EXE_DIR} + + cmake \ + -H"./" \ + -B"${BUILD_DIR}" \ + -DCMAKE_BUILD_TYPE="Release" \ + -DCMAKE_TOOLCHAIN_FILE="./toolchain/ios.toolchain.cmake" \ + -DIOS_PLATFORM=${PLATFORM} \ + -DIOS_ARCH=${ARME_ABI} + + # 生成目标文件 + cd ${BUILD_DIR} + make + + # 将目标文件移至指定目录 + cd ${PRE_EXE_DIR} + mkdir -p ${OUTPUT_LIBS}/${ARME_ABI}/ + mv ${PRE_EXE_DIR}/lib/* ${OUTPUT_LIBS}/${ARME_ABI}/ + rm -r ./build/ios +} + +build_with_platform_and_armeabi "OS" "armv7" +build_with_platform_and_armeabi "OS" "armv7s" +build_with_platform_and_armeabi "OS" "arm64" + +build_with_platform_and_armeabi "SIMULATOR64" "x86_64" +build_with_platform_and_armeabi "SIMULATOR" "i386" diff --git a/AutoCoverTool/ref/tools/mixer/denoise/test.cpp b/AutoCoverTool/ref/tools/mixer/denoise/test.cpp new file mode 100644 index 0000000..d3b2e3b --- /dev/null +++ b/AutoCoverTool/ref/tools/mixer/denoise/test.cpp @@ -0,0 +1,96 @@ +// +// Created by yangjianli on 2020-02-13. +// +#include "cstdio" +#include "iostream" +#include "webrtc/include/WebrtcDenoise.h" +#include "string" +#include "waves/inc/WaveFile.h" +/** + * 切分为4个阶段测试4个等级 + * https://people.xiph.org/~jm/demo/rnnoise/ + * 使用该网址的杂音数据 + * @param pInBuf + * @param nLength + * @param nChannel + * @param nSampleRate + * @return + */ +int denoise_webrtc(short* pInBuf, int nLength, int nChannel, int nSampleRate) +{ + CWebrtcDenoise cWebrtcDenoise; + cWebrtcDenoise.init(nSampleRate, nChannel); + float* pTmp = new float[nLength]; + for(int i=0;i