diff --git a/AIMeiSheng/._readme_meisheng.md b/AIMeiSheng/._readme_meisheng.md new file mode 100644 index 0000000..50212ca Binary files /dev/null and b/AIMeiSheng/._readme_meisheng.md differ diff --git a/AIMeiSheng/Dockerfile b/AIMeiSheng/Dockerfile new file mode 100644 index 0000000..49f62d5 --- /dev/null +++ b/AIMeiSheng/Dockerfile @@ -0,0 +1,13 @@ +# syntax=docker/dockerfile:1 + +FROM python:3.10-bullseye + +EXPOSE 7865 + +WORKDIR /app + +COPY . . + +RUN pip3 install -r requirements.txt + +CMD ["python3", "infer-web.py"] \ No newline at end of file diff --git a/AIMeiSheng/LICENSE b/AIMeiSheng/LICENSE new file mode 100644 index 0000000..4bb30b3 --- /dev/null +++ b/AIMeiSheng/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2023 liujing04 +Copyright (c) 2023 源文雨 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/AIMeiSheng/MDXNet.py b/AIMeiSheng/MDXNet.py new file mode 100644 index 0000000..19164b2 --- /dev/null +++ b/AIMeiSheng/MDXNet.py @@ -0,0 +1,272 @@ +import soundfile as sf +import torch, pdb, os, warnings, librosa +import numpy as np +import onnxruntime as ort +from tqdm import tqdm +import torch + +dim_c = 4 + + +class Conv_TDF_net_trim: + def __init__( + self, device, model_name, target_name, L, dim_f, dim_t, n_fft, hop=1024 + ): + super(Conv_TDF_net_trim, self).__init__() + + self.dim_f = dim_f + self.dim_t = 2**dim_t + self.n_fft = n_fft + self.hop = hop + self.n_bins = self.n_fft // 2 + 1 + self.chunk_size = hop * (self.dim_t - 1) + self.window = torch.hann_window(window_length=self.n_fft, periodic=True).to( + device + ) + self.target_name = target_name + self.blender = "blender" in model_name + + out_c = dim_c * 4 if target_name == "*" else dim_c + self.freq_pad = torch.zeros( + [1, out_c, self.n_bins - self.dim_f, self.dim_t] + ).to(device) + + self.n = L // 2 + + def stft(self, x): + x = x.reshape([-1, self.chunk_size]) + x = torch.stft( + x, + n_fft=self.n_fft, + hop_length=self.hop, + window=self.window, + center=True, + return_complex=True, + ) + x = torch.view_as_real(x) + x = x.permute([0, 3, 1, 2]) + x = x.reshape([-1, 2, 2, self.n_bins, self.dim_t]).reshape( + [-1, dim_c, self.n_bins, self.dim_t] + ) + return x[:, :, : self.dim_f] + + def istft(self, x, freq_pad=None): + freq_pad = ( + self.freq_pad.repeat([x.shape[0], 1, 1, 1]) + if freq_pad is None + else freq_pad + ) + x = torch.cat([x, freq_pad], -2) + c = 4 * 2 if self.target_name == "*" else 2 + x = x.reshape([-1, c, 2, self.n_bins, self.dim_t]).reshape( + [-1, 2, self.n_bins, self.dim_t] + ) + x = x.permute([0, 2, 3, 1]) + x = x.contiguous() + x = torch.view_as_complex(x) + x = torch.istft( + x, n_fft=self.n_fft, hop_length=self.hop, window=self.window, center=True + ) + return x.reshape([-1, c, self.chunk_size]) + + +def get_models(device, dim_f, dim_t, n_fft): + return Conv_TDF_net_trim( + device=device, + model_name="Conv-TDF", + target_name="vocals", + L=11, + dim_f=dim_f, + dim_t=dim_t, + n_fft=n_fft, + ) + + +warnings.filterwarnings("ignore") +cpu = torch.device("cpu") +if torch.cuda.is_available(): + device = torch.device("cuda:0") +elif torch.backends.mps.is_available(): + device = torch.device("mps") +else: + device = torch.device("cpu") + + +class Predictor: + def __init__(self, args): + self.args = args + self.model_ = get_models( + device=cpu, dim_f=args.dim_f, dim_t=args.dim_t, n_fft=args.n_fft + ) + self.model = ort.InferenceSession( + os.path.join(args.onnx, self.model_.target_name + ".onnx"), + providers=["CUDAExecutionProvider", "CPUExecutionProvider"], + ) + print("onnx load done") + + def demix(self, mix): + samples = mix.shape[-1] + margin = self.args.margin + chunk_size = self.args.chunks * 44100 + assert not margin == 0, "margin cannot be zero!" + if margin > chunk_size: + margin = chunk_size + + segmented_mix = {} + + if self.args.chunks == 0 or samples < chunk_size: + chunk_size = samples + + counter = -1 + for skip in range(0, samples, chunk_size): + counter += 1 + + s_margin = 0 if counter == 0 else margin + end = min(skip + chunk_size + margin, samples) + + start = skip - s_margin + + segmented_mix[skip] = mix[:, start:end].copy() + if end == samples: + break + + sources = self.demix_base(segmented_mix, margin_size=margin) + """ + mix:(2,big_sample) + segmented_mix:offset->(2,small_sample) + sources:(1,2,big_sample) + """ + return sources + + def demix_base(self, mixes, margin_size): + chunked_sources = [] + progress_bar = tqdm(total=len(mixes)) + progress_bar.set_description("Processing") + for mix in mixes: + cmix = mixes[mix] + sources = [] + n_sample = cmix.shape[1] + model = self.model_ + trim = model.n_fft // 2 + gen_size = model.chunk_size - 2 * trim + pad = gen_size - n_sample % gen_size + mix_p = np.concatenate( + (np.zeros((2, trim)), cmix, np.zeros((2, pad)), np.zeros((2, trim))), 1 + ) + mix_waves = [] + i = 0 + while i < n_sample + pad: + waves = np.array(mix_p[:, i : i + model.chunk_size]) + mix_waves.append(waves) + i += gen_size + mix_waves = torch.tensor(mix_waves, dtype=torch.float32).to(cpu) + with torch.no_grad(): + _ort = self.model + spek = model.stft(mix_waves) + if self.args.denoise: + spec_pred = ( + -_ort.run(None, {"input": -spek.cpu().numpy()})[0] * 0.5 + + _ort.run(None, {"input": spek.cpu().numpy()})[0] * 0.5 + ) + tar_waves = model.istft(torch.tensor(spec_pred)) + else: + tar_waves = model.istft( + torch.tensor(_ort.run(None, {"input": spek.cpu().numpy()})[0]) + ) + tar_signal = ( + tar_waves[:, :, trim:-trim] + .transpose(0, 1) + .reshape(2, -1) + .numpy()[:, :-pad] + ) + + start = 0 if mix == 0 else margin_size + end = None if mix == list(mixes.keys())[::-1][0] else -margin_size + if margin_size == 0: + end = None + sources.append(tar_signal[:, start:end]) + + progress_bar.update(1) + + chunked_sources.append(sources) + _sources = np.concatenate(chunked_sources, axis=-1) + # del self.model + progress_bar.close() + return _sources + + def prediction(self, m, vocal_root, others_root, format): + os.makedirs(vocal_root, exist_ok=True) + os.makedirs(others_root, exist_ok=True) + basename = os.path.basename(m) + mix, rate = librosa.load(m, mono=False, sr=44100) + if mix.ndim == 1: + mix = np.asfortranarray([mix, mix]) + mix = mix.T + sources = self.demix(mix.T) + opt = sources[0].T + if format in ["wav", "flac"]: + sf.write( + "%s/%s_main_vocal.%s" % (vocal_root, basename, format), mix - opt, rate + ) + sf.write("%s/%s_others.%s" % (others_root, basename, format), opt, rate) + else: + path_vocal = "%s/%s_main_vocal.wav" % (vocal_root, basename) + path_other = "%s/%s_others.wav" % (others_root, basename) + sf.write(path_vocal, mix - opt, rate) + sf.write(path_other, opt, rate) + if os.path.exists(path_vocal): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path_vocal, path_vocal[:-4] + ".%s" % format) + ) + if os.path.exists(path_other): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path_other, path_other[:-4] + ".%s" % format) + ) + + +class MDXNetDereverb: + def __init__(self, chunks): + self.onnx = "uvr5_weights/onnx_dereverb_By_FoxJoy" + self.shifts = 10 #'Predict with randomised equivariant stabilisation' + self.mixing = "min_mag" # ['default','min_mag','max_mag'] + self.chunks = chunks + self.margin = 44100 + self.dim_t = 9 + self.dim_f = 3072 + self.n_fft = 6144 + self.denoise = True + self.pred = Predictor(self) + + def _path_audio_(self, input, vocal_root, others_root, format): + self.pred.prediction(input, vocal_root, others_root, format) + + +if __name__ == "__main__": + dereverb = MDXNetDereverb(15) + from time import time as ttime + + t0 = ttime() + dereverb._path_audio_( + "雪雪伴奏对消HP5.wav", + "vocal", + "others", + ) + t1 = ttime() + print(t1 - t0) + + +""" + +runtime\python.exe MDXNet.py + +6G: +15/9:0.8G->6.8G +14:0.8G->6.5G +25:炸 + +half15:0.7G->6.6G,22.69s +fp32-15:0.7G->6.6G,20.85s + +""" diff --git "a/AIMeiSheng/MIT\345\215\217\350\256\256\346\232\250\347\233\270\345\205\263\345\274\225\347\224\250\345\272\223\345\215\217\350\256\256" "b/AIMeiSheng/MIT\345\215\217\350\256\256\346\232\250\347\233\270\345\205\263\345\274\225\347\224\250\345\272\223\345\215\217\350\256\256" new file mode 100644 index 0000000..dbb6c6d --- /dev/null +++ "b/AIMeiSheng/MIT\345\215\217\350\256\256\346\232\250\347\233\270\345\205\263\345\274\225\347\224\250\345\272\223\345\215\217\350\256\256" @@ -0,0 +1,45 @@ +本软件及其相关代码以MIT协议开源,作者不对软件具备任何控制力,使用软件者、传播软件导出的声音者自负全责。 +如不认可该条款,则不能使用或引用软件包内任何代码和文件。 + +特此授予任何获得本软件和相关文档文件(以下简称“软件”)副本的人免费使用、复制、修改、合并、出版、分发、再授权和/或销售本软件的权利,以及授予本软件所提供的人使用本软件的权利,但须符合以下条件: +上述版权声明和本许可声明应包含在软件的所有副本或实质部分中。 +软件是“按原样”提供的,没有任何明示或暗示的保证,包括但不限于适销性、适用于特定目的和不侵权的保证。在任何情况下,作者或版权持有人均不承担因软件或软件的使用或其他交易而产生、产生或与之相关的任何索赔、损害赔偿或其他责任,无论是在合同诉讼、侵权诉讼还是其他诉讼中。 + + +The LICENCEs for related libraries are as follows. +相关引用库协议如下: + +ContentVec +https://github.com/auspicious3000/contentvec/blob/main/LICENSE +MIT License + +VITS +https://github.com/jaywalnut310/vits/blob/main/LICENSE +MIT License + +HIFIGAN +https://github.com/jik876/hifi-gan/blob/master/LICENSE +MIT License + +gradio +https://github.com/gradio-app/gradio/blob/main/LICENSE +Apache License 2.0 + +ffmpeg +https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv3 +https://github.com/BtbN/FFmpeg-Builds/releases/download/autobuild-2021-02-28-12-32/ffmpeg-n4.3.2-160-gfbb9368226-win64-lgpl-4.3.zip +LPGLv3 License +MIT License + +ultimatevocalremovergui +https://github.com/Anjok07/ultimatevocalremovergui/blob/master/LICENSE +https://github.com/yang123qwe/vocal_separation_by_uvr5 +MIT License + +audio-slicer +https://github.com/openvpi/audio-slicer/blob/main/LICENSE +MIT License + +PySimpleGUI +https://github.com/PySimpleGUI/PySimpleGUI/blob/master/license.txt +LPGLv3 License diff --git a/AIMeiSheng/README.md b/AIMeiSheng/README.md new file mode 100644 index 0000000..08aebdb --- /dev/null +++ b/AIMeiSheng/README.md @@ -0,0 +1,137 @@ +
+ +

Retrieval-based-Voice-Conversion-WebUI

+一个基于VITS的简单易用的语音转换(变声器)框架

+ +[![madewithlove](https://img.shields.io/badge/made_with-%E2%9D%A4-red?style=for-the-badge&labelColor=orange +)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) + +
+ +[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) +[![Licence](https://img.shields.io/badge/LICENSE-MIT-green.svg?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE) +[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) + +[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk) + +[**更新日志**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_CN.md) | [**常见问题解答**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/%E5%B8%B8%E8%A7%81%E9%97%AE%E9%A2%98%E8%A7%A3%E7%AD%94) | [**AutoDL·5毛钱训练AI歌手**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/Autodl%E8%AE%AD%E7%BB%83RVC%C2%B7AI%E6%AD%8C%E6%89%8B%E6%95%99%E7%A8%8B) | [**对照实验记录**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/Autodl%E8%AE%AD%E7%BB%83RVC%C2%B7AI%E6%AD%8C%E6%89%8B%E6%95%99%E7%A8%8B](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/%E5%AF%B9%E7%85%A7%E5%AE%9E%E9%AA%8C%C2%B7%E5%AE%9E%E9%AA%8C%E8%AE%B0%E5%BD%95)) | [**在线演示**](https://huggingface.co/spaces/Ricecake123/RVC-demo) + +
+ +------ + +[**English**](./docs/README.en.md) | [**中文简体**](./README.md) | [**日本語**](./docs/README.ja.md) | [**한국어**](./docs/README.ko.md) ([**韓國語**](./docs/README.ko.han.md)) + +点此查看我们的[演示视频](https://www.bilibili.com/video/BV1pm4y1z7Gm/) ! + +> 使用了RVC的实时语音转换: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) + +> 使用了RVC变声器训练的人声转木吉他模型在线demo :https://huggingface.co/spaces/lj1995/vocal2guitar + +> RVC人声转吉他效果展示视频 :https://www.bilibili.com/video/BV19W4y1D7tT/ + +> 底模使用接近50小时的开源高质量VCTK训练集训练,无版权方面的顾虑,请大家放心使用 + +> 后续会陆续加入高质量有授权歌声训练集训练底模 + +## 简介 +本仓库具有以下特点 ++ 使用top1检索替换输入源特征为训练集特征来杜绝音色泄漏 ++ 即便在相对较差的显卡上也能快速训练 ++ 使用少量数据进行训练也能得到较好结果(推荐至少收集10分钟低底噪语音数据) ++ 可以通过模型融合来改变音色(借助ckpt处理选项卡中的ckpt-merge) ++ 简单易用的网页界面 ++ 可调用UVR5模型来快速分离人声和伴奏 ++ 使用最先进的[人声音高提取算法InterSpeech2023-RMVPE](#参考项目)根绝哑音问题。效果最好(显著地)但比crepe_full更快、资源占用更小 + +## 环境配置 +以下指令需在 Python 版本大于3.8的环境中执行。 + +(Windows/Linux) +首先通过 pip 安装主要依赖: +```bash +# 安装Pytorch及其核心依赖,若已安装则跳过 +# 参考自: https://pytorch.org/get-started/locally/ +pip install torch torchvision torchaudio + +#如果是win系统+Nvidia Ampere架构(RTX30xx),根据 #21 的经验,需要指定pytorch对应的cuda版本 +#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 +``` + +可以使用 poetry 来安装依赖: +```bash +# 安装 Poetry 依赖管理工具, 若已安装则跳过 +# 参考自: https://python-poetry.org/docs/#installation +curl -sSL https://install.python-poetry.org | python3 - + +# 通过poetry安装依赖 +poetry install +``` + +你也可以通过 pip 来安装依赖: +```bash +pip install -r requirements.txt +``` + +------ +Mac 用户可以通过 `run.sh` 来安装依赖: +```bash +sh ./run.sh +``` + +## 其他预模型准备 +RVC需要其他一些预模型来推理和训练。 + +你可以从我们的[Hugging Face space](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)下载到这些模型。 + +以下是一份清单,包括了所有RVC所需的预模型和其他文件的名称: +```bash +hubert_base.pt + +./pretrained + +./uvr5_weights + +想测试v2版本模型的话,需要额外下载 + +./pretrained_v2 + +如果你正在使用Windows,则你可能需要这个文件,若ffmpeg和ffprobe已安装则跳过; ubuntu/debian 用户可以通过apt install ffmpeg来安装这2个库, Mac 用户则可以通过brew install ffmpeg来安装 (需要预先安装brew) + +./ffmpeg + +https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffmpeg.exe + +./ffprobe + +https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/ffprobe.exe + +如果你想使用最新的RMVPE人声音高提取算法,则你需要下载音高提取模型参数并放置于RVC根目录 + +https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/rmvpe.pt + +``` +之后使用以下指令来启动WebUI: +```bash +python infer-web.py +``` + +如果你正在使用Windows 或 macOS,你可以直接下载并解压`RVC-beta.7z`,前者可以运行`go-web.bat`以启动WebUI,后者则运行命令`sh ./run.sh`以启动WebUI。 + +仓库内还有一份`小白简易教程.doc`以供参考。 + +## 参考项目 ++ [ContentVec](https://github.com/auspicious3000/contentvec/) ++ [VITS](https://github.com/jaywalnut310/vits) ++ [HIFIGAN](https://github.com/jik876/hifi-gan) ++ [Gradio](https://github.com/gradio-app/gradio) ++ [FFmpeg](https://github.com/FFmpeg/FFmpeg) ++ [Ultimate Vocal Remover](https://github.com/Anjok07/ultimatevocalremovergui) ++ [audio-slicer](https://github.com/openvpi/audio-slicer) ++ [Vocal pitch extraction:RMVPE](https://github.com/Dream-High/RMVPE) + + The pretrained model is trained and tested by [yxlllc](https://github.com/yxlllc/RMVPE) and [RVC-Boss](https://github.com/RVC-Boss). + +## 感谢所有贡献者作出的努力 + + + diff --git a/AIMeiSheng/RawNet3/README.md b/AIMeiSheng/RawNet3/README.md new file mode 100644 index 0000000..ab56efb --- /dev/null +++ b/AIMeiSheng/RawNet3/README.md @@ -0,0 +1,29 @@ +## Usage + +RawNet3 is hosted via two repositories. +Inference of any utterance with 16k 16bit mono format and Vox1-O benchmark is +supported in this repository. + +Training recipe, on the other hand, will be supported in +https://github.com/clovaai/voxceleb_trainer. + +Model weight parameters are served by huggingface at +https://huggingface.co/jungjee/RawNet3, which is used as a submodule here + +To download the model, run: +`git submodule update --init --recursive` + +### Single utterance inference +Run: `python inference.py --inference_utterance --input {YOUR_INPUT_FILE}` + +Optionally, `--out_dir` can be set to direct where to save the extracted speaker embedding. (default: `./out.npy`) + +### Benchmark on the Vox1-O evaluation protocol +Run: `python inference.py --vox1_o_benchmark --DB_dir` + +Note that `DB_dir` should direct the directory of VoxCeleb1 dataset. +For example, if `DB_dir`="/home/abc/db/VoxCeleb1", +VoxCeleb1 folder is expected to have 1,251 folders inside which corresponds to 1,251 speakers of the VoxCeleb1 dataset. + +If you successfully run the benchmark, you will get: +`Vox1-O benchmark Finished. EER: 0.8932, minDCF:0.06690`. diff --git a/AIMeiSheng/RawNet3/__init__.py b/AIMeiSheng/RawNet3/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AIMeiSheng/RawNet3/__pycache__/__init__.cpython-38.pyc b/AIMeiSheng/RawNet3/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..b977952 Binary files /dev/null and b/AIMeiSheng/RawNet3/__pycache__/__init__.cpython-38.pyc differ diff --git a/AIMeiSheng/RawNet3/__pycache__/cal_cos_distance_folder.cpython-38.pyc b/AIMeiSheng/RawNet3/__pycache__/cal_cos_distance_folder.cpython-38.pyc new file mode 100644 index 0000000..02da763 Binary files /dev/null and b/AIMeiSheng/RawNet3/__pycache__/cal_cos_distance_folder.cpython-38.pyc differ diff --git a/AIMeiSheng/RawNet3/__pycache__/cal_cos_distance_folder.cpython-39.pyc b/AIMeiSheng/RawNet3/__pycache__/cal_cos_distance_folder.cpython-39.pyc new file mode 100644 index 0000000..fabcfd3 Binary files /dev/null and b/AIMeiSheng/RawNet3/__pycache__/cal_cos_distance_folder.cpython-39.pyc differ diff --git a/AIMeiSheng/RawNet3/__pycache__/infererence_fang_meisheng.cpython-38.pyc b/AIMeiSheng/RawNet3/__pycache__/infererence_fang_meisheng.cpython-38.pyc new file mode 100644 index 0000000..0003ccb Binary files /dev/null and b/AIMeiSheng/RawNet3/__pycache__/infererence_fang_meisheng.cpython-38.pyc differ diff --git a/AIMeiSheng/RawNet3/__pycache__/infererence_fang_meisheng.cpython-39.pyc b/AIMeiSheng/RawNet3/__pycache__/infererence_fang_meisheng.cpython-39.pyc new file mode 100644 index 0000000..be84168 Binary files /dev/null and b/AIMeiSheng/RawNet3/__pycache__/infererence_fang_meisheng.cpython-39.pyc differ diff --git a/AIMeiSheng/RawNet3/__pycache__/multi_threads_wraper.cpython-39.pyc b/AIMeiSheng/RawNet3/__pycache__/multi_threads_wraper.cpython-39.pyc new file mode 100644 index 0000000..ca69a96 Binary files /dev/null and b/AIMeiSheng/RawNet3/__pycache__/multi_threads_wraper.cpython-39.pyc differ diff --git a/AIMeiSheng/RawNet3/__pycache__/utils.cpython-38.pyc b/AIMeiSheng/RawNet3/__pycache__/utils.cpython-38.pyc new file mode 100644 index 0000000..1bf1e45 Binary files /dev/null and b/AIMeiSheng/RawNet3/__pycache__/utils.cpython-38.pyc differ diff --git a/AIMeiSheng/RawNet3/__pycache__/utils.cpython-39.pyc b/AIMeiSheng/RawNet3/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000..231937c Binary files /dev/null and b/AIMeiSheng/RawNet3/__pycache__/utils.cpython-39.pyc differ diff --git a/AIMeiSheng/RawNet3/cal_cos_distance_folder.py b/AIMeiSheng/RawNet3/cal_cos_distance_folder.py new file mode 100644 index 0000000..4f7fa61 --- /dev/null +++ b/AIMeiSheng/RawNet3/cal_cos_distance_folder.py @@ -0,0 +1,42 @@ +import torch +import numpy as np +import sys + +def l2_norm(s1, s2): + norm = torch.sum(s1 * s2, -1, keepdim=True) + return norm + +def cos_distance(s1,s2,eps=1e-8): + + s1_s2_norm = l2_norm(s1, s2) + s2_s2_norm = l2_norm(s2, s2) + s1_s1_norm = l2_norm(s1, s1) + #print('s1_s1_norm: ',s1_s1_norm) + #print('s1_s2_norm: ',s1_s2_norm) + #print('s2_s2_norm: ',s2_s2_norm) + loss = s1_s2_norm / (torch.sqrt(s2_s2_norm *s1_s1_norm) + eps) + + return loss +import os +def load_and_cal_distance(npy_name1,npy_name2): + spk1_embead = np.load(npy_name1 ) + spk2_embead = np.load(npy_name2 ) + spk1_embead = torch.from_numpy(spk1_embead) + spk2_embead = torch.from_numpy(spk2_embead) + loss = cos_distance(spk1_embead,spk2_embead) + print("file:",os.path.basename(npy_name2),"cos distance:", loss) + return loss + +def cal_cos_folder(target_npy,test_folder): + all_files = os.listdir(test_folder) + for npy_test in all_files: + npy_filename = os.path.join(test_folder,npy_test) + load_and_cal_distance(target_npy,npy_filename) + +if __name__ == '__main__': + + npy_name1 = sys.argv[1] #"../../test_wav/xiafan_RawNet3/zihao.npy" + npy_name2 = sys.argv[2] #"../../test_wav/xiafan_RawNet3/" + + #load_and_cal_distance(npy_name1,npy_name2) + cal_cos_folder(npy_name1,npy_name2) diff --git a/AIMeiSheng/RawNet3/infererence.py b/AIMeiSheng/RawNet3/infererence.py new file mode 100644 index 0000000..96a7c8b --- /dev/null +++ b/AIMeiSheng/RawNet3/infererence.py @@ -0,0 +1,183 @@ +import argparse +import itertools +import os +import sys +from typing import Dict + +import numpy as np +import soundfile as sf +import torch +import torch.nn.functional as F +from tqdm import tqdm + +from models.RawNet3 import RawNet3 +from models.RawNetBasicBlock import Bottle2neck +from utils import tuneThresholdfromScore, ComputeErrorRates, ComputeMinDcf + + +def main(args: Dict) -> None: + model = RawNet3( + Bottle2neck, + model_scale=8, + context=True, + summed=True, + encoder_type="ECA", + nOut=256, + out_bn=False, + sinc_stride=10, + log_sinc=True, + norm_sinc="mean", + grad_mult=1, + ) + gpu = False + + model.load_state_dict( + torch.load( + "./models/weights/model.pt", + map_location=lambda storage, loc: storage, + )["model"] + ) + model.eval() + print("RawNet3 initialised & weights loaded!") + + if torch.cuda.is_available(): + print("Cuda available, conducting inference on GPU") + model = model.to("cuda") + gpu = True + + if args.inference_utterance: + output = extract_speaker_embd( + model, + fn=args.input, + n_samples=48000, + n_segments=args.n_segments, + gpu=gpu, + ).mean(0) + + np.save(args.out_dir, output.detach().cpu().numpy()) + return + + if args.vox1_o_benchmark: + with open("../../trials/cleaned_test_list.txt", "r") as f: + trials = f.readlines() + + ## Get a list of unique file names + files = list(itertools.chain(*[x.strip().split()[-2:] for x in trials])) + + setfiles = list(set(files)) + setfiles.sort() + + embd_dic = {} + for f in tqdm(setfiles): + embd_dic[f] = extract_speaker_embd( + model, os.path.join(args.DB_dir, f), n_samples=64000, gpu=gpu + ) + + labels, scores = [], [] + for line in trials: + data = line.split() + ref_feat = F.normalize(embd_dic[data[1]], p=2, dim=1) + com_feat = F.normalize(embd_dic[data[2]], p=2, dim=1) + + if gpu: + ref_feat = ref_feat.cuda() + com_feat = com_feat.cuda() + + dist = ( + torch.cdist( + ref_feat.reshape((args.n_segments, -1)), + com_feat.reshape((args.n_segments, -1)), + ) + .detach() + .cpu() + .numpy() + ) + score = -1.0 * np.mean(dist) + labels.append(int(data[0])) + scores.append(score) + + result = tuneThresholdfromScore(scores, labels, [1, 0.1]) + + fnrs, fprs, thresholds = ComputeErrorRates(scores, labels) + p_target, c_miss, c_fa = 0.05, 1, 1 + mindcf, _ = ComputeMinDcf( + fnrs, fprs, thresholds, p_target, c_miss, c_fa + ) + print( + "Vox1-O benchmark Finished. EER: %2.4f, minDCF:%.5f" + % (result[1], mindcf) + ) + + +def extract_speaker_embd( + model, fn: str, n_samples: int, n_segments: int = 10, gpu: bool = False +) -> np.ndarray: + audio, sample_rate = sf.read(fn) + if len(audio.shape) > 1: + raise ValueError( + f"RawNet3 supports mono input only. Input data has a shape of {audio.shape}." + ) + + if sample_rate != 16000: + raise ValueError( + f"RawNet3 supports 16k sampling rate only. Input data's sampling rate is {sample_rate}." + ) + + if ( + len(audio) < n_samples + ): # RawNet3 was trained using utterances of 3 seconds + shortage = n_samples - len(audio) + 1 + audio = np.pad(audio, (0, shortage), "wrap") + + audios = [] + startframe = np.linspace(0, len(audio) - n_samples, num=n_segments) + for asf in startframe: + audios.append(audio[int(asf) : int(asf) + n_samples]) + + audios = torch.from_numpy(np.stack(audios, axis=0).astype(np.float32)) + if gpu: + audios = audios.to("cuda") + with torch.no_grad(): + output = model(audios) + + return output + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="RawNet3 inference") + + parser.add_argument( + "--inference_utterance", default=False, action="store_true" + ) + parser.add_argument( + "--input", + type=str, + default="", + help="Input file to extract embedding. Required when 'inference_utterance' is True", + ) + parser.add_argument( + "--vox1_o_benchmark", default=False, action="store_true" + ) + parser.add_argument( + "--DB_dir", + type=str, + default="", + help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True", + ) + parser.add_argument("--out_dir", type=str, default="./out.npy") + parser.add_argument( + "--n_segments", + type=int, + default=10, + help="number of segments to make using each utterance", + ) + args = parser.parse_args() + + assert args.inference_utterance or args.vox1_o_benchmark + if args.inference_utterance: + assert args.input != "" + + if args.vox1_o_benchmark: + assert args.DB_dir != "" + + sys.exit(main(args)) diff --git a/AIMeiSheng/RawNet3/infererence_fang_meisheng.py b/AIMeiSheng/RawNet3/infererence_fang_meisheng.py new file mode 100644 index 0000000..471f92a --- /dev/null +++ b/AIMeiSheng/RawNet3/infererence_fang_meisheng.py @@ -0,0 +1,269 @@ +import argparse +import itertools +import os +import sys +from typing import Dict + +import numpy as np +import soundfile as sf +import torch +import torch.nn.functional as F +from tqdm import tqdm + +from models.RawNet3 import RawNet3 +from models.RawNetBasicBlock import Bottle2neck +from utils import tuneThresholdfromScore, ComputeErrorRates, ComputeMinDcf +#model_directory = '/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3' +#sys.path.append(os.path.abspath(model_directory)) + +def get_embed_model(): + model = RawNet3( + Bottle2neck, + model_scale=8, + context=True, + summed=True, + encoder_type="ECA", + nOut=256, + out_bn=False, + sinc_stride=10, + log_sinc=True, + norm_sinc="mean", + grad_mult=1, + ) + + + model.load_state_dict( + torch.load( + "/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3/models/weights/model.pt", + map_location=lambda storage, loc: storage, + )["model"] + ) + model.eval() + + return model + +def main(args: Dict, model=None) -> None: + + if model == None: + + model = RawNet3( + Bottle2neck, + model_scale=8, + context=True, + summed=True, + encoder_type="ECA", + nOut=256, + out_bn=False, + sinc_stride=10, + log_sinc=True, + norm_sinc="mean", + grad_mult=1, + ) + + model.load_state_dict( + torch.load( + "./models/weights/model.pt", + map_location=lambda storage, loc: storage, + )["model"] + ) + + model.eval() + + # gpu = False + gpu = True if torch.cuda.is_available() else False + + + #print("RawNet3 initialised & weights loaded!") + + if torch.cuda.is_available(): + #print("Cuda available, conducting inference on GPU") + model = model.to("cuda") + gpu = True + + if args.inference_utterance: + output = extract_speaker_embd( + model, + fn=args.input, + n_samples=48000, + n_segments=args.n_segments, + gpu=gpu, + ).mean(0) + #print("embead shape:", output.size()) + np.save(args.out_dir, output.detach().cpu().numpy()) + + return + + if args.vox1_o_benchmark: + with open("../../trials/cleaned_test_list.txt", "r") as f: + trials = f.readlines() + + ## Get a list of unique file names + files = list(itertools.chain(*[x.strip().split()[-2:] for x in trials])) + + setfiles = list(set(files)) + setfiles.sort() + + embd_dic = {} + for f in tqdm(setfiles): + embd_dic[f] = extract_speaker_embd( + model, os.path.join(args.DB_dir, f), n_samples=64000, gpu=gpu + ) + + labels, scores = [], [] + for line in trials: + data = line.split() + ref_feat = F.normalize(embd_dic[data[1]], p=2, dim=1) + com_feat = F.normalize(embd_dic[data[2]], p=2, dim=1) + + if gpu: + ref_feat = ref_feat.cuda() + com_feat = com_feat.cuda() + + dist = ( + torch.cdist( + ref_feat.reshape((args.n_segments, -1)), + com_feat.reshape((args.n_segments, -1)), + ) + .detach() + .cpu() + .numpy() + ) + score = -1.0 * np.mean(dist) + labels.append(int(data[0])) + scores.append(score) + + result = tuneThresholdfromScore(scores, labels, [1, 0.1]) + + fnrs, fprs, thresholds = ComputeErrorRates(scores, labels) + p_target, c_miss, c_fa = 0.05, 1, 1 + mindcf, _ = ComputeMinDcf( + fnrs, fprs, thresholds, p_target, c_miss, c_fa + ) + print( + "Vox1-O benchmark Finished. EER: %2.4f, minDCF:%.5f" + % (result[1], mindcf) + ) + +import librosa +def extract_speaker_embd( + model, fn: str, n_samples: int, n_segments: int = 10, gpu: bool = False +) -> np.ndarray: + #audio, sample_rate = sf.read(fn) + audio, sample_rate = librosa.load(fn,sr=16000) ##fang add + + if len(audio.shape) > 1: + raise ValueError( + f"RawNet3 supports mono input only. Input data has a shape of {audio.shape}." + ) + + if sample_rate != 16000: + raise ValueError( + f"RawNet3 supports 16k sampling rate only. Input data's sampling rate is {sample_rate}." + ) + + if ( + len(audio) < n_samples + ): # RawNet3 was trained using utterances of 3 seconds + shortage = n_samples - len(audio) + 1 + audio = np.pad(audio, (0, shortage), "wrap") + + audios = [] + startframe = np.linspace(0, len(audio) - n_samples, num=n_segments) + for asf in startframe: + audios.append(audio[int(asf) : int(asf) + n_samples]) + + audios = torch.from_numpy(np.stack(audios, axis=0).astype(np.float32)) + if gpu: + audios = audios.to("cuda") + with torch.no_grad(): + output = model(audios) + + return output + +def get_embed(target_wav, embed_npy, model=None): + parser = argparse.ArgumentParser(description="RawNet3 inference") + + parser.add_argument( + "--inference_utterance", default=True, action="store_true" + ) + parser.add_argument( + "--input", + type=str, + default="", + help="Input file to extract embedding. Required when 'inference_utterance' is True", + ) + parser.add_argument( + "--vox1_o_benchmark", default=False, action="store_true" + ) + parser.add_argument( + "--DB_dir", + type=str, + default="", + help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True", + ) + parser.add_argument("--out_dir", type=str, default="./out.npy") + parser.add_argument( + "--n_segments", + type=int, + default=10, + help="number of segments to make using each utterance", + ) + + args = parser.parse_args() + + args.input = target_wav + args.out_dir = embed_npy + + + assert args.inference_utterance or args.vox1_o_benchmark + if args.inference_utterance: + assert args.input != "" + + if args.vox1_o_benchmark: + assert args.DB_dir != "" + + + #sys.exit(main(args,model)) + main(args,model) + + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="RawNet3 inference") + + parser.add_argument( + "--inference_utterance", default=False, action="store_true" + ) + + parser.add_argument( + "--input", + type=str, + default="", + help="Input file to extract embedding. Required when 'inference_utterance' is True", + ) + parser.add_argument( + "--vox1_o_benchmark", default=False, action="store_true" + ) + parser.add_argument( + "--DB_dir", + type=str, + default="", + help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True", + ) + parser.add_argument("--out_dir", type=str, default="./out.npy") + parser.add_argument( + "--n_segments", + type=int, + default=10, + help="number of segments to make using each utterance", + ) + args = parser.parse_args() + + assert args.inference_utterance or args.vox1_o_benchmark + if args.inference_utterance: + assert args.input != "" + + if args.vox1_o_benchmark: + assert args.DB_dir != "" + + sys.exit(main(args)) diff --git a/AIMeiSheng/RawNet3/models/RawNet3.py b/AIMeiSheng/RawNet3/models/RawNet3.py new file mode 100644 index 0000000..a611dea --- /dev/null +++ b/AIMeiSheng/RawNet3/models/RawNet3.py @@ -0,0 +1,144 @@ +# -*- encoding: utf-8 -*- + +import torch +import torch.nn as nn +from asteroid_filterbanks import Encoder, ParamSincFB + +from models.RawNetBasicBlock import Bottle2neck, PreEmphasis + + +class RawNet3(nn.Module): + def __init__(self, block, model_scale, context, summed, C=1024, **kwargs): + super().__init__() + + nOut = kwargs["nOut"] + + self.context = context + self.encoder_type = kwargs["encoder_type"] + self.log_sinc = kwargs["log_sinc"] + self.norm_sinc = kwargs["norm_sinc"] + self.out_bn = kwargs["out_bn"] + self.summed = summed + + self.preprocess = nn.Sequential( + PreEmphasis(), nn.InstanceNorm1d(1, eps=1e-4, affine=True) + ) + self.conv1 = Encoder( + ParamSincFB( + C // 4, + 251, + stride=kwargs["sinc_stride"], + ) + ) + self.relu = nn.ReLU() + self.bn1 = nn.BatchNorm1d(C // 4) + + self.layer1 = block( + C // 4, C, kernel_size=3, dilation=2, scale=model_scale, pool=5 + ) + self.layer2 = block( + C, C, kernel_size=3, dilation=3, scale=model_scale, pool=3 + ) + self.layer3 = block(C, C, kernel_size=3, dilation=4, scale=model_scale) + self.layer4 = nn.Conv1d(3 * C, 1536, kernel_size=1) + + if self.context: + attn_input = 1536 * 3 + else: + attn_input = 1536 + print("self.encoder_type", self.encoder_type) + if self.encoder_type == "ECA": + attn_output = 1536 + elif self.encoder_type == "ASP": + attn_output = 1 + else: + raise ValueError("Undefined encoder") + + self.attention = nn.Sequential( + nn.Conv1d(attn_input, 128, kernel_size=1), + nn.ReLU(), + nn.BatchNorm1d(128), + nn.Conv1d(128, attn_output, kernel_size=1), + nn.Softmax(dim=2), + ) + + self.bn5 = nn.BatchNorm1d(3072) + + self.fc6 = nn.Linear(3072, nOut) + self.bn6 = nn.BatchNorm1d(nOut) + + self.mp3 = nn.MaxPool1d(3) + + def forward(self, x): + """ + :param x: input mini-batch (bs, samp) + """ + + with torch.cuda.amp.autocast(enabled=False): + x = self.preprocess(x) + x = torch.abs(self.conv1(x)) + if self.log_sinc: + x = torch.log(x + 1e-6) + if self.norm_sinc == "mean": + x = x - torch.mean(x, dim=-1, keepdim=True) + elif self.norm_sinc == "mean_std": + m = torch.mean(x, dim=-1, keepdim=True) + s = torch.std(x, dim=-1, keepdim=True) + s[s < 0.001] = 0.001 + x = (x - m) / s + + if self.summed: + x1 = self.layer1(x) + x2 = self.layer2(x1) + x3 = self.layer3(self.mp3(x1) + x2) + else: + x1 = self.layer1(x) + x2 = self.layer2(x1) + x3 = self.layer3(x2) + + x = self.layer4(torch.cat((self.mp3(x1), x2, x3), dim=1)) + x = self.relu(x) + + t = x.size()[-1] + + if self.context: + global_x = torch.cat( + ( + x, + torch.mean(x, dim=2, keepdim=True).repeat(1, 1, t), + torch.sqrt( + torch.var(x, dim=2, keepdim=True).clamp( + min=1e-4, max=1e4 + ) + ).repeat(1, 1, t), + ), + dim=1, + ) + else: + global_x = x + + w = self.attention(global_x) + + mu = torch.sum(x * w, dim=2) + sg = torch.sqrt( + (torch.sum((x**2) * w, dim=2) - mu**2).clamp(min=1e-4, max=1e4) + ) + + x = torch.cat((mu, sg), 1) + + x = self.bn5(x) + + x = self.fc6(x) + + if self.out_bn: + x = self.bn6(x) + + return x + + +def MainModel(**kwargs): + + model = RawNet3( + Bottle2neck, model_scale=8, context=True, summed=True, **kwargs + ) + return model diff --git a/AIMeiSheng/RawNet3/models/RawNetBasicBlock.py b/AIMeiSheng/RawNet3/models/RawNetBasicBlock.py new file mode 100644 index 0000000..2cf7609 --- /dev/null +++ b/AIMeiSheng/RawNet3/models/RawNetBasicBlock.py @@ -0,0 +1,142 @@ +import math + +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class PreEmphasis(torch.nn.Module): + def __init__(self, coef: float = 0.97) -> None: + super().__init__() + self.coef = coef + # make kernel + # In pytorch, the convolution operation uses cross-correlation. So, filter is flipped. + self.register_buffer( + "flipped_filter", + torch.FloatTensor([-self.coef, 1.0]).unsqueeze(0).unsqueeze(0), + ) + + def forward(self, input: torch.tensor) -> torch.tensor: + assert ( + len(input.size()) == 2 + ), "The number of dimensions of input tensor must be 2!" + # reflect padding to match lengths of in/out + input = input.unsqueeze(1) + input = F.pad(input, (1, 0), "reflect") + return F.conv1d(input, self.flipped_filter) + + +class AFMS(nn.Module): + """ + Alpha-Feature map scaling, added to the output of each residual block[1,2]. + + Reference: + [1] RawNet2 : https://www.isca-speech.org/archive/Interspeech_2020/pdfs/1011.pdf + [2] AMFS : https://www.koreascience.or.kr/article/JAKO202029757857763.page + """ + + def __init__(self, nb_dim: int) -> None: + super().__init__() + self.alpha = nn.Parameter(torch.ones((nb_dim, 1))) + self.fc = nn.Linear(nb_dim, nb_dim) + self.sig = nn.Sigmoid() + + def forward(self, x): + y = F.adaptive_avg_pool1d(x, 1).view(x.size(0), -1) + y = self.sig(self.fc(y)).view(x.size(0), x.size(1), -1) + + x = x + self.alpha + x = x * y + return x + + +class Bottle2neck(nn.Module): + def __init__( + self, + inplanes, + planes, + kernel_size=None, + dilation=None, + scale=4, + pool=False, + ): + + super().__init__() + + width = int(math.floor(planes / scale)) + + self.conv1 = nn.Conv1d(inplanes, width * scale, kernel_size=1) + self.bn1 = nn.BatchNorm1d(width * scale) + + self.nums = scale - 1 + + convs = [] + bns = [] + + num_pad = math.floor(kernel_size / 2) * dilation + + for i in range(self.nums): + convs.append( + nn.Conv1d( + width, + width, + kernel_size=kernel_size, + dilation=dilation, + padding=num_pad, + ) + ) + bns.append(nn.BatchNorm1d(width)) + + self.convs = nn.ModuleList(convs) + self.bns = nn.ModuleList(bns) + + self.conv3 = nn.Conv1d(width * scale, planes, kernel_size=1) + self.bn3 = nn.BatchNorm1d(planes) + + self.relu = nn.ReLU() + + self.width = width + + self.mp = nn.MaxPool1d(pool) if pool else False + self.afms = AFMS(planes) + + if inplanes != planes: # if change in number of filters + self.residual = nn.Sequential( + nn.Conv1d(inplanes, planes, kernel_size=1, stride=1, bias=False) + ) + else: + self.residual = nn.Identity() + + def forward(self, x): + residual = self.residual(x) + + out = self.conv1(x) + out = self.relu(out) + out = self.bn1(out) + + spx = torch.split(out, self.width, 1) + for i in range(self.nums): + if i == 0: + sp = spx[i] + else: + sp = sp + spx[i] + sp = self.convs[i](sp) + sp = self.relu(sp) + sp = self.bns[i](sp) + if i == 0: + out = sp + else: + out = torch.cat((out, sp), 1) + + out = torch.cat((out, spx[self.nums]), 1) + + out = self.conv3(out) + out = self.relu(out) + out = self.bn3(out) + + out += residual + if self.mp: + out = self.mp(out) + out = self.afms(out) + + return out diff --git a/AIMeiSheng/RawNet3/models/__init__.py b/AIMeiSheng/RawNet3/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AIMeiSheng/RawNet3/models/__pycache__/RawNet3.cpython-38.pyc b/AIMeiSheng/RawNet3/models/__pycache__/RawNet3.cpython-38.pyc new file mode 100644 index 0000000..e5499c1 Binary files /dev/null and b/AIMeiSheng/RawNet3/models/__pycache__/RawNet3.cpython-38.pyc differ diff --git a/AIMeiSheng/RawNet3/models/__pycache__/RawNet3.cpython-39.pyc b/AIMeiSheng/RawNet3/models/__pycache__/RawNet3.cpython-39.pyc new file mode 100644 index 0000000..643c555 Binary files /dev/null and b/AIMeiSheng/RawNet3/models/__pycache__/RawNet3.cpython-39.pyc differ diff --git a/AIMeiSheng/RawNet3/models/__pycache__/RawNetBasicBlock.cpython-38.pyc b/AIMeiSheng/RawNet3/models/__pycache__/RawNetBasicBlock.cpython-38.pyc new file mode 100644 index 0000000..a29ed34 Binary files /dev/null and b/AIMeiSheng/RawNet3/models/__pycache__/RawNetBasicBlock.cpython-38.pyc differ diff --git a/AIMeiSheng/RawNet3/models/__pycache__/RawNetBasicBlock.cpython-39.pyc b/AIMeiSheng/RawNet3/models/__pycache__/RawNetBasicBlock.cpython-39.pyc new file mode 100644 index 0000000..cffbcf6 Binary files /dev/null and b/AIMeiSheng/RawNet3/models/__pycache__/RawNetBasicBlock.cpython-39.pyc differ diff --git a/AIMeiSheng/RawNet3/models/__pycache__/__init__.cpython-38.pyc b/AIMeiSheng/RawNet3/models/__pycache__/__init__.cpython-38.pyc new file mode 100644 index 0000000..f00a24c Binary files /dev/null and b/AIMeiSheng/RawNet3/models/__pycache__/__init__.cpython-38.pyc differ diff --git a/AIMeiSheng/RawNet3/models/__pycache__/__init__.cpython-39.pyc b/AIMeiSheng/RawNet3/models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..a0da605 Binary files /dev/null and b/AIMeiSheng/RawNet3/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/AIMeiSheng/RawNet3/models/weights/model.pt b/AIMeiSheng/RawNet3/models/weights/model.pt new file mode 100644 index 0000000..c328608 Binary files /dev/null and b/AIMeiSheng/RawNet3/models/weights/model.pt differ diff --git a/AIMeiSheng/RawNet3/utils.py b/AIMeiSheng/RawNet3/utils.py new file mode 100644 index 0000000..6d76e9a --- /dev/null +++ b/AIMeiSheng/RawNet3/utils.py @@ -0,0 +1,91 @@ +#!/usr/bin/python +#-*- coding: utf-8 -*- +""" +Source code from: +https://github.com/clovaai/voxceleb_trainer/blob/master/tuneThreshold.py +""" + +import os +import glob +import sys +import time +from sklearn import metrics +import numpy +import pdb +from operator import itemgetter + +def tuneThresholdfromScore(scores, labels, target_fa, target_fr = None): + + fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=1) + fnr = 1 - tpr + + tunedThreshold = []; + if target_fr: + for tfr in target_fr: + idx = numpy.nanargmin(numpy.absolute((tfr - fnr))) + tunedThreshold.append([thresholds[idx], fpr[idx], fnr[idx]]); + + for tfa in target_fa: + idx = numpy.nanargmin(numpy.absolute((tfa - fpr))) # numpy.where(fpr<=tfa)[0][-1] + tunedThreshold.append([thresholds[idx], fpr[idx], fnr[idx]]); + + idxE = numpy.nanargmin(numpy.absolute((fnr - fpr))) + eer = max(fpr[idxE],fnr[idxE])*100 + + return (tunedThreshold, eer, fpr, fnr); + +# Creates a list of false-negative rates, a list of false-positive rates +# and a list of decision thresholds that give those error-rates. +def ComputeErrorRates(scores, labels): + + # Sort the scores from smallest to largest, and also get the corresponding + # indexes of the sorted scores. We will treat the sorted scores as the + # thresholds at which the the error-rates are evaluated. + sorted_indexes, thresholds = zip(*sorted( + [(index, threshold) for index, threshold in enumerate(scores)], + key=itemgetter(1))) + sorted_labels = [] + labels = [labels[i] for i in sorted_indexes] + fnrs = [] + fprs = [] + + # At the end of this loop, fnrs[i] is the number of errors made by + # incorrectly rejecting scores less than thresholds[i]. And, fprs[i] + # is the total number of times that we have correctly accepted scores + # greater than thresholds[i]. + for i in range(0, len(labels)): + if i == 0: + fnrs.append(labels[i]) + fprs.append(1 - labels[i]) + else: + fnrs.append(fnrs[i-1] + labels[i]) + fprs.append(fprs[i-1] + 1 - labels[i]) + fnrs_norm = sum(labels) + fprs_norm = len(labels) - fnrs_norm + + # Now divide by the total number of false negative errors to + # obtain the false positive rates across all thresholds + fnrs = [x / float(fnrs_norm) for x in fnrs] + + # Divide by the total number of corret positives to get the + # true positive rate. Subtract these quantities from 1 to + # get the false positive rates. + fprs = [1 - x / float(fprs_norm) for x in fprs] + return fnrs, fprs, thresholds + +# Computes the minimum of the detection cost function. The comments refer to +# equations in Section 3 of the NIST 2016 Speaker Recognition Evaluation Plan. +def ComputeMinDcf(fnrs, fprs, thresholds, p_target, c_miss, c_fa): + min_c_det = float("inf") + min_c_det_threshold = thresholds[0] + for i in range(0, len(fnrs)): + # See Equation (2). it is a weighted sum of false negative + # and false positive errors. + c_det = c_miss * fnrs[i] * p_target + c_fa * fprs[i] * (1 - p_target) + if c_det < min_c_det: + min_c_det = c_det + min_c_det_threshold = thresholds[i] + # See Equations (3) and (4). Now we normalize the cost. + c_def = min(c_miss * p_target, c_fa * (1 - p_target)) + min_dcf = min_c_det / c_def + return min_dcf, min_c_det_threshold \ No newline at end of file diff --git a/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI.ipynb b/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI.ipynb new file mode 100644 index 0000000..2da7bf9 --- /dev/null +++ b/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI.ipynb @@ -0,0 +1,384 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "private_outputs": true, + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU", + "gpuClass": "standard" + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb)" + ], + "metadata": { + "id": "ZFFCx5J80SGa" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GmFP6bN9dvOq" + }, + "outputs": [], + "source": [ + "# @title 查看显卡\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "source": [ + "# @title 安装依赖\n", + "!apt-get -y install build-essential python3-dev ffmpeg\n", + "!pip3 install --upgrade setuptools wheel\n", + "!pip3 install --upgrade pip\n", + "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2" + ], + "metadata": { + "id": "wjddIFr1oS3W" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 克隆仓库\n", + "\n", + "!git clone --depth=1 -b stable https://github.com/fumiama/Retrieval-based-Voice-Conversion-WebUI\n", + "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", + "!mkdir -p pretrained uvr5_weights" + ], + "metadata": { + "id": "ge_97mfpgqTm" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 更新仓库(一般无需执行)\n", + "!git pull" + ], + "metadata": { + "id": "BLDEZADkvlw1" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 安装aria2\n", + "!apt -y install -qq aria2" + ], + "metadata": { + "id": "pqE0PrnuRqI2" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 下载底模\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth" + ], + "metadata": { + "id": "UG3XpUwEomUz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 下载人声分离模型\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth" + ], + "metadata": { + "id": "HugjmZqZRuiF" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 下载hubert_base\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt" + ], + "metadata": { + "id": "2RCaT9FTR0ej" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 挂载谷歌云盘\n", + "\n", + "from google.colab import drive\n", + "\n", + "drive.mount(\"/content/drive\")" + ], + "metadata": { + "id": "jwu07JgqoFON" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 从谷歌云盘加载打包好的数据集到/content/dataset\n", + "\n", + "# @markdown 数据集位置\n", + "DATASET = (\n", + " \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" # @param {type:\"string\"}\n", + ")\n", + "\n", + "!mkdir -p /content/dataset\n", + "!unzip -d /content/dataset -B {DATASET}" + ], + "metadata": { + "id": "Mwk7Q0Loqzjx" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 重命名数据集中的重名文件\n", + "!ls -a /content/dataset/\n", + "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*" + ], + "metadata": { + "id": "PDlFxWHWEynD" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 启动web\n", + "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", + "# %load_ext tensorboard\n", + "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n", + "!python3 infer-web.py --colab --pycmd python3" + ], + "metadata": { + "id": "7vh6vphDwO0b" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 手动将训练后的模型文件备份到谷歌云盘\n", + "# @markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", + "\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 模型epoch\n", + "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", + "\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n", + "\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth" + ], + "metadata": { + "id": "FgJuNeAwx5Y_" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 从谷歌云盘恢复pth\n", + "# @markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", + "\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 模型epoch\n", + "MODELEPOCH = 7500 # @param {type:\"integer\"}\n", + "\n", + "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", + "\n", + "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", + "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", + "!cp /content/drive/MyDrive/*.index /content/\n", + "!cp /content/drive/MyDrive/*.npy /content/\n", + "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth" + ], + "metadata": { + "id": "OVQoLQJXS7WX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 手动预处理(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 采样率\n", + "BITRATE = 48000 # @param {type:\"integer\"}\n", + "# @markdown 使用的进程数\n", + "THREADCOUNT = 8 # @param {type:\"integer\"}\n", + "\n", + "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True" + ], + "metadata": { + "id": "ZKAyuKb9J6dz" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 手动提取特征(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 使用的进程数\n", + "THREADCOUNT = 8 # @param {type:\"integer\"}\n", + "# @markdown 音高提取算法\n", + "ALGO = \"harvest\" # @param {type:\"string\"}\n", + "\n", + "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n", + "\n", + "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}" + ], + "metadata": { + "id": "CrxJqzAUKmPJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 手动训练(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 使用的GPU\n", + "USEGPU = \"0\" # @param {type:\"string\"}\n", + "# @markdown 批大小\n", + "BATCHSIZE = 32 # @param {type:\"integer\"}\n", + "# @markdown 停止的epoch\n", + "MODELEPOCH = 3200 # @param {type:\"integer\"}\n", + "# @markdown 保存epoch间隔\n", + "EPOCHSAVE = 100 # @param {type:\"integer\"}\n", + "# @markdown 采样率\n", + "MODELSAMPLE = \"48k\" # @param {type:\"string\"}\n", + "# @markdown 是否缓存训练集\n", + "CACHEDATA = 1 # @param {type:\"integer\"}\n", + "# @markdown 是否仅保存最新的ckpt文件\n", + "ONLYLATEST = 0 # @param {type:\"integer\"}\n", + "\n", + "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}" + ], + "metadata": { + "id": "IMLPLKOaKj58" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 删除其它pth,只留选中的(慎点,仔细看代码)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 选中模型epoch\n", + "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", + "\n", + "!echo \"备份选中的模型。。。\"\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n", + "\n", + "!echo \"正在删除。。。\"\n", + "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", + "!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n", + "\n", + "!echo \"恢复选中的模型。。。\"\n", + "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", + "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", + "\n", + "!echo \"删除完成\"\n", + "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}" + ], + "metadata": { + "id": "haYA81hySuDl" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title 清除项目下所有文件,只留选中的模型(慎点,仔细看代码)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 选中模型epoch\n", + "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", + "\n", + "!echo \"备份选中的模型。。。\"\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n", + "\n", + "!echo \"正在删除。。。\"\n", + "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", + "!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n", + "\n", + "!echo \"恢复选中的模型。。。\"\n", + "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", + "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", + "\n", + "!echo \"删除完成\"\n", + "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}" + ], + "metadata": { + "id": "QhSiPTVPoIRh" + }, + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb b/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb new file mode 100644 index 0000000..c286d69 --- /dev/null +++ b/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb @@ -0,0 +1,404 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "id": "ZFFCx5J80SGa" + }, + "source": [ + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI_v2.ipynb)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GmFP6bN9dvOq" + }, + "outputs": [], + "source": [ + "# @title 查看显卡\n", + "!nvidia-smi" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wjddIFr1oS3W" + }, + "outputs": [], + "source": [ + "# @title 安装依赖\n", + "!apt-get -y install build-essential python3-dev ffmpeg\n", + "!pip3 install --upgrade setuptools wheel\n", + "!pip3 install --upgrade pip\n", + "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ge_97mfpgqTm" + }, + "outputs": [], + "source": [ + "# @title 克隆仓库\n", + "\n", + "!mkdir Retrieval-based-Voice-Conversion-WebUI\n", + "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", + "!git init\n", + "!git remote add origin https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git\n", + "!git fetch origin cfd984812804ddc9247d65b14c82cd32e56c1133 --depth=1\n", + "!git reset --hard FETCH_HEAD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BLDEZADkvlw1" + }, + "outputs": [], + "source": [ + "# @title 更新仓库(一般无需执行)\n", + "!git pull" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pqE0PrnuRqI2" + }, + "outputs": [], + "source": [ + "# @title 安装aria2\n", + "!apt -y install -qq aria2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UG3XpUwEomUz" + }, + "outputs": [], + "source": [ + "# @title 下载底模\n", + "\n", + "# v1\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n", + "\n", + "# v2\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D40k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D48k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G40k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G48k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D40k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D48k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G32k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G40k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G48k.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G48k.pth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HugjmZqZRuiF" + }, + "outputs": [], + "source": [ + "# @title 下载人声分离模型\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2RCaT9FTR0ej" + }, + "outputs": [], + "source": [ + "# @title 下载hubert_base\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jwu07JgqoFON" + }, + "outputs": [], + "source": [ + "# @title 挂载谷歌云盘\n", + "\n", + "from google.colab import drive\n", + "\n", + "drive.mount(\"/content/drive\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mwk7Q0Loqzjx" + }, + "outputs": [], + "source": [ + "# @title 从谷歌云盘加载打包好的数据集到/content/dataset\n", + "\n", + "# @markdown 数据集位置\n", + "DATASET = (\n", + " \"/content/drive/MyDrive/dataset/lulu20230327_32k.zip\" # @param {type:\"string\"}\n", + ")\n", + "\n", + "!mkdir -p /content/dataset\n", + "!unzip -d /content/dataset -B {DATASET}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PDlFxWHWEynD" + }, + "outputs": [], + "source": [ + "# @title 重命名数据集中的重名文件\n", + "!ls -a /content/dataset/\n", + "!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /content/dataset/*.*~*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7vh6vphDwO0b" + }, + "outputs": [], + "source": [ + "# @title 启动web\n", + "%cd /content/Retrieval-based-Voice-Conversion-WebUI\n", + "# %load_ext tensorboard\n", + "# %tensorboard --logdir /content/Retrieval-based-Voice-Conversion-WebUI/logs\n", + "!python3 infer-web.py --colab --pycmd python3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FgJuNeAwx5Y_" + }, + "outputs": [], + "source": [ + "# @title 手动将训练后的模型文件备份到谷歌云盘\n", + "# @markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", + "\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 模型epoch\n", + "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", + "\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/added_*.index /content/drive/MyDrive/\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/total_*.npy /content/drive/MyDrive/\n", + "\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OVQoLQJXS7WX" + }, + "outputs": [], + "source": [ + "# @title 从谷歌云盘恢复pth\n", + "# @markdown 需要自己查看logs文件夹下模型的文件名,手动修改下方命令末尾的文件名\n", + "\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 模型epoch\n", + "MODELEPOCH = 7500 # @param {type:\"integer\"}\n", + "\n", + "!mkdir -p /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", + "\n", + "!cp /content/drive/MyDrive/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", + "!cp /content/drive/MyDrive/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", + "!cp /content/drive/MyDrive/*.index /content/\n", + "!cp /content/drive/MyDrive/*.npy /content/\n", + "!cp /content/drive/MyDrive/{MODELNAME}{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/weights/{MODELNAME}.pth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZKAyuKb9J6dz" + }, + "outputs": [], + "source": [ + "# @title 手动预处理(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 采样率\n", + "BITRATE = 48000 # @param {type:\"integer\"}\n", + "# @markdown 使用的进程数\n", + "THREADCOUNT = 8 # @param {type:\"integer\"}\n", + "\n", + "!python3 trainset_preprocess_pipeline_print.py /content/dataset {BITRATE} {THREADCOUNT} logs/{MODELNAME} True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CrxJqzAUKmPJ" + }, + "outputs": [], + "source": [ + "# @title 手动提取特征(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 使用的进程数\n", + "THREADCOUNT = 8 # @param {type:\"integer\"}\n", + "# @markdown 音高提取算法\n", + "ALGO = \"harvest\" # @param {type:\"string\"}\n", + "\n", + "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n", + "\n", + "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IMLPLKOaKj58" + }, + "outputs": [], + "source": [ + "# @title 手动训练(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 使用的GPU\n", + "USEGPU = \"0\" # @param {type:\"string\"}\n", + "# @markdown 批大小\n", + "BATCHSIZE = 32 # @param {type:\"integer\"}\n", + "# @markdown 停止的epoch\n", + "MODELEPOCH = 3200 # @param {type:\"integer\"}\n", + "# @markdown 保存epoch间隔\n", + "EPOCHSAVE = 100 # @param {type:\"integer\"}\n", + "# @markdown 采样率\n", + "MODELSAMPLE = \"48k\" # @param {type:\"string\"}\n", + "# @markdown 是否缓存训练集\n", + "CACHEDATA = 1 # @param {type:\"integer\"}\n", + "# @markdown 是否仅保存最新的ckpt文件\n", + "ONLYLATEST = 0 # @param {type:\"integer\"}\n", + "\n", + "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e lulu -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained/f0G{MODELSAMPLE}.pth -pd pretrained/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "haYA81hySuDl" + }, + "outputs": [], + "source": [ + "# @title 删除其它pth,只留选中的(慎点,仔细看代码)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 选中模型epoch\n", + "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", + "\n", + "!echo \"备份选中的模型。。。\"\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n", + "\n", + "!echo \"正在删除。。。\"\n", + "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", + "!rm /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*.pth\n", + "\n", + "!echo \"恢复选中的模型。。。\"\n", + "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", + "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", + "\n", + "!echo \"删除完成\"\n", + "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QhSiPTVPoIRh" + }, + "outputs": [], + "source": [ + "# @title 清除项目下所有文件,只留选中的模型(慎点,仔细看代码)\n", + "# @markdown 模型名\n", + "MODELNAME = \"lulu\" # @param {type:\"string\"}\n", + "# @markdown 选中模型epoch\n", + "MODELEPOCH = 9600 # @param {type:\"integer\"}\n", + "\n", + "!echo \"备份选中的模型。。。\"\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth /content/{MODELNAME}_D_{MODELEPOCH}.pth\n", + "!cp /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth /content/{MODELNAME}_G_{MODELEPOCH}.pth\n", + "\n", + "!echo \"正在删除。。。\"\n", + "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}\n", + "!rm -rf /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/*\n", + "\n", + "!echo \"恢复选中的模型。。。\"\n", + "!mv /content/{MODELNAME}_D_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/G_{MODELEPOCH}.pth\n", + "!mv /content/{MODELNAME}_G_{MODELEPOCH}.pth /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}/D_{MODELEPOCH}.pth\n", + "\n", + "!echo \"删除完成\"\n", + "!ls /content/Retrieval-based-Voice-Conversion-WebUI/logs/{MODELNAME}" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "private_outputs": true, + "provenance": [] + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI_v2_kaggle.ipynb b/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI_v2_kaggle.ipynb new file mode 100644 index 0000000..7624004 --- /dev/null +++ b/AIMeiSheng/Retrieval_based_Voice_Conversion_WebUI_v2_kaggle.ipynb @@ -0,0 +1,577 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "fc2833ca-6157-4074-930a-1bf72464478b", + "_uuid": "97194436-68e0-4ffd-a0a4-67c0cae25aa8", + "execution": { + "iopub.execute_input": "2023-08-08T12:14:04.984722Z", + "iopub.status.busy": "2023-08-08T12:14:04.984345Z", + "iopub.status.idle": "2023-08-08T12:17:05.076497Z", + "shell.execute_reply": "2023-08-08T12:17:05.075256Z", + "shell.execute_reply.started": "2023-08-08T12:14:04.984693Z" + }, + "id": "m4lM_QISaGcT", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 安装依赖\n", + "!apt-get -y install build-essential python3-dev ffmpeg\n", + "!pip3 install --upgrade setuptools wheel\n", + "!pip3 install --upgrade pip\n", + "!pip3 install faiss-cpu==1.7.2 fairseq gradio==3.14.0 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2\n", + "!pip3 install torch torchvision torchaudio\n", + "##!pip3 install faiss-gpu fairseq gradio==3.35.1 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "d99f608b-c63c-4a06-a0bf-717475a25394", + "_uuid": "3834565d-c64b-4a3a-9720-652709f4e14f", + "execution": { + "iopub.execute_input": "2023-08-08T12:17:05.079069Z", + "iopub.status.busy": "2023-08-08T12:17:05.078716Z", + "iopub.status.idle": "2023-08-08T12:17:07.103410Z", + "shell.execute_reply": "2023-08-08T12:17:07.101967Z", + "shell.execute_reply.started": "2023-08-08T12:17:05.079034Z" + }, + "id": "c2XetvO5aGcU", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "!mkdir content\n", + "!mkdir content/Retrieval-based-Voice-Conversion-WebUI\n", + "%cd /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "979abafd-97f3-454e-81a7-c1d4642b0957", + "_uuid": "ffb4a355-d5e9-46d0-9d0d-b4c246928a87", + "execution": { + "iopub.execute_input": "2023-08-08T12:18:03.390585Z", + "iopub.status.busy": "2023-08-08T12:18:03.390144Z", + "iopub.status.idle": "2023-08-08T12:18:09.114623Z", + "shell.execute_reply": "2023-08-08T12:18:09.113349Z", + "shell.execute_reply.started": "2023-08-08T12:18:03.390549Z" + }, + "id": "dLDioOvhaGcV", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "!git clone -b 'v1.1-patch-no-ui' --single-branch --depth 1 https://github.com/elcolex777/Retrieval-based-Voice-Conversion-WebUI.git ." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "b8037221-7147-4292-8290-1012f1a1add7", + "_uuid": "286f1dfd-35fd-48c0-801c-f2e3298a7fd8", + "execution": { + "iopub.execute_input": "2023-08-08T12:18:22.450476Z", + "iopub.status.busy": "2023-08-08T12:18:22.450044Z", + "iopub.status.idle": "2023-08-08T12:19:32.561757Z", + "shell.execute_reply": "2023-08-08T12:19:32.560686Z", + "shell.execute_reply.started": "2023-08-08T12:18:22.450436Z" + }, + "id": "zlxYeiOeaGcV", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "!pip install -r requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "8bf89dcd-fded-4166-be57-3d5925f3bb42", + "_uuid": "258effd7-85ef-451d-9086-3270366881f0", + "execution": { + "iopub.execute_input": "2023-08-08T12:20:31.818252Z", + "iopub.status.busy": "2023-08-08T12:20:31.817807Z", + "iopub.status.idle": "2023-08-08T12:20:40.358862Z", + "shell.execute_reply": "2023-08-08T12:20:40.357771Z", + "shell.execute_reply.started": "2023-08-08T12:20:31.818214Z" + }, + "id": "6jnyv5vIaGcV", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 安装aria2\n", + "!apt -y install -qq aria2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "5b9dfeb0-2303-47da-8229-83f80a280787", + "_uuid": "1db7a246-912f-45d0-917d-61e2f1cdaa08", + "execution": { + "iopub.execute_input": "2023-08-08T12:20:40.361304Z", + "iopub.status.busy": "2023-08-08T12:20:40.360949Z", + "iopub.status.idle": "2023-08-08T12:21:50.733941Z", + "shell.execute_reply": "2023-08-08T12:21:50.732785Z", + "shell.execute_reply.started": "2023-08-08T12:20:40.361270Z" + }, + "id": "5a1zXck3aGcW", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 下载底模\n", + "\n", + "# v1\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o D48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o G48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0D48k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G40k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained -o f0G48k.pth\n", + "\n", + "# v2\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D32k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D40k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D48k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o D48k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G32k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G40k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G48k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o G48k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D32k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D40k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D48k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0D48k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G32k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G32k.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G40k.pth\n", + "# !aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G48k.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/pretrained_v2 -o f0G48k.pth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "4a0d7b3d-f07c-46a8-9ec3-bf45d1d5e6b1", + "_uuid": "db1c9b13-08c8-4186-b4a9-6f489b1170f3", + "execution": { + "iopub.execute_input": "2023-08-08T12:21:50.736649Z", + "iopub.status.busy": "2023-08-08T12:21:50.736243Z", + "iopub.status.idle": "2023-08-08T12:22:02.014806Z", + "shell.execute_reply": "2023-08-08T12:22:02.013371Z", + "shell.execute_reply.started": "2023-08-08T12:21:50.736600Z" + }, + "id": "Wf6hx6-9aGcX", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 下载人声分离模型\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2-人声vocals+非人声instrumentals.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP2-人声vocals+非人声instrumentals.pth\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5-主旋律人声vocals+其他instrumentals.pth -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/uvr5_weights -o HP5-主旋律人声vocals+其他instrumentals.pth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "f16d3428-1fb8-41f2-bbaf-e1e68e54b6ed", + "_uuid": "3daf1b14-54b7-4469-8e2e-adfd9d275b6e", + "execution": { + "iopub.execute_input": "2023-08-08T12:22:02.016867Z", + "iopub.status.busy": "2023-08-08T12:22:02.016487Z", + "iopub.status.idle": "2023-08-08T12:22:06.052722Z", + "shell.execute_reply": "2023-08-08T12:22:06.051437Z", + "shell.execute_reply.started": "2023-08-08T12:22:02.016832Z" + }, + "id": "Ga3ELLPjaGcX", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 下载hubert_base\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI -o hubert_base.pt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "bb33fe84-133f-4f46-9b55-9d12ae71a11c", + "_uuid": "8837b647-df9e-4dea-9e45-290dbc202757", + "execution": { + "iopub.execute_input": "2023-08-08T12:22:06.057167Z", + "iopub.status.busy": "2023-08-08T12:22:06.056004Z", + "iopub.status.idle": "2023-08-08T12:22:16.461648Z", + "shell.execute_reply": "2023-08-08T12:22:16.460065Z", + "shell.execute_reply.started": "2023-08-08T12:22:06.057094Z" + }, + "id": "axkxH-_YaGcX", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 从谷歌云盘加载打包好的数据集到/content/dataset\n", + "\n", + "# @markdown 数据集位置\n", + "#DATASET = (\n", + "# \"/content/drive/MyDrive/dataset/lukhovchanka-medium.zip\" # @param {type:\"string\"}\n", + "#)\n", + "\n", + "#!mkdir -p /kaggle/working/content/dataset\n", + "#!unzip -d /kaggle/working/content/dataset -B {DATASET}\n", + "DATASET = \"lukhovchanka-sasha\"\n", + "\n", + "!mkdir -p \"dataset_raw\"\n", + "!cp -R /kaggle/input/{DATASET}/* -t \"dataset_raw/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "34e7bf7e-b72b-4ca0-b3e6-ac9183f4e89c", + "_uuid": "576c8603-f1cf-4baf-96a3-71147701bd9a", + "id": "cBTeQkVEaGcY", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 重命名数据集中的重名文件\n", + "#!ls -a /kaggle/working/content/dataset/\n", + "#!rename 's/(\\w+)\\.(\\w+)~(\\d*)/$1_$3.$2/' /kaggle/working/content/dataset/*.*~*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "b717715f-94e7-4f10-8b11-7f1fbc393160", + "_uuid": "54640c48-e7c8-4aad-b295-a4b48dba9fab", + "execution": { + "iopub.execute_input": "2023-08-08T12:22:16.464302Z", + "iopub.status.busy": "2023-08-08T12:22:16.463801Z", + "iopub.status.idle": "2023-08-08T12:22:19.326032Z", + "shell.execute_reply": "2023-08-08T12:22:19.324788Z", + "shell.execute_reply.started": "2023-08-08T12:22:16.464255Z" + }, + "id": "fqpQwMgmaGcY", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "#https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/rmvpe.pt\n", + "#!pip3 install faiss-gpu fairseq gradio==3.35.1 ffmpeg ffmpeg-python praat-parselmouth pyworld numpy==1.23.5 numba==0.56.4 librosa==0.9.2\n", + "#!pip3 install tensorboardX\n", + "!aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt -d /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI -o rmvpe.pt\n", + "#!curl -o /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/rmvpe.pt https://drive.google.com/file/d/1bL6w-k6_0t5bESCdsBz40_HbTpzmwvCf/view?usp=drive_link" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "fc2ba4fc-7dcf-4a94-9c51-569549f85e50", + "_uuid": "e01a84d7-c32f-4242-92ae-de002c15a8dc", + "execution": { + "iopub.execute_input": "2023-08-08T12:22:19.329095Z", + "iopub.status.busy": "2023-08-08T12:22:19.328572Z", + "iopub.status.idle": "2023-08-08T12:22:19.336526Z", + "shell.execute_reply": "2023-08-08T12:22:19.335614Z", + "shell.execute_reply.started": "2023-08-08T12:22:19.329044Z" + }, + "id": "-TDRrR4PaGcY", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 启动web\n", + "%cd /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI\n", + "# %load_ext tensorboard\n", + "# %tensorboard --logdir /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/logs\n", + "#!python3 infer-web.py --colab --pycmd python3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "d640cb2d-efa8-4326-9116-eedf53a389be", + "_uuid": "c77793f7-b8e0-4173-9f53-7fce3d357fcd", + "execution": { + "iopub.execute_input": "2023-08-08T12:51:08.375199Z", + "iopub.status.busy": "2023-08-08T12:51:08.374762Z", + "iopub.status.idle": "2023-08-08T12:51:09.397678Z", + "shell.execute_reply": "2023-08-08T12:51:09.396322Z", + "shell.execute_reply.started": "2023-08-08T12:51:08.375162Z" + }, + "id": "GluV-nZvaGcY", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "!mkdir -p logs/mi-test\n", + "!touch logs/mi-test/preprocess.log\n", + "#!touch logs/mi-test/filelist.txt\n", + "#!touch logs/filelist.txt\n", + "\n", + "!touch logs/mi-test/extract_f0_feature.log\n", + "#python extract_f0_print.py logs/mi-test $(nproc) pm\n", + "#python extract_feature_print.py cpu 1 0 0 logs/mi-test v1\n", + "\n", + "# \"/content/dataset\" Enter the path of the training folder:\n", + "# 40000 Target sample rate\n", + "# 1 Number of CPU processes used for pitch extraction and data processing:\n", + "# \"/content/Retrieval-based-Voice-Conversion-WebUI/logs/mi-test\"\n", + "# False\n", + "#!python3 trainset_preprocess_pipeline_print.py \"/kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/dataset_raw\" 40000 1 \"/kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/logs/mi-test\" False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "e99f592d-ec96-41de-9842-22aea30e4618", + "_uuid": "397de858-35b6-46d1-8ecd-3f8793ba3ae0", + "execution": { + "iopub.execute_input": "2023-08-08T12:23:53.839258Z", + "iopub.status.busy": "2023-08-08T12:23:53.838778Z", + "iopub.status.idle": "2023-08-08T12:25:40.939175Z", + "shell.execute_reply": "2023-08-08T12:25:40.937765Z", + "shell.execute_reply.started": "2023-08-08T12:23:53.839217Z" + }, + "id": "YyZXCmR-aGcY", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 手动预处理(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"mi-test\" # @param {type:\"string\"}\n", + "# @markdown 采样率\n", + "BITRATE = 40000 # @param {type:\"integer\"}\n", + "# @markdown 使用的进程数\n", + "THREADCOUNT = 8 # @param {type:\"integer\"}\n", + "\n", + "!python3 trainset_preprocess_pipeline_print.py /kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/dataset_raw {BITRATE} {THREADCOUNT} logs/{MODELNAME} True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "314f3bd5-0080-4c66-a122-dc7e98123883", + "_uuid": "17f65d2a-1c0a-444d-8f54-2d043c662a54", + "id": "BI7F2pYyaGcY", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "#!python3 extract_f0_rmvpe.py 2 0 0 \"/kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/logs/mi-test\" True\n", + "#!python3 extract_f0_rmvpe.py 2 1 0 \"/kaggle/working/content/Retrieval-based-Voice-Conversion-WebUI/logs/mi-test\" True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "f11798a9-5759-463c-b467-df997f701700", + "_uuid": "872a10b1-f68d-4ba8-8208-228081e90c34", + "execution": { + "iopub.execute_input": "2023-08-08T13:01:51.981205Z", + "iopub.status.busy": "2023-08-08T13:01:51.980662Z", + "iopub.status.idle": "2023-08-08T13:10:18.050952Z", + "shell.execute_reply": "2023-08-08T13:10:18.049440Z", + "shell.execute_reply.started": "2023-08-08T13:01:51.981154Z" + }, + "id": "61uo0TZJaGcY", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 手动提取特征(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"mi-test\" # @param {type:\"string\"}\n", + "# @markdown 使用的进程数\n", + "THREADCOUNT = 8 # @param {type:\"integer\"}\n", + "# @markdown 音高提取算法\n", + "ALGO = \"rmvpe\" # @param {type:\"string\"} pm|harvest|crepe|rmvpe\n", + "\n", + "!python3 extract_f0_print.py logs/{MODELNAME} {THREADCOUNT} {ALGO}\n", + "\n", + "!python3 extract_feature_print.py cpu 1 0 0 logs/{MODELNAME} v2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "7a83b346-659f-40a3-b93e-a737bbea43a0", + "_uuid": "619eacd5-d0fb-4abe-bffb-18654928fc8e", + "execution": { + "iopub.execute_input": "2023-08-08T13:14:15.600958Z", + "iopub.status.busy": "2023-08-08T13:14:15.600514Z", + "iopub.status.idle": "2023-08-08T13:14:15.629697Z", + "shell.execute_reply": "2023-08-08T13:14:15.628575Z", + "shell.execute_reply.started": "2023-08-08T13:14:15.600921Z" + }, + "id": "kktagXwnaGcZ", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @markdown 模型名\n", + "MODELNAME = \"mi-test\" # @param {type:\"string\"}\n", + "# @markdown 采样率\n", + "MODELSAMPLE = \"40k\" # @param {type:\"string\"}\n", + "\n", + "!python3 create_filelist_print.py {MODELNAME} v2 True {MODELSAMPLE} 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "37667197-6f23-41a4-acab-5b0f9d552803", + "_uuid": "15eaf840-c740-49a4-a968-2dfb160126fb", + "id": "3ZtM59b6aGcZ", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @markdown 模型名\n", + "MODELNAME = \"mi-test\" # @param {type:\"string\"}\n", + "\n", + "!python3 train_index_print.py {MODELNAME} v2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "_cell_guid": "1d0dce3a-317f-4552-9a79-419ed8c642ff", + "_uuid": "0a0eabfa-0767-4350-935a-6e6ec56f207f", + "id": "lL-BhQu2aGcZ", + "jupyter": { + "outputs_hidden": false + }, + "trusted": true + }, + "outputs": [], + "source": [ + "# @title 手动训练(不推荐)\n", + "# @markdown 模型名\n", + "MODELNAME = \"mi-test\" # @param {type:\"string\"}\n", + "# @markdown 使用的GPU\n", + "USEGPU = \"1\" # @param {type:\"string\"}\n", + "# @markdown 批大小\n", + "BATCHSIZE = 7 # @param {type:\"integer\"}\n", + "# @markdown 停止的epoch\n", + "MODELEPOCH = 100 # @param {type:\"integer\"}\n", + "# @markdown 保存epoch间隔\n", + "EPOCHSAVE = 50 # @param {type:\"integer\"}\n", + "# @markdown 采样率\n", + "MODELSAMPLE = \"40k\" # @param {type:\"string\"}\n", + "# @markdown 是否缓存训练集\n", + "CACHEDATA = 0 # @param {type:\"integer\"}\n", + "# @markdown 是否仅保存最新的ckpt文件\n", + "ONLYLATEST = 0 # @param {type:\"integer\"}\n", + "\n", + "!python3 train_nsf_sim_cache_sid_load_pretrain.py -e {MODELNAME} -sr {MODELSAMPLE} -f0 1 -bs {BATCHSIZE} -g {USEGPU} -te {MODELEPOCH} -se {EPOCHSAVE} -pg pretrained_v2/f0G{MODELSAMPLE}.pth -pd pretrained_v2/f0D{MODELSAMPLE}.pth -l {ONLYLATEST} -c {CACHEDATA} -sw 1 -v v2" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/AIMeiSheng/app.py b/AIMeiSheng/app.py new file mode 100644 index 0000000..9fdd212 --- /dev/null +++ b/AIMeiSheng/app.py @@ -0,0 +1,319 @@ +import os +import torch + +# os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt") +import gradio as gr +import librosa +import numpy as np +import logging +from fairseq import checkpoint_utils +from vc_infer_pipeline import VC +import traceback +from config import Config +from lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +from i18n import I18nAuto + +logging.getLogger("numba").setLevel(logging.WARNING) +logging.getLogger("markdown_it").setLevel(logging.WARNING) +logging.getLogger("urllib3").setLevel(logging.WARNING) +logging.getLogger("matplotlib").setLevel(logging.WARNING) + +i18n = I18nAuto() +i18n.print() + +config = Config() + +weight_root = "weights" +weight_uvr5_root = "uvr5_weights" +index_root = "logs" +names = [] +hubert_model = None +for name in os.listdir(weight_root): + if name.endswith(".pth"): + names.append(name) +index_paths = [] +for root, dirs, files in os.walk(index_root, topdown=False): + for name in files: + if name.endswith(".index") and "trained" not in name: + index_paths.append("%s/%s" % (root, name)) + + +def get_vc(sid): + global n_spk, tgt_sr, net_g, vc, cpt, version + if sid == "" or sid == []: + global hubert_model + if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 + print("clean_empty_cache") + del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt + hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None + if torch.cuda.is_available(): + torch.cuda.empty_cache() + ###楼下不这么折腾清理不干净 + if_f0 = cpt.get("f0", 1) + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid( + *cpt["config"], is_half=config.is_half + ) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: + net_g = SynthesizerTrnMs768NSFsid( + *cpt["config"], is_half=config.is_half + ) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del net_g, cpt + if torch.cuda.is_available(): + torch.cuda.empty_cache() + cpt = None + return {"visible": False, "__type__": "update"} + person = "%s/%s" % (weight_root, sid) + print("loading %s" % person) + cpt = torch.load(person, map_location="cpu") + tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk + if_f0 = cpt.get("f0", 1) + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: + net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del net_g.enc_q + print(net_g.load_state_dict(cpt["weight"], strict=False)) + net_g.eval().to(config.device) + if config.is_half: + net_g = net_g.half() + else: + net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk = cpt["config"][-3] + return {"visible": True, "maximum": n_spk, "__type__": "update"} + + +def load_hubert(): + global hubert_model + models, _, _ = checkpoint_utils.load_model_ensemble_and_task( + ["hubert_base.pt"], + suffix="", + ) + hubert_model = models[0] + hubert_model = hubert_model.to(config.device) + if config.is_half: + hubert_model = hubert_model.half() + else: + hubert_model = hubert_model.float() + hubert_model.eval() + + +def vc_single( + sid, + input_audio_path, + f0_up_key, + f0_file, + f0_method, + file_index, + file_index2, + # file_big_npy, + index_rate, + filter_radius, + resample_sr, + rms_mix_rate, + protect, +): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 + global tgt_sr, net_g, vc, hubert_model, version + if input_audio_path is None: + return "You need to upload an audio", None + f0_up_key = int(f0_up_key) + try: + audio = input_audio_path[1] / 32768.0 + if len(audio.shape) == 2: + audio = np.mean(audio, -1) + audio = librosa.resample(audio, orig_sr=input_audio_path[0], target_sr=16000) + audio_max = np.abs(audio).max() / 0.95 + if audio_max > 1: + audio /= audio_max + times = [0, 0, 0] + if hubert_model == None: + load_hubert() + if_f0 = cpt.get("f0", 1) + file_index = ( + ( + file_index.strip(" ") + .strip('"') + .strip("\n") + .strip('"') + .strip(" ") + .replace("trained", "added") + ) + if file_index != "" + else file_index2 + ) # 防止小白写错,自动帮他替换掉 + # file_big_npy = ( + # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + # ) + audio_opt = vc.pipeline( + hubert_model, + net_g, + sid, + audio, + input_audio_path, + times, + f0_up_key, + f0_method, + file_index, + # file_big_npy, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + f0_file=f0_file, + ) + if resample_sr >= 16000 and tgt_sr != resample_sr: + tgt_sr = resample_sr + index_info = ( + "Using index:%s." % file_index + if os.path.exists(file_index) + else "Index not used." + ) + return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % ( + index_info, + times[0], + times[1], + times[2], + ), (tgt_sr, audio_opt) + except: + info = traceback.format_exc() + print(info) + return info, (None, None) + + +app = gr.Blocks() +with app: + with gr.Tabs(): + with gr.TabItem("在线demo"): + gr.Markdown( + value=""" + RVC 在线demo + """ + ) + sid = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names)) + with gr.Column(): + spk_item = gr.Slider( + minimum=0, + maximum=2333, + step=1, + label=i18n("请选择说话人id"), + value=0, + visible=False, + interactive=True, + ) + sid.change( + fn=get_vc, + inputs=[sid], + outputs=[spk_item], + ) + gr.Markdown( + value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ") + ) + vc_input3 = gr.Audio(label="上传音频(长度小于90秒)") + vc_transform0 = gr.Number(label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0) + f0method0 = gr.Radio( + label=i18n("选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"), + choices=["pm", "harvest", "crepe", "rmvpe"], + value="pm", + interactive=True, + ) + filter_radius0 = gr.Slider( + minimum=0, + maximum=7, + label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), + value=3, + step=1, + interactive=True, + ) + with gr.Column(): + file_index1 = gr.Textbox( + label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), + value="", + interactive=False, + visible=False, + ) + file_index2 = gr.Dropdown( + label=i18n("自动检测index路径,下拉式选择(dropdown)"), + choices=sorted(index_paths), + interactive=True, + ) + index_rate1 = gr.Slider( + minimum=0, + maximum=1, + label=i18n("检索特征占比"), + value=0.88, + interactive=True, + ) + resample_sr0 = gr.Slider( + minimum=0, + maximum=48000, + label=i18n("后处理重采样至最终采样率,0为不进行重采样"), + value=0, + step=1, + interactive=True, + ) + rms_mix_rate0 = gr.Slider( + minimum=0, + maximum=1, + label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), + value=1, + interactive=True, + ) + protect0 = gr.Slider( + minimum=0, + maximum=0.5, + label=i18n("保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"), + value=0.33, + step=0.01, + interactive=True, + ) + f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调")) + but0 = gr.Button(i18n("转换"), variant="primary") + vc_output1 = gr.Textbox(label=i18n("输出信息")) + vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) + but0.click( + vc_single, + [ + spk_item, + vc_input3, + vc_transform0, + f0_file, + f0method0, + file_index1, + file_index2, + # file_big_npy1, + index_rate1, + filter_radius0, + resample_sr0, + rms_mix_rate0, + protect0, + ], + [vc_output1, vc_output2], + ) + + +app.launch() diff --git a/AIMeiSheng/attentions_in_dec.py b/AIMeiSheng/attentions_in_dec.py new file mode 100644 index 0000000..c785b6d --- /dev/null +++ b/AIMeiSheng/attentions_in_dec.py @@ -0,0 +1,424 @@ +import copy +import math +import numpy as np +import torch +from torch import nn +from torch.nn import functional as F + +from lib.infer_pack import commons +from lib.infer_pack import modules +from lib.infer_pack.modules import LayerNorm,AdaIN1d,AdaIN2d + + +class Encoder(nn.Module): + def __init__( + self, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size=1, + p_dropout=0.0, + window_size=10, + **kwargs + ): + super().__init__() + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.window_size = window_size + + self.drop = nn.Dropout(p_dropout) + self.attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + for i in range(self.n_layers): + self.attn_layers.append( + MultiHeadAttention( + hidden_channels, + hidden_channels, + n_heads, + p_dropout=p_dropout, + window_size=window_size, + ) + ) + #self.norm_layers_1.append(LayerNorm(hidden_channels)) + #self.norm_layers_1.append(AdaIN1d(hidden_channels,256)) #fang add + self.norm_layers_1.append(AdaIN1d(256,192))#fang add + #print("xxxhidden_channels:",hidden_channels) + #print("xxxfilter_channels:",filter_channels) + self.ffn_layers.append( + FFN( + hidden_channels, + hidden_channels, + filter_channels, + kernel_size, + p_dropout=p_dropout, + ) + ) + self.norm_layers_2.append(LayerNorm(hidden_channels)) + + def forward(self, x, x_mask,g):#fang add + attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + x = x * x_mask + for i in range(self.n_layers): + y = self.attn_layers[i](x, x, attn_mask) + y = self.drop(y) + #print("@@@ x:",x.shape) #fang add + #x = self.norm_layers_1[i](x + y) + #print("@@g:",g.shape) + x = self.norm_layers_1[i](x + y,torch.squeeze(g,dim=-1))#fang add + #print("@@@norm x:",x.shape)#fang add + y = self.ffn_layers[i](x, x_mask) + y = self.drop(y) + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x + + +class Decoder(nn.Module): + def __init__( + self, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size=1, + p_dropout=0.0, + proximal_bias=False, + proximal_init=True, + **kwargs + ): + super().__init__() + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.proximal_bias = proximal_bias + self.proximal_init = proximal_init + + self.drop = nn.Dropout(p_dropout) + self.self_attn_layers = nn.ModuleList() + self.norm_layers_0 = nn.ModuleList() + self.encdec_attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + for i in range(self.n_layers): + self.self_attn_layers.append( + MultiHeadAttention( + hidden_channels, + hidden_channels, + n_heads, + p_dropout=p_dropout, + proximal_bias=proximal_bias, + proximal_init=proximal_init, + ) + ) + self.norm_layers_0.append(LayerNorm(hidden_channels)) + self.encdec_attn_layers.append( + MultiHeadAttention( + hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout + ) + ) + self.norm_layers_1.append(LayerNorm(hidden_channels)) + self.ffn_layers.append( + FFN( + hidden_channels, + hidden_channels, + filter_channels, + kernel_size, + p_dropout=p_dropout, + causal=True, + ) + ) + self.norm_layers_2.append(LayerNorm(hidden_channels)) + + def forward(self, x, x_mask, h, h_mask): + """ + x: decoder input + h: encoder output + """ + self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to( + device=x.device, dtype=x.dtype + ) + encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + x = x * x_mask + for i in range(self.n_layers): + y = self.self_attn_layers[i](x, x, self_attn_mask) + y = self.drop(y) + x = self.norm_layers_0[i](x + y) + + y = self.encdec_attn_layers[i](x, h, encdec_attn_mask) + y = self.drop(y) + x = self.norm_layers_1[i](x + y) + + y = self.ffn_layers[i](x, x_mask) + y = self.drop(y) + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x + + +class MultiHeadAttention(nn.Module): + def __init__( + self, + channels, + out_channels, + n_heads, + p_dropout=0.0, + window_size=None, + heads_share=True, + block_length=None, + proximal_bias=False, + proximal_init=False, + ): + super().__init__() + assert channels % n_heads == 0 + + self.channels = channels + self.out_channels = out_channels + self.n_heads = n_heads + self.p_dropout = p_dropout + self.window_size = window_size + self.heads_share = heads_share + self.block_length = block_length + self.proximal_bias = proximal_bias + self.proximal_init = proximal_init + self.attn = None + + self.k_channels = channels // n_heads + self.conv_q = nn.Conv1d(channels, channels, 1) + self.conv_k = nn.Conv1d(channels, channels, 1) + self.conv_v = nn.Conv1d(channels, channels, 1) + self.conv_o = nn.Conv1d(channels, out_channels, 1) + self.drop = nn.Dropout(p_dropout) + + if window_size is not None: + n_heads_rel = 1 if heads_share else n_heads + rel_stddev = self.k_channels**-0.5 + self.emb_rel_k = nn.Parameter( + torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) + * rel_stddev + ) + self.emb_rel_v = nn.Parameter( + torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) + * rel_stddev + ) + + nn.init.xavier_uniform_(self.conv_q.weight) + nn.init.xavier_uniform_(self.conv_k.weight) + nn.init.xavier_uniform_(self.conv_v.weight) + if proximal_init: + with torch.no_grad(): + self.conv_k.weight.copy_(self.conv_q.weight) + self.conv_k.bias.copy_(self.conv_q.bias) + + def forward(self, x, c, attn_mask=None): + q = self.conv_q(x) + k = self.conv_k(c) + v = self.conv_v(c) + + x, self.attn = self.attention(q, k, v, mask=attn_mask) + + x = self.conv_o(x) + return x + + def attention(self, query, key, value, mask=None): + # reshape [b, d, t] -> [b, n_h, t, d_k] + b, d, t_s, t_t = (*key.size(), query.size(2)) + query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3) + key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) + value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) + + scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1)) + if self.window_size is not None: + assert ( + t_s == t_t + ), "Relative attention is only available for self-attention." + key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s) + rel_logits = self._matmul_with_relative_keys( + query / math.sqrt(self.k_channels), key_relative_embeddings + ) + scores_local = self._relative_position_to_absolute_position(rel_logits) + scores = scores + scores_local + if self.proximal_bias: + assert t_s == t_t, "Proximal bias is only available for self-attention." + scores = scores + self._attention_bias_proximal(t_s).to( + device=scores.device, dtype=scores.dtype + ) + if mask is not None: + scores = scores.masked_fill(mask == 0, -1e4) + if self.block_length is not None: + assert ( + t_s == t_t + ), "Local attention is only available for self-attention." + block_mask = ( + torch.ones_like(scores) + .triu(-self.block_length) + .tril(self.block_length) + ) + scores = scores.masked_fill(block_mask == 0, -1e4) + p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s] + p_attn = self.drop(p_attn) + output = torch.matmul(p_attn, value) + if self.window_size is not None: + relative_weights = self._absolute_position_to_relative_position(p_attn) + value_relative_embeddings = self._get_relative_embeddings( + self.emb_rel_v, t_s + ) + output = output + self._matmul_with_relative_values( + relative_weights, value_relative_embeddings + ) + output = ( + output.transpose(2, 3).contiguous().view(b, d, t_t) + ) # [b, n_h, t_t, d_k] -> [b, d, t_t] + return output, p_attn + + def _matmul_with_relative_values(self, x, y): + """ + x: [b, h, l, m] + y: [h or 1, m, d] + ret: [b, h, l, d] + """ + ret = torch.matmul(x, y.unsqueeze(0)) + return ret + + def _matmul_with_relative_keys(self, x, y): + """ + x: [b, h, l, d] + y: [h or 1, m, d] + ret: [b, h, l, m] + """ + ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1)) + return ret + + def _get_relative_embeddings(self, relative_embeddings, length): + max_relative_position = 2 * self.window_size + 1 + # Pad first before slice to avoid using cond ops. + pad_length = max(length - (self.window_size + 1), 0) + slice_start_position = max((self.window_size + 1) - length, 0) + slice_end_position = slice_start_position + 2 * length - 1 + if pad_length > 0: + padded_relative_embeddings = F.pad( + relative_embeddings, + commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]), + ) + else: + padded_relative_embeddings = relative_embeddings + used_relative_embeddings = padded_relative_embeddings[ + :, slice_start_position:slice_end_position + ] + return used_relative_embeddings + + def _relative_position_to_absolute_position(self, x): + """ + x: [b, h, l, 2*l-1] + ret: [b, h, l, l] + """ + batch, heads, length, _ = x.size() + # Concat columns of pad to shift from relative to absolute indexing. + x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]])) + + # Concat extra elements so to add up to shape (len+1, 2*len-1). + x_flat = x.view([batch, heads, length * 2 * length]) + x_flat = F.pad( + x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [0, length - 1]]) + ) + + # Reshape and slice out the padded elements. + x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[ + :, :, :length, length - 1 : + ] + return x_final + + def _absolute_position_to_relative_position(self, x): + """ + x: [b, h, l, l] + ret: [b, h, l, 2*l-1] + """ + batch, heads, length, _ = x.size() + # padd along column + x = F.pad( + x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]]) + ) + x_flat = x.view([batch, heads, length**2 + length * (length - 1)]) + # add 0's in the beginning that will skew the elements after reshape + x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]])) + x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:] + return x_final + + def _attention_bias_proximal(self, length): + """Bias for self-attention to encourage attention to close positions. + Args: + length: an integer scalar. + Returns: + a Tensor with shape [1, 1, length, length] + """ + r = torch.arange(length, dtype=torch.float32) + diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1) + return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0) + + +class FFN(nn.Module): + def __init__( + self, + in_channels, + out_channels, + filter_channels, + kernel_size, + p_dropout=0.0, + activation=None, + causal=False, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.filter_channels = filter_channels + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.activation = activation + self.causal = causal + + if causal: + self.padding = self._causal_padding + else: + self.padding = self._same_padding + + self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size) + self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size) + self.drop = nn.Dropout(p_dropout) + + def forward(self, x, x_mask): + x = self.conv_1(self.padding(x * x_mask)) + if self.activation == "gelu": + x = x * torch.sigmoid(1.702 * x) + else: + x = torch.relu(x) + x = self.drop(x) + x = self.conv_2(self.padding(x * x_mask)) + return x * x_mask + + def _causal_padding(self, x): + if self.kernel_size == 1: + return x + pad_l = self.kernel_size - 1 + pad_r = 0 + padding = [[0, 0], [0, 0], [pad_l, pad_r]] + x = F.pad(x, commons.convert_pad_shape(padding)) + return x + + def _same_padding(self, x): + if self.kernel_size == 1: + return x + pad_l = (self.kernel_size - 1) // 2 + pad_r = self.kernel_size // 2 + padding = [[0, 0], [0, 0], [pad_l, pad_r]] + x = F.pad(x, commons.convert_pad_shape(padding)) + return x diff --git a/AIMeiSheng/config.py b/AIMeiSheng/config.py new file mode 100644 index 0000000..8b7f1ff --- /dev/null +++ b/AIMeiSheng/config.py @@ -0,0 +1,141 @@ +import argparse +import sys +import torch +from multiprocessing import cpu_count + + +def use_fp32_config(): + for config_file in [ + "32k.json", + "40k.json", + "48k.json", + "48k_v2.json", + "32k_v2.json", + ]: + with open(f"configs/{config_file}", "r") as f: + strr = f.read().replace("true", "false") + with open(f"configs/{config_file}", "w") as f: + f.write(strr) + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + + +class Config: + def __init__(self): + self.device = "cuda:0" + self.is_half = True + self.n_cpu = 0 + self.gpu_name = None + self.gpu_mem = None + ( + self.python_cmd, + self.listen_port, + self.iscolab, + self.noparallel, + self.noautoopen, + ) = self.arg_parse() + self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + + @staticmethod + def arg_parse() -> tuple: + exe = sys.executable or "python" + parser = argparse.ArgumentParser() + parser.add_argument("--port", type=int, default=7865, help="Listen port") + parser.add_argument("--pycmd", type=str, default=exe, help="Python command") + parser.add_argument("--colab", action="store_true", help="Launch in colab") + parser.add_argument( + "--noparallel", action="store_true", help="Disable parallel processing" + ) + parser.add_argument( + "--noautoopen", + action="store_true", + help="Do not open in browser automatically", + ) + cmd_opts = parser.parse_args() + + cmd_opts.port = cmd_opts.port if 0 <= cmd_opts.port <= 65535 else 7865 + + return ( + cmd_opts.pycmd, + cmd_opts.port, + cmd_opts.colab, + cmd_opts.noparallel, + cmd_opts.noautoopen, + ) + + # has_mps is only available in nightly pytorch (for now) and MasOS 12.3+. + # check `getattr` and try it for compatibility + @staticmethod + def has_mps() -> bool: + if not torch.backends.mps.is_available(): + return False + try: + torch.zeros(1).to(torch.device("mps")) + return True + except Exception: + return False + + def device_config(self) -> tuple: + if torch.cuda.is_available(): + i_device = int(self.device.split(":")[-1]) + self.gpu_name = torch.cuda.get_device_name(i_device) + if ( + ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) + or "P40" in self.gpu_name.upper() + or "1060" in self.gpu_name + or "1070" in self.gpu_name + or "1080" in self.gpu_name + ): + print("Found GPU", self.gpu_name, ", force to fp32") + self.is_half = False + use_fp32_config() + else: + print("Found GPU", self.gpu_name) + self.gpu_mem = int( + torch.cuda.get_device_properties(i_device).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + if self.gpu_mem <= 4: + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + elif self.has_mps(): + print("No supported Nvidia GPU found, use MPS instead") + self.device = "mps" + self.is_half = False + use_fp32_config() + else: + print("No supported Nvidia GPU found, use CPU instead") + self.device = "cpu" + self.is_half = False + use_fp32_config() + + if self.n_cpu == 0: + self.n_cpu = cpu_count() + + if self.is_half: + # 6G显存配置 + x_pad = 3 + x_query = 10 + x_center = 60 + x_max = 65 + else: + # 5G显存配置 + x_pad = 1 + x_query = 6 + x_center = 38 + x_max = 41 + + if self.gpu_mem != None and self.gpu_mem <= 4: + x_pad = 1 + x_query = 5 + x_center = 30 + x_max = 32 + + return x_pad, x_query, x_center, x_max diff --git a/AIMeiSheng/configs/32k.json b/AIMeiSheng/configs/32k.json new file mode 100644 index 0000000..d5f16d6 --- /dev/null +++ b/AIMeiSheng/configs/32k.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 1024, + "hop_length": 320, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,4,2,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/AIMeiSheng/configs/32k_v2.json b/AIMeiSheng/configs/32k_v2.json new file mode 100644 index 0000000..70e534f --- /dev/null +++ b/AIMeiSheng/configs/32k_v2.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 32000, + "filter_length": 1024, + "hop_length": 320, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,8,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [20,16,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/AIMeiSheng/configs/40k.json b/AIMeiSheng/configs/40k.json new file mode 100644 index 0000000..4ffc87b --- /dev/null +++ b/AIMeiSheng/configs/40k.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 12800, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 40000, + "filter_length": 2048, + "hop_length": 400, + "win_length": 2048, + "n_mel_channels": 125, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,10,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/AIMeiSheng/configs/48k.json b/AIMeiSheng/configs/48k.json new file mode 100644 index 0000000..2d0e05b --- /dev/null +++ b/AIMeiSheng/configs/48k.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 11520, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 48000, + "filter_length": 2048, + "hop_length": 480, + "win_length": 2048, + "n_mel_channels": 128, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [10,6,2,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [16,16,4,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/AIMeiSheng/configs/48k_v2.json b/AIMeiSheng/configs/48k_v2.json new file mode 100644 index 0000000..75f770c --- /dev/null +++ b/AIMeiSheng/configs/48k_v2.json @@ -0,0 +1,46 @@ +{ + "train": { + "log_interval": 200, + "seed": 1234, + "epochs": 20000, + "learning_rate": 1e-4, + "betas": [0.8, 0.99], + "eps": 1e-9, + "batch_size": 4, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 17280, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "max_wav_value": 32768.0, + "sampling_rate": 48000, + "filter_length": 2048, + "hop_length": 480, + "win_length": 2048, + "n_mel_channels": 128, + "mel_fmin": 0.0, + "mel_fmax": null + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0, + "resblock": "1", + "resblock_kernel_sizes": [3,7,11], + "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]], + "upsample_rates": [12,10,2,2], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [24,20,4,4], + "use_spectral_norm": false, + "gin_channels": 256, + "spk_embed_dim": 109 + } +} diff --git a/AIMeiSheng/create_filelist_print.py b/AIMeiSheng/create_filelist_print.py new file mode 100644 index 0000000..5edd6f4 --- /dev/null +++ b/AIMeiSheng/create_filelist_print.py @@ -0,0 +1,88 @@ +import os, sys +from random import shuffle + +# Example: +# python3 create_filelist_print.py mi-test v2 True 40k 0 + +exp_dir1 = sys.argv[1] if len(sys.argv) > 1 else "mi-test" +version19 = sys.argv[2] if len(sys.argv) > 1 else "v2" +if_f0_3 = sys.argv[3] == "True" if len(sys.argv) > 1 else True # 模型是否带音高指导(唱歌一定要, 语音可以不要) +sr2 = sys.argv[4] if len(sys.argv) > 1 else "40k" +spk_id5 = int(sys.argv[5]) if len(sys.argv) > 1 else 0 + +#print("exp_dir1=%s, version19=%s, if_f0_3=%s, sr2=%s, spk_id5=%s" % (exp_dir1, version19,if_f0_3,sr2,spk_id5) ) + +#version19 = "v2" +now_dir = os.getcwd() +#exp_dir1 = "mi-test" +#if_f0_3 = True +#sr2 = "40k" +#spk_id5 = 0 + +# 生成filelist +exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) +os.makedirs(exp_dir, exist_ok=True) +gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir) +feature_dir = ( + "%s/3_feature256" % (exp_dir) + if version19 == "v1" + else "%s/3_feature768" % (exp_dir) +) +if if_f0_3: + f0_dir = "%s/2a_f0" % (exp_dir) + f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) + names = ( + set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) + & set([name.split(".")[0] for name in os.listdir(feature_dir)]) + & set([name.split(".")[0] for name in os.listdir(f0_dir)]) + & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) + ) +else: + names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( + [name.split(".")[0] for name in os.listdir(feature_dir)] + ) +opt = [] +for name in names: + if if_f0_3: + opt.append( + "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" + % ( + gt_wavs_dir.replace("\\", "\\\\"), + name, + feature_dir.replace("\\", "\\\\"), + name, + f0_dir.replace("\\", "\\\\"), + name, + f0nsf_dir.replace("\\", "\\\\"), + name, + spk_id5, + ) + ) + else: + opt.append( + "%s/%s.wav|%s/%s.npy|%s" + % ( + gt_wavs_dir.replace("\\", "\\\\"), + name, + feature_dir.replace("\\", "\\\\"), + name, + spk_id5, + ) + ) +fea_dim = 256 if version19 == "v1" else 768 +if if_f0_3: + for _ in range(2): + opt.append( + "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" + % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) + ) +else: + for _ in range(2): + opt.append( + "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" + % (now_dir, sr2, now_dir, fea_dim, spk_id5) + ) +shuffle(opt) +with open("%s/filelist.txt" % exp_dir, "w") as f: + f.write("\n".join(opt)) +print("write filelist done") \ No newline at end of file diff --git a/AIMeiSheng/data_utils_embed_random075.py b/AIMeiSheng/data_utils_embed_random075.py new file mode 100644 index 0000000..b1fb65d --- /dev/null +++ b/AIMeiSheng/data_utils_embed_random075.py @@ -0,0 +1,629 @@ +import os, traceback,random +import numpy as np +import torch +import torch.utils.data + +from lib.train.mel_processing import spectrogram_torch +from lib.train.utils import load_wav_to_torch, load_filepaths_and_text + +def l2_norm(s1, s2): + norm = np.sum(s1 * s2, -1, keepdims=True) + return norm + +def cos_distance(s1,s2,eps=1e-8): + + s1_s2_norm = l2_norm(s1, s2) + s2_s2_norm = l2_norm(s2, s2) + s1_s1_norm = l2_norm(s1, s1) + #print('s1_s1_norm: ',s1_s1_norm) + #print('s1_s2_norm: ',s1_s2_norm) + #print('s2_s2_norm: ',s2_s2_norm) + loss = s1_s2_norm / (np.sqrt(s2_s2_norm * s1_s1_norm) + eps) + + return loss + +def load_and_cal_distance(npy_name1,npy_name2): + spk1_embead = np.load(npy_name1 ) + spk2_embead = np.load(npy_name2 ) + # spk1_embead = torch.from_numpy(spk1_embead) + # spk2_embead = torch.from_numpy(spk2_embead) + loss = cos_distance(spk1_embead,spk2_embead) + #print("file:",os.path.basename(npy_name1), ' ', os.path.basename(npy_name2)," cos distance:", loss) + return loss + +class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset): + """ + 1) loads audio, text pairs + 2) normalizes text and converts them to sequences of integers + 3) computes spectrograms from audio files. + """ + + def __init__(self, audiopaths_and_text, hparams): + self.audiopaths_and_text = load_filepaths_and_text(audiopaths_and_text) + #print("len@@: ", len(self.audiopaths_and_text)) + #print("len size@@: ", len(self.audiopaths_and_text[0])) + #print("len final size@@: ", len(self.audiopaths_and_text[-1])) + self.max_wav_value = hparams.max_wav_value + self.sampling_rate = hparams.sampling_rate + self.filter_length = hparams.filter_length + self.hop_length = hparams.hop_length + self.win_length = hparams.win_length + self.sampling_rate = hparams.sampling_rate + self.min_text_len = getattr(hparams, "min_text_len", 1) + self.max_text_len = getattr(hparams, "max_text_len", 5000) + self._filter() + + def _filter(self): + """ + Filter text & store spec lengths + """ + # Store spectrogram lengths for Bucketing + # wav_length ~= file_size / (wav_channels * Bytes per dim) = file_size / (1 * 2) + # spec_length = wav_length // hop_length + audiopaths_and_text_new = [] + lengths = [] + #for audiopath, text, pitch, pitchf, dv in self.audiopaths_and_text: + for item_tmp in self.audiopaths_and_text: + if(len(item_tmp) ==5): + audiopath, text, pitch, pitchf, dv = item_tmp + else: + print("len### ;",len(item_tmp)) + print(item_tmp) + audiopath, text, pitch, pitchf, dv = item_tmp + if self.min_text_len <= len(text) and len(text) <= self.max_text_len: + audiopaths_and_text_new.append([audiopath, text, pitch, pitchf, dv]) + lengths.append(os.path.getsize(audiopath) // (3 * self.hop_length)) + self.audiopaths_and_text = audiopaths_and_text_new + self.lengths = lengths + + def get_sid(self, sid): + #sid = torch.LongTensor([int(sid)]) + #return sid + #sid = torch.LongTensor([int(sid)]) + sid_embed = np.load(sid,allow_pickle=True) + #phone = np.repeat(phone, 2, axis=0) + sid_embed = torch.FloatTensor(sid_embed) + return sid_embed + + def get_audio_text_pair(self, audiopath_and_text): + # separate filename and text + file = audiopath_and_text[0] + phone = audiopath_and_text[1] + pitch = audiopath_and_text[2] + pitchf = audiopath_and_text[3] + #dv = audiopath_and_text[4] + #print("file:",file) + basename_tmp = os.path.basename(file) + dir_parent = os.path.dirname(os.path.dirname(file)) + embed_dir = os.path.join(dir_parent, '4_embed256') + #dv = os.path.join(embed_dir, basename_tmp)[:-4] + '.npy' + + if 0:#fang add + dv = os.path.join(embed_dir, basename_tmp)[:-4] + '.npy' + dv = self.get_sid(dv) + else: + ##方法一,直接使用 + embedfile= os.listdir(embed_dir) + basename_tmp_rnd = embedfile[random.randint(0, len(embedfile)-1)] + dv = os.path.join(embed_dir, basename_tmp_rnd) + #dv = self.get_sid(dv) + ##方法二. 相似度大于0.75的 + dv_target = os.path.join(embed_dir, basename_tmp)[:-4] + '.npy' + loss = load_and_cal_distance(dv_target,dv) + if(loss < 0.85 ):#相似度大于0.75 + dv = dv_target + dv = self.get_sid(dv) + ##方法三. 0.1的扰动 + ''' + dv_target = os.path.join(embed_dir, basename_tmp)[:-4] + '.npy' + dv_target = self.get_sid(dv_target) + #loss = load_and_cal_distance(dv_target,dv) + dv = self.get_sid(dv) + dv = dv_target*0.9 + dv*0.1 + #''' + #print("dv:",dv) + + phone, pitch, pitchf = self.get_labels(phone, pitch, pitchf) + spec, wav = self.get_audio(file) + #dv = self.get_sid(dv) + + len_phone = phone.size()[0] + len_spec = spec.size()[-1] + # print(123,phone.shape,pitch.shape,spec.shape) + if len_phone != len_spec: + len_min = min(len_phone, len_spec) + # amor + len_wav = len_min * self.hop_length + + spec = spec[:, :len_min] + wav = wav[:, :len_wav] + + phone = phone[:len_min, :] + pitch = pitch[:len_min] + pitchf = pitchf[:len_min] + + return (spec, wav, phone, pitch, pitchf, dv) + + def get_labels(self, phone, pitch, pitchf): + phone = np.load(phone) + phone = np.repeat(phone, 2, axis=0) + pitch = np.load(pitch) + pitchf = np.load(pitchf) + n_num = min(phone.shape[0], 900) # DistributedBucketSampler + # print(234,phone.shape,pitch.shape) + phone = phone[:n_num, :] + pitch = pitch[:n_num] + pitchf = pitchf[:n_num] + phone = torch.FloatTensor(phone) + pitch = torch.LongTensor(pitch) + pitchf = torch.FloatTensor(pitchf) + return phone, pitch, pitchf + + def get_audio(self, filename): + audio, sampling_rate = load_wav_to_torch(filename) + if sampling_rate != self.sampling_rate: + raise ValueError( + "{} SR doesn't match target {} SR".format( + sampling_rate, self.sampling_rate + ) + ) + audio_norm = audio + # audio_norm = audio / self.max_wav_value + # audio_norm = audio / np.abs(audio).max() + + audio_norm = audio_norm.unsqueeze(0) + spec_filename = filename.replace(".wav", ".spec.pt") + if os.path.exists(spec_filename): + try: + spec = torch.load(spec_filename) + except: + print(spec_filename, traceback.format_exc()) + spec = spectrogram_torch( + audio_norm, + self.filter_length, + self.sampling_rate, + self.hop_length, + self.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename, _use_new_zipfile_serialization=False) + else: + spec = spectrogram_torch( + audio_norm, + self.filter_length, + self.sampling_rate, + self.hop_length, + self.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename, _use_new_zipfile_serialization=False) + return spec, audio_norm + + def __getitem__(self, index): + return self.get_audio_text_pair(self.audiopaths_and_text[index]) + + def __len__(self): + return len(self.audiopaths_and_text) + + +class TextAudioCollateMultiNSFsid: + """Zero-pads model inputs and targets""" + + def __init__(self, return_ids=False): + self.return_ids = return_ids + + def __call__(self, batch): + """Collate's training batch from normalized text and aduio + PARAMS + ------ + batch: [text_normalized, spec_normalized, wav_normalized] + """ + # Right zero-pad all one-hot text sequences to max input length + _, ids_sorted_decreasing = torch.sort( + torch.LongTensor([x[0].size(1) for x in batch]), dim=0, descending=True + ) + + max_spec_len = max([x[0].size(1) for x in batch]) + max_wave_len = max([x[1].size(1) for x in batch]) + spec_lengths = torch.LongTensor(len(batch)) + wave_lengths = torch.LongTensor(len(batch)) + spec_padded = torch.FloatTensor(len(batch), batch[0][0].size(0), max_spec_len) + wave_padded = torch.FloatTensor(len(batch), 1, max_wave_len) + spec_padded.zero_() + wave_padded.zero_() + + max_phone_len = max([x[2].size(0) for x in batch]) + phone_lengths = torch.LongTensor(len(batch)) + phone_padded = torch.FloatTensor( + len(batch), max_phone_len, batch[0][2].shape[1] + ) # (spec, wav, phone, pitch) + pitch_padded = torch.LongTensor(len(batch), max_phone_len) + pitchf_padded = torch.FloatTensor(len(batch), max_phone_len) + phone_padded.zero_() + pitch_padded.zero_() + pitchf_padded.zero_() + # dv = torch.FloatTensor(len(batch), 256)#gin=256 + #sid = torch.LongTensor(len(batch)) + sid = torch.FloatTensor(len(batch), 256) + + for i in range(len(ids_sorted_decreasing)): + row = batch[ids_sorted_decreasing[i]] + + spec = row[0] + spec_padded[i, :, : spec.size(1)] = spec + spec_lengths[i] = spec.size(1) + + wave = row[1] + wave_padded[i, :, : wave.size(1)] = wave + wave_lengths[i] = wave.size(1) + + phone = row[2] + phone_padded[i, : phone.size(0), :] = phone + phone_lengths[i] = phone.size(0) + + pitch = row[3] + pitch_padded[i, : pitch.size(0)] = pitch + pitchf = row[4] + #print("row[4]:",row[4].shape) + pitchf_padded[i, : pitchf.size(0)] = pitchf + #print("pitchf_padded:",pitchf_padded.shape) + + # dv[i] = row[5] + #print("row[5]:",row[5].shape) + #sid[i] = row[5] + sid_tmp = row[5] + sid[i, : sid_tmp.size(0)] = sid_tmp + #print("sid :", sid.shape) + + return ( + phone_padded, + phone_lengths, + pitch_padded, + pitchf_padded, + spec_padded, + spec_lengths, + wave_padded, + wave_lengths, + # dv + sid, + ) + + +class TextAudioLoader(torch.utils.data.Dataset): + """ + 1) loads audio, text pairs + 2) normalizes text and converts them to sequences of integers + 3) computes spectrograms from audio files. + """ + + def __init__(self, audiopaths_and_text, hparams): + self.audiopaths_and_text = load_filepaths_and_text(audiopaths_and_text) + #print("len@@: ", len(self.audiopaths_and_text)) + #print("len size@@: ", len(self.audiopaths_and_text[0])) + #print("len final size@@: ", len(self.audiopaths_and_text[-1])) + self.max_wav_value = hparams.max_wav_value + self.sampling_rate = hparams.sampling_rate + self.filter_length = hparams.filter_length + self.hop_length = hparams.hop_length + self.win_length = hparams.win_length + self.sampling_rate = hparams.sampling_rate + self.min_text_len = getattr(hparams, "min_text_len", 1) + self.max_text_len = getattr(hparams, "max_text_len", 5000) + self._filter() + + def _filter(self): + """ + Filter text & store spec lengths + """ + # Store spectrogram lengths for Bucketing + # wav_length ~= file_size / (wav_channels * Bytes per dim) = file_size / (1 * 2) + # spec_length = wav_length // hop_length + audiopaths_and_text_new = [] + lengths = [] + #print("self.audiopaths_and_text: ",self.audiopaths_and_text) + #for audiopath, text, dv in self.audiopaths_and_text: ##org + for audiopath, text, pitch, pitchf,dv in self.audiopaths_and_text: ##fang + if self.min_text_len <= len(text) and len(text) <= self.max_text_len: + audiopaths_and_text_new.append([audiopath, text, dv]) + lengths.append(os.path.getsize(audiopath) // (3 * self.hop_length)) + self.audiopaths_and_text = audiopaths_and_text_new + self.lengths = lengths + + def get_sid(self, sid): + #sid = torch.LongTensor([int(sid)]) + sid_embed = np.load(sid,allow_pickle=True) + #phone = np.repeat(phone, 2, axis=0) + sid_embed = torch.FloatTensor(sid_embed) + return sid_embed + + + def get_audio_text_pair(self, audiopath_and_text): + # separate filename and text + file = audiopath_and_text[0] + phone = audiopath_and_text[1] + #dv = audiopath_and_text[2] + #print("file:",file) + basename_tmp = os.path.basename(file) + dir_parent = os.path.dirname(os.path.dirname(file)) + embed_dir = os.path.join(dir_parent, '4_embed256') + if 0:#fang add + dv = os.path.join(embed_dir, basename_tmp)[:-4] + '.npy' + dv = self.get_sid(dv) + else: + ##方法一,直接使用 + embedfile= os.listdir(embed_dir) + basename_tmp_rnd = embedfile[random.randint(0, len(embedfile)-1)] + dv = os.path.join(embed_dir, basename_tmp_rnd) + #dv = self.get_sid(dv) + ##方法二. 相似度大于0.75的 + dv_target = os.path.join(embed_dir, basename_tmp)[:-4] + '.npy' + loss = load_and_cal_distance(dv_target,dv) + if(loss < 0.75 ):#相似度大于0.75 + dv = dv_target + dv = self.get_sid(dv) + ##方法三. 0.1的扰动 + ''' + dv_target = os.path.join(embed_dir, basename_tmp)[:-4] + '.npy' + dv_target = self.get_sid(dv_target) + #loss = load_and_cal_distance(dv_target,dv) + dv = self.get_sid(dv) + dv = dv_target*0.9 + dv*0.1 + #''' + #print("dv:",dv) + + phone = self.get_labels(phone) + spec, wav = self.get_audio(file) + #dv = self.get_sid(dv) + + len_phone = phone.size()[0] + len_spec = spec.size()[-1] + if len_phone != len_spec: + len_min = min(len_phone, len_spec) + len_wav = len_min * self.hop_length + spec = spec[:, :len_min] + wav = wav[:, :len_wav] + phone = phone[:len_min, :] + return (spec, wav, phone, dv) + + def get_labels(self, phone): + phone = np.load(phone) + phone = np.repeat(phone, 2, axis=0) + n_num = min(phone.shape[0], 900) # DistributedBucketSampler + phone = phone[:n_num, :] + phone = torch.FloatTensor(phone) + return phone + + def get_audio(self, filename): + audio, sampling_rate = load_wav_to_torch(filename) + if sampling_rate != self.sampling_rate: + raise ValueError( + "{} SR doesn't match target {} SR".format( + sampling_rate, self.sampling_rate + ) + ) + audio_norm = audio + # audio_norm = audio / self.max_wav_value + # audio_norm = audio / np.abs(audio).max() + + audio_norm = audio_norm.unsqueeze(0) + spec_filename = filename.replace(".wav", ".spec.pt") + if os.path.exists(spec_filename): + try: + spec = torch.load(spec_filename) + except: + print(spec_filename, traceback.format_exc()) + spec = spectrogram_torch( + audio_norm, + self.filter_length, + self.sampling_rate, + self.hop_length, + self.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename, _use_new_zipfile_serialization=False) + else: + spec = spectrogram_torch( + audio_norm, + self.filter_length, + self.sampling_rate, + self.hop_length, + self.win_length, + center=False, + ) + spec = torch.squeeze(spec, 0) + torch.save(spec, spec_filename, _use_new_zipfile_serialization=False) + return spec, audio_norm + + def __getitem__(self, index): + return self.get_audio_text_pair(self.audiopaths_and_text[index]) + + def __len__(self): + return len(self.audiopaths_and_text) + + +class TextAudioCollate: + """Zero-pads model inputs and targets""" + + def __init__(self, return_ids=False): + self.return_ids = return_ids + + def __call__(self, batch): + """Collate's training batch from normalized text and aduio + PARAMS + ------ + batch: [text_normalized, spec_normalized, wav_normalized] + """ + # Right zero-pad all one-hot text sequences to max input length + _, ids_sorted_decreasing = torch.sort( + torch.LongTensor([x[0].size(1) for x in batch]), dim=0, descending=True + ) + + max_spec_len = max([x[0].size(1) for x in batch]) + max_wave_len = max([x[1].size(1) for x in batch]) + spec_lengths = torch.LongTensor(len(batch)) + wave_lengths = torch.LongTensor(len(batch)) + spec_padded = torch.FloatTensor(len(batch), batch[0][0].size(0), max_spec_len) + wave_padded = torch.FloatTensor(len(batch), 1, max_wave_len) + spec_padded.zero_() + wave_padded.zero_() + + max_phone_len = max([x[2].size(0) for x in batch]) + phone_lengths = torch.LongTensor(len(batch)) + phone_padded = torch.FloatTensor( + len(batch), max_phone_len, batch[0][2].shape[1] + ) + phone_padded.zero_() + #sid = torch.LongTensor(len(batch)) + sid = torch.FloatTensor(len(batch), 256) + for i in range(len(ids_sorted_decreasing)): + row = batch[ids_sorted_decreasing[i]] + + spec = row[0] + spec_padded[i, :, : spec.size(1)] = spec + spec_lengths[i] = spec.size(1) + + wave = row[1] + wave_padded[i, :, : wave.size(1)] = wave + wave_lengths[i] = wave.size(1) + + phone = row[2] + phone_padded[i, : phone.size(0), :] = phone + phone_lengths[i] = phone.size(0) + #print("phone_padded:",phone_padded.shape) + + #sid[i] = row[3] + #print("row[3]:",row[3].shape) + sid_tmp = row[3] + sid[i, : sid_tmp.size(0)] = sid_tmp + + return ( + phone_padded, + phone_lengths, + spec_padded, + spec_lengths, + wave_padded, + wave_lengths, + sid, + ) + + +class DistributedBucketSampler(torch.utils.data.distributed.DistributedSampler): + """ + Maintain similar input lengths in a batch. + Length groups are specified by boundaries. + Ex) boundaries = [b1, b2, b3] -> any batch is included either {x | b1 < length(x) <=b2} or {x | b2 < length(x) <= b3}. + + It removes samples which are not included in the boundaries. + Ex) boundaries = [b1, b2, b3] -> any x s.t. length(x) <= b1 or length(x) > b3 are discarded. + """ + + def __init__( + self, + dataset, + batch_size, + boundaries, + num_replicas=None, + rank=None, + shuffle=True, + ): + super().__init__(dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle) + self.lengths = dataset.lengths + self.batch_size = batch_size + self.boundaries = boundaries + + self.buckets, self.num_samples_per_bucket = self._create_buckets() + self.total_size = sum(self.num_samples_per_bucket) + self.num_samples = self.total_size // self.num_replicas + + def _create_buckets(self): + buckets = [[] for _ in range(len(self.boundaries) - 1)] + for i in range(len(self.lengths)): + length = self.lengths[i] + idx_bucket = self._bisect(length) + if idx_bucket != -1: + buckets[idx_bucket].append(i) + + for i in range(len(buckets) - 1, -1, -1): # + if len(buckets[i]) == 0: + buckets.pop(i) + self.boundaries.pop(i + 1) + + num_samples_per_bucket = [] + for i in range(len(buckets)): + len_bucket = len(buckets[i]) + total_batch_size = self.num_replicas * self.batch_size + rem = ( + total_batch_size - (len_bucket % total_batch_size) + ) % total_batch_size + num_samples_per_bucket.append(len_bucket + rem) + return buckets, num_samples_per_bucket + + def __iter__(self): + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + + indices = [] + if self.shuffle: + for bucket in self.buckets: + indices.append(torch.randperm(len(bucket), generator=g).tolist()) + else: + for bucket in self.buckets: + indices.append(list(range(len(bucket)))) + + batches = [] + for i in range(len(self.buckets)): + bucket = self.buckets[i] + len_bucket = len(bucket) + ids_bucket = indices[i] + num_samples_bucket = self.num_samples_per_bucket[i] + + # add extra samples to make it evenly divisible + rem = num_samples_bucket - len_bucket + ids_bucket = ( + ids_bucket + + ids_bucket * (rem // len_bucket) + + ids_bucket[: (rem % len_bucket)] + ) + + # subsample + ids_bucket = ids_bucket[self.rank :: self.num_replicas] + + # batching + for j in range(len(ids_bucket) // self.batch_size): + batch = [ + bucket[idx] + for idx in ids_bucket[ + j * self.batch_size : (j + 1) * self.batch_size + ] + ] + batches.append(batch) + + if self.shuffle: + batch_ids = torch.randperm(len(batches), generator=g).tolist() + batches = [batches[i] for i in batch_ids] + self.batches = batches + + assert len(self.batches) * self.batch_size == self.num_samples + return iter(self.batches) + + def _bisect(self, x, lo=0, hi=None): + if hi is None: + hi = len(self.boundaries) - 1 + + if hi > lo: + mid = (hi + lo) // 2 + if self.boundaries[mid] < x and x <= self.boundaries[mid + 1]: + return mid + elif x <= self.boundaries[mid]: + return self._bisect(x, lo, mid) + else: + return self._bisect(x, mid + 1, hi) + else: + return -1 + + def __len__(self): + return self.num_samples // self.batch_size diff --git a/AIMeiSheng/docker_demo/.requirements.txt.swp b/AIMeiSheng/docker_demo/.requirements.txt.swp new file mode 100644 index 0000000..1adaec3 Binary files /dev/null and b/AIMeiSheng/docker_demo/.requirements.txt.swp differ diff --git a/AIMeiSheng/docker_demo/Dockerfile b/AIMeiSheng/docker_demo/Dockerfile new file mode 100644 index 0000000..84159ab --- /dev/null +++ b/AIMeiSheng/docker_demo/Dockerfile @@ -0,0 +1,29 @@ +# 指定基础映像 +FROM python:3.8.12 + +# 设置软件包源为中科大镜像源 +#RUN echo "deb https://mirrors.ustc.edu.cn/debian/ buster main" > /etc/apt/sources.list && \ +# echo "deb-src https://mirrors.ustc.edu.cn/debian/ buster main" >> /etc/apt/sources.list +# 更新软件包源 +#RUN apt-get update + +# 安装 libgl1-mesa-glx 软件包 +#RUN apt-get install -y libgl1-mesa-glx + +# 设置工作目录 +WORKDIR /data/bingxiao.fang/docker + +##复制工程文件放到容器中 +COPY . /data/bingxiao.fang/docker + +# 安装依赖项 +RUN pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple/ --default-timeout=60 --no-cache-dir -r requirements.txt + +# 复制 Python 文件和依赖项清单 + + + +EXPOSE 6768 + +# 指定容器启动命令 +CMD ["python", "./main.py"] diff --git a/AIMeiSheng/docker_demo/main.py b/AIMeiSheng/docker_demo/main.py new file mode 100644 index 0000000..094c2fc --- /dev/null +++ b/AIMeiSheng/docker_demo/main.py @@ -0,0 +1,12 @@ +import gradio as gr + +def greet(name): + return "Hello " + name + "!!" + +demo = gr.Interface(fn=greet, inputs="text", outputs="text") + +if __name__ == "__main__": + demo.launch(server_name="0.0.0.0") + # 注意:gradio启动项目后默认地址为127.0.0.1;使用docker部署需要将地址修改为0.0.0.0,否则会导致地址访问错误 + # 默认端口为7860,如需更改可在launch()中设置server_port=7000 +~ diff --git a/AIMeiSheng/docker_demo/requirements.txt b/AIMeiSheng/docker_demo/requirements.txt new file mode 100644 index 0000000..b68ec06 --- /dev/null +++ b/AIMeiSheng/docker_demo/requirements.txt @@ -0,0 +1,2 @@ +gradio==3.42.0 +gradio_client==0.5.0 diff --git a/AIMeiSheng/docs/Changelog_CN.md b/AIMeiSheng/docs/Changelog_CN.md new file mode 100644 index 0000000..eb67ba5 --- /dev/null +++ b/AIMeiSheng/docs/Changelog_CN.md @@ -0,0 +1,80 @@ +### 20230618更新 +- v2增加32k和48k两个新预训练模型 +- 修复非f0模型推理报错 +- 对于超过一小时的训练集的索引建立环节,自动kmeans缩小特征处理以加速索引训练、加入和查询 +- 附送一个人声转吉他玩具仓库 +- 数据处理剔除异常值切片 +- onnx导出选项卡 + +失败的实验: +- ~~特征检索增加时序维度:寄,没啥效果~~ +- ~~特征检索增加PCAR降维可选项:寄,数据大用kmeans缩小数据量,数据小降维操作耗时比省下的匹配耗时还多~~ +- ~~支持onnx推理(附带仅推理的小压缩包):寄,生成nsf还是需要pytorch~~ +- ~~训练时在音高、gender、eq、噪声等方面对输入进行随机增强:寄,没啥效果~~ + +todolist: +- 接入小型声码器调研 +- 训练集音高识别支持crepe +- crepe的精度支持和RVC-config同步 +- 对接F0编辑器 + + +### 20230528更新 +- 增加v2的jupyter notebook,韩文changelog,增加一些环境依赖 +- 增加呼吸、清辅音、齿音保护模式 +- 支持crepe-full推理 +- UVR5人声伴奏分离加上3个去延迟模型和MDX-Net去混响模型,增加HP3人声提取模型 +- 索引名称增加版本和实验名称 +- 人声伴奏分离、推理批量导出增加音频导出格式选项 +- 废弃32k模型的训练 + +### 20230513更新 +- 清除一键包内部老版本runtime内残留的lib.infer_pack和uvr5_pack +- 修复训练集预处理伪多进程的bug +- 增加harvest识别音高可选通过中值滤波削弱哑音现象,可调整中值滤波半径 +- 导出音频增加后处理重采样 +- 训练n_cpu进程数从"仅调整f0提取"改为"调整数据预处理和f0提取" +- 自动检测logs文件夹下的index路径,提供下拉列表功能 +- tab页增加"常见问题解答"(也可参考github-rvc-wiki) +- 相同路径的输入音频推理增加了音高缓存(用途:使用harvest音高提取,整个pipeline会经历漫长且重复的音高提取过程,如果不使用缓存,实验不同音色、索引、音高中值滤波半径参数的用户在第一次测试后的等待结果会非常痛苦) + +### 20230514更新 +- 音量包络对齐输入混合(可以缓解“输入静音输出小幅度噪声”的问题。如果输入音频背景底噪大则不建议开启,默认不开启(值为1可视为不开启)) +- 支持按照指定频率保存提取的小模型(假如你想尝试不同epoch下的推理效果,但是不想保存所有大checkpoint并且每次都要ckpt手工处理提取小模型,这项功能会非常实用) +- 通过设置环境变量解决服务端开了系统全局代理导致浏览器连接错误的问题 +- 支持v2预训练模型(目前只公开了40k版本进行测试,另外2个采样率还没有训练完全) +- 推理前限制超过1的过大音量 +- 微调数据预处理参数 + + +### 20230409更新 +- 修正训练参数,提升显卡平均利用率,A100最高从25%提升至90%左右,V100:50%->90%左右,2060S:60%->85%左右,P40:25%->95%左右,训练速度显著提升 +- 修正参数:总batch_size改为每张卡的batch_size +- 修正total_epoch:最大限制100解锁至1000;默认10提升至默认20 +- 修复ckpt提取识别是否带音高错误导致推理异常的问题 +- 修复分布式训练每个rank都保存一次ckpt的问题 +- 特征提取进行nan特征过滤 +- 修复静音输入输出随机辅音or噪声的问题(老版模型需要重做训练集重训) + +### 20230416更新 +- 新增本地实时变声迷你GUI,双击go-realtime-gui.bat启动 +- 训练推理均对<50Hz的频段进行滤波过滤 +- 训练推理音高提取pyworld最低音高从默认80下降至50,50-80hz间的男声低音不会哑 +- WebUI支持根据系统区域变更语言(现支持en_US,ja_JP,zh_CN,zh_HK,zh_SG,zh_TW,不支持的默认en_US) +- 修正部分显卡识别(例如V100-16G识别失败,P4识别失败) + +### 20230428更新 +- 升级faiss索引设置,速度更快,质量更高 +- 取消total_npy依赖,后续分享模型不再需要填写total_npy +- 解锁16系限制。4G显存GPU给到4G的推理设置。 +- 修复部分音频格式下UVR5人声伴奏分离的bug +- 实时变声迷你gui增加对非40k与不懈怠音高模型的支持 + +### 后续计划: +功能: +- 支持多人训练选项卡(至多4人) + +底模: +- 收集呼吸wav加入训练集修正呼吸变声电音的问题 +- 我们正在训练增加了歌声训练集的底模,未来会公开 + diff --git a/AIMeiSheng/docs/Changelog_EN.md b/AIMeiSheng/docs/Changelog_EN.md new file mode 100644 index 0000000..20fc84c --- /dev/null +++ b/AIMeiSheng/docs/Changelog_EN.md @@ -0,0 +1,83 @@ +### 2023-06-18 +- New pretrained v2 models: 32k and 48k +- Fix non-f0 model inference errors +- For training-set exceeding 1 hour, do automatic minibatch-kmeans to reduce feature shape, so that index training, adding, and searching will be much faster. +- Provide a toy vocal2guitar huggingface space +- Auto delete outlier short cut training-set audios +- Onnx export tab + +Failed experiments: +- ~~Feature retrieval: add temporal feature retrieval: not effective~~ +- ~~Feature retrieval: add PCAR dimensionality reduction: searching is even slower~~ +- ~~Random data augmentation when training: not effective~~ + +todolist: +- Vocos-RVC (tiny vocoder) +- Crepe support for training +- Half precision crepe inference +- F0 editor support + +### 2023-05-28 +- Add v2 jupyter notebook, korean changelog, fix some environment requirments +- Add voiceless consonant and breath protection mode +- Support crepe-full pitch detect +- UVR5 vocal separation: support dereverb models and de-echo models +- Add experiment name and version on the name of index +- Support users to manually select export format of output audios when batch voice conversion processing and UVR5 vocal separation +- v1 32k model training is no more supported + +### 2023-05-13 +- Clear the redundant codes in the old version of runtime in the one-click-package: lib.infer_pack and uvr5_pack +- Fix pseudo multiprocessing bug in training set preprocessing +- Adding median filtering radius adjustment for harvest pitch recognize algorithm +- Support post processing resampling for exporting audio +- Multi processing "n_cpu" setting for training is changed from "f0 extraction" to "data preprocessing and f0 extraction" +- Automatically detect the index paths under the logs folder and provide a drop-down list function +- Add "Frequently Asked Questions and Answers" on the tab page (you can also refer to github RVC wiki) +- When inference, harvest pitch is cached when using same input audio path (purpose: using harvest pitch extraction, the entire pipeline will go through a long and repetitive pitch extraction process. If caching is not used, users who experiment with different timbre, index, and pitch median filtering radius settings will experience a very painful waiting process after the first inference) + +### 2023-05-14 +- Use volume envelope of input to mix or replace the volume envelope of output (can alleviate the problem of "input muting and output small amplitude noise". If the input audio background noise is high, it is not recommended to turn it on, and it is not turned on by default (1 can be considered as not turned on) +- Support saving extracted small models at a specified frequency (if you want to see the performance under different epochs, but do not want to save all large checkpoints and manually extract small models by ckpt-processing every time, this feature will be very practical) +- Resolve the issue of "connection errors" caused by the server's global proxy by setting environment variables +- Supports pre-trained v2 models (currently only 40k versions are publicly available for testing, and the other two sampling rates have not been fully trained yet) +- Limit excessive volume exceeding 1 before inference +- Slightly adjusted the settings of training-set preprocessing + + +####################### + +History changelogs: + +### 2023-04-09 +- Fixed training parameters to improve GPU utilization rate: A100 increased from 25% to around 90%, V100: 50% to around 90%, 2060S: 60% to around 85%, P40: 25% to around 95%; significantly improved training speed +- Changed parameter: total batch_size is now per GPU batch_size +- Changed total_epoch: maximum limit increased from 100 to 1000; default increased from 10 to 20 +- Fixed issue of ckpt extraction recognizing pitch incorrectly, causing abnormal inference +- Fixed issue of distributed training saving ckpt for each rank +- Applied nan feature filtering for feature extraction +- Fixed issue with silent input/output producing random consonants or noise (old models need to retrain with a new dataset) + +### 2023-04-16 Update +- Added local real-time voice changing mini-GUI, start by double-clicking go-realtime-gui.bat +- Applied filtering for frequency bands below 50Hz during training and inference +- Lowered the minimum pitch extraction of pyworld from the default 80 to 50 for training and inference, allowing male low-pitched voices between 50-80Hz not to be muted +- WebUI supports changing languages according to system locale (currently supporting en_US, ja_JP, zh_CN, zh_HK, zh_SG, zh_TW; defaults to en_US if not supported) +- Fixed recognition of some GPUs (e.g., V100-16G recognition failure, P4 recognition failure) + +### 2023-04-28 Update +- Upgraded faiss index settings for faster speed and higher quality +- Removed dependency on total_npy; future model sharing will not require total_npy input +- Unlocked restrictions for the 16-series GPUs, providing 4GB inference settings for 4GB VRAM GPUs +- Fixed bug in UVR5 vocal accompaniment separation for certain audio formats +- Real-time voice changing mini-GUI now supports non-40k and non-lazy pitch models + +### Future Plans: +Features: +- Add option: extract small models for each epoch save +- Add option: export additional mp3 to the specified path during inference +- Support multi-person training tab (up to 4 people) + +Base model: +- Collect breathing wav files to add to the training dataset to fix the issue of distorted breath sounds +- We are currently training a base model with an extended singing dataset, which will be released in the future diff --git a/AIMeiSheng/docs/Changelog_KO.md b/AIMeiSheng/docs/Changelog_KO.md new file mode 100644 index 0000000..52da1df --- /dev/null +++ b/AIMeiSheng/docs/Changelog_KO.md @@ -0,0 +1,91 @@ +### 2023년 6월 18일 업데이트 + +- v2 버전에서 새로운 32k와 48k 사전 학습 모델을 추가. +- non-f0 모델들의 추론 오류 수정. +- 학습 세트가 1시간을 넘어가는 경우, 인덱스 생성 단계에서 minibatch-kmeans을 사용해, 학습속도 가속화. +- [huggingface](https://huggingface.co/spaces/lj1995/vocal2guitar)에서 vocal2guitar 제공. +- 데이터 처리 단계에서 이상 값 자동으로 제거. +- ONNX로 내보내는(export) 옵션 탭 추가. + +업데이트에 적용되지 않았지만 시도한 것들 : + +- 시계열 차원을 추가하여 특징 검색을 진행했지만, 유의미한 효과는 없었습니다. +- PCA 차원 축소를 추가하여 특징 검색을 진행했지만, 유의미한 효과는 없었습니다. +- ONNX 추론을 지원하는 것에 실패했습니다. nsf 생성시, Pytorch가 필요하기 때문입니다. +- 훈련 중에 입력에 대한 음고, 성별, 이퀄라이저, 노이즈 등 무작위로 강화하는 것에, 유의미한 효과는 없었습니다. + +추후 업데이트 목록: + +- Vocos-RVC (소형 보코더) 통합 예정. +- 학습 단계에 음고 인식을 위한 Crepe 지원 예정. +- Crepe의 정밀도를 REC-config와 동기화하여 지원 예정. +- FO 에디터 지원 예정. + +### 2023년 5월 28일 업데이트 + +- v2 jupyter notebook 추가, 한국어 업데이트 로그 추가, 의존성 모듈 일부 수정. +- 무성음 및 숨소리 보호 모드 추가. +- crepe-full pitch 감지 지원. +- UVR5 보컬 분리: 디버브 및 디-에코 모델 지원. +- index 이름에 experiment 이름과 버전 추가. +- 배치 음성 변환 처리 및 UVR5 보컬 분리 시, 사용자가 수동으로 출력 오디오의 내보내기(export) 형식을 선택할 수 있도록 지원. +- 32k 훈련 모델 지원 종료. + +### 2023년 5월 13일 업데이트 + +- 원클릭 패키지의 이전 버전 런타임 내, 불필요한 코드(lib.infer_pack 및 uvr5_pack) 제거. +- 훈련 세트 전처리의 유사 다중 처리 버그 수정. +- Harvest 피치 인식 알고리즘에 대한 중위수 필터링 반경 조정 추가. +- 오디오 내보낼 때, 후처리 리샘플링 지원. +- 훈련에 대한 다중 처리 "n_cpu" 설정이 "f0 추출"에서 "데이터 전처리 및 f0 추출"로 변경. +- logs 폴더 하의 인덱스 경로를 자동으로 감지 및 드롭다운 목록 기능 제공. +- 탭 페이지에 "자주 묻는 질문과 답변" 추가. (github RVC wiki 참조 가능) +- 동일한 입력 오디오 경로를 사용할 때 추론, Harvest 피치를 캐시. + (주의: Harvest 피치 추출을 사용하면 전체 파이프라인은 길고 반복적인 피치 추출 과정을 거치게됩니다. 캐싱을 하지 않는다면, 첫 inference 이후의 단계에서 timbre, 인덱스, 피치 중위수 필터링 반경 설정 등 대기시간이 엄청나게 길어집니다!) + +### 2023년 5월 14일 업데이트 + +- 입력의 볼륨 캡슐을 사용하여 출력의 볼륨 캡슐을 혼합하거나 대체. (입력이 무음이거나 출력의 노이즈 문제를 최소화 할 수 있습니다. 입력 오디오의 배경 노이즈(소음)가 큰 경우 해당 기능을 사용하지 않는 것이 좋습니다. 기본적으로 비활성화 되어있는 옵션입니다. (1: 비활성화 상태)) +- 추출된 소형 모델을 지정된 빈도로 저장하는 기능을 지원. (다양한 에폭 하에서의 성능을 보려고 하지만 모든 대형 체크포인트를 저장하고 매번 ckpt 처리를 통해 소형 모델을 수동으로 추출하고 싶지 않은 경우 이 기능은 매우 유용합니다) +- 환경 변수를 설정하여 서버의 전역 프록시로 인한 "연결 오류" 문제 해결. +- 사전 훈련된 v2 모델 지원. (현재 40k 버전만 테스트를 위해 공개적으로 사용 가능하며, 다른 두 개의 샘플링 비율은 아직 완전히 훈련되지 않아 보류되었습니다.) +- 추론 전, 1을 초과하는 과도한 볼륨 제한. +- 데이터 전처리 매개변수 미세 조정. + +### 2023년 4월 9일 업데이트 + +- GPU 이용률 향상을 위해 훈련 파라미터 수정: A100은 25%에서 약 90%로 증가, V100: 50%에서 약 90%로 증가, 2060S: 60%에서 약 85%로 증가, P40: 25%에서 약 95%로 증가. + 훈련 속도가 크게 향상. +- 매개변수 기준 변경: total batch_size는 GPU당 batch_size를 의미. +- total_epoch 변경: 최대 한도가 100에서 1000으로 증가. 기본값이 10에서 20으로 증가. +- ckpt 추출이 피치를 잘못 인식하여 비정상적인 추론을 유발하는 문제 수정. +- 분산 훈련 과정에서 각 랭크마다 ckpt를 저장하는 문제 수정. +- 특성 추출 과정에 나노 특성 필터링 적용. +- 무음 입력/출력이 랜덤하게 소음을 생성하는 문제 수정. (이전 모델은 새 데이터셋으로 다시 훈련해야 합니다) + +### 2023년 4월 16일 업데이트 + +- 로컬 실시간 음성 변경 미니-GUI 추가, go-realtime-gui.bat를 더블 클릭하여 시작. +- 훈련 및 추론 중 50Hz 이하의 주파수 대역에 대해 필터링 적용. +- 훈련 및 추론의 pyworld 최소 피치 추출을 기본 80에서 50으로 낮춤. 이로 인해, 50-80Hz 사이의 남성 저음이 무음화되지 않습니다. +- 시스템 지역에 따른 WebUI 언어 변경 지원. (현재 en_US, ja_JP, zh_CN, zh_HK, zh_SG, zh_TW를 지원하며, 지원되지 않는 경우 기본값은 en_US) +- 일부 GPU의 인식 수정. (예: V100-16G 인식 실패, P4 인식 실패) + +### 2023년 4월 28일 업데이트 + +- Faiss 인덱스 설정 업그레이드로 속도가 더 빨라지고 품질이 향상. +- total_npy에 대한 의존성 제거. 추후의 모델 공유는 total_npy 입력을 필요로 하지 않습니다. +- 16 시리즈 GPU에 대한 제한 해제, 4GB VRAM GPU에 대한 4GB 추론 설정 제공. +- 일부 오디오 형식에 대한 UVR5 보컬 동반 분리에서의 버그 수정. +- 실시간 음성 변경 미니-GUI는 이제 non-40k 및 non-lazy 피치 모델을 지원합니다. + +### 추후 계획 + +Features: + +- 다중 사용자 훈련 탭 지원.(최대 4명) + +Base model: + +- 훈련 데이터셋에 숨소리 wav 파일을 추가하여, 보컬의 호흡이 노이즈로 변환되는 문제 수정. +- 보컬 훈련 세트의 기본 모델을 추가하기 위한 작업을 진행중이며, 이는 향후에 발표될 예정. diff --git a/AIMeiSheng/docs/README.en.md b/AIMeiSheng/docs/README.en.md new file mode 100644 index 0000000..80add24 --- /dev/null +++ b/AIMeiSheng/docs/README.en.md @@ -0,0 +1,124 @@ +
+ +

Retrieval-based-Voice-Conversion-WebUI

+An easy-to-use Voice Conversion framework based on VITS.

+ +[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) + +
+ +[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) +[![Licence](https://img.shields.io/github/license/RVC-Project/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE) +[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) + +[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk) + +
+ +------ +[**Changelog**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_EN.md) | [**FAQ (Frequently Asked Questions)**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/wiki/FAQ-(Frequently-Asked-Questions)) + +[**English**](./README.en.md) | [**中文简体**](../README.md) | [**日本語**](./README.ja.md) | [**한국어**](./README.ko.md) ([**韓國語**](./README.ko.han.md)) + + +Check our [Demo Video](https://www.bilibili.com/video/BV1pm4y1z7Gm/) here! + +Realtime Voice Conversion Software using RVC : [w-okada/voice-changer](https://github.com/w-okada/voice-changer) + +> A online demo using RVC that convert Vocal to Acoustic Guitar audio:https://huggingface.co/spaces/lj1995/vocal2guitar + +> Vocal2Guitar demo video:https://www.bilibili.com/video/BV19W4y1D7tT/ + +> The dataset for the pre-training model uses nearly 50 hours of high quality VCTK open source dataset. + +> High quality licensed song datasets will be added to training-set one after another for your use, without worrying about copyright infringement. + +## Summary +This repository has the following features: ++ Reduce tone leakage by replacing the source feature to training-set feature using top1 retrieval; ++ Easy and fast training, even on relatively poor graphics cards; ++ Training with a small amount of data also obtains relatively good results (>=10min low noise speech recommended); ++ Supporting model fusion to change timbres (using ckpt processing tab->ckpt merge); ++ Easy-to-use Webui interface; ++ Use the UVR5 model to quickly separate vocals and instruments. ++ Use the most powerful High-pitch Voice Extraction Algorithm [InterSpeech2023-RMVPE](#Credits) to prevent the muted sound problem. Provides the best results (significantly) and is faster, with even lower resource consumption than Crepe_full. + +## Preparing the environment +The following commands need to be executed in the environment of Python version 3.8 or higher. + +(Windows/Linux) +First install the main dependencies through pip: +```bash +# Install PyTorch-related core dependencies, skip if installed +# Reference: https://pytorch.org/get-started/locally/ +pip install torch torchvision torchaudio + +#For Windows + Nvidia Ampere Architecture(RTX30xx), you need to specify the cuda version corresponding to pytorch according to the experience of https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/issues/21 +#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 +``` + +Then can use poetry to install the other dependencies: +```bash +# Install the Poetry dependency management tool, skip if installed +# Reference: https://python-poetry.org/docs/#installation +curl -sSL https://install.python-poetry.org | python3 - + +# Install the project dependencies +poetry install +``` + +You can also use pip to install them: +```bash +pip install -r requirements.txt +``` + +------ +Mac users can install dependencies via `run.sh`: +```bash +sh ./run.sh +``` + +## Preparation of other Pre-models +RVC requires other pre-models to infer and train. + +You need to download them from our [Huggingface space](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/). + +Here's a list of Pre-models and other files that RVC needs: +```bash +hubert_base.pt + +./pretrained + +./uvr5_weights + +If you want to test the v2 version model (the v2 version model has changed the input from the 256 dimensional feature of 9-layer Hubert+final_proj to the 768 dimensional feature of 12-layer Hubert, and has added 3 period discriminators), you will need to download additional features + +./pretrained_v2 + +#If you are using Windows, you may also need this dictionary, skip if FFmpeg is installed +ffmpeg.exe +``` +Then use this command to start Webui: +```bash +python infer-web.py +``` +If you are using Windows or macOS, you can download and extract `RVC-beta.7z` to use RVC directly by using `go-web.bat` on windows or `sh ./run.sh` on macOS to start Webui. + +There's also a tutorial on RVC in Chinese and you can check it out if needed. + +## Credits ++ [ContentVec](https://github.com/auspicious3000/contentvec/) ++ [VITS](https://github.com/jaywalnut310/vits) ++ [HIFIGAN](https://github.com/jik876/hifi-gan) ++ [Gradio](https://github.com/gradio-app/gradio) ++ [FFmpeg](https://github.com/FFmpeg/FFmpeg) ++ [Ultimate Vocal Remover](https://github.com/Anjok07/ultimatevocalremovergui) ++ [audio-slicer](https://github.com/openvpi/audio-slicer) ++ [Vocal pitch extraction:RMVPE](https://github.com/Dream-High/RMVPE) + + The pretrained model is trained and tested by [yxlllc](https://github.com/yxlllc/RMVPE) and [RVC-Boss](https://github.com/RVC-Boss). + +## Thanks to all contributors for their efforts + + + + diff --git a/AIMeiSheng/docs/README.ja.md b/AIMeiSheng/docs/README.ja.md new file mode 100644 index 0000000..26ce3af --- /dev/null +++ b/AIMeiSheng/docs/README.ja.md @@ -0,0 +1,104 @@ +
+ +

Retrieval-based-Voice-Conversion-WebUI

+VITSに基づく使いやすい音声変換(voice changer)framework

+ +[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) + +
+ +[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) +[![Licence](https://img.shields.io/github/license/RVC-Project/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE) +[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) + +[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk) + +
+ +------ + +[**更新日誌**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_CN.md) + +[**English**](./README.en.md) | [**中文简体**](../README.md) | [**日本語**](./README.ja.md) | [**한국어**](./README.ko.md) ([**韓國語**](./README.ko.han.md)) + +> デモ動画は[こちら](https://www.bilibili.com/video/BV1pm4y1z7Gm/)でご覧ください。 + +> RVCによるリアルタイム音声変換: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) + +> 著作権侵害を心配することなく使用できるように、基底モデルは約50時間の高品質なオープンソースデータセットで訓練されています。 + +> 今後も、次々と使用許可のある高品質な歌声の資料集を追加し、基底モデルを訓練する予定です。 + +## はじめに +本リポジトリには下記の特徴があります。 + ++ Top1検索を用いることで、生の特徴量を訓練用データセット特徴量に変換し、トーンリーケージを削減します。 ++ 比較的貧弱なGPUでも、高速かつ簡単に訓練できます。 ++ 少量のデータセットからでも、比較的良い結果を得ることができます。(10分以上のノイズの少ない音声を推奨します。) ++ モデルを融合することで、音声を混ぜることができます。(ckpt processingタブの、ckpt mergeを使用します。) ++ 使いやすいWebUI。 ++ UVR5 Modelも含んでいるため、人の声とBGMを素早く分離できます。 + +## 環境構築 +Poetryで依存関係をインストールすることをお勧めします。 + +下記のコマンドは、Python3.8以上の環境で実行する必要があります: +```bash +# PyTorch関連の依存関係をインストール。インストール済の場合は省略。 +# 参照先: https://pytorch.org/get-started/locally/ +pip install torch torchvision torchaudio + +#Windows+ Nvidia Ampere Architecture(RTX30xx)の場合、 #21 に従い、pytorchに対応するcuda versionを指定する必要があります。 +#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 + +# PyTorch関連の依存関係をインストール。インストール済の場合は省略。 +# 参照先: https://python-poetry.org/docs/#installation +curl -sSL https://install.python-poetry.org | python3 - + +# Poetry経由で依存関係をインストール +poetry install +``` + +pipでも依存関係のインストールが可能です: + +```bash +pip install -r requirements.txt +``` + +## 基底modelsを準備 +RVCは推論/訓練のために、様々な事前訓練を行った基底モデルを必要とします。 + +modelsは[Hugging Face space](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)からダウンロードできます。 + +以下は、RVCに必要な基底モデルやその他のファイルの一覧です。 +```bash +hubert_base.pt + +./pretrained + +./uvr5_weights + +# ffmpegがすでにinstallされている場合は省略 +./ffmpeg +``` +その後、下記のコマンドでWebUIを起動します。 +```bash +python infer-web.py +``` +Windowsをお使いの方は、直接`RVC-beta.7z`をダウンロード後に展開し、`go-web.bat`をクリックすることで、WebUIを起動することができます。(7zipが必要です。) + +また、リポジトリに[小白简易教程.doc](./小白简易教程.doc)がありますので、参考にしてください(中国語版のみ)。 + +## 参考プロジェクト ++ [ContentVec](https://github.com/auspicious3000/contentvec/) ++ [VITS](https://github.com/jaywalnut310/vits) ++ [HIFIGAN](https://github.com/jik876/hifi-gan) ++ [Gradio](https://github.com/gradio-app/gradio) ++ [FFmpeg](https://github.com/FFmpeg/FFmpeg) ++ [Ultimate Vocal Remover](https://github.com/Anjok07/ultimatevocalremovergui) ++ [audio-slicer](https://github.com/openvpi/audio-slicer) + +## 貢献者(contributor)の皆様の尽力に感謝します + + + diff --git a/AIMeiSheng/docs/README.ko.han.md b/AIMeiSheng/docs/README.ko.han.md new file mode 100644 index 0000000..cac9d70 --- /dev/null +++ b/AIMeiSheng/docs/README.ko.han.md @@ -0,0 +1,100 @@ +
+ +

Retrieval-based-Voice-Conversion-WebUI

+VITS基盤의 簡單하고使用하기 쉬운音聲變換틀

+ +[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) + +
+ +[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) +[![Licence](https://img.shields.io/github/license/RVC-Project/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE) +[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) + +[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk) + +
+ +------ +[**更新日誌**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_KO.md) + +[**English**](./README.en.md) | [**中文简体**](../README.md) | [**日本語**](./README.ja.md) | [**한국어**](./README.ko.md) ([**韓國語**](./README.ko.han.md)) + +> [示範映像](https://www.bilibili.com/video/BV1pm4y1z7Gm/)을 確認해 보세요! + +> RVC를活用한實時間音聲變換: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) + +> 基本모델은 50時間假量의 高品質 오픈 소스 VCTK 데이터셋을 使用하였으므로, 著作權上의 念慮가 없으니 安心하고 使用하시기 바랍니다. + +> 著作權問題가 없는 高品質의 노래를 以後에도 繼續해서 訓練할 豫定입니다. + +## 紹介 +本Repo는 다음과 같은 特徵을 가지고 있습니다: ++ top1檢索을利用하여 入力音色特徵을 訓練세트音色特徵으로 代替하여 音色의漏出을 防止; ++ 相對的으로 낮은性能의 GPU에서도 빠른訓練可能; ++ 적은量의 데이터로 訓練해도 좋은 結果를 얻을 수 있음 (最小10分以上의 低雜음音聲데이터를 使用하는 것을 勸獎); ++ 모델融合을通한 音色의 變調可能 (ckpt處理탭->ckpt混合選擇); ++ 使用하기 쉬운 WebUI (웹 使用者인터페이스); ++ UVR5 모델을 利用하여 목소리와 背景音樂의 빠른 分離; + +## 環境의準備 +poetry를通해 依存를設置하는 것을 勸獎합니다. + +다음命令은 Python 버전3.8以上의環境에서 實行되어야 합니다: +```bash +# PyTorch 關聯主要依存設置, 이미設置되어 있는 境遇 건너뛰기 可能 +# 參照: https://pytorch.org/get-started/locally/ +pip install torch torchvision torchaudio + +# Windows + Nvidia Ampere Architecture(RTX30xx)를 使用하고 있다面, #21 에서 명시된 것과 같이 PyTorch에 맞는 CUDA 버전을 指定해야 합니다. +#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 + +# Poetry 設置, 이미設置되어 있는 境遇 건너뛰기 可能 +# Reference: https://python-poetry.org/docs/#installation +curl -sSL https://install.python-poetry.org | python3 - + +# 依存設置 +poetry install +``` +pip를 活用하여依存를 設置하여도 無妨합니다. + +```bash +pip install -r requirements.txt +``` + +## 其他預備모델準備 +RVC 모델은 推論과訓練을 依하여 다른 預備모델이 必要합니다. + +[Huggingface space](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)를 通해서 다운로드 할 수 있습니다. + +다음은 RVC에 必要한 預備모델 및 其他 파일 目錄입니다: +```bash +hubert_base.pt + +./pretrained + +./uvr5_weights + +# Windows를 使用하는境遇 이 사전도 必要할 수 있습니다. FFmpeg가 設置되어 있으면 건너뛰어도 됩니다. +ffmpeg.exe +``` +그後 以下의 命令을 使用하여 WebUI를 始作할 수 있습니다: +```bash +python infer-web.py +``` +Windows를 使用하는境遇 `RVC-beta.7z`를 다운로드 및 壓縮解除하여 RVC를 直接使用하거나 `go-web.bat`을 使用하여 WebUi를 直接할 수 있습니다. + +## 參考 ++ [ContentVec](https://github.com/auspicious3000/contentvec/) ++ [VITS](https://github.com/jaywalnut310/vits) ++ [HIFIGAN](https://github.com/jik876/hifi-gan) ++ [Gradio](https://github.com/gradio-app/gradio) ++ [FFmpeg](https://github.com/FFmpeg/FFmpeg) ++ [Ultimate Vocal Remover](https://github.com/Anjok07/ultimatevocalremovergui) ++ [audio-slicer](https://github.com/openvpi/audio-slicer) +## 모든寄與者분들의勞力에感謝드립니다 + + + + + diff --git a/AIMeiSheng/docs/README.ko.md b/AIMeiSheng/docs/README.ko.md new file mode 100644 index 0000000..abea8e6 --- /dev/null +++ b/AIMeiSheng/docs/README.ko.md @@ -0,0 +1,112 @@ +
+ +

Retrieval-based-Voice-Conversion-WebUI

+VITS 기반의 간단하고 사용하기 쉬운 음성 변환 프레임워크.

+ +[![madewithlove](https://forthebadge.com/images/badges/built-with-love.svg)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI) + +
+ +[![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/github/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/Retrieval_based_Voice_Conversion_WebUI.ipynb) +[![Licence](https://img.shields.io/github/license/RVC-Project/Retrieval-based-Voice-Conversion-WebUI?style=for-the-badge)](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/LICENSE) +[![Huggingface](https://img.shields.io/badge/🤗%20-Spaces-yellow.svg?style=for-the-badge)](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/) + +[![Discord](https://img.shields.io/badge/RVC%20Developers-Discord-7289DA?style=for-the-badge&logo=discord&logoColor=white)](https://discord.gg/HcsmBBGyVk) + +
+ +--- + +[**업데이트 로그**](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/blob/main/docs/Changelog_KO.md) + +[**English**](./README.en.md) | [**中文简体**](../README.md) | [**日本語**](./README.ja.md) | [**한국어**](./README.ko.md) ([**韓國語**](./README.ko.han.md)) + +> [데모 영상](https://www.bilibili.com/video/BV1pm4y1z7Gm/)을 확인해 보세요! + +> RVC를 활용한 실시간 음성변환: [w-okada/voice-changer](https://github.com/w-okada/voice-changer) + +> 기본 모델은 50시간 가량의 고퀄리티 오픈 소스 VCTK 데이터셋을 사용하였으므로, 저작권상의 염려가 없으니 안심하고 사용하시기 바랍니다. + +> 저작권 문제가 없는 고퀄리티의 노래를 이후에도 계속해서 훈련할 예정입니다. + +## 소개 + +본 Repo는 다음과 같은 특징을 가지고 있습니다: + +- top1 검색을 이용하여 입력 음색 특징을 훈련 세트 음색 특징으로 대체하여 음색의 누출을 방지; +- 상대적으로 낮은 성능의 GPU에서도 빠른 훈련 가능; +- 적은 양의 데이터로 훈련해도 좋은 결과를 얻을 수 있음 (최소 10분 이상의 저잡음 음성 데이터를 사용하는 것을 권장); +- 모델 융합을 통한 음색의 변조 가능 (ckpt 처리 탭->ckpt 병합 선택); +- 사용하기 쉬운 WebUI (웹 인터페이스); +- UVR5 모델을 이용하여 목소리와 배경음악의 빠른 분리; + +## 환경의 준비 + +poetry를 통해 dependecies를 설치하는 것을 권장합니다. + +다음 명령은 Python 버전 3.8 이상의 환경에서 실행되어야 합니다: + +```bash +# PyTorch 관련 주요 dependencies 설치, 이미 설치되어 있는 경우 건너뛰기 가능 +# 참조: https://pytorch.org/get-started/locally/ +pip install torch torchvision torchaudio + +# Windows + Nvidia Ampere Architecture(RTX30xx)를 사용하고 있다면, https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/issues/21 에서 명시된 것과 같이 PyTorch에 맞는 CUDA 버전을 지정해야 합니다. +#pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 + +# Poetry 설치, 이미 설치되어 있는 경우 건너뛰기 가능 +# Reference: https://python-poetry.org/docs/#installation +curl -sSL https://install.python-poetry.org | python3 - + +# Dependecies 설치 +poetry install +``` + +pip를 활용하여 dependencies를 설치하여도 무방합니다. + +```bash +pip install -r requirements.txt +``` + +## 기타 사전 모델 준비 + +RVC 모델은 추론과 훈련을 위하여 다른 사전 모델이 필요합니다. + +[Huggingface space](https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main/)를 통해서 다운로드 할 수 있습니다. + +다음은 RVC에 필요한 사전 모델 및 기타 파일 목록입니다: + +```bash +hubert_base.pt + +./pretrained + +./uvr5_weights + +# Windows를 사용하는 경우 이 사전도 필요할 수 있습니다. FFmpeg가 설치되어 있으면 건너뛰어도 됩니다. +ffmpeg.exe +``` + +그 후 이하의 명령을 사용하여 WebUI를 시작할 수 있습니다: + +```bash +python infer-web.py +``` + +Windows를 사용하는 경우 `RVC-beta.7z`를 다운로드 및 압축 해제하여 RVC를 직접 사용하거나 `go-web.bat`을 사용하여 WebUi를 시작할 수 있습니다. + +## 참고 + +- [ContentVec](https://github.com/auspicious3000/contentvec/) +- [VITS](https://github.com/jaywalnut310/vits) +- [HIFIGAN](https://github.com/jik876/hifi-gan) +- [Gradio](https://github.com/gradio-app/gradio) +- [FFmpeg](https://github.com/FFmpeg/FFmpeg) +- [Ultimate Vocal Remover](https://github.com/Anjok07/ultimatevocalremovergui) +- [audio-slicer](https://github.com/openvpi/audio-slicer) + +## 모든 기여자 분들의 노력에 감사드립니다. + + + + diff --git a/AIMeiSheng/docs/faiss_tips_en.md b/AIMeiSheng/docs/faiss_tips_en.md new file mode 100644 index 0000000..aafad6e --- /dev/null +++ b/AIMeiSheng/docs/faiss_tips_en.md @@ -0,0 +1,102 @@ +faiss tuning TIPS +================== +# about faiss +faiss is a library of neighborhood searches for dense vectors, developed by facebook research, which efficiently implements many approximate neighborhood search methods. +Approximate Neighbor Search finds similar vectors quickly while sacrificing some accuracy. + +## faiss in RVC +In RVC, for the embedding of features converted by HuBERT, we search for embeddings similar to the embedding generated from the training data and mix them to achieve a conversion that is closer to the original speech. However, since this search takes time if performed naively, high-speed conversion is realized by using approximate neighborhood search. + +# implementation overview +In '/logs/your-experiment/3_feature256' where the model is located, features extracted by HuBERT from each voice data are located. +From here we read the npy files in order sorted by filename and concatenate the vectors to create big_npy. (This vector has shape [N, 256].) +After saving big_npy as /logs/your-experiment/total_fea.npy, train it with faiss. + +In this article, I will explain the meaning of these parameters. + +# Explanation of the method +## index factory +An index factory is a unique faiss notation that expresses a pipeline that connects multiple approximate neighborhood search methods as a string. +This allows you to try various approximate neighborhood search methods simply by changing the index factory string. +In RVC it is used like this: + +```python +index = faiss.index_factory(256, "IVF%s,Flat" % n_ivf) +``` +Among the arguments of index_factory, the first is the number of dimensions of the vector, the second is the index factory string, and the third is the distance to use. + +For more detailed notation +https://github.com/facebookresearch/faiss/wiki/The-index-factory + +## index for distance +There are two typical indexes used as similarity of embedding as follows. + +- Euclidean distance (METRIC_L2) +- inner product (METRIC_INNER_PRODUCT) + +Euclidean distance takes the squared difference in each dimension, sums the differences in all dimensions, and then takes the square root. This is the same as the distance in 2D and 3D that we use on a daily basis. +The inner product is not used as an index of similarity as it is, and the cosine similarity that takes the inner product after being normalized by the L2 norm is generally used. + +Which is better depends on the case, but cosine similarity is often used in embedding obtained by word2vec and similar image retrieval models learned by ArcFace. If you want to do l2 normalization on vector X with numpy, you can do it with the following code with eps small enough to avoid 0 division. + +```python +X_normed = X / np.maximum(eps, np.linalg.norm(X, ord=2, axis=-1, keepdims=True)) +``` + +Also, for the index factory, you can change the distance index used for calculation by choosing the value to pass as the third argument. + +```python +index = faiss.index_factory(dimention, text, faiss.METRIC_INNER_PRODUCT) +``` + +## IVF +IVF (Inverted file indexes) is an algorithm similar to the inverted index in full-text search. +During learning, the search target is clustered with kmeans, and Voronoi partitioning is performed using the cluster center. Each data point is assigned a cluster, so we create a dictionary that looks up the data points from the clusters. + +For example, if clusters are assigned as follows +|index|Cluster| +|-----|-------| +|1|A| +|2|B| +|3|A| +|4|C| +|5|B| + +The resulting inverted index looks like this: + +|cluster|index| +|-------|-----| +|A|1, 3| +|B|2, 5| +|C|4| + +When searching, we first search n_probe clusters from the clusters, and then calculate the distances for the data points belonging to each cluster. + +# recommend parameter +There are official guidelines on how to choose an index, so I will explain accordingly. +https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index + +For datasets below 1M, 4bit-PQ is the most efficient method available in faiss as of April 2023. +Combining this with IVF, narrowing down the candidates with 4bit-PQ, and finally recalculating the distance with an accurate index can be described by using the following index factory. + +```python +index = faiss.index_factory(256, "IVF1024,PQ128x4fs,RFlat") +``` + +## Recommended parameters for IVF +Consider the case of too many IVFs. For example, if coarse quantization by IVF is performed for the number of data, this is the same as a naive exhaustive search and is inefficient. +For 1M or less, IVF values are recommended between 4*sqrt(N) ~ 16*sqrt(N) for N number of data points. + +Since the calculation time increases in proportion to the number of n_probes, please consult with the accuracy and choose appropriately. Personally, I don't think RVC needs that much accuracy, so n_probe = 1 is fine. + +## FastScan +FastScan is a method that enables high-speed approximation of distances by Cartesian product quantization by performing them in registers. +Cartesian product quantization performs clustering independently for each d dimension (usually d = 2) during learning, calculates the distance between clusters in advance, and creates a lookup table. At the time of prediction, the distance of each dimension can be calculated in O(1) by looking at the lookup table. +So the number you specify after PQ usually specifies half the dimension of the vector. + +For a more detailed description of FastScan, please refer to the official documentation. +https://github.com/facebookresearch/faiss/wiki/Fast-accumulation-of-PQ-and-AQ-codes-(FastScan) + +## RFlat +RFlat is an instruction to recalculate the rough distance calculated by FastScan with the exact distance specified by the third argument of index factory. +When getting k neighbors, k*k_factor points are recalculated. diff --git a/AIMeiSheng/docs/faiss_tips_ja.md b/AIMeiSheng/docs/faiss_tips_ja.md new file mode 100644 index 0000000..89cf5ba --- /dev/null +++ b/AIMeiSheng/docs/faiss_tips_ja.md @@ -0,0 +1,101 @@ +faiss tuning TIPS +================== +# about faiss +faissはfacebook researchの開発する、密なベクトルに対する近傍探索をまとめたライブラリで、多くの近似近傍探索の手法を効率的に実装しています。 +近似近傍探索はある程度精度を犠牲にしながら高速に類似するベクトルを探します。 + +## faiss in RVC +RVCではHuBERTで変換した特徴量のEmbeddingに対し、学習データから生成されたEmbeddingと類似するものを検索し、混ぜることでより元の音声に近い変換を実現しています。ただ、この検索は愚直に行うと時間がかかるため、近似近傍探索を用いることで高速な変換を実現しています。 + +# 実装のoverview +モデルが配置されている '/logs/your-experiment/3_feature256'には各音声データからHuBERTで抽出された特徴量が配置されています。 +ここからnpyファイルをファイル名でソートした順番で読み込み、ベクトルを連結してbig_npyを作成しfaissを学習させます。(このベクトルのshapeは[N, 256]です。) + +本Tipsではまずこれらのパラメータの意味を解説します。 + +# 手法の解説 +## index factory +index factoryは複数の近似近傍探索の手法を繋げるパイプラインをstringで表記するfaiss独自の記法です。 +これにより、index factoryの文字列を変更するだけで様々な近似近傍探索の手法を試せます。 +RVCでは以下のように使われています。 + +```python +index = faiss.index_factory(256, "IVF%s,Flat" % n_ivf) +``` +index_factoryの引数のうち、1つ目はベクトルの次元数、2つ目はindex factoryの文字列で、3つ目には用いる距離を指定することができます。 + +より詳細な記法については +https://github.com/facebookresearch/faiss/wiki/The-index-factory + +## 距離指標 +embeddingの類似度として用いられる代表的な指標として以下の二つがあります。 + +- ユークリッド距離(METRIC_L2) +- 内積(METRIC_INNER_PRODUCT) + +ユークリッド距離では各次元において二乗の差をとり、全次元の差を足してから平方根をとります。これは日常的に用いる2次元、3次元での距離と同じです。 +内積はこのままでは類似度の指標として用いず、一般的にはL2ノルムで正規化してから内積をとるコサイン類似度を用います。 + +どちらがよいかは場合によりますが、word2vec等で得られるembeddingやArcFace等で学習した類似画像検索のモデルではコサイン類似度が用いられることが多いです。ベクトルXに対してl2正規化をnumpyで行う場合は、0 divisionを避けるために十分に小さな値をepsとして以下のコードで可能です。 + +```python +X_normed = X / np.maximum(eps, np.linalg.norm(X, ord=2, axis=-1, keepdims=True)) +``` + +また、index factoryには第3引数に渡す値を選ぶことで計算に用いる距離指標を変更できます。 + +```python +index = faiss.index_factory(dimention, text, faiss.METRIC_INNER_PRODUCT) +``` + +## IVF +IVF(Inverted file indexes)は全文検索における転置インデックスと似たようなアルゴリズムです。 +学習時には検索対象に対してkmeansでクラスタリングを行い、クラスタ中心を用いてボロノイ分割を行います。各データ点には一つずつクラスタが割り当てられるので、クラスタからデータ点を逆引きする辞書を作成します。 + +例えば以下のようにクラスタが割り当てられた場合 +|index|クラスタ| +|-----|-------| +|1|A| +|2|B| +|3|A| +|4|C| +|5|B| + +作成される転置インデックスは以下のようになります。 + +|クラスタ|index| +|-------|-----| +|A|1, 3| +|B|2, 5| +|C|4| + +検索時にはまずクラスタからn_probe個のクラスタを検索し、次にそれぞれのクラスタに属するデータ点について距離を計算します。 + +# 推奨されるパラメータ +indexの選び方については公式にガイドラインがあるので、それに準じて説明します。 +https://github.com/facebookresearch/faiss/wiki/Guidelines-to-choose-an-index + +1M以下のデータセットにおいては4bit-PQが2023年4月時点ではfaissで利用できる最も効率的な手法です。 +これをIVFと組み合わせ、4bit-PQで候補を絞り、最後に正確な指標で距離を再計算するには以下のindex factoryを用いることで記載できます。 + +```python +index = faiss.index_factory(256, "IVF1024,PQ128x4fs,RFlat") +``` + +## IVFの推奨パラメータ +IVFの数が多すぎる場合、たとえばデータ数の数だけIVFによる粗量子化を行うと、これは愚直な全探索と同じになり効率が悪いです。 +1M以下の場合ではIVFの値はデータ点の数Nに対して4*sqrt(N) ~ 16*sqrt(N)に推奨しています。 + +n_probeはn_probeの数に比例して計算時間が増えるので、精度と相談して適切に選んでください。個人的にはRVCにおいてそこまで精度は必要ないと思うのでn_probe = 1で良いと思います。 + +## FastScan +FastScanは直積量子化で大まかに距離を近似するのを、レジスタ内で行うことにより高速に行うようにした手法です。 +直積量子化は学習時にd次元ごと(通常はd=2)に独立してクラスタリングを行い、クラスタ同士の距離を事前計算してlookup tableを作成します。予測時はlookup tableを見ることで各次元の距離をO(1)で計算できます。 +そのため、PQの次に指定する数字は通常ベクトルの半分の次元を指定します。 + +FastScanに関するより詳細な説明は公式のドキュメントを参照してください。 +https://github.com/facebookresearch/faiss/wiki/Fast-accumulation-of-PQ-and-AQ-codes-(FastScan) + +## RFlat +RFlatはFastScanで計算した大まかな距離を、index factoryの第三引数で指定した正確な距離で再計算する指示です。 +k個の近傍を取得する際は、k*k_factor個の点について再計算が行われます。 diff --git a/AIMeiSheng/docs/faiss_tips_ko.md b/AIMeiSheng/docs/faiss_tips_ko.md new file mode 100644 index 0000000..ecd518c --- /dev/null +++ b/AIMeiSheng/docs/faiss_tips_ko.md @@ -0,0 +1,132 @@ +Facebook AI Similarity Search (Faiss) 팁 +================== +# Faiss에 대하여 +Faiss 는 Facebook Research가 개발하는, 고밀도 벡터 이웃 검색 라이브러리입니다. 근사 근접 탐색법 (Approximate Neigbor Search)은 약간의 정확성을 희생하여 유사 벡터를 고속으로 찾습니다. + +## RVC에 있어서 Faiss +RVC에서는 HuBERT로 변환한 feature의 embedding을 위해 훈련 데이터에서 생성된 embedding과 유사한 embadding을 검색하고 혼합하여 원래의 음성에 더욱 가까운 변환을 달성합니다. 그러나, 이 탐색법은 단순히 수행하면 시간이 다소 소모되므로, 근사 근접 탐색법을 통해 고속 변환을 가능케 하고 있습니다. + +# 구현 개요 +모델이 위치한 `/logs/your-experiment/3_feature256`에는 각 음성 데이터에서 HuBERT가 추출한 feature들이 있습니다. 여기에서 파일 이름별로 정렬된 npy 파일을 읽고, 벡터를 연결하여 big_npy ([N, 256] 모양의 벡터) 를 만듭니다. big_npy를 `/logs/your-experiment/total_fea.npy`로 저장한 후, Faiss로 학습시킵니다. + +2023/04/18 기준으로, Faiss의 Index Factory 기능을 이용해, L2 거리에 근거하는 IVF를 이용하고 있습니다. IVF의 분할수(n_ivf)는 N//39로, n_probe는 int(np.power(n_ivf, 0.3))가 사용되고 있습니다. (infer-web.py의 train_index 주위를 찾으십시오.) + +이 팁에서는 먼저 이러한 매개 변수의 의미를 설명하고, 개발자가 추후 더 나은 index를 작성할 수 있도록 하는 조언을 작성합니다. + +# 방법의 설명 +## Index factory +index factory는 여러 근사 근접 탐색법을 문자열로 연결하는 pipeline을 문자열로 표기하는 Faiss만의 독자적인 기법입니다. 이를 통해 index factory의 문자열을 변경하는 것만으로 다양한 근사 근접 탐색을 시도해 볼 수 있습니다. RVC에서는 다음과 같이 사용됩니다: + +```python +index = Faiss.index_factory(256, "IVF%s,Flat" % n_ivf) +``` +`index_factory`의 인수들 중 첫 번째는 벡터의 차원 수이고, 두번째는 index factory 문자열이며, 세번째에는 사용할 거리를 지정할 수 있습니다. + +기법의 보다 자세한 설명은 https://github.com/facebookresearch/Faiss/wiki/The-index-factory 를 확인해 주십시오. + +## 거리에 대한 index +embedding의 유사도로서 사용되는 대표적인 지표로서 이하의 2개가 있습니다. + +- 유클리드 거리 (METRIC_L2) +- 내적(内積) (METRIC_INNER_PRODUCT) + +유클리드 거리에서는 각 차원에서 제곱의 차를 구하고, 각 차원에서 구한 차를 모두 더한 후 제곱근을 취합니다. 이것은 일상적으로 사용되는 2차원, 3차원에서의 거리의 연산법과 같습니다. 내적은 그 값을 그대로 유사도 지표로 사용하지 않고, L2 정규화를 한 이후 내적을 취하는 코사인 유사도를 사용합니다. + +어느 쪽이 더 좋은지는 경우에 따라 다르지만, word2vec에서 얻은 embedding 및 ArcFace를 활용한 이미지 검색 모델은 코사인 유사성이 이용되는 경우가 많습니다. numpy를 사용하여 벡터 X에 대해 L2 정규화를 하고자 하는 경우, 0 division을 피하기 위해 충분히 작은 값을 eps로 한 뒤 이하에 코드를 활용하면 됩니다. + +```python +X_normed = X / np.maximum(eps, np.linalg.norm(X, ord=2, axis=-1, keepdims=True)) +``` + +또한, `index factory`의 3번째 인수에 건네주는 값을 선택하는 것을 통해 계산에 사용하는 거리 index를 변경할 수 있습니다. + +```python +index = Faiss.index_factory(dimention, text, Faiss.METRIC_INNER_PRODUCT) +``` + +## IVF +IVF (Inverted file indexes)는 역색인 탐색법과 유사한 알고리즘입니다. 학습시에는 검색 대상에 대해 k-평균 군집법을 실시하고 클러스터 중심을 이용해 보로노이 분할을 실시합니다. 각 데이터 포인트에는 클러스터가 할당되므로, 클러스터에서 데이터 포인트를 조회하는 dictionary를 만듭니다. + +예를 들어, 클러스터가 다음과 같이 할당된 경우 +|index|Cluster| +|-----|-------| +|1|A| +|2|B| +|3|A| +|4|C| +|5|B| + +IVF 이후의 결과는 다음과 같습니다: + +|cluster|index| +|-------|-----| +|A|1, 3| +|B|2, 5| +|C|4| + +탐색 시, 우선 클러스터에서 `n_probe`개의 클러스터를 탐색한 다음, 각 클러스터에 속한 데이터 포인트의 거리를 계산합니다. + +# 권장 매개변수 +index의 선택 방법에 대해서는 공식적으로 가이드 라인이 있으므로, 거기에 준해 설명합니다. +https://github.com/facebookresearch/Faiss/wiki/Guidelines-to-choose-an-index + +1M 이하의 데이터 세트에 있어서는 4bit-PQ가 2023년 4월 시점에서는 Faiss로 이용할 수 있는 가장 효율적인 수법입니다. 이것을 IVF와 조합해, 4bit-PQ로 후보를 추려내고, 마지막으로 이하의 index factory를 이용하여 정확한 지표로 거리를 재계산하면 됩니다. + +```python +index = Faiss.index_factory(256, "IVF1024,PQ128x4fs,RFlat") +``` + +## IVF 권장 매개변수 +IVF의 수가 너무 많으면, 가령 데이터 수의 수만큼 IVF로 양자화(Quantization)를 수행하면, 이것은 완전탐색과 같아져 효율이 나빠지게 됩니다. 1M 이하의 경우 IVF 값은 데이터 포인트 수 N에 대해 4sqrt(N) ~ 16sqrt(N)를 사용하는 것을 권장합니다. + +n_probe는 n_probe의 수에 비례하여 계산 시간이 늘어나므로 정확도와 시간을 적절히 균형을 맞추어 주십시오. 개인적으로 RVC에 있어서 그렇게까지 정확도는 필요 없다고 생각하기 때문에 n_probe = 1이면 된다고 생각합니다. + +## FastScan +FastScan은 직적 양자화를 레지스터에서 수행함으로써 거리의 고속 근사를 가능하게 하는 방법입니다.직적 양자화는 학습시에 d차원마다(보통 d=2)에 독립적으로 클러스터링을 실시해, 클러스터끼리의 거리를 사전 계산해 lookup table를 작성합니다. 예측시는 lookup table을 보면 각 차원의 거리를 O(1)로 계산할 수 있습니다. 따라서 PQ 다음에 지정하는 숫자는 일반적으로 벡터의 절반 차원을 지정합니다. + +FastScan에 대한 자세한 설명은 공식 문서를 참조하십시오. +https://github.com/facebookresearch/Faiss/wiki/Fast-accumulation-of-PQ-and-AQ-codes-(FastScan) + +## RFlat +RFlat은 FastScan이 계산한 대략적인 거리를 index factory의 3번째 인수로 지정한 정확한 거리로 다시 계산하라는 인스트럭션입니다. k개의 근접 변수를 가져올 때 k*k_factor개의 점에 대해 재계산이 이루어집니다. + +# Embedding 테크닉 +## Alpha 쿼리 확장 +퀴리 확장이란 탐색에서 사용되는 기술로, 예를 들어 전문 탐색 시, 입력된 검색문에 단어를 몇 개를 추가함으로써 검색 정확도를 올리는 방법입니다. 백터 탐색을 위해서도 몇가지 방법이 제안되었는데, 그 중 α-쿼리 확장은 추가 학습이 필요 없는 매우 효과적인 방법으로 알려져 있습니다. [Attention-Based Query Expansion Learning](https://arxiv.org/abs/2007.08019)와 [2nd place solution of kaggle shopee competition](https://www.kaggle.com/code/lyakaap/2nd-place-solution/notebook) 논문에서 소개된 바 있습니다.. + +α-쿼리 확장은 한 벡터에 인접한 벡터를 유사도의 α곱한 가중치로 더해주면 됩니다. 코드로 예시를 들어 보겠습니다. big_npy를 α query expansion로 대체합니다. + +```python +alpha = 3. +index = Faiss.index_factory(256, "IVF512,PQ128x4fs,RFlat") +original_norm = np.maximum(np.linalg.norm(big_npy, ord=2, axis=1, keepdims=True), 1e-9) +big_npy /= original_norm +index.train(big_npy) +index.add(big_npy) +dist, neighbor = index.search(big_npy, num_expand) + +expand_arrays = [] +ixs = np.arange(big_npy.shape[0]) +for i in range(-(-big_npy.shape[0]//batch_size)): + ix = ixs[i*batch_size:(i+1)*batch_size] + weight = np.power(np.einsum("nd,nmd->nm", big_npy[ix], big_npy[neighbor[ix]]), alpha) + expand_arrays.append(np.sum(big_npy[neighbor[ix]] * np.expand_dims(weight, axis=2),axis=1)) +big_npy = np.concatenate(expand_arrays, axis=0) + +# index version 정규화 +big_npy = big_npy / np.maximum(np.linalg.norm(big_npy, ord=2, axis=1, keepdims=True), 1e-9) +``` + +위 테크닉은 탐색을 수행하는 쿼리에도, 탐색 대상 DB에도 적응 가능한 테크닉입니다. + +## MiniBatch KMeans에 의한 embedding 압축 + +total_fea.npy가 너무 클 경우 K-means를 이용하여 벡터를 작게 만드는 것이 가능합니다. 이하 코드로 embedding의 압축이 가능합니다. n_clusters에 압축하고자 하는 크기를 지정하고 batch_size에 256 * CPU의 코어 수를 지정함으로써 CPU 병렬화의 혜택을 충분히 얻을 수 있습니다. + +```python +import multiprocessing +from sklearn.cluster import MiniBatchKMeans +kmeans = MiniBatchKMeans(n_clusters=10000, batch_size=256 * multiprocessing.cpu_count(), init="random") +kmeans.fit(big_npy) +sample_npy = kmeans.cluster_centers_ +``` \ No newline at end of file diff --git a/AIMeiSheng/docs/faq.md b/AIMeiSheng/docs/faq.md new file mode 100644 index 0000000..3fba4a2 --- /dev/null +++ b/AIMeiSheng/docs/faq.md @@ -0,0 +1,93 @@ +## Q1:ffmpeg error/utf8 error. + +大概率不是ffmpeg问题,而是音频路径问题;
+ffmpeg读取路径带空格、()等特殊符号,可能出现ffmpeg error;训练集音频带中文路径,在写入filelist.txt的时候可能出现utf8 error;
+ +## Q2:一键训练结束没有索引 + +显示"Training is done. The program is closed."则模型训练成功,后续紧邻的报错是假的;
+ +一键训练结束完成没有added开头的索引文件,可能是因为训练集太大卡住了添加索引的步骤;已通过批处理add索引解决内存add索引对内存需求过大的问题。临时可尝试再次点击"训练索引"按钮。
+ +## Q3:训练结束推理没看到训练集的音色 +点刷新音色再看看,如果还没有看看训练有没有报错,控制台和webui的截图,logs/实验名下的log,都可以发给开发者看看。
+ +## Q4:如何分享模型 +  rvc_root/logs/实验名 下面存储的pth不是用来分享模型用来推理的,而是为了存储实验状态供复现,以及继续训练用的。用来分享的模型应该是weights文件夹下大小为60+MB的pth文件;
+  后续将把weights/exp_name.pth和logs/exp_name/added_xxx.index合并打包成weights/exp_name.zip省去填写index的步骤,那么zip文件用来分享,不要分享pth文件,除非是想换机器继续训练;
+  如果你把logs文件夹下的几百MB的pth文件复制/分享到weights文件夹下强行用于推理,可能会出现f0,tgt_sr等各种key不存在的报错。你需要用ckpt选项卡最下面,手工或自动(本地logs下如果能找到相关信息则会自动)选择是否携带音高、目标音频采样率的选项后进行ckpt小模型提取(输入路径填G开头的那个),提取完在weights文件夹下会出现60+MB的pth文件,刷新音色后可以选择使用。
+ +## Q5:Connection Error. +也许你关闭了控制台(黑色窗口)。
+ +## Q6:WebUI弹出Expecting value: line 1 column 1 (char 0). +请关闭系统局域网代理/全局代理。
+ +这个不仅是客户端的代理,也包括服务端的代理(例如你使用autodl设置了http_proxy和https_proxy学术加速,使用时也需要unset关掉)
+ +## Q7:不用WebUI如何通过命令训练推理 +训练脚本:
+可先跑通WebUI,消息窗内会显示数据集处理和训练用命令行;
+ +推理脚本:
+https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/myinfer.py
+ +例子:
+ +runtime\python.exe myinfer.py 0 "E:\codes\py39\RVC-beta\todo-songs\1111.wav" "E:\codes\py39\logs\mi-test\added_IVF677_Flat_nprobe_7.index" harvest "test.wav" "weights/mi-test.pth" 0.6 cuda:0 True
+ +f0up_key=sys.argv[1]
+input_path=sys.argv[2]
+index_path=sys.argv[3]
+f0method=sys.argv[4]#harvest or pm
+opt_path=sys.argv[5]
+model_path=sys.argv[6]
+index_rate=float(sys.argv[7])
+device=sys.argv[8]
+is_half=bool(sys.argv[9])
+ +## Q8:Cuda error/Cuda out of memory. +小概率是cuda配置问题、设备不支持;大概率是显存不够(out of memory);
+ +训练的话缩小batch size(如果缩小到1还不够只能更换显卡训练),推理的话酌情缩小config.py结尾的x_pad,x_query,x_center,x_max。4G以下显存(例如1060(3G)和各种2G显卡)可以直接放弃,4G显存显卡还有救。
+ +## Q9:total_epoch调多少比较好 + +如果训练集音质差底噪大,20~30足够了,调太高,底模音质无法带高你的低音质训练集
+如果训练集音质高底噪低时长多,可以调高,200是ok的(训练速度很快,既然你有条件准备高音质训练集,显卡想必条件也不错,肯定不在乎多一些训练时间)
+ +## Q10:需要多少训练集时长 +  推荐10min至50min
+  保证音质高底噪低的情况下,如果有个人特色的音色统一,则多多益善
+  高水平的训练集(精简+音色有特色),5min至10min也是ok的,仓库作者本人就经常这么玩
+  也有人拿1min至2min的数据来训练并且训练成功的,但是成功经验是其他人不可复现的,不太具备参考价值。这要求训练集音色特色非常明显(比如说高频气声较明显的萝莉少女音),且音质高;
+  1min以下时长数据目前没见有人尝试(成功)过。不建议进行这种鬼畜行为。
+ +## Q11:index rate干嘛用的,怎么调(科普) +  如果底模和推理源的音质高于训练集的音质,他们可以带高推理结果的音质,但代价可能是音色往底模/推理源的音色靠,这种现象叫做"音色泄露";
+  index rate用来削减/解决音色泄露问题。调到1,则理论上不存在推理源的音色泄露问题,但音质更倾向于训练集。如果训练集音质比推理源低,则index rate调高可能降低音质。调到0,则不具备利用检索混合来保护训练集音色的效果;
+  如果训练集优质时长多,可调高total_epoch,此时模型本身不太会引用推理源和底模的音色,很少存在"音色泄露"问题,此时index_rate不重要,你甚至可以不建立/分享index索引文件。
+ +## Q11:推理怎么选gpu +config.py文件里device cuda:后面选择卡号;
+卡号和显卡的映射关系,在训练选项卡的显卡信息栏里能看到。
+ +## Q12:如何推理训练中间保存的pth +通过ckpt选项卡最下面提取小模型。
+ + +## Q13:如何中断和继续训练 +现阶段只能关闭WebUI控制台双击go-web.bat重启程序。网页参数也要刷新重新填写;
+继续训练:相同网页参数点训练模型,就会接着上次的checkpoint继续训练。
+ +## Q14:训练时出现文件页面/内存error +进程开太多了,内存炸了。你可能可以通过如下方式解决
+1、"提取音高和处理数据使用的CPU进程数" 酌情拉低;
+2、训练集音频手工切一下,不要太长。
+ + +## Q15:如何中途加数据训练 +1、所有数据新建一个实验名;
+2、拷贝上一次的最新的那个G和D文件(或者你想基于哪个中间ckpt训练,也可以拷贝中间的)到新实验名;下
+3、一键训练新实验名,他会继续上一次的最新进度训练。
+ diff --git a/AIMeiSheng/docs/faq_en.md b/AIMeiSheng/docs/faq_en.md new file mode 100644 index 0000000..6d15da2 --- /dev/null +++ b/AIMeiSheng/docs/faq_en.md @@ -0,0 +1,104 @@ +## Q1:ffmpeg error/utf8 error. +It is most likely not a FFmpeg issue, but rather an audio path issue; + +FFmpeg may encounter an error when reading paths containing special characters like spaces and (), which may cause an FFmpeg error; and when the training set's audio contains Chinese paths, writing it into filelist.txt may cause a utf8 error.
+ +## Q2:Cannot find index file after "One-click Training". +If it displays "Training is done. The program is closed," then the model has been trained successfully, and the subsequent errors are fake; + +The lack of an 'added' index file after One-click training may be due to the training set being too large, causing the addition of the index to get stuck; this has been resolved by using batch processing to add the index, which solves the problem of memory overload when adding the index. As a temporary solution, try clicking the "Train Index" button again.
+ +## Q3:Cannot find the model in “Inferencing timbre” after training +Click “Refresh timbre list” and check again; if still not visible, check if there are any errors during training and send screenshots of the console, web UI, and logs/experiment_name/*.log to the developers for further analysis.
+ +## Q4:How to share a model/How to use others' models? +The pth files stored in rvc_root/logs/experiment_name are not meant for sharing or inference, but for storing the experiment checkpoits for reproducibility and further training. The model to be shared should be the 60+MB pth file in the weights folder; + +In the future, weights/exp_name.pth and logs/exp_name/added_xxx.index will be merged into a single weights/exp_name.zip file to eliminate the need for manual index input; so share the zip file, not the pth file, unless you want to continue training on a different machine; + +Copying/sharing the several hundred MB pth files from the logs folder to the weights folder for forced inference may result in errors such as missing f0, tgt_sr, or other keys. You need to use the ckpt tab at the bottom to manually or automatically (if the information is found in the logs/exp_name), select whether to include pitch infomation and target audio sampling rate options and then extract the smaller model. After extraction, there will be a 60+ MB pth file in the weights folder, and you can refresh the voices to use it.
+ +## Q5:Connection Error. +You may have closed the console (black command line window).
+ +## Q6:WebUI popup 'Expecting value: line 1 column 1 (char 0)'. +Please disable system LAN proxy/global proxy and then refresh.
+ +## Q7:How to train and infer without the WebUI? +Training script:
+You can run training in WebUI first, and the command-line versions of dataset preprocessing and training will be displayed in the message window.
+ +Inference script:
+https://huggingface.co/lj1995/VoiceConversionWebUI/blob/main/myinfer.py
+ + +e.g.
+ +runtime\python.exe myinfer.py 0 "E:\codes\py39\RVC-beta\todo-songs\1111.wav" "E:\codes\py39\logs\mi-test\added_IVF677_Flat_nprobe_7.index" harvest "test.wav" "weights/mi-test.pth" 0.6 cuda:0 True
+ + +f0up_key=sys.argv[1]
+input_path=sys.argv[2]
+index_path=sys.argv[3]
+f0method=sys.argv[4]#harvest or pm
+opt_path=sys.argv[5]
+model_path=sys.argv[6]
+index_rate=float(sys.argv[7])
+device=sys.argv[8]
+is_half=bool(sys.argv[9])
+ +## Q8:Cuda error/Cuda out of memory. +There is a small chance that there is a problem with the CUDA configuration or the device is not supported; more likely, there is not enough memory (out of memory).
+ +For training, reduce the batch size (if reducing to 1 is still not enough, you may need to change the graphics card); for inference, adjust the x_pad, x_query, x_center, and x_max settings in the config.py file as needed. 4G or lower memory cards (e.g. 1060(3G) and various 2G cards) can be abandoned, while 4G memory cards still have a chance.
+ +## Q9:How many total_epoch are optimal? +If the training dataset's audio quality is poor and the noise floor is high, 20-30 epochs are sufficient. Setting it too high won't improve the audio quality of your low-quality training set.
+ +If the training set audio quality is high, the noise floor is low, and there is sufficient duration, you can increase it. 200 is acceptable (since training is fast, and if you're able to prepare a high-quality training set, your GPU likely can handle a longer training duration without issue).
+ +## Q10:How much training set duration is needed? + +A dataset of around 10min to 50min is recommended.
+ +With guaranteed high sound quality and low bottom noise, more can be added if the dataset's timbre is uniform.
+ +For a high-level training set (lean + distinctive tone), 5min to 10min is fine.
+ +There are some people who have trained successfully with 1min to 2min data, but the success is not reproducible by others and is not very informative.
This requires that the training set has a very distinctive timbre (e.g. a high-frequency airy anime girl sound) and the quality of the audio is high; +Data of less than 1min duration has not been successfully attempted so far. This is not recommended.
+ + +## Q11:What is the index rate for and how to adjust it? +If the tone quality of the pre-trained model and inference source is higher than that of the training set, they can bring up the tone quality of the inference result, but at the cost of a possible tone bias towards the tone of the underlying model/inference source rather than the tone of the training set, which is generally referred to as "tone leakage".
+ +The index rate is used to reduce/resolve the timbre leakage problem. If the index rate is set to 1, theoretically there is no timbre leakage from the inference source and the timbre quality is more biased towards the training set. If the training set has a lower sound quality than the inference source, then a higher index rate may reduce the sound quality. Turning it down to 0 does not have the effect of using retrieval blending to protect the training set tones.
+ +If the training set has good audio quality and long duration, turn up the total_epoch, when the model itself is less likely to refer to the inferred source and the pretrained underlying model, and there is little "tone leakage", the index_rate is not important and you can even not create/share the index file.
+ +## Q12:How to choose the gpu when inferring? +In the config.py file, select the card number after "device cuda:".
+ +The mapping between card number and graphics card can be seen in the graphics card information section of the training tab.
+ +## Q13:How to use the model saved in the middle of training? +Save via model extraction at the bottom of the ckpt processing tab. + +## Q14:File/memory error(when training)? +Too many processes and your memory is not enough. You may fix it by: + +1、decrease the input in field "Threads of CPU". + +2、pre-cut trainset to shorter audio files. + +## Q15: How to continue training using more data + +step1: put all wav data to path2. + +step2: exp_name2+path2 -> process dataset and extract feature. + +step3: copy the latest G and D file of exp_name1 (your previous experiment) into exp_name2 folder. + +step4: click "train the model", and it will continue training from the beginning of your previous exp model epoch. + + diff --git a/AIMeiSheng/docs/training_tips_en.md b/AIMeiSheng/docs/training_tips_en.md new file mode 100644 index 0000000..ab9b1f8 --- /dev/null +++ b/AIMeiSheng/docs/training_tips_en.md @@ -0,0 +1,65 @@ +Instructions and tips for RVC training +====================================== +This TIPS explains how data training is done. + +# Training flow +I will explain along the steps in the training tab of the GUI. + +## step1 +Set the experiment name here. + +You can also set here whether the model should take pitch into account. +If the model doesn't consider pitch, the model will be lighter, but not suitable for singing. + +Data for each experiment is placed in `/logs/your-experiment-name/`. + +## step2a +Loads and preprocesses audio. + +### load audio +If you specify a folder with audio, the audio files in that folder will be read automatically. +For example, if you specify `C:Users\hoge\voices`, `C:Users\hoge\voices\voice.mp3` will be loaded, but `C:Users\hoge\voices\dir\voice.mp3` will Not loaded. + +Since ffmpeg is used internally for reading audio, if the extension is supported by ffmpeg, it will be read automatically. +After converting to int16 with ffmpeg, convert to float32 and normalize between -1 to 1. + +### denoising +The audio is smoothed by scipy's filtfilt. + +### Audio Split +First, the input audio is divided by detecting parts of silence that last longer than a certain period (max_sil_kept=5 seconds?). After splitting the audio on silence, split the audio every 4 seconds with an overlap of 0.3 seconds. For audio separated within 4 seconds, after normalizing the volume, convert the wav file to `/logs/your-experiment-name/0_gt_wavs` and then convert it to 16k sampling rate to `/logs/your-experiment-name/1_16k_wavs ` as a wav file. + +## step2b +### Extract pitch +Extract pitch information from wav files. Extract the pitch information (=f0) using the method built into parselmouth or pyworld and save it in `/logs/your-experiment-name/2a_f0`. Then logarithmically convert the pitch information to an integer between 1 and 255 and save it in `/logs/your-experiment-name/2b-f0nsf`. + +### Extract feature_print +Convert the wav file to embedding in advance using HuBERT. Read the wav file saved in `/logs/your-experiment-name/1_16k_wavs`, convert the wav file to 256-dimensional features with HuBERT, and save in npy format in `/logs/your-experiment-name/3_feature256`. + +## step3 +train the model. +### Glossary for Beginners +In deep learning, the data set is divided and the learning proceeds little by little. In one model update (step), batch_size data are retrieved and predictions and error corrections are performed. Doing this once for a dataset counts as one epoch. + +Therefore, the learning time is the learning time per step x (the number of data in the dataset / batch size) x the number of epochs. In general, the larger the batch size, the more stable the learning becomes (learning time per step ÷ batch size) becomes smaller, but it uses more GPU memory. GPU RAM can be checked with the nvidia-smi command. Learning can be done in a short time by increasing the batch size as much as possible according to the machine of the execution environment. + +### Specify pretrained model +RVC starts training the model from pretrained weights instead of from 0, so it can be trained with a small dataset. + +By default + +- If you consider pitch, it loads `rvc-location/pretrained/f0G40k.pth` and `rvc-location/pretrained/f0D40k.pth`. +- If you don't consider pitch, it loads `rvc-location/pretrained/f0G40k.pth` and `rvc-location/pretrained/f0D40k.pth`. + +When learning, model parameters are saved in `logs/your-experiment-name/G_{}.pth` and `logs/your-experiment-name/D_{}.pth` for each save_every_epoch, but by specifying this path, you can start learning. You can restart or start training from model weights learned in a different experiment. + +### learning index +RVC saves the HuBERT feature values used during training, and during inference, searches for feature values that are similar to the feature values used during learning to perform inference. In order to perform this search at high speed, the index is learned in advance. +For index learning, we use the approximate neighborhood search library faiss. Read the feature value of `logs/your-experiment-name/3_feature256` and use it to learn the index, and save it as `logs/your-experiment-name/add_XXX.index`. + +(From the 20230428update version, it is read from the index, and saving / specifying is no longer necessary.) + +### Button description +- Train model: After executing step2b, press this button to train the model. +- Train feature index: After training the model, perform index learning. +- One-click training: step2b, model training and feature index training all at once. \ No newline at end of file diff --git a/AIMeiSheng/docs/training_tips_ja.md b/AIMeiSheng/docs/training_tips_ja.md new file mode 100644 index 0000000..c5b06f2 --- /dev/null +++ b/AIMeiSheng/docs/training_tips_ja.md @@ -0,0 +1,64 @@ +RVCの訓練における説明、およびTIPS +=============================== +本TIPSではどのようにデータの訓練が行われているかを説明します。 + +# 訓練の流れ +GUIの訓練タブのstepに沿って説明します。 + +## step1 +実験名の設定を行います。 + +また、モデルに音高ガイド(ピッチ)を考慮させるかもここで設定できます。考慮させない場合はモデルは軽量になりますが、歌唱には向かなくなります。 + +各実験のデータは`/logs/実験名/`に配置されます。 + +## step2a +音声の読み込みと前処理を行います。 + +### load audio +音声のあるフォルダを指定すると、そのフォルダ内にある音声ファイルを自動で読み込みます。 +例えば`C:Users\hoge\voices`を指定した場合、`C:Users\hoge\voices\voice.mp3`は読み込まれますが、`C:Users\hoge\voices\dir\voice.mp3`は読み込まれません。 + +音声の読み込みには内部でffmpegを利用しているので、ffmpegで対応している拡張子であれば自動的に読み込まれます。 +ffmpegでint16に変換した後、float32に変換し、-1 ~ 1の間に正規化されます。 + +### denoising +音声についてscipyのfiltfiltによる平滑化を行います。 + +### 音声の分割 +入力した音声はまず、一定期間(max_sil_kept=5秒?)より長く無音が続く部分を検知して音声を分割します。無音で音声を分割した後は、0.3秒のoverlapを含む4秒ごとに音声を分割します。4秒以内に区切られた音声は、音量の正規化を行った後wavファイルを`/logs/実験名/0_gt_wavs`に、そこから16kのサンプリングレートに変換して`/logs/実験名/1_16k_wavs`にwavファイルで保存します。 + +## step2b +### ピッチの抽出 +wavファイルからピッチ(音の高低)の情報を抽出します。parselmouthやpyworldに内蔵されている手法でピッチ情報(=f0)を抽出し、`/logs/実験名/2a_f0`に保存します。その後、ピッチ情報を対数で変換して1~255の整数に変換し、`/logs/実験名/2b-f0nsf`に保存します。 + +### feature_printの抽出 +HuBERTを用いてwavファイルを事前にembeddingに変換します。`/logs/実験名/1_16k_wavs`に保存したwavファイルを読み込み、HuBERTでwavファイルを256次元の特徴量に変換し、npy形式で`/logs/実験名/3_feature256`に保存します。 + +## step3 +モデルのトレーニングを行います。 +### 初心者向け用語解説 +深層学習ではデータセットを分割し、少しずつ学習を進めていきます。一回のモデルの更新(step)では、batch_size個のデータを取り出し予測と誤差の修正を行います。これをデータセットに対して一通り行うと一epochと数えます。 + +そのため、学習時間は 1step当たりの学習時間 x (データセット内のデータ数 ÷ バッチサイズ) x epoch数 かかります。一般にバッチサイズを大きくするほど学習は安定し、(1step当たりの学習時間÷バッチサイズ)は小さくなりますが、その分GPUのメモリを多く使用します。GPUのRAMはnvidia-smiコマンド等で確認できます。実行環境のマシンに合わせてバッチサイズをできるだけ大きくするとより短時間で学習が可能です。 + +### pretrained modelの指定 +RVCではモデルの訓練を0からではなく、事前学習済みの重みから開始するため、少ないデータセットで学習を行えます。 + +デフォルトでは + +- 音高ガイドを考慮する場合、`RVCのある場所/pretrained/f0G40k.pth`と`RVCのある場所/pretrained/f0D40k.pth`を読み込みます。 +- 音高ガイドを考慮しない場合、`RVCのある場所/pretrained/G40k.pth`と`RVCのある場所/pretrained/D40k.pth`を読み込みます。 + +学習時はsave_every_epochごとにモデルのパラメータが`logs/実験名/G_{}.pth`と`logs/実験名/D_{}.pth`に保存されますが、このパスを指定することで学習を再開したり、もしくは違う実験で学習したモデルの重みから学習を開始できます。 + +### indexの学習 +RVCでは学習時に使われたHuBERTの特徴量を保存し、推論時は学習時の特徴量から近い特徴量を探してきて推論を行います。この検索を高速に行うために事前にindexの学習を行います。 +indexの学習には近似近傍探索ライブラリのfaissを用います。`/logs/実験名/3_feature256`の特徴量を読み込み、それを用いて学習したindexを`/logs/実験名/add_XXX.index`として保存します。 +(20230428updateよりtotal_fea.npyはindexから読み込むので不要になりました。) + +### ボタンの説明 +- モデルのトレーニング: step2bまでを実行した後、このボタンを押すとモデルの学習を行います。 +- 特徴インデックスのトレーニング: モデルのトレーニング後、indexの学習を行います。 +- ワンクリックトレーニング: step2bまでとモデルのトレーニング、特徴インデックスのトレーニングを一括で行います。 + diff --git a/AIMeiSheng/docs/training_tips_ko.md b/AIMeiSheng/docs/training_tips_ko.md new file mode 100644 index 0000000..8b3b624 --- /dev/null +++ b/AIMeiSheng/docs/training_tips_ko.md @@ -0,0 +1,53 @@ +RVC 훈련에 대한 설명과 팁들 +====================================== +본 팁에서는 어떻게 데이터 훈련이 이루어지고 있는지 설명합니다. + +# 훈련의 흐름 +GUI의 훈련 탭의 단계를 따라 설명합니다. + +## step1 +실험 이름을 지정합니다. 또한, 모델이 피치(소리의 높낮이)를 고려해야 하는지 여부를 여기에서 설정할 수도 있습니다.. +각 실험을 위한 데이터는 `/logs/experiment name/`에 배치됩니다.. + +## step2a +음성 파일을 불러오고 전처리합니다. + +### 음성 파일 불러오기 +음성 파일이 있는 폴더를 지정하면 해당 폴더에 있는 음성 파일이 자동으로 가져와집니다. +예를 들어 `C:Users\hoge\voices`를 지정하면 `C:Users\hoge\voices\voice.mp3`가 읽히지만 `C:Users\hoge\voices\dir\voice.mp3`는 읽히지 않습니다. + +음성 로드에는 내부적으로 ffmpeg를 이용하고 있으므로, ffmpeg로 대응하고 있는 확장자라면 자동적으로 읽힙니다. +ffmpeg에서 int16으로 변환한 후 float32로 변환하고 -1과 1 사이에 정규화됩니다. + +### 잡음 제거 +음성 파일에 대해 scipy의 filtfilt를 이용하여 잡음을 처리합니다. + +### 음성 분할 +입력한 음성 파일은 먼저 일정 기간(max_sil_kept=5초?)보다 길게 무음이 지속되는 부분을 감지하여 음성을 분할합니다.무음으로 음성을 분할한 후에는 0.3초의 overlap을 포함하여 4초마다 음성을 분할합니다.4초 이내에 구분된 음성은 음량의 정규화를 실시한 후 wav 파일을 `/logs/실험명/0_gt_wavs`로, 거기에서 16k의 샘플링 레이트로 변환해 `/logs/실험명/1_16k_wavs`에 wav 파일로 저장합니다. + +## step2b +### 피치 추출 +wav 파일에서 피치(소리의 높낮이) 정보를 추출합니다. parselmouth나 pyworld에 내장되어 있는 메서드으로 피치 정보(=f0)를 추출해, `/logs/실험명/2a_f0`에 저장합니다. 그 후 피치 정보를 로그로 변환하여 1~255 정수로 변환하고 `/logs/실험명/2b-f0nsf`에 저장합니다. + +### feature_print 추출 +HuBERT를 이용하여 wav 파일을 미리 embedding으로 변환합니다. `/logs/실험명/1_16k_wavs`에 저장한 wav 파일을 읽고 HuBERT에서 wav 파일을 256차원 feature들로 변환한 후 npy 형식으로 `/logs/실험명/3_feature256`에 저장합니다. + +## step3 +모델의 훈련을 진행합니다. + +### 초보자용 용어 해설 +심층학습(딥러닝)에서는 데이터셋을 분할하여 조금씩 학습을 진행합니다.한 번의 모델 업데이트(step) 단계 당 batch_size개의 데이터를 탐색하여 예측과 오차를 수정합니다. 데이터셋 전부에 대해 이 작업을 한 번 수행하는 이를 하나의 epoch라고 계산합니다. + +따라서 학습 시간은 단계당 학습 시간 x (데이터셋 내 데이터의 수 / batch size) x epoch 수가 소요됩니다. 일반적으로 batch size가 클수록 학습이 안정적이게 됩니다. (step당 학습 시간 ÷ batch size)는 작아지지만 GPU 메모리를 더 많이 사용합니다. GPU RAM은 nvidia-smi 명령어를 통해 확인할 수 있습니다. 실행 환경에 따라 배치 크기를 최대한 늘리면 짧은 시간 내에 학습이 가능합니다. + +### 사전 학습된 모델 지정 +RVC는 적은 데이터셋으로도 훈련이 가능하도록 사전 훈련된 가중치에서 모델 훈련을 시작합니다. 기본적으로 `rvc-location/pretrained/f0G40k.pth` 및 `rvc-location/pretrained/f0D40k.pth`를 불러옵니다. 학습을 할 시에, 모델 파라미터는 각 save_every_epoch별로 `logs/experiment name/G_{}.pth` 와 `logs/experiment name/D_{}.pth`로 저장이 되는데, 이 경로를 지정함으로써 학습을 재개하거나, 다른 실험에서 학습한 모델의 가중치에서 학습을 시작할 수 있습니다. + +### index의 학습 +RVC에서는 학습시에 사용된 HuBERT의 feature값을 저장하고, 추론 시에는 학습 시 사용한 feature값과 유사한 feature 값을 탐색해 추론을 진행합니다. 이 탐색을 고속으로 수행하기 위해 사전에 index을 학습하게 됩니다. +Index 학습에는 근사 근접 탐색법 라이브러리인 Faiss를 사용하게 됩니다. `/logs/실험명/3_feature256`의 feature값을 불러와, 이를 모두 결합시킨 feature값을 `/logs/실험명/total_fea.npy`로서 저장, 그것을 사용해 학습한 index를`/logs/실험명/add_XXX.index`로 저장합니다. + +### 버튼 설명 +- モデルのトレーニング (모델 학습): step2b까지 실행한 후, 이 버튼을 눌러 모델을 학습합니다. +- 特徴インデックスのトレーニング (특징 지수 훈련): 모델의 훈련 후, index를 학습합니다. +- ワンクリックトレーニング (원클릭 트레이닝): step2b까지의 모델 훈련, feature index 훈련을 일괄로 실시합니다. \ No newline at end of file diff --git "a/AIMeiSheng/docs/\345\260\217\347\231\275\347\256\200\346\230\223\346\225\231\347\250\213.doc" "b/AIMeiSheng/docs/\345\260\217\347\231\275\347\256\200\346\230\223\346\225\231\347\250\213.doc" new file mode 100644 index 0000000..2e29189 Binary files /dev/null and "b/AIMeiSheng/docs/\345\260\217\347\231\275\347\256\200\346\230\223\346\225\231\347\250\213.doc" differ diff --git a/AIMeiSheng/environment_dml.yaml b/AIMeiSheng/environment_dml.yaml new file mode 100644 index 0000000..0fb3f22 --- /dev/null +++ b/AIMeiSheng/environment_dml.yaml @@ -0,0 +1,186 @@ +name: pydml +channels: + - pytorch + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/main/ + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/pkgs/free/ + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/conda-forge/ + - defaults + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/fastai/ + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/pytorch/ + - https://mirrors.tuna.tsinghua.edu.cn/anaconda/cloud/bioconda/ +dependencies: + - abseil-cpp=20211102.0=hd77b12b_0 + - absl-py=1.3.0=py310haa95532_0 + - aiohttp=3.8.3=py310h2bbff1b_0 + - aiosignal=1.2.0=pyhd3eb1b0_0 + - async-timeout=4.0.2=py310haa95532_0 + - attrs=22.1.0=py310haa95532_0 + - blas=1.0=mkl + - blinker=1.4=py310haa95532_0 + - bottleneck=1.3.5=py310h9128911_0 + - brotli=1.0.9=h2bbff1b_7 + - brotli-bin=1.0.9=h2bbff1b_7 + - brotlipy=0.7.0=py310h2bbff1b_1002 + - bzip2=1.0.8=he774522_0 + - c-ares=1.19.0=h2bbff1b_0 + - ca-certificates=2023.05.30=haa95532_0 + - cachetools=4.2.2=pyhd3eb1b0_0 + - certifi=2023.5.7=py310haa95532_0 + - cffi=1.15.1=py310h2bbff1b_3 + - charset-normalizer=2.0.4=pyhd3eb1b0_0 + - click=8.0.4=py310haa95532_0 + - colorama=0.4.6=py310haa95532_0 + - contourpy=1.0.5=py310h59b6b97_0 + - cryptography=39.0.1=py310h21b164f_0 + - cycler=0.11.0=pyhd3eb1b0_0 + - fonttools=4.25.0=pyhd3eb1b0_0 + - freetype=2.12.1=ha860e81_0 + - frozenlist=1.3.3=py310h2bbff1b_0 + - giflib=5.2.1=h8cc25b3_3 + - glib=2.69.1=h5dc1a3c_2 + - google-auth=2.6.0=pyhd3eb1b0_0 + - google-auth-oauthlib=0.4.4=pyhd3eb1b0_0 + - grpc-cpp=1.48.2=hf108199_0 + - grpcio=1.48.2=py310hf108199_0 + - gst-plugins-base=1.18.5=h9e645db_0 + - gstreamer=1.18.5=hd78058f_0 + - icu=58.2=ha925a31_3 + - idna=3.4=py310haa95532_0 + - intel-openmp=2023.1.0=h59b6b97_46319 + - jpeg=9e=h2bbff1b_1 + - kiwisolver=1.4.4=py310hd77b12b_0 + - krb5=1.19.4=h5b6d351_0 + - lerc=3.0=hd77b12b_0 + - libbrotlicommon=1.0.9=h2bbff1b_7 + - libbrotlidec=1.0.9=h2bbff1b_7 + - libbrotlienc=1.0.9=h2bbff1b_7 + - libclang=14.0.6=default_hb5a9fac_1 + - libclang13=14.0.6=default_h8e68704_1 + - libdeflate=1.17=h2bbff1b_0 + - libffi=3.4.4=hd77b12b_0 + - libiconv=1.16=h2bbff1b_2 + - libogg=1.3.5=h2bbff1b_1 + - libpng=1.6.39=h8cc25b3_0 + - libprotobuf=3.20.3=h23ce68f_0 + - libtiff=4.5.0=h6c2663c_2 + - libuv=1.44.2=h2bbff1b_0 + - libvorbis=1.3.7=he774522_0 + - libwebp=1.2.4=hbc33d0d_1 + - libwebp-base=1.2.4=h2bbff1b_1 + - libxml2=2.10.3=h0ad7f3c_0 + - libxslt=1.1.37=h2bbff1b_0 + - lz4-c=1.9.4=h2bbff1b_0 + - markdown=3.4.1=py310haa95532_0 + - markupsafe=2.1.1=py310h2bbff1b_0 + - matplotlib=3.7.1=py310haa95532_1 + - matplotlib-base=3.7.1=py310h4ed8f06_1 + - mkl=2023.1.0=h8bd8f75_46356 + - mkl-service=2.4.0=py310h2bbff1b_1 + - mkl_fft=1.3.6=py310h4ed8f06_1 + - mkl_random=1.2.2=py310h4ed8f06_1 + - multidict=6.0.2=py310h2bbff1b_0 + - munkres=1.1.4=py_0 + - numexpr=2.8.4=py310h2cd9be0_1 + - numpy=1.24.3=py310h055cbcc_1 + - numpy-base=1.24.3=py310h65a83cf_1 + - oauthlib=3.2.2=py310haa95532_0 + - openssl=1.1.1t=h2bbff1b_0 + - packaging=23.0=py310haa95532_0 + - pandas=1.5.3=py310h4ed8f06_0 + - pcre=8.45=hd77b12b_0 + - pillow=9.4.0=py310hd77b12b_0 + - pip=23.0.1=py310haa95532_0 + - ply=3.11=py310haa95532_0 + - protobuf=3.20.3=py310hd77b12b_0 + - pyasn1=0.4.8=pyhd3eb1b0_0 + - pyasn1-modules=0.2.8=py_0 + - pycparser=2.21=pyhd3eb1b0_0 + - pyjwt=2.4.0=py310haa95532_0 + - pyopenssl=23.0.0=py310haa95532_0 + - pyparsing=3.0.9=py310haa95532_0 + - pyqt=5.15.7=py310hd77b12b_0 + - pyqt5-sip=12.11.0=py310hd77b12b_0 + - pysocks=1.7.1=py310haa95532_0 + - python=3.10.11=h966fe2a_2 + - python-dateutil=2.8.2=pyhd3eb1b0_0 + - pytorch-mutex=1.0=cpu + - pytz=2022.7=py310haa95532_0 + - pyyaml=6.0=py310h2bbff1b_1 + - qt-main=5.15.2=he8e5bd7_8 + - qt-webengine=5.15.9=hb9a9bb5_5 + - qtwebkit=5.212=h2bbfb41_5 + - re2=2022.04.01=hd77b12b_0 + - requests=2.29.0=py310haa95532_0 + - requests-oauthlib=1.3.0=py_0 + - rsa=4.7.2=pyhd3eb1b0_1 + - setuptools=67.8.0=py310haa95532_0 + - sip=6.6.2=py310hd77b12b_0 + - six=1.16.0=pyhd3eb1b0_1 + - sqlite=3.41.2=h2bbff1b_0 + - tbb=2021.8.0=h59b6b97_0 + - tensorboard=2.10.0=py310haa95532_0 + - tensorboard-data-server=0.6.1=py310haa95532_0 + - tensorboard-plugin-wit=1.8.1=py310haa95532_0 + - tk=8.6.12=h2bbff1b_0 + - toml=0.10.2=pyhd3eb1b0_0 + - tornado=6.2=py310h2bbff1b_0 + - tqdm=4.65.0=py310h9909e9c_0 + - typing_extensions=4.5.0=py310haa95532_0 + - tzdata=2023c=h04d1e81_0 + - urllib3=1.26.16=py310haa95532_0 + - vc=14.2=h21ff451_1 + - vs2015_runtime=14.27.29016=h5e58377_2 + - werkzeug=2.2.3=py310haa95532_0 + - wheel=0.38.4=py310haa95532_0 + - win_inet_pton=1.1.0=py310haa95532_0 + - xz=5.4.2=h8cc25b3_0 + - yaml=0.2.5=he774522_0 + - yarl=1.8.1=py310h2bbff1b_0 + - zlib=1.2.13=h8cc25b3_0 + - zstd=1.5.5=hd43e919_0 + - pip: + - antlr4-python3-runtime==4.8 + - appdirs==1.4.4 + - audioread==3.0.0 + - bitarray==2.7.4 + - cython==0.29.35 + - decorator==5.1.1 + - fairseq==0.12.2 + - faiss-cpu==1.7.4 + - filelock==3.12.0 + - hydra-core==1.0.7 + - jinja2==3.1.2 + - joblib==1.2.0 + - lazy-loader==0.2 + - librosa==0.10.0.post2 + - llvmlite==0.40.0 + - lxml==4.9.2 + - mpmath==1.3.0 + - msgpack==1.0.5 + - networkx==3.1 + - noisereduce==2.0.1 + - numba==0.57.0 + - omegaconf==2.0.6 + - opencv-python==4.7.0.72 + - pooch==1.6.0 + - portalocker==2.7.0 + - pysimplegui==4.60.5 + - pywin32==306 + - pyworld==0.3.3 + - regex==2023.5.5 + - sacrebleu==2.3.1 + - scikit-learn==1.2.2 + - scipy==1.10.1 + - sounddevice==0.4.6 + - soundfile==0.12.1 + - soxr==0.3.5 + - sympy==1.12 + - tabulate==0.9.0 + - threadpoolctl==3.1.0 + - torch==2.0.0 + - torch-directml==0.2.0.dev230426 + - torchaudio==2.0.1 + - torchvision==0.15.1 + - wget==3.2 +prefix: D:\ProgramData\anaconda3_\envs\pydml diff --git a/AIMeiSheng/extract_f0_print.py b/AIMeiSheng/extract_f0_print.py new file mode 100644 index 0000000..4f6c806 --- /dev/null +++ b/AIMeiSheng/extract_f0_print.py @@ -0,0 +1,166 @@ +import os, traceback, sys, parselmouth + +now_dir = os.getcwd() +sys.path.append(now_dir) +from lib.audio import load_audio +import pyworld +import numpy as np, logging + +logging.getLogger("numba").setLevel(logging.WARNING) +from multiprocessing import Process + +exp_dir = sys.argv[1] +f = open("%s/extract_f0_feature.log" % exp_dir, "a+") + + +def printt(strr): + print(strr) + f.write("%s\n" % strr) + f.flush() + + +n_p = int(sys.argv[2]) +f0method = sys.argv[3] + + +class FeatureInput(object): + def __init__(self, samplerate=16000, hop_size=160): + self.fs = samplerate + self.hop = hop_size + + self.f0_bin = 256 + self.f0_max = 1100.0 + self.f0_min = 50.0 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + + def compute_f0(self, path, f0_method): + x = load_audio(path, self.fs) + p_len = x.shape[0] // self.hop + if f0_method == "pm": + time_step = 160 / 16000 * 1000 + f0_min = 50 + f0_max = 1100 + f0 = ( + parselmouth.Sound(x, self.fs) + .to_pitch_ac( + time_step=time_step / 1000, + voicing_threshold=0.6, + pitch_floor=f0_min, + pitch_ceiling=f0_max, + ) + .selected_array["frequency"] + ) + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + f0 = np.pad( + f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" + ) + elif f0_method == "harvest": + f0, t = pyworld.harvest( + x.astype(np.double), + fs=self.fs, + f0_ceil=self.f0_max, + f0_floor=self.f0_min, + frame_period=1000 * self.hop / self.fs, + ) + f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs) + elif f0_method == "dio": + f0, t = pyworld.dio( + x.astype(np.double), + fs=self.fs, + f0_ceil=self.f0_max, + f0_floor=self.f0_min, + frame_period=1000 * self.hop / self.fs, + ) + f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.fs) + elif f0_method == "rmvpe": + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE("rmvpe.pt", is_half=False, device="cpu") + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + return f0 + + def coarse_f0(self, f0): + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( + self.f0_bin - 2 + ) / (self.f0_mel_max - self.f0_mel_min) + 1 + + # use 0 or 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 + f0_coarse = np.rint(f0_mel).astype(int) + assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( + f0_coarse.max(), + f0_coarse.min(), + ) + return f0_coarse + + def go(self, paths, f0_method): + if len(paths) == 0: + printt("no-f0-todo") + else: + printt("todo-f0-%s" % len(paths)) + n = max(len(paths) // 5, 1) # 每个进程最多打印5条 + for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): + try: + if idx % n == 0: + printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) + if ( + os.path.exists(opt_path1 + ".npy") == True + and os.path.exists(opt_path2 + ".npy") == True + ): + continue + featur_pit = self.compute_f0(inp_path, f0_method) + np.save( + opt_path2, + featur_pit, + allow_pickle=False, + ) # nsf + coarse_pit = self.coarse_f0(featur_pit) + np.save( + opt_path1, + coarse_pit, + allow_pickle=False, + ) # ori + except: + printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) + + +if __name__ == "__main__": + # exp_dir=r"E:\codes\py39\dataset\mi-test" + # n_p=16 + # f = open("%s/log_extract_f0.log"%exp_dir, "w") + printt(sys.argv) + featureInput = FeatureInput() + paths = [] + inp_root = "%s/1_16k_wavs" % (exp_dir) + opt_root1 = "%s/2a_f0" % (exp_dir) + opt_root2 = "%s/2b-f0nsf" % (exp_dir) + + os.makedirs(opt_root1, exist_ok=True) + os.makedirs(opt_root2, exist_ok=True) + for name in sorted(list(os.listdir(inp_root))): + inp_path = "%s/%s" % (inp_root, name) + if "spec" in inp_path: + continue + opt_path1 = "%s/%s" % (opt_root1, name) + opt_path2 = "%s/%s" % (opt_root2, name) + paths.append([inp_path, opt_path1, opt_path2]) + + ps = [] + for i in range(n_p): + p = Process( + target=featureInput.go, + args=( + paths[i::n_p], + f0method, + ), + ) + ps.append(p) + p.start() + for i in range(n_p): + ps[i].join() diff --git a/AIMeiSheng/extract_f0_rmvpe.py b/AIMeiSheng/extract_f0_rmvpe.py new file mode 100644 index 0000000..55dd97b --- /dev/null +++ b/AIMeiSheng/extract_f0_rmvpe.py @@ -0,0 +1,132 @@ +import os, traceback, sys, parselmouth + +now_dir = os.getcwd() +sys.path.append(now_dir) +from lib.audio import load_audio +import pyworld +import numpy as np, logging + +logging.getLogger("numba").setLevel(logging.WARNING) + +n_part = int(sys.argv[1]) +i_part = int(sys.argv[2]) +i_gpu = sys.argv[3] +os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) +exp_dir = sys.argv[4] +is_half = sys.argv[5] +f = open("%s/extract_f0_feature.log" % exp_dir, "a+") + + +def printt(strr): + print(strr) + f.write("%s\n" % strr) + f.flush() + + +class FeatureInput(object): + def __init__(self, samplerate=16000, hop_size=160): + self.fs = samplerate + self.hop = hop_size + + self.f0_bin = 256 + self.f0_max = 1100.0 + self.f0_min = 50.0 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + + def compute_f0(self, path, f0_method): + x = load_audio(path, self.fs) + p_len = x.shape[0] // self.hop + if f0_method == "rmvpe": + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE("rmvpe.pt", is_half=True, device="cuda") + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + return f0 + + def coarse_f0(self, f0): + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - self.f0_mel_min) * ( + self.f0_bin - 2 + ) / (self.f0_mel_max - self.f0_mel_min) + 1 + + # use 0 or 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > self.f0_bin - 1] = self.f0_bin - 1 + f0_coarse = np.rint(f0_mel).astype(int) + assert f0_coarse.max() <= 255 and f0_coarse.min() >= 1, ( + f0_coarse.max(), + f0_coarse.min(), + ) + return f0_coarse + + def go(self, paths, f0_method): + if len(paths) == 0: + printt("no-f0-todo") + else: + printt("todo-f0-%s" % len(paths)) + n = max(len(paths) // 5, 1) # 每个进程最多打印5条 + for idx, (inp_path, opt_path1, opt_path2) in enumerate(paths): + try: + if idx % n == 0: + printt("f0ing,now-%s,all-%s,-%s" % (idx, len(paths), inp_path)) + if ( + os.path.exists(opt_path1 + ".npy") == True + and os.path.exists(opt_path2 + ".npy") == True + ): + continue + featur_pit = self.compute_f0(inp_path, f0_method) + np.save( + opt_path2, + featur_pit, + allow_pickle=False, + ) # nsf + coarse_pit = self.coarse_f0(featur_pit) + np.save( + opt_path1, + coarse_pit, + allow_pickle=False, + ) # ori + except: + printt("f0fail-%s-%s-%s" % (idx, inp_path, traceback.format_exc())) + + +if __name__ == "__main__": + # exp_dir=r"E:\codes\py39\dataset\mi-test" + # n_p=16 + # f = open("%s/log_extract_f0.log"%exp_dir, "w") + printt(sys.argv) + featureInput = FeatureInput() + paths = [] + inp_root = "%s/1_16k_wavs" % (exp_dir) + opt_root1 = "%s/2a_f0" % (exp_dir) + opt_root2 = "%s/2b-f0nsf" % (exp_dir) + + os.makedirs(opt_root1, exist_ok=True) + os.makedirs(opt_root2, exist_ok=True) + for name in sorted(list(os.listdir(inp_root))): + inp_path = "%s/%s" % (inp_root, name) + if "spec" in inp_path: + continue + opt_path1 = "%s/%s" % (opt_root1, name) + opt_path2 = "%s/%s" % (opt_root2, name) + paths.append([inp_path, opt_path1, opt_path2]) + try: + featureInput.go(paths[i_part::n_part], "rmvpe") + except: + printt("f0_all_fail-%s" % (traceback.format_exc())) + # ps = [] + # for i in range(n_p): + # p = Process( + # target=featureInput.go, + # args=( + # paths[i::n_p], + # f0method, + # ), + # ) + # ps.append(p) + # p.start() + # for i in range(n_p): + # ps[i].join() diff --git a/AIMeiSheng/extract_feature_print.py b/AIMeiSheng/extract_feature_print.py new file mode 100644 index 0000000..780ffbc --- /dev/null +++ b/AIMeiSheng/extract_feature_print.py @@ -0,0 +1,123 @@ +import os, sys, traceback + +os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" +os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0" + +# device=sys.argv[1] +n_part = int(sys.argv[2]) +i_part = int(sys.argv[3]) +if len(sys.argv) == 6: + exp_dir = sys.argv[4] + version = sys.argv[5] +else: + i_gpu = sys.argv[4] + exp_dir = sys.argv[5] + os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu) + version = sys.argv[6] +import torch +import torch.nn.functional as F +import soundfile as sf +import numpy as np +from fairseq import checkpoint_utils + +device = "cpu" +if torch.cuda.is_available(): + device = "cuda" +elif torch.backends.mps.is_available(): + device = "mps" + +f = open("%s/extract_f0_feature.log" % exp_dir, "a+") + + +def printt(strr): + print(strr) + f.write("%s\n" % strr) + f.flush() + + +printt(sys.argv) +model_path = "hubert_base.pt" + +printt(exp_dir) +wavPath = "%s/1_16k_wavs" % exp_dir +outPath = ( + "%s/3_feature256" % exp_dir if version == "v1" else "%s/3_feature768" % exp_dir +) +os.makedirs(outPath, exist_ok=True) + + +# wave must be 16k, hop_size=320 +def readwave(wav_path, normalize=False): + wav, sr = sf.read(wav_path) + assert sr == 16000 + feats = torch.from_numpy(wav).float() + if feats.dim() == 2: # double channels + feats = feats.mean(-1) + assert feats.dim() == 1, feats.dim() + if normalize: + with torch.no_grad(): + feats = F.layer_norm(feats, feats.shape) + feats = feats.view(1, -1) + return feats + + +# HuBERT model +printt("load model(s) from {}".format(model_path)) +# if hubert model is exist +if os.access(model_path, os.F_OK) == False: + printt( + "Error: Extracting is shut down because %s does not exist, you may download it from https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main" + % model_path + ) + exit(0) +models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [model_path], + suffix="", +) +model = models[0] +model = model.to(device) +printt("move model to %s" % device) +if device not in ["mps", "cpu"]: + model = model.half() +model.eval() + +todo = sorted(list(os.listdir(wavPath)))[i_part::n_part] +n = max(1, len(todo) // 10) # 最多打印十条 +if len(todo) == 0: + printt("no-feature-todo") +else: + printt("all-feature-%s" % len(todo)) + for idx, file in enumerate(todo): + try: + if file.endswith(".wav"): + wav_path = "%s/%s" % (wavPath, file) + out_path = "%s/%s" % (outPath, file.replace("wav", "npy")) + + if os.path.exists(out_path): + continue + + feats = readwave(wav_path, normalize=saved_cfg.task.normalize) + padding_mask = torch.BoolTensor(feats.shape).fill_(False) + inputs = { + "source": feats.half().to(device) + if device not in ["mps", "cpu"] + else feats.to(device), + "padding_mask": padding_mask.to(device), + "output_layer": 9 if version == "v1" else 12, # layer 9 + } + with torch.no_grad(): + logits = model.extract_features(**inputs) + feats = ( + model.final_proj(logits[0]) if version == "v1" else logits[0] + ) + + feats = feats.squeeze(0).float().cpu().numpy() + if np.isnan(feats).sum() == 0: + np.save(out_path, feats, allow_pickle=False) + else: + printt("%s-contains nan" % file) + if idx % n == 0: + printt("now-%s,all-%s,%s,%s" % (len(todo), idx, file, feats.shape)) + except: + printt(traceback.format_exc()) + printt("all-feature-done") diff --git a/AIMeiSheng/extract_locale.py b/AIMeiSheng/extract_locale.py new file mode 100644 index 0000000..460e75f --- /dev/null +++ b/AIMeiSheng/extract_locale.py @@ -0,0 +1,34 @@ +import json +import re + +# Define regular expression patterns +pattern = r"""i18n\([\s\n\t]*(["'][^"']+["'])[\s\n\t]*\)""" + +# Initialize the dictionary to store key-value pairs +data = {} + + +def process(fn: str): + global data + with open(fn, "r", encoding="utf-8") as f: + contents = f.read() + matches = re.findall(pattern, contents) + for key in matches: + key = eval(key) + print("extract:", key) + data[key] = key + + +print("processing infer-web.py") +process("infer-web.py") + +print("processing gui_v0.py") +process("gui_v0.py") + +print("processing gui_v1.py") +process("gui_v1.py") + +# Save as a JSON file +with open("./lib/i18n/zh_CN.json", "w", encoding="utf-8") as f: + json.dump(data, f, ensure_ascii=False, indent=4) + f.write("\n") diff --git a/AIMeiSheng/gender_classify.py b/AIMeiSheng/gender_classify.py new file mode 100644 index 0000000..d77eb2a --- /dev/null +++ b/AIMeiSheng/gender_classify.py @@ -0,0 +1,32 @@ +import sys, os +import time +sys.path.append('./voice_classification/online/') +from voice_class_online_fang import VoiceClass + + +def load_gender_model(): + model_path = "./voice_classification/online/models" + music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") + gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + return vc + + +if __name__ == "__main__": + # test_all() + # test_all_feature() + model_path = sys.argv[1] + voice_path = sys.argv[2] + music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") + gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + for i in range(0, 1): + st = time.time() + print("------------------------------>>>>>") + gender, female_rate, is_pure = vc.process(voice_path) + print("process|spend_tm=={}".format(time.time() - st)) + print("gender:{}, female_rate:{},is_pure:{}".format(gender,female_rate,is_pure)) diff --git a/AIMeiSheng/go-realtime-gui.bat b/AIMeiSheng/go-realtime-gui.bat new file mode 100644 index 0000000..8c08290 --- /dev/null +++ b/AIMeiSheng/go-realtime-gui.bat @@ -0,0 +1,2 @@ +runtime\python.exe gui_v1.py +pause diff --git a/AIMeiSheng/go-web.bat b/AIMeiSheng/go-web.bat new file mode 100644 index 0000000..db1dec5 --- /dev/null +++ b/AIMeiSheng/go-web.bat @@ -0,0 +1,2 @@ +runtime\python.exe infer-web.py --pycmd runtime\python.exe --port 7897 +pause diff --git a/AIMeiSheng/gui_v0.py b/AIMeiSheng/gui_v0.py new file mode 100644 index 0000000..407c9bc --- /dev/null +++ b/AIMeiSheng/gui_v0.py @@ -0,0 +1,696 @@ +import os, sys, traceback, re + +import json + +now_dir = os.getcwd() +sys.path.append(now_dir) +from config import Config + +Config = Config() +import PySimpleGUI as sg +import sounddevice as sd +import noisereduce as nr +import numpy as np +from fairseq import checkpoint_utils +import librosa, torch, pyworld, faiss, time, threading +import torch.nn.functional as F +import torchaudio.transforms as tat +import scipy.signal as signal + + +# import matplotlib.pyplot as plt +from lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +from i18n import I18nAuto + +i18n = I18nAuto() +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +current_dir = os.getcwd() + + +class RVC: + def __init__( + self, key, hubert_path, pth_path, index_path, npy_path, index_rate + ) -> None: + """ + 初始化 + """ + try: + self.f0_up_key = key + self.time_step = 160 / 16000 * 1000 + self.f0_min = 50 + self.f0_max = 1100 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + self.sr = 16000 + self.window = 160 + if index_rate != 0: + self.index = faiss.read_index(index_path) + # self.big_npy = np.load(npy_path) + self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) + print("index search enabled") + self.index_rate = index_rate + model_path = hubert_path + print("load model(s) from {}".format(model_path)) + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [model_path], + suffix="", + ) + self.model = models[0] + self.model = self.model.to(device) + if Config.is_half: + self.model = self.model.half() + else: + self.model = self.model.float() + self.model.eval() + cpt = torch.load(pth_path, map_location="cpu") + self.tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk + self.if_f0 = cpt.get("f0", 1) + self.version = cpt.get("version", "v1") + if self.version == "v1": + if self.if_f0 == 1: + self.net_g = SynthesizerTrnMs256NSFsid( + *cpt["config"], is_half=Config.is_half + ) + else: + self.net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif self.version == "v2": + if self.if_f0 == 1: + self.net_g = SynthesizerTrnMs768NSFsid( + *cpt["config"], is_half=Config.is_half + ) + else: + self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del self.net_g.enc_q + print(self.net_g.load_state_dict(cpt["weight"], strict=False)) + self.net_g.eval().to(device) + if Config.is_half: + self.net_g = self.net_g.half() + else: + self.net_g = self.net_g.float() + except: + print(traceback.format_exc()) + + def get_f0(self, x, f0_up_key, inp_f0=None): + x_pad = 1 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + f0, t = pyworld.harvest( + x.astype(np.double), + fs=self.sr, + f0_ceil=f0_max, + f0_floor=f0_min, + frame_period=10, + ) + f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr) + f0 = signal.medfilt(f0, 3) + f0 *= pow(2, f0_up_key / 12) + # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + tf0 = self.sr // self.window # 每秒f0点数 + if inp_f0 is not None: + delta_t = np.round( + (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1 + ).astype("int16") + replace_f0 = np.interp( + list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1] + ) + shape = f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)].shape[0] + f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)] = replace_f0[:shape] + # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + f0bak = f0.copy() + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( + f0_mel_max - f0_mel_min + ) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0bak # 1-0 + + def infer(self, feats: torch.Tensor) -> np.ndarray: + """ + 推理函数 + """ + audio = feats.clone().cpu().numpy() + assert feats.dim() == 1, feats.dim() + feats = feats.view(1, -1) + padding_mask = torch.BoolTensor(feats.shape).fill_(False) + if Config.is_half: + feats = feats.half() + else: + feats = feats.float() + inputs = { + "source": feats.to(device), + "padding_mask": padding_mask.to(device), + "output_layer": 9 if self.version == "v1" else 12, + } + torch.cuda.synchronize() + with torch.no_grad(): + logits = self.model.extract_features(**inputs) + feats = ( + self.model.final_proj(logits[0]) if self.version == "v1" else logits[0] + ) + + ####索引优化 + try: + if ( + hasattr(self, "index") + and hasattr(self, "big_npy") + and self.index_rate != 0 + ): + npy = feats[0].cpu().numpy().astype("float32") + score, ix = self.index.search(npy, k=8) + weight = np.square(1 / score) + weight /= weight.sum(axis=1, keepdims=True) + npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1) + if Config.is_half: + npy = npy.astype("float16") + feats = ( + torch.from_numpy(npy).unsqueeze(0).to(device) * self.index_rate + + (1 - self.index_rate) * feats + ) + else: + print("index search FAIL or disabled") + except: + traceback.print_exc() + print("index search FAIL") + feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) + torch.cuda.synchronize() + print(feats.shape) + if self.if_f0 == 1: + pitch, pitchf = self.get_f0(audio, self.f0_up_key) + p_len = min(feats.shape[1], 13000, pitch.shape[0]) # 太大了爆显存 + else: + pitch, pitchf = None, None + p_len = min(feats.shape[1], 13000) # 太大了爆显存 + torch.cuda.synchronize() + # print(feats.shape,pitch.shape) + feats = feats[:, :p_len, :] + if self.if_f0 == 1: + pitch = pitch[:p_len] + pitchf = pitchf[:p_len] + pitch = torch.LongTensor(pitch).unsqueeze(0).to(device) + pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device) + p_len = torch.LongTensor([p_len]).to(device) + ii = 0 # sid + sid = torch.LongTensor([ii]).to(device) + with torch.no_grad(): + if self.if_f0 == 1: + infered_audio = ( + self.net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] + .data.cpu() + .float() + ) + else: + infered_audio = ( + self.net_g.infer(feats, p_len, sid)[0][0, 0].data.cpu().float() + ) + torch.cuda.synchronize() + return infered_audio + + +class GUIConfig: + def __init__(self) -> None: + self.hubert_path: str = "" + self.pth_path: str = "" + self.index_path: str = "" + self.npy_path: str = "" + self.pitch: int = 12 + self.samplerate: int = 44100 + self.block_time: float = 1.0 # s + self.buffer_num: int = 1 + self.threhold: int = -30 + self.crossfade_time: float = 0.08 + self.extra_time: float = 0.04 + self.I_noise_reduce = False + self.O_noise_reduce = False + self.index_rate = 0.3 + + +class GUI: + def __init__(self) -> None: + self.config = GUIConfig() + self.flag_vc = False + + self.launcher() + + def load(self): + ( + input_devices, + output_devices, + input_devices_indices, + output_devices_indices, + ) = self.get_devices() + try: + with open("values1.json", "r") as j: + data = json.load(j) + except: + with open("values1.json", "w") as j: + data = { + "pth_path": "", + "index_path": "", + "sg_input_device": input_devices[ + input_devices_indices.index(sd.default.device[0]) + ], + "sg_output_device": output_devices[ + output_devices_indices.index(sd.default.device[1]) + ], + "threhold": "-45", + "pitch": "0", + "index_rate": "0", + "block_time": "1", + "crossfade_length": "0.04", + "extra_time": "1", + } + return data + + def launcher(self): + data = self.load() + sg.theme("LightBlue3") + input_devices, output_devices, _, _ = self.get_devices() + layout = [ + [ + sg.Frame( + title=i18n("加载模型"), + layout=[ + [ + sg.Input( + default_text="hubert_base.pt", + key="hubert_path", + disabled=True, + ), + sg.FileBrowse( + i18n("Hubert模型"), + initial_folder=os.path.join(os.getcwd()), + file_types=(("pt files", "*.pt"),), + ), + ], + [ + sg.Input( + default_text=data.get("pth_path", ""), + key="pth_path", + ), + sg.FileBrowse( + i18n("选择.pth文件"), + initial_folder=os.path.join(os.getcwd(), "weights"), + file_types=(("weight files", "*.pth"),), + ), + ], + [ + sg.Input( + default_text=data.get("index_path", ""), + key="index_path", + ), + sg.FileBrowse( + i18n("选择.index文件"), + initial_folder=os.path.join(os.getcwd(), "logs"), + file_types=(("index files", "*.index"),), + ), + ], + [ + sg.Input( + default_text="你不需要填写这个You don't need write this.", + key="npy_path", + disabled=True, + ), + sg.FileBrowse( + i18n("选择.npy文件"), + initial_folder=os.path.join(os.getcwd(), "logs"), + file_types=(("feature files", "*.npy"),), + ), + ], + ], + ) + ], + [ + sg.Frame( + layout=[ + [ + sg.Text(i18n("输入设备")), + sg.Combo( + input_devices, + key="sg_input_device", + default_value=data.get("sg_input_device", ""), + ), + ], + [ + sg.Text(i18n("输出设备")), + sg.Combo( + output_devices, + key="sg_output_device", + default_value=data.get("sg_output_device", ""), + ), + ], + ], + title=i18n("音频设备(请使用同种类驱动)"), + ) + ], + [ + sg.Frame( + layout=[ + [ + sg.Text(i18n("响应阈值")), + sg.Slider( + range=(-60, 0), + key="threhold", + resolution=1, + orientation="h", + default_value=data.get("threhold", ""), + ), + ], + [ + sg.Text(i18n("音调设置")), + sg.Slider( + range=(-24, 24), + key="pitch", + resolution=1, + orientation="h", + default_value=data.get("pitch", ""), + ), + ], + [ + sg.Text(i18n("Index Rate")), + sg.Slider( + range=(0.0, 1.0), + key="index_rate", + resolution=0.01, + orientation="h", + default_value=data.get("index_rate", ""), + ), + ], + ], + title=i18n("常规设置"), + ), + sg.Frame( + layout=[ + [ + sg.Text(i18n("采样长度")), + sg.Slider( + range=(0.1, 3.0), + key="block_time", + resolution=0.1, + orientation="h", + default_value=data.get("block_time", ""), + ), + ], + [ + sg.Text(i18n("淡入淡出长度")), + sg.Slider( + range=(0.01, 0.15), + key="crossfade_length", + resolution=0.01, + orientation="h", + default_value=data.get("crossfade_length", ""), + ), + ], + [ + sg.Text(i18n("额外推理时长")), + sg.Slider( + range=(0.05, 3.00), + key="extra_time", + resolution=0.01, + orientation="h", + default_value=data.get("extra_time", ""), + ), + ], + [ + sg.Checkbox(i18n("输入降噪"), key="I_noise_reduce"), + sg.Checkbox(i18n("输出降噪"), key="O_noise_reduce"), + ], + ], + title=i18n("性能设置"), + ), + ], + [ + sg.Button(i18n("开始音频转换"), key="start_vc"), + sg.Button(i18n("停止音频转换"), key="stop_vc"), + sg.Text(i18n("推理时间(ms):")), + sg.Text("0", key="infer_time"), + ], + ] + self.window = sg.Window("RVC - GUI", layout=layout) + self.event_handler() + + def event_handler(self): + while True: + event, values = self.window.read() + if event == sg.WINDOW_CLOSED: + self.flag_vc = False + exit() + if event == "start_vc" and self.flag_vc == False: + if self.set_values(values) == True: + print("using_cuda:" + str(torch.cuda.is_available())) + self.start_vc() + settings = { + "pth_path": values["pth_path"], + "index_path": values["index_path"], + "sg_input_device": values["sg_input_device"], + "sg_output_device": values["sg_output_device"], + "threhold": values["threhold"], + "pitch": values["pitch"], + "index_rate": values["index_rate"], + "block_time": values["block_time"], + "crossfade_length": values["crossfade_length"], + "extra_time": values["extra_time"], + } + with open("values1.json", "w") as j: + json.dump(settings, j) + if event == "stop_vc" and self.flag_vc == True: + self.flag_vc = False + + def set_values(self, values): + if len(values["pth_path"].strip()) == 0: + sg.popup(i18n("请选择pth文件")) + return False + if len(values["index_path"].strip()) == 0: + sg.popup(i18n("请选择index文件")) + return False + pattern = re.compile("[^\x00-\x7F]+") + if pattern.findall(values["hubert_path"]): + sg.popup(i18n("hubert模型路径不可包含中文")) + return False + if pattern.findall(values["pth_path"]): + sg.popup(i18n("pth文件路径不可包含中文")) + return False + if pattern.findall(values["index_path"]): + sg.popup(i18n("index文件路径不可包含中文")) + return False + self.set_devices(values["sg_input_device"], values["sg_output_device"]) + self.config.hubert_path = os.path.join(current_dir, "hubert_base.pt") + self.config.pth_path = values["pth_path"] + self.config.index_path = values["index_path"] + self.config.npy_path = values["npy_path"] + self.config.threhold = values["threhold"] + self.config.pitch = values["pitch"] + self.config.block_time = values["block_time"] + self.config.crossfade_time = values["crossfade_length"] + self.config.extra_time = values["extra_time"] + self.config.I_noise_reduce = values["I_noise_reduce"] + self.config.O_noise_reduce = values["O_noise_reduce"] + self.config.index_rate = values["index_rate"] + return True + + def start_vc(self): + torch.cuda.empty_cache() + self.flag_vc = True + self.block_frame = int(self.config.block_time * self.config.samplerate) + self.crossfade_frame = int(self.config.crossfade_time * self.config.samplerate) + self.sola_search_frame = int(0.012 * self.config.samplerate) + self.delay_frame = int(0.01 * self.config.samplerate) # 往前预留0.02s + self.extra_frame = int(self.config.extra_time * self.config.samplerate) + self.rvc = None + self.rvc = RVC( + self.config.pitch, + self.config.hubert_path, + self.config.pth_path, + self.config.index_path, + self.config.npy_path, + self.config.index_rate, + ) + self.input_wav: np.ndarray = np.zeros( + self.extra_frame + + self.crossfade_frame + + self.sola_search_frame + + self.block_frame, + dtype="float32", + ) + self.output_wav: torch.Tensor = torch.zeros( + self.block_frame, device=device, dtype=torch.float32 + ) + self.sola_buffer: torch.Tensor = torch.zeros( + self.crossfade_frame, device=device, dtype=torch.float32 + ) + self.fade_in_window: torch.Tensor = torch.linspace( + 0.0, 1.0, steps=self.crossfade_frame, device=device, dtype=torch.float32 + ) + self.fade_out_window: torch.Tensor = 1 - self.fade_in_window + self.resampler1 = tat.Resample( + orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32 + ) + self.resampler2 = tat.Resample( + orig_freq=self.rvc.tgt_sr, + new_freq=self.config.samplerate, + dtype=torch.float32, + ) + thread_vc = threading.Thread(target=self.soundinput) + thread_vc.start() + + def soundinput(self): + """ + 接受音频输入 + """ + with sd.Stream( + channels=2, + callback=self.audio_callback, + blocksize=self.block_frame, + samplerate=self.config.samplerate, + dtype="float32", + ): + while self.flag_vc: + time.sleep(self.config.block_time) + print("Audio block passed.") + print("ENDing VC") + + def audio_callback( + self, indata: np.ndarray, outdata: np.ndarray, frames, times, status + ): + """ + 音频处理 + """ + start_time = time.perf_counter() + indata = librosa.to_mono(indata.T) + if self.config.I_noise_reduce: + indata[:] = nr.reduce_noise(y=indata, sr=self.config.samplerate) + + """noise gate""" + frame_length = 2048 + hop_length = 1024 + rms = librosa.feature.rms( + y=indata, frame_length=frame_length, hop_length=hop_length + ) + db_threhold = librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold + # print(rms.shape,db.shape,db) + for i in range(db_threhold.shape[0]): + if db_threhold[i]: + indata[i * hop_length : (i + 1) * hop_length] = 0 + self.input_wav[:] = np.append(self.input_wav[self.block_frame :], indata) + + # infer + print("input_wav:" + str(self.input_wav.shape)) + # print('infered_wav:'+str(infer_wav.shape)) + infer_wav: torch.Tensor = self.resampler2( + self.rvc.infer(self.resampler1(torch.from_numpy(self.input_wav))) + )[-self.crossfade_frame - self.sola_search_frame - self.block_frame :].to( + device + ) + print("infer_wav:" + str(infer_wav.shape)) + + # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC + cor_nom = F.conv1d( + infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame], + self.sola_buffer[None, None, :], + ) + cor_den = torch.sqrt( + F.conv1d( + infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame] + ** 2, + torch.ones(1, 1, self.crossfade_frame, device=device), + ) + + 1e-8 + ) + sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0]) + print("sola offset: " + str(int(sola_offset))) + + # crossfade + self.output_wav[:] = infer_wav[sola_offset : sola_offset + self.block_frame] + self.output_wav[: self.crossfade_frame] *= self.fade_in_window + self.output_wav[: self.crossfade_frame] += self.sola_buffer[:] + if sola_offset < self.sola_search_frame: + self.sola_buffer[:] = ( + infer_wav[ + -self.sola_search_frame + - self.crossfade_frame + + sola_offset : -self.sola_search_frame + + sola_offset + ] + * self.fade_out_window + ) + else: + self.sola_buffer[:] = ( + infer_wav[-self.crossfade_frame :] * self.fade_out_window + ) + + if self.config.O_noise_reduce: + outdata[:] = np.tile( + nr.reduce_noise( + y=self.output_wav[:].cpu().numpy(), sr=self.config.samplerate + ), + (2, 1), + ).T + else: + outdata[:] = self.output_wav[:].repeat(2, 1).t().cpu().numpy() + total_time = time.perf_counter() - start_time + self.window["infer_time"].update(int(total_time * 1000)) + print("infer time:" + str(total_time)) + + def get_devices(self, update: bool = True): + """获取设备列表""" + if update: + sd._terminate() + sd._initialize() + devices = sd.query_devices() + hostapis = sd.query_hostapis() + for hostapi in hostapis: + for device_idx in hostapi["devices"]: + devices[device_idx]["hostapi_name"] = hostapi["name"] + input_devices = [ + f"{d['name']} ({d['hostapi_name']})" + for d in devices + if d["max_input_channels"] > 0 + ] + output_devices = [ + f"{d['name']} ({d['hostapi_name']})" + for d in devices + if d["max_output_channels"] > 0 + ] + input_devices_indices = [ + d["index"] if "index" in d else d["name"] + for d in devices + if d["max_input_channels"] > 0 + ] + output_devices_indices = [ + d["index"] if "index" in d else d["name"] + for d in devices + if d["max_output_channels"] > 0 + ] + return ( + input_devices, + output_devices, + input_devices_indices, + output_devices_indices, + ) + + def set_devices(self, input_device, output_device): + """设置输出设备""" + ( + input_devices, + output_devices, + input_device_indices, + output_device_indices, + ) = self.get_devices() + sd.default.device[0] = input_device_indices[input_devices.index(input_device)] + sd.default.device[1] = output_device_indices[ + output_devices.index(output_device) + ] + print("input device:" + str(sd.default.device[0]) + ":" + str(input_device)) + print("output device:" + str(sd.default.device[1]) + ":" + str(output_device)) + + +gui = GUI() diff --git a/AIMeiSheng/gui_v1.py b/AIMeiSheng/gui_v1.py new file mode 100644 index 0000000..0d45da6 --- /dev/null +++ b/AIMeiSheng/gui_v1.py @@ -0,0 +1,661 @@ +import os, sys + +if sys.platform == "darwin": + os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" + +now_dir = os.getcwd() +sys.path.append(now_dir) +import multiprocessing + + +class Harvest(multiprocessing.Process): + def __init__(self, inp_q, opt_q): + multiprocessing.Process.__init__(self) + self.inp_q = inp_q + self.opt_q = opt_q + + def run(self): + import numpy as np, pyworld + + while 1: + idx, x, res_f0, n_cpu, ts = self.inp_q.get() + f0, t = pyworld.harvest( + x.astype(np.double), + fs=16000, + f0_ceil=1100, + f0_floor=50, + frame_period=10, + ) + res_f0[idx] = f0 + if len(res_f0.keys()) >= n_cpu: + self.opt_q.put(ts) + + +if __name__ == "__main__": + from multiprocessing import Queue + from queue import Empty + import numpy as np + import multiprocessing + import traceback, re + import json + import PySimpleGUI as sg + import sounddevice as sd + import noisereduce as nr + from multiprocessing import cpu_count + import librosa, torch, time, threading + import torch.nn.functional as F + import torchaudio.transforms as tat + from i18n import I18nAuto + + i18n = I18nAuto() + device = torch.device( + "cuda" + if torch.cuda.is_available() + else ("mps" if torch.backends.mps.is_available() else "cpu") + ) + current_dir = os.getcwd() + inp_q = Queue() + opt_q = Queue() + n_cpu = min(cpu_count(), 8) + for _ in range(n_cpu): + Harvest(inp_q, opt_q).start() + from rvc_for_realtime import RVC + + class GUIConfig: + def __init__(self) -> None: + self.pth_path: str = "" + self.index_path: str = "" + self.pitch: int = 12 + self.samplerate: int = 40000 + self.block_time: float = 1.0 # s + self.buffer_num: int = 1 + self.threhold: int = -30 + self.crossfade_time: float = 0.08 + self.extra_time: float = 0.04 + self.I_noise_reduce = False + self.O_noise_reduce = False + self.index_rate = 0.3 + self.n_cpu = min(n_cpu, 8) + self.f0method = "harvest" + self.sg_input_device = "" + self.sg_output_device = "" + + class GUI: + def __init__(self) -> None: + self.config = GUIConfig() + self.flag_vc = False + + self.launcher() + + def load(self): + input_devices, output_devices, _, _ = self.get_devices() + try: + with open("values1.json", "r") as j: + data = json.load(j) + data["pm"] = data["f0method"] == "pm" + data["harvest"] = data["f0method"] == "harvest" + data["crepe"] = data["f0method"] == "crepe" + data["rmvpe"] = data["f0method"] == "rmvpe" + except: + with open("values1.json", "w") as j: + data = { + "pth_path": " ", + "index_path": " ", + "sg_input_device": input_devices[sd.default.device[0]], + "sg_output_device": output_devices[sd.default.device[1]], + "threhold": "-45", + "pitch": "0", + "index_rate": "0", + "block_time": "1", + "crossfade_length": "0.04", + "extra_time": "1", + "f0method": "rmvpe", + } + return data + + def launcher(self): + data = self.load() + sg.theme("LightBlue3") + input_devices, output_devices, _, _ = self.get_devices() + layout = [ + [ + sg.Frame( + title=i18n("加载模型"), + layout=[ + [ + sg.Input( + default_text=data.get("pth_path", ""), + key="pth_path", + ), + sg.FileBrowse( + i18n("选择.pth文件"), + initial_folder=os.path.join(os.getcwd(), "weights"), + file_types=((". pth"),), + ), + ], + [ + sg.Input( + default_text=data.get("index_path", ""), + key="index_path", + ), + sg.FileBrowse( + i18n("选择.index文件"), + initial_folder=os.path.join(os.getcwd(), "logs"), + file_types=((". index"),), + ), + ], + ], + ) + ], + [ + sg.Frame( + layout=[ + [ + sg.Text(i18n("输入设备")), + sg.Combo( + input_devices, + key="sg_input_device", + default_value=data.get("sg_input_device", ""), + ), + ], + [ + sg.Text(i18n("输出设备")), + sg.Combo( + output_devices, + key="sg_output_device", + default_value=data.get("sg_output_device", ""), + ), + ], + [sg.Button(i18n("重载设备列表"), key="reload_devices")], + ], + title=i18n("音频设备(请使用同种类驱动)"), + ) + ], + [ + sg.Frame( + layout=[ + [ + sg.Text(i18n("响应阈值")), + sg.Slider( + range=(-60, 0), + key="threhold", + resolution=1, + orientation="h", + default_value=data.get("threhold", ""), + ), + ], + [ + sg.Text(i18n("音调设置")), + sg.Slider( + range=(-24, 24), + key="pitch", + resolution=1, + orientation="h", + default_value=data.get("pitch", ""), + ), + ], + [ + sg.Text(i18n("Index Rate")), + sg.Slider( + range=(0.0, 1.0), + key="index_rate", + resolution=0.01, + orientation="h", + default_value=data.get("index_rate", ""), + ), + ], + [ + sg.Text(i18n("音高算法")), + sg.Radio( + "pm", + "f0method", + key="pm", + default=data.get("pm", "") == True, + ), + sg.Radio( + "harvest", + "f0method", + key="harvest", + default=data.get("harvest", "") == True, + ), + sg.Radio( + "crepe", + "f0method", + key="crepe", + default=data.get("crepe", "") == True, + ), + sg.Radio( + "rmvpe", + "f0method", + key="rmvpe", + default=data.get("rmvpe", "") == True, + ), + ], + ], + title=i18n("常规设置"), + ), + sg.Frame( + layout=[ + [ + sg.Text(i18n("采样长度")), + sg.Slider( + range=(0.12, 2.4), + key="block_time", + resolution=0.03, + orientation="h", + default_value=data.get("block_time", ""), + ), + ], + [ + sg.Text(i18n("harvest进程数")), + sg.Slider( + range=(1, n_cpu), + key="n_cpu", + resolution=1, + orientation="h", + default_value=data.get( + "n_cpu", min(self.config.n_cpu, n_cpu) + ), + ), + ], + [ + sg.Text(i18n("淡入淡出长度")), + sg.Slider( + range=(0.01, 0.15), + key="crossfade_length", + resolution=0.01, + orientation="h", + default_value=data.get("crossfade_length", ""), + ), + ], + [ + sg.Text(i18n("额外推理时长")), + sg.Slider( + range=(0.05, 3.00), + key="extra_time", + resolution=0.01, + orientation="h", + default_value=data.get("extra_time", ""), + ), + ], + [ + sg.Checkbox(i18n("输入降噪"), key="I_noise_reduce"), + sg.Checkbox(i18n("输出降噪"), key="O_noise_reduce"), + ], + ], + title=i18n("性能设置"), + ), + ], + [ + sg.Button(i18n("开始音频转换"), key="start_vc"), + sg.Button(i18n("停止音频转换"), key="stop_vc"), + sg.Text(i18n("推理时间(ms):")), + sg.Text("0", key="infer_time"), + ], + ] + self.window = sg.Window("RVC - GUI", layout=layout) + self.event_handler() + + def event_handler(self): + while True: + event, values = self.window.read() + if event == sg.WINDOW_CLOSED: + self.flag_vc = False + exit() + if event == "reload_devices": + prev_input = self.window["sg_input_device"].get() + prev_output = self.window["sg_output_device"].get() + input_devices, output_devices, _, _ = self.get_devices(update=True) + if prev_input not in input_devices: + self.config.sg_input_device = input_devices[0] + else: + self.config.sg_input_device = prev_input + self.window["sg_input_device"].Update(values=input_devices) + self.window["sg_input_device"].Update( + value=self.config.sg_input_device + ) + if prev_output not in output_devices: + self.config.sg_output_device = output_devices[0] + else: + self.config.sg_output_device = prev_output + self.window["sg_output_device"].Update(values=output_devices) + self.window["sg_output_device"].Update( + value=self.config.sg_output_device + ) + if event == "start_vc" and self.flag_vc == False: + if self.set_values(values) == True: + print("using_cuda:" + str(torch.cuda.is_available())) + self.start_vc() + settings = { + "pth_path": values["pth_path"], + "index_path": values["index_path"], + "sg_input_device": values["sg_input_device"], + "sg_output_device": values["sg_output_device"], + "threhold": values["threhold"], + "pitch": values["pitch"], + "index_rate": values["index_rate"], + "block_time": values["block_time"], + "crossfade_length": values["crossfade_length"], + "extra_time": values["extra_time"], + "n_cpu": values["n_cpu"], + "f0method": ["pm", "harvest", "crepe", "rmvpe"][ + [ + values["pm"], + values["harvest"], + values["crepe"], + values["rmvpe"], + ].index(True) + ], + } + with open("values1.json", "w") as j: + json.dump(settings, j) + if event == "stop_vc" and self.flag_vc == True: + self.flag_vc = False + + def set_values(self, values): + if len(values["pth_path"].strip()) == 0: + sg.popup(i18n("请选择pth文件")) + return False + if len(values["index_path"].strip()) == 0: + sg.popup(i18n("请选择index文件")) + return False + pattern = re.compile("[^\x00-\x7F]+") + if pattern.findall(values["pth_path"]): + sg.popup(i18n("pth文件路径不可包含中文")) + return False + if pattern.findall(values["index_path"]): + sg.popup(i18n("index文件路径不可包含中文")) + return False + self.set_devices(values["sg_input_device"], values["sg_output_device"]) + self.config.pth_path = values["pth_path"] + self.config.index_path = values["index_path"] + self.config.threhold = values["threhold"] + self.config.pitch = values["pitch"] + self.config.block_time = values["block_time"] + self.config.crossfade_time = values["crossfade_length"] + self.config.extra_time = values["extra_time"] + self.config.I_noise_reduce = values["I_noise_reduce"] + self.config.O_noise_reduce = values["O_noise_reduce"] + self.config.index_rate = values["index_rate"] + self.config.n_cpu = values["n_cpu"] + self.config.f0method = ["pm", "harvest", "crepe", "rmvpe"][ + [ + values["pm"], + values["harvest"], + values["crepe"], + values["rmvpe"], + ].index(True) + ] + return True + + def start_vc(self): + torch.cuda.empty_cache() + self.flag_vc = True + self.rvc = RVC( + self.config.pitch, + self.config.pth_path, + self.config.index_path, + self.config.index_rate, + self.config.n_cpu, + inp_q, + opt_q, + device, + ) + self.config.samplerate = self.rvc.tgt_sr + self.config.crossfade_time = min( + self.config.crossfade_time, self.config.block_time + ) + self.block_frame = int(self.config.block_time * self.config.samplerate) + self.crossfade_frame = int( + self.config.crossfade_time * self.config.samplerate + ) + self.sola_search_frame = int(0.01 * self.config.samplerate) + self.extra_frame = int(self.config.extra_time * self.config.samplerate) + self.zc = self.rvc.tgt_sr // 100 + self.input_wav: np.ndarray = np.zeros( + int( + np.ceil( + ( + self.extra_frame + + self.crossfade_frame + + self.sola_search_frame + + self.block_frame + ) + / self.zc + ) + * self.zc + ), + dtype="float32", + ) + self.output_wav_cache: torch.Tensor = torch.zeros( + int( + np.ceil( + ( + self.extra_frame + + self.crossfade_frame + + self.sola_search_frame + + self.block_frame + ) + / self.zc + ) + * self.zc + ), + device=device, + dtype=torch.float32, + ) + self.pitch: np.ndarray = np.zeros( + self.input_wav.shape[0] // self.zc, + dtype="int32", + ) + self.pitchf: np.ndarray = np.zeros( + self.input_wav.shape[0] // self.zc, + dtype="float64", + ) + self.output_wav: torch.Tensor = torch.zeros( + self.block_frame, device=device, dtype=torch.float32 + ) + self.sola_buffer: torch.Tensor = torch.zeros( + self.crossfade_frame, device=device, dtype=torch.float32 + ) + self.fade_in_window: torch.Tensor = torch.linspace( + 0.0, 1.0, steps=self.crossfade_frame, device=device, dtype=torch.float32 + ) + self.fade_out_window: torch.Tensor = 1 - self.fade_in_window + self.resampler = tat.Resample( + orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32 + ).to(device) + thread_vc = threading.Thread(target=self.soundinput) + thread_vc.start() + + def soundinput(self): + """ + 接受音频输入 + """ + channels = 1 if sys.platform == "darwin" else 2 + with sd.Stream( + channels=channels, + callback=self.audio_callback, + blocksize=self.block_frame, + samplerate=self.config.samplerate, + dtype="float32", + ): + while self.flag_vc: + time.sleep(self.config.block_time) + print("Audio block passed.") + print("ENDing VC") + + def audio_callback( + self, indata: np.ndarray, outdata: np.ndarray, frames, times, status + ): + """ + 音频处理 + """ + start_time = time.perf_counter() + indata = librosa.to_mono(indata.T) + if self.config.I_noise_reduce: + indata[:] = nr.reduce_noise(y=indata, sr=self.config.samplerate) + """noise gate""" + frame_length = 2048 + hop_length = 1024 + rms = librosa.feature.rms( + y=indata, frame_length=frame_length, hop_length=hop_length + ) + if self.config.threhold > -60: + db_threhold = ( + librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold + ) + for i in range(db_threhold.shape[0]): + if db_threhold[i]: + indata[i * hop_length : (i + 1) * hop_length] = 0 + self.input_wav[:] = np.append(self.input_wav[self.block_frame :], indata) + # infer + inp = torch.from_numpy(self.input_wav).to(device) + ##0 + res1 = self.resampler(inp) + ###55% + rate1 = self.block_frame / ( + self.extra_frame + + self.crossfade_frame + + self.sola_search_frame + + self.block_frame + ) + rate2 = ( + self.crossfade_frame + self.sola_search_frame + self.block_frame + ) / ( + self.extra_frame + + self.crossfade_frame + + self.sola_search_frame + + self.block_frame + ) + res2 = self.rvc.infer( + res1, + res1[-self.block_frame :].cpu().numpy(), + rate1, + rate2, + self.pitch, + self.pitchf, + self.config.f0method, + ) + self.output_wav_cache[-res2.shape[0] :] = res2 + infer_wav = self.output_wav_cache[ + -self.crossfade_frame - self.sola_search_frame - self.block_frame : + ] + # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC + cor_nom = F.conv1d( + infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame], + self.sola_buffer[None, None, :], + ) + cor_den = torch.sqrt( + F.conv1d( + infer_wav[ + None, None, : self.crossfade_frame + self.sola_search_frame + ] + ** 2, + torch.ones(1, 1, self.crossfade_frame, device=device), + ) + + 1e-8 + ) + if sys.platform == "darwin": + _, sola_offset = torch.max(cor_nom[0, 0] / cor_den[0, 0]) + sola_offset = sola_offset.item() + else: + sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0]) + print("sola offset: " + str(int(sola_offset))) + self.output_wav[:] = infer_wav[sola_offset : sola_offset + self.block_frame] + self.output_wav[: self.crossfade_frame] *= self.fade_in_window + self.output_wav[: self.crossfade_frame] += self.sola_buffer[:] + # crossfade + if sola_offset < self.sola_search_frame: + self.sola_buffer[:] = ( + infer_wav[ + -self.sola_search_frame + - self.crossfade_frame + + sola_offset : -self.sola_search_frame + + sola_offset + ] + * self.fade_out_window + ) + else: + self.sola_buffer[:] = ( + infer_wav[-self.crossfade_frame :] * self.fade_out_window + ) + if self.config.O_noise_reduce: + if sys.platform == "darwin": + noise_reduced_signal = nr.reduce_noise( + y=self.output_wav[:].cpu().numpy(), sr=self.config.samplerate + ) + outdata[:] = noise_reduced_signal[:, np.newaxis] + else: + outdata[:] = np.tile( + nr.reduce_noise( + y=self.output_wav[:].cpu().numpy(), + sr=self.config.samplerate, + ), + (2, 1), + ).T + else: + if sys.platform == "darwin": + outdata[:] = self.output_wav[:].cpu().numpy()[:, np.newaxis] + else: + outdata[:] = self.output_wav[:].repeat(2, 1).t().cpu().numpy() + total_time = time.perf_counter() - start_time + self.window["infer_time"].update(int(total_time * 1000)) + print("infer time:" + str(total_time)) + + def get_devices(self, update: bool = True): + """获取设备列表""" + if update: + sd._terminate() + sd._initialize() + devices = sd.query_devices() + hostapis = sd.query_hostapis() + for hostapi in hostapis: + for device_idx in hostapi["devices"]: + devices[device_idx]["hostapi_name"] = hostapi["name"] + input_devices = [ + f"{d['name']} ({d['hostapi_name']})" + for d in devices + if d["max_input_channels"] > 0 + ] + output_devices = [ + f"{d['name']} ({d['hostapi_name']})" + for d in devices + if d["max_output_channels"] > 0 + ] + input_devices_indices = [ + d["index"] if "index" in d else d["name"] + for d in devices + if d["max_input_channels"] > 0 + ] + output_devices_indices = [ + d["index"] if "index" in d else d["name"] + for d in devices + if d["max_output_channels"] > 0 + ] + return ( + input_devices, + output_devices, + input_devices_indices, + output_devices_indices, + ) + + def set_devices(self, input_device, output_device): + """设置输出设备""" + ( + input_devices, + output_devices, + input_device_indices, + output_device_indices, + ) = self.get_devices() + sd.default.device[0] = input_device_indices[ + input_devices.index(input_device) + ] + sd.default.device[1] = output_device_indices[ + output_devices.index(output_device) + ] + print("input device:" + str(sd.default.device[0]) + ":" + str(input_device)) + print( + "output device:" + str(sd.default.device[1]) + ":" + str(output_device) + ) + + gui = GUI() diff --git a/AIMeiSheng/guidml.py b/AIMeiSheng/guidml.py new file mode 100644 index 0000000..aadf22d --- /dev/null +++ b/AIMeiSheng/guidml.py @@ -0,0 +1,710 @@ +""" +0416后的更新: + 引入config中half + 重建npy而不用填写 + v2支持 + 无f0模型支持 + 修复 + + int16: + 增加无索引支持 + f0算法改harvest(怎么看就只有这个会影响CPU占用),但是不这么改效果不好 +""" +import os, sys, traceback, re + +import json + +now_dir = os.getcwd() +sys.path.append(now_dir) +from config import Config + +Config = Config() + +import torch_directml +import PySimpleGUI as sg +import sounddevice as sd +import noisereduce as nr +import numpy as np +from fairseq import checkpoint_utils +import librosa, torch, pyworld, faiss, time, threading +import torch.nn.functional as F +import torchaudio.transforms as tat +import scipy.signal as signal + + +# import matplotlib.pyplot as plt +from lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +from i18n import I18nAuto + +i18n = I18nAuto() +device = torch_directml.device(torch_directml.default_device()) +current_dir = os.getcwd() + + +class RVC: + def __init__( + self, key, hubert_path, pth_path, index_path, npy_path, index_rate + ) -> None: + """ + 初始化 + """ + try: + self.f0_up_key = key + self.time_step = 160 / 16000 * 1000 + self.f0_min = 50 + self.f0_max = 1100 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + self.sr = 16000 + self.window = 160 + if index_rate != 0: + self.index = faiss.read_index(index_path) + # self.big_npy = np.load(npy_path) + self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) + print("index search enabled") + self.index_rate = index_rate + model_path = hubert_path + print("load model(s) from {}".format(model_path)) + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [model_path], + suffix="", + ) + self.model = models[0] + self.model = self.model.to(device) + if Config.is_half: + self.model = self.model.half() + else: + self.model = self.model.float() + self.model.eval() + cpt = torch.load(pth_path, map_location="cpu") + self.tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk + self.if_f0 = cpt.get("f0", 1) + self.version = cpt.get("version", "v1") + if self.version == "v1": + if self.if_f0 == 1: + self.net_g = SynthesizerTrnMs256NSFsid( + *cpt["config"], is_half=Config.is_half + ) + else: + self.net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif self.version == "v2": + if self.if_f0 == 1: + self.net_g = SynthesizerTrnMs768NSFsid( + *cpt["config"], is_half=Config.is_half + ) + else: + self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del self.net_g.enc_q + print(self.net_g.load_state_dict(cpt["weight"], strict=False)) + self.net_g.eval().to(device) + if Config.is_half: + self.net_g = self.net_g.half() + else: + self.net_g = self.net_g.float() + except: + print(traceback.format_exc()) + + def get_f0(self, x, f0_up_key, inp_f0=None): + x_pad = 1 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + f0, t = pyworld.harvest( + x.astype(np.double), + fs=self.sr, + f0_ceil=f0_max, + f0_floor=f0_min, + frame_period=10, + ) + f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr) + f0 = signal.medfilt(f0, 3) + f0 *= pow(2, f0_up_key / 12) + # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + tf0 = self.sr // self.window # 每秒f0点数 + if inp_f0 is not None: + delta_t = np.round( + (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1 + ).astype("int16") + replace_f0 = np.interp( + list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1] + ) + shape = f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)].shape[0] + f0[x_pad * tf0 : x_pad * tf0 + len(replace_f0)] = replace_f0[:shape] + # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + f0bak = f0.copy() + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( + f0_mel_max - f0_mel_min + ) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0bak # 1-0 + + def infer(self, feats: torch.Tensor) -> np.ndarray: + """ + 推理函数 + """ + audio = feats.clone().cpu().numpy() + assert feats.dim() == 1, feats.dim() + feats = feats.view(1, -1) + padding_mask = torch.BoolTensor(feats.shape).fill_(False) + if Config.is_half: + feats = feats.half() + else: + feats = feats.float() + inputs = { + "source": feats.to(device), + "padding_mask": padding_mask.to(device), + "output_layer": 9 if self.version == "v1" else 12, + } + torch.cuda.synchronize() + with torch.no_grad(): + logits = self.model.extract_features(**inputs) + feats = ( + self.model.final_proj(logits[0]) if self.version == "v1" else logits[0] + ) + + ####索引优化 + try: + if ( + hasattr(self, "index") + and hasattr(self, "big_npy") + and self.index_rate != 0 + ): + npy = feats[0].cpu().numpy().astype("float32") + score, ix = self.index.search(npy, k=8) + weight = np.square(1 / score) + weight /= weight.sum(axis=1, keepdims=True) + npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1) + if Config.is_half: + npy = npy.astype("float16") + feats = ( + torch.from_numpy(npy).unsqueeze(0).to(device) * self.index_rate + + (1 - self.index_rate) * feats + ) + else: + print("index search FAIL or disabled") + except: + traceback.print_exc() + print("index search FAIL") + feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) + torch.cuda.synchronize() + print(feats.shape) + if self.if_f0 == 1: + pitch, pitchf = self.get_f0(audio, self.f0_up_key) + p_len = min(feats.shape[1], 13000, pitch.shape[0]) # 太大了爆显存 + else: + pitch, pitchf = None, None + p_len = min(feats.shape[1], 13000) # 太大了爆显存 + torch.cuda.synchronize() + # print(feats.shape,pitch.shape) + feats = feats[:, :p_len, :] + if self.if_f0 == 1: + pitch = pitch[:p_len] + pitchf = pitchf[:p_len] + pitch = torch.LongTensor(pitch).unsqueeze(0).to(device) + pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device) + p_len = torch.LongTensor([p_len]).to(device) + ii = 0 # sid + sid = torch.LongTensor([ii]).to(device) + with torch.no_grad(): + if self.if_f0 == 1: + infered_audio = ( + self.net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] + .data.cpu() + .float() + ) + else: + infered_audio = ( + self.net_g.infer(feats, p_len, sid)[0][0, 0].data.cpu().float() + ) + torch.cuda.synchronize() + return infered_audio + + +class GUIConfig: + def __init__(self) -> None: + self.hubert_path: str = "" + self.pth_path: str = "" + self.index_path: str = "" + self.npy_path: str = "" + self.pitch: int = 12 + self.samplerate: int = 44100 + self.block_time: float = 1.0 # s + self.buffer_num: int = 1 + self.threhold: int = -30 + self.crossfade_time: float = 0.08 + self.extra_time: float = 0.04 + self.I_noise_reduce = False + self.O_noise_reduce = False + self.index_rate = 0.3 + + +class GUI: + def __init__(self) -> None: + self.config = GUIConfig() + self.flag_vc = False + + self.launcher() + + def load(self): + ( + input_devices, + output_devices, + input_devices_indices, + output_devices_indices, + ) = self.get_devices() + try: + with open("values1.json", "r") as j: + data = json.load(j) + except: + with open("values1.json", "w") as j: + data = { + "pth_path": "", + "index_path": "", + "sg_input_device": input_devices[ + input_devices_indices.index(sd.default.device[0]) + ], + "sg_output_device": output_devices[ + output_devices_indices.index(sd.default.device[1]) + ], + "threhold": "-45", + "pitch": "0", + "index_rate": "0", + "block_time": "1", + "crossfade_length": "0.04", + "extra_time": "1", + } + return data + + def launcher(self): + data = self.load() + sg.theme("LightBlue3") + input_devices, output_devices, _, _ = self.get_devices() + layout = [ + [ + sg.Frame( + title=i18n("加载模型"), + layout=[ + [ + sg.Input( + default_text="hubert_base.pt", + key="hubert_path", + disabled=True, + ), + sg.FileBrowse( + i18n("Hubert模型"), + initial_folder=os.path.join(os.getcwd()), + file_types=(("pt files", "*.pt"),), + ), + ], + [ + sg.Input( + default_text=data.get("pth_path", ""), + key="pth_path", + ), + sg.FileBrowse( + i18n("选择.pth文件"), + initial_folder=os.path.join(os.getcwd(), "weights"), + file_types=(("weight files", "*.pth"),), + ), + ], + [ + sg.Input( + default_text=data.get("index_path", ""), + key="index_path", + ), + sg.FileBrowse( + i18n("选择.index文件"), + initial_folder=os.path.join(os.getcwd(), "logs"), + file_types=(("index files", "*.index"),), + ), + ], + [ + sg.Input( + default_text="你不需要填写这个You don't need write this.", + key="npy_path", + disabled=True, + ), + sg.FileBrowse( + i18n("选择.npy文件"), + initial_folder=os.path.join(os.getcwd(), "logs"), + file_types=(("feature files", "*.npy"),), + ), + ], + ], + ) + ], + [ + sg.Frame( + layout=[ + [ + sg.Text(i18n("输入设备")), + sg.Combo( + input_devices, + key="sg_input_device", + default_value=data.get("sg_input_device", ""), + ), + ], + [ + sg.Text(i18n("输出设备")), + sg.Combo( + output_devices, + key="sg_output_device", + default_value=data.get("sg_output_device", ""), + ), + ], + ], + title=i18n("音频设备(请使用同种类驱动)"), + ) + ], + [ + sg.Frame( + layout=[ + [ + sg.Text(i18n("响应阈值")), + sg.Slider( + range=(-60, 0), + key="threhold", + resolution=1, + orientation="h", + default_value=data.get("threhold", ""), + ), + ], + [ + sg.Text(i18n("音调设置")), + sg.Slider( + range=(-24, 24), + key="pitch", + resolution=1, + orientation="h", + default_value=data.get("pitch", ""), + ), + ], + [ + sg.Text(i18n("Index Rate")), + sg.Slider( + range=(0.0, 1.0), + key="index_rate", + resolution=0.01, + orientation="h", + default_value=data.get("index_rate", ""), + ), + ], + ], + title=i18n("常规设置"), + ), + sg.Frame( + layout=[ + [ + sg.Text(i18n("采样长度")), + sg.Slider( + range=(0.1, 3.0), + key="block_time", + resolution=0.1, + orientation="h", + default_value=data.get("block_time", ""), + ), + ], + [ + sg.Text(i18n("淡入淡出长度")), + sg.Slider( + range=(0.01, 0.15), + key="crossfade_length", + resolution=0.01, + orientation="h", + default_value=data.get("crossfade_length", ""), + ), + ], + [ + sg.Text(i18n("额外推理时长")), + sg.Slider( + range=(0.05, 3.00), + key="extra_time", + resolution=0.01, + orientation="h", + default_value=data.get("extra_time", ""), + ), + ], + [ + sg.Checkbox(i18n("输入降噪"), key="I_noise_reduce"), + sg.Checkbox(i18n("输出降噪"), key="O_noise_reduce"), + ], + ], + title=i18n("性能设置"), + ), + ], + [ + sg.Button(i18n("开始音频转换"), key="start_vc"), + sg.Button(i18n("停止音频转换"), key="stop_vc"), + sg.Text(i18n("推理时间(ms):")), + sg.Text("0", key="infer_time"), + ], + ] + self.window = sg.Window("RVC - GUI", layout=layout) + self.event_handler() + + def event_handler(self): + while True: + event, values = self.window.read() + if event == sg.WINDOW_CLOSED: + self.flag_vc = False + exit() + if event == "start_vc" and self.flag_vc == False: + if self.set_values(values) == True: + print("using_cuda:" + str(torch.cuda.is_available())) + self.start_vc() + settings = { + "pth_path": values["pth_path"], + "index_path": values["index_path"], + "sg_input_device": values["sg_input_device"], + "sg_output_device": values["sg_output_device"], + "threhold": values["threhold"], + "pitch": values["pitch"], + "index_rate": values["index_rate"], + "block_time": values["block_time"], + "crossfade_length": values["crossfade_length"], + "extra_time": values["extra_time"], + } + with open("values1.json", "w") as j: + json.dump(settings, j) + if event == "stop_vc" and self.flag_vc == True: + self.flag_vc = False + + def set_values(self, values): + if len(values["pth_path"].strip()) == 0: + sg.popup(i18n("请选择pth文件")) + return False + if len(values["index_path"].strip()) == 0: + sg.popup(i18n("请选择index文件")) + return False + pattern = re.compile("[^\x00-\x7F]+") + if pattern.findall(values["hubert_path"]): + sg.popup(i18n("hubert模型路径不可包含中文")) + return False + if pattern.findall(values["pth_path"]): + sg.popup(i18n("pth文件路径不可包含中文")) + return False + if pattern.findall(values["index_path"]): + sg.popup(i18n("index文件路径不可包含中文")) + return False + self.set_devices(values["sg_input_device"], values["sg_output_device"]) + self.config.hubert_path = os.path.join(current_dir, "hubert_base.pt") + self.config.pth_path = values["pth_path"] + self.config.index_path = values["index_path"] + self.config.npy_path = values["npy_path"] + self.config.threhold = values["threhold"] + self.config.pitch = values["pitch"] + self.config.block_time = values["block_time"] + self.config.crossfade_time = values["crossfade_length"] + self.config.extra_time = values["extra_time"] + self.config.I_noise_reduce = values["I_noise_reduce"] + self.config.O_noise_reduce = values["O_noise_reduce"] + self.config.index_rate = values["index_rate"] + return True + + def start_vc(self): + torch.cuda.empty_cache() + self.flag_vc = True + self.block_frame = int(self.config.block_time * self.config.samplerate) + self.crossfade_frame = int(self.config.crossfade_time * self.config.samplerate) + self.sola_search_frame = int(0.012 * self.config.samplerate) + self.delay_frame = int(0.01 * self.config.samplerate) # 往前预留0.02s + self.extra_frame = int(self.config.extra_time * self.config.samplerate) + self.rvc = None + self.rvc = RVC( + self.config.pitch, + self.config.hubert_path, + self.config.pth_path, + self.config.index_path, + self.config.npy_path, + self.config.index_rate, + ) + self.input_wav: np.ndarray = np.zeros( + self.extra_frame + + self.crossfade_frame + + self.sola_search_frame + + self.block_frame, + dtype="float32", + ) + self.output_wav: torch.Tensor = torch.zeros( + self.block_frame, device=device, dtype=torch.float32 + ) + self.sola_buffer: torch.Tensor = torch.zeros( + self.crossfade_frame, device=device, dtype=torch.float32 + ) + self.fade_in_window: torch.Tensor = torch.linspace( + 0.0, 1.0, steps=self.crossfade_frame, device=device, dtype=torch.float32 + ) + self.fade_out_window: torch.Tensor = 1 - self.fade_in_window + self.resampler1 = tat.Resample( + orig_freq=self.config.samplerate, new_freq=16000, dtype=torch.float32 + ) + self.resampler2 = tat.Resample( + orig_freq=self.rvc.tgt_sr, + new_freq=self.config.samplerate, + dtype=torch.float32, + ) + thread_vc = threading.Thread(target=self.soundinput) + thread_vc.start() + + def soundinput(self): + """ + 接受音频输入 + """ + with sd.Stream( + channels=2, + callback=self.audio_callback, + blocksize=self.block_frame, + samplerate=self.config.samplerate, + dtype="float32", + ): + while self.flag_vc: + time.sleep(self.config.block_time) + print("Audio block passed.") + print("ENDing VC") + + def audio_callback( + self, indata: np.ndarray, outdata: np.ndarray, frames, times, status + ): + """ + 音频处理 + """ + start_time = time.perf_counter() + indata = librosa.to_mono(indata.T) + if self.config.I_noise_reduce: + indata[:] = nr.reduce_noise(y=indata, sr=self.config.samplerate) + + """noise gate""" + frame_length = 2048 + hop_length = 1024 + rms = librosa.feature.rms( + y=indata, frame_length=frame_length, hop_length=hop_length + ) + db_threhold = librosa.amplitude_to_db(rms, ref=1.0)[0] < self.config.threhold + # print(rms.shape,db.shape,db) + for i in range(db_threhold.shape[0]): + if db_threhold[i]: + indata[i * hop_length : (i + 1) * hop_length] = 0 + self.input_wav[:] = np.append(self.input_wav[self.block_frame :], indata) + + # infer + print("input_wav:" + str(self.input_wav.shape)) + # print('infered_wav:'+str(infer_wav.shape)) + infer_wav: torch.Tensor = self.resampler2( + self.rvc.infer(self.resampler1(torch.from_numpy(self.input_wav))) + )[-self.crossfade_frame - self.sola_search_frame - self.block_frame :].to( + device + ) + print("infer_wav:" + str(infer_wav.shape)) + + # SOLA algorithm from https://github.com/yxlllc/DDSP-SVC + cor_nom = F.conv1d( + infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame], + self.sola_buffer[None, None, :], + ) + cor_den = torch.sqrt( + F.conv1d( + infer_wav[None, None, : self.crossfade_frame + self.sola_search_frame] + ** 2, + torch.ones(1, 1, self.crossfade_frame, device=device), + ) + + 1e-8 + ) + sola_offset = torch.argmax(cor_nom[0, 0] / cor_den[0, 0]) + print("sola offset: " + str(int(sola_offset))) + + # crossfade + self.output_wav[:] = infer_wav[sola_offset : sola_offset + self.block_frame] + self.output_wav[: self.crossfade_frame] *= self.fade_in_window + self.output_wav[: self.crossfade_frame] += self.sola_buffer[:] + if sola_offset < self.sola_search_frame: + self.sola_buffer[:] = ( + infer_wav[ + -self.sola_search_frame + - self.crossfade_frame + + sola_offset : -self.sola_search_frame + + sola_offset + ] + * self.fade_out_window + ) + else: + self.sola_buffer[:] = ( + infer_wav[-self.crossfade_frame :] * self.fade_out_window + ) + + if self.config.O_noise_reduce: + outdata[:] = np.tile( + nr.reduce_noise( + y=self.output_wav[:].cpu().numpy(), sr=self.config.samplerate + ), + (2, 1), + ).T + else: + outdata[:] = self.output_wav[:].repeat(2, 1).t().cpu().numpy() + total_time = time.perf_counter() - start_time + self.window["infer_time"].update(int(total_time * 1000)) + print("infer time:" + str(total_time)) + + def get_devices(self, update: bool = True): + """获取设备列表""" + if update: + sd._terminate() + sd._initialize() + devices = sd.query_devices() + hostapis = sd.query_hostapis() + for hostapi in hostapis: + for device_idx in hostapi["devices"]: + devices[device_idx]["hostapi_name"] = hostapi["name"] + input_devices = [ + f"{d['name']} ({d['hostapi_name']})" + for d in devices + if d["max_input_channels"] > 0 + ] + output_devices = [ + f"{d['name']} ({d['hostapi_name']})" + for d in devices + if d["max_output_channels"] > 0 + ] + input_devices_indices = [ + d["index"] if "index" in d else d["name"] + for d in devices + if d["max_input_channels"] > 0 + ] + output_devices_indices = [ + d["index"] if "index" in d else d["name"] + for d in devices + if d["max_output_channels"] > 0 + ] + return ( + input_devices, + output_devices, + input_devices_indices, + output_devices_indices, + ) + + def set_devices(self, input_device, output_device): + """设置输出设备""" + ( + input_devices, + output_devices, + input_device_indices, + output_device_indices, + ) = self.get_devices() + sd.default.device[0] = input_device_indices[input_devices.index(input_device)] + sd.default.device[1] = output_device_indices[ + output_devices.index(output_device) + ] + print("input device:" + str(sd.default.device[0]) + ":" + str(input_device)) + print("output device:" + str(sd.default.device[1]) + ":" + str(output_device)) + + +gui = GUI() diff --git a/AIMeiSheng/i18n.py b/AIMeiSheng/i18n.py new file mode 100644 index 0000000..d64f2ea --- /dev/null +++ b/AIMeiSheng/i18n.py @@ -0,0 +1,28 @@ +import locale +import json +import os + + +def load_language_list(language): + with open(f"./lib/i18n/{language}.json", "r", encoding="utf-8") as f: + language_list = json.load(f) + return language_list + + +class I18nAuto: + def __init__(self, language=None): + if language in ["Auto", None]: + language = locale.getdefaultlocale()[ + 0 + ] # getlocale can't identify the system's language ((None, None)) + if not os.path.exists(f"./lib/i18n/{language}.json"): + language = "en_US" + self.language = language + # print("Use Language:", language) + self.language_map = load_language_list(language) + + def __call__(self, key): + return self.language_map.get(key, key) + + def print(self): + print("Use Language:", self.language) diff --git a/AIMeiSheng/infer-web.py b/AIMeiSheng/infer-web.py new file mode 100644 index 0000000..cd89f96 --- /dev/null +++ b/AIMeiSheng/infer-web.py @@ -0,0 +1,2123 @@ +import os +import shutil +import sys + +now_dir = os.getcwd() +sys.path.append(now_dir) +import traceback, pdb +import warnings + +import numpy as np +import torch + +os.environ["OPENBLAS_NUM_THREADS"] = "1" +os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" +import logging +import threading +from random import shuffle +from subprocess import Popen +from time import sleep + +import faiss +import ffmpeg +import gradio as gr +import soundfile as sf +from config import Config +from fairseq import checkpoint_utils +from i18n import I18nAuto +from lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM +from infer_uvr5 import _audio_pre_, _audio_pre_new +from lib.audio import load_audio +from lib.train.process_ckpt import change_info, extract_small_model, merge, show_info +from vc_infer_pipeline import VC +from sklearn.cluster import MiniBatchKMeans + +logging.getLogger("numba").setLevel(logging.WARNING) + +now_dir = os.getcwd() +tmp = os.path.join(now_dir, "TEMP") +shutil.rmtree(tmp, ignore_errors=True) +shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) +shutil.rmtree("%s/runtime/Lib/site-packages/uvr5_pack" % (now_dir), ignore_errors=True) +os.makedirs(tmp, exist_ok=True) +os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) +os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) +os.environ["TEMP"] = tmp +warnings.filterwarnings("ignore") +torch.manual_seed(114514) + + +config = Config() +i18n = I18nAuto() +i18n.print() +# 判断是否有能用来训练和加速推理的N卡 +ngpu = torch.cuda.device_count() +gpu_infos = [] +mem = [] +if_gpu_ok = False + +if torch.cuda.is_available() or ngpu != 0: + for i in range(ngpu): + gpu_name = torch.cuda.get_device_name(i) + if any( + value in gpu_name.upper() + for value in [ + "10", + "16", + "20", + "30", + "40", + "A2", + "A3", + "A4", + "P4", + "A50", + "500", + "A60", + "70", + "80", + "90", + "M4", + "T4", + "TITAN", + ] + ): + # A10#A100#V100#A40#P40#M40#K80#A4500 + if_gpu_ok = True # 至少有一张能用的N卡 + gpu_infos.append("%s\t%s" % (i, gpu_name)) + mem.append( + int( + torch.cuda.get_device_properties(i).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + ) +if if_gpu_ok and len(gpu_infos) > 0: + gpu_info = "\n".join(gpu_infos) + default_batch_size = min(mem) // 2 +else: + gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") + default_batch_size = 1 +gpus = "-".join([i[0] for i in gpu_infos]) + + +class ToolButton(gr.Button, gr.components.FormComponent): + """Small button with single emoji as text, fits inside gradio forms""" + + def __init__(self, **kwargs): + super().__init__(variant="tool", **kwargs) + + def get_block_name(self): + return "button" + + +hubert_model = None + + +def load_hubert(): + global hubert_model + models, _, _ = checkpoint_utils.load_model_ensemble_and_task( + ["hubert_base.pt"], + suffix="", + ) + hubert_model = models[0] + hubert_model = hubert_model.to(config.device) + if config.is_half: + hubert_model = hubert_model.half() + else: + hubert_model = hubert_model.float() + hubert_model.eval() + + +weight_root = "weights" +weight_uvr5_root = "uvr5_weights" +index_root = "logs" +names = [] +for name in os.listdir(weight_root): + if name.endswith(".pth"): + names.append(name) +index_paths = [] +for root, dirs, files in os.walk(index_root, topdown=False): + for name in files: + if name.endswith(".index") and "trained" not in name: + index_paths.append("%s/%s" % (root, name)) +uvr5_names = [] +for name in os.listdir(weight_uvr5_root): + if name.endswith(".pth") or "onnx" in name: + uvr5_names.append(name.replace(".pth", "")) + +cpt = None + + +def vc_single( + sid, + input_audio_path, + f0_up_key, + f0_file, + f0_method, + file_index, + file_index2, + # file_big_npy, + index_rate, + filter_radius, + resample_sr, + rms_mix_rate, + protect, +): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 + global tgt_sr, net_g, vc, hubert_model, version, cpt + if input_audio_path is None: + return "You need to upload an audio", None + f0_up_key = int(f0_up_key) + try: + audio = load_audio(input_audio_path, 16000) + audio_max = np.abs(audio).max() / 0.95 + if audio_max > 1: + audio /= audio_max + times = [0, 0, 0] + if not hubert_model: + load_hubert() + if_f0 = cpt.get("f0", 1) + file_index = ( + ( + file_index.strip(" ") + .strip('"') + .strip("\n") + .strip('"') + .strip(" ") + .replace("trained", "added") + ) + if file_index != "" + else file_index2 + ) # 防止小白写错,自动帮他替换掉 + # file_big_npy = ( + # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + # ) + audio_opt = vc.pipeline( + hubert_model, + net_g, + sid, + audio, + input_audio_path, + times, + f0_up_key, + f0_method, + file_index, + # file_big_npy, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + f0_file=f0_file, + ) + if tgt_sr != resample_sr >= 16000: + tgt_sr = resample_sr + index_info = ( + "Using index:%s." % file_index + if os.path.exists(file_index) + else "Index not used." + ) + return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % ( + index_info, + times[0], + times[1], + times[2], + ), (tgt_sr, audio_opt) + except: + info = traceback.format_exc() + print(info) + return info, (None, None) + + +def vc_multi( + sid, + dir_path, + opt_root, + paths, + f0_up_key, + f0_method, + file_index, + file_index2, + # file_big_npy, + index_rate, + filter_radius, + resample_sr, + rms_mix_rate, + protect, + format1, +): + try: + dir_path = ( + dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + ) # 防止小白拷路径头尾带了空格和"和回车 + opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + os.makedirs(opt_root, exist_ok=True) + try: + if dir_path != "": + paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)] + else: + paths = [path.name for path in paths] + except: + traceback.print_exc() + paths = [path.name for path in paths] + infos = [] + for path in paths: + info, opt = vc_single( + sid, + path, + f0_up_key, + None, + f0_method, + file_index, + file_index2, + # file_big_npy, + index_rate, + filter_radius, + resample_sr, + rms_mix_rate, + protect, + ) + if "Success" in info: + try: + tgt_sr, audio_opt = opt + if format1 in ["wav", "flac"]: + sf.write( + "%s/%s.%s" % (opt_root, os.path.basename(path), format1), + audio_opt, + tgt_sr, + ) + else: + path = "%s/%s.wav" % (opt_root, os.path.basename(path)) + sf.write( + path, + audio_opt, + tgt_sr, + ) + if os.path.exists(path): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4] + ".%s" % format1) + ) + except: + info += traceback.format_exc() + infos.append("%s->%s" % (os.path.basename(path), info)) + yield "\n".join(infos) + yield "\n".join(infos) + except: + yield traceback.format_exc() + + +def uvr(model_name, inp_root, save_root_vocal, paths, save_root_ins, agg, format0): + infos = [] + try: + inp_root = inp_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + save_root_vocal = ( + save_root_vocal.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + ) + save_root_ins = ( + save_root_ins.strip(" ").strip('"').strip("\n").strip('"').strip(" ") + ) + if model_name == "onnx_dereverb_By_FoxJoy": + from MDXNet import MDXNetDereverb + + pre_fun = MDXNetDereverb(15) + else: + func = _audio_pre_ if "DeEcho" not in model_name else _audio_pre_new + pre_fun = func( + agg=int(agg), + model_path=os.path.join(weight_uvr5_root, model_name + ".pth"), + device=config.device, + is_half=config.is_half, + ) + if inp_root != "": + paths = [os.path.join(inp_root, name) for name in os.listdir(inp_root)] + else: + paths = [path.name for path in paths] + for path in paths: + inp_path = os.path.join(inp_root, path) + need_reformat = 1 + done = 0 + try: + info = ffmpeg.probe(inp_path, cmd="ffprobe") + if ( + info["streams"][0]["channels"] == 2 + and info["streams"][0]["sample_rate"] == "44100" + ): + need_reformat = 0 + pre_fun._path_audio_( + inp_path, save_root_ins, save_root_vocal, format0 + ) + done = 1 + except: + need_reformat = 1 + traceback.print_exc() + if need_reformat == 1: + tmp_path = "%s/%s.reformatted.wav" % (tmp, os.path.basename(inp_path)) + os.system( + "ffmpeg -i %s -vn -acodec pcm_s16le -ac 2 -ar 44100 %s -y" + % (inp_path, tmp_path) + ) + inp_path = tmp_path + try: + if done == 0: + pre_fun._path_audio_( + inp_path, save_root_ins, save_root_vocal, format0 + ) + infos.append("%s->Success" % (os.path.basename(inp_path))) + yield "\n".join(infos) + except: + infos.append( + "%s->%s" % (os.path.basename(inp_path), traceback.format_exc()) + ) + yield "\n".join(infos) + except: + infos.append(traceback.format_exc()) + yield "\n".join(infos) + finally: + try: + if model_name == "onnx_dereverb_By_FoxJoy": + del pre_fun.pred.model + del pre_fun.pred.model_ + else: + del pre_fun.model + del pre_fun + except: + traceback.print_exc() + print("clean_empty_cache") + if torch.cuda.is_available(): + torch.cuda.empty_cache() + yield "\n".join(infos) + + +def get_index_path_from_model(sid): + sel_index_path = "" + name = os.path.join("logs", sid.split(".")[0], "") + # print(name) + for f in index_paths: + if name in f: + # print("selected index path:", f) + sel_index_path = f + break + return sel_index_path + + +# 一个选项卡全局只能有一个音色 +def get_vc(sid, to_return_protect0, to_return_protect1): + global n_spk, tgt_sr, net_g, vc, cpt, version + if sid == "" or sid == []: + global hubert_model + if hubert_model is not None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 + print("clean_empty_cache") + del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt + hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None + if torch.cuda.is_available(): + torch.cuda.empty_cache() + ###楼下不这么折腾清理不干净 + if_f0 = cpt.get("f0", 1) + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid( + *cpt["config"], is_half=config.is_half + ) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: + net_g = SynthesizerTrnMs768NSFsid( + *cpt["config"], is_half=config.is_half + ) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del net_g, cpt + if torch.cuda.is_available(): + torch.cuda.empty_cache() + return {"visible": False, "__type__": "update"} + person = "%s/%s" % (weight_root, sid) + print("loading %s" % person) + + cpt = torch.load(person, map_location="cpu") + tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk + if_f0 = cpt.get("f0", 1) + if if_f0 == 0: + to_return_protect0 = to_return_protect1 = { + "visible": False, + "value": 0.5, + "__type__": "update", + } + else: + to_return_protect0 = { + "visible": True, + "value": to_return_protect0, + "__type__": "update", + } + to_return_protect1 = { + "visible": True, + "value": to_return_protect1, + "__type__": "update", + } + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: + net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del net_g.enc_q + print(net_g.load_state_dict(cpt["weight"], strict=False)) + net_g.eval().to(config.device) + if config.is_half: + net_g = net_g.half() + else: + net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk = cpt["config"][-3] + return ( + {"visible": True, "maximum": n_spk, "__type__": "update"}, + to_return_protect0, + to_return_protect1, + get_index_path_from_model(sid), + ) + + +def change_choices(): + names = [] + for name in os.listdir(weight_root): + if name.endswith(".pth"): + names.append(name) + index_paths = [] + for root, dirs, files in os.walk(index_root, topdown=False): + for name in files: + if name.endswith(".index") and "trained" not in name: + index_paths.append("%s/%s" % (root, name)) + return {"choices": sorted(names), "__type__": "update"}, { + "choices": sorted(index_paths), + "__type__": "update", + } + + +def clean(): + return {"value": "", "__type__": "update"} + + +sr_dict = { + "32k": 32000, + "40k": 40000, + "48k": 48000, +} + + +def if_done(done, p): + while 1: + if p.poll() is None: + sleep(0.5) + else: + break + done[0] = True + + +def if_done_multi(done, ps): + while 1: + # poll==None代表进程未结束 + # 只要有一个进程未结束都不停 + flag = 1 + for p in ps: + if p.poll() is None: + flag = 0 + sleep(0.5) + break + if flag == 1: + break + done[0] = True + + +def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): + sr = sr_dict[sr] + os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) + f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") + f.close() + cmd = ( + config.python_cmd + + ' trainset_preprocess_pipeline_print.py "%s" %s %s "%s/logs/%s" ' + % (trainset_dir, sr, n_p, now_dir, exp_dir) + + str(config.noparallel) + ) + print(cmd) + p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir + ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 + done = [False] + threading.Thread( + target=if_done, + args=( + done, + p, + ), + ).start() + while 1: + with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: + yield (f.read()) + sleep(1) + if done[0]: + break + with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: + log = f.read() + print(log) + yield log + + +# but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2]) +def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe): + gpus = gpus.split("-") + os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) + f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") + f.close() + if if_f0: + if f0method != "rmvpe_gpu": + cmd = config.python_cmd + ' extract_f0_print.py "%s/logs/%s" %s %s' % ( + now_dir, + exp_dir, + n_p, + f0method, + ) + print(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , stdin=PIPE, stdout=PIPE,stderr=PIPE + ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 + done = [False] + threading.Thread( + target=if_done, + args=( + done, + p, + ), + ).start() + while 1: + with open( + "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" + ) as f: + yield (f.read()) + sleep(1) + if done[0]: + break + with open( + "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" + ) as f: + log = f.read() + print(log) + yield log + else: + gpus_rmvpe = gpus_rmvpe.split("-") + leng = len(gpus_rmvpe) + ps = [] + for idx, n_g in enumerate(gpus_rmvpe): + cmd = ( + config.python_cmd + + ' extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' + % (leng, idx, n_g, now_dir, exp_dir, config.is_half) + ) + print(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir + ps.append(p) + ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 + done = [False] + threading.Thread( + target=if_done_multi, + args=( + done, + ps, + ), + ).start() + while 1: + with open( + "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" + ) as f: + yield (f.read()) + sleep(1) + if done[0]: + break + with open( + "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" + ) as f: + log = f.read() + print(log) + yield log + ####对不同part分别开多进程 + """ + n_part=int(sys.argv[1]) + i_part=int(sys.argv[2]) + i_gpu=sys.argv[3] + exp_dir=sys.argv[4] + os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) + """ + leng = len(gpus) + ps = [] + for idx, n_g in enumerate(gpus): + cmd = ( + config.python_cmd + + ' extract_feature_print.py %s %s %s %s "%s/logs/%s" %s' + % ( + config.device, + leng, + idx, + n_g, + now_dir, + exp_dir, + version19, + ) + ) + print(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir + ps.append(p) + ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 + done = [False] + threading.Thread( + target=if_done_multi, + args=( + done, + ps, + ), + ).start() + while 1: + with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: + yield (f.read()) + sleep(1) + if done[0]: + break + with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: + log = f.read() + print(log) + yield log + + +def change_sr2(sr2, if_f0_3, version19): + path_str = "" if version19 == "v1" else "_v2" + f0_str = "f0" if if_f0_3 else "" + if_pretrained_generator_exist = os.access( + "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK + ) + if_pretrained_discriminator_exist = os.access( + "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK + ) + if not if_pretrained_generator_exist: + print( + "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), + "not exist, will not use pretrained model", + ) + if not if_pretrained_discriminator_exist: + print( + "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), + "not exist, will not use pretrained model", + ) + return ( + "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) + if if_pretrained_generator_exist + else "", + "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) + if if_pretrained_discriminator_exist + else "", + ) + + +def change_version19(sr2, if_f0_3, version19): + path_str = "" if version19 == "v1" else "_v2" + if sr2 == "32k" and version19 == "v1": + sr2 = "40k" + to_return_sr2 = ( + {"choices": ["40k", "48k"], "__type__": "update", "value": sr2} + if version19 == "v1" + else {"choices": ["40k", "48k", "32k"], "__type__": "update", "value": sr2} + ) + f0_str = "f0" if if_f0_3 else "" + if_pretrained_generator_exist = os.access( + "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK + ) + if_pretrained_discriminator_exist = os.access( + "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK + ) + if not if_pretrained_generator_exist: + print( + "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), + "not exist, will not use pretrained model", + ) + if not if_pretrained_discriminator_exist: + print( + "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), + "not exist, will not use pretrained model", + ) + return ( + "pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2) + if if_pretrained_generator_exist + else "", + "pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2) + if if_pretrained_discriminator_exist + else "", + to_return_sr2, + ) + + +def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15 + path_str = "" if version19 == "v1" else "_v2" + if_pretrained_generator_exist = os.access( + "pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK + ) + if_pretrained_discriminator_exist = os.access( + "pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK + ) + if not if_pretrained_generator_exist: + print( + "pretrained%s/f0G%s.pth" % (path_str, sr2), + "not exist, will not use pretrained model", + ) + if not if_pretrained_discriminator_exist: + print( + "pretrained%s/f0D%s.pth" % (path_str, sr2), + "not exist, will not use pretrained model", + ) + if if_f0_3: + return ( + {"visible": True, "__type__": "update"}, + "pretrained%s/f0G%s.pth" % (path_str, sr2) + if if_pretrained_generator_exist + else "", + "pretrained%s/f0D%s.pth" % (path_str, sr2) + if if_pretrained_discriminator_exist + else "", + ) + return ( + {"visible": False, "__type__": "update"}, + ("pretrained%s/G%s.pth" % (path_str, sr2)) + if if_pretrained_generator_exist + else "", + ("pretrained%s/D%s.pth" % (path_str, sr2)) + if if_pretrained_discriminator_exist + else "", + ) + + +# but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16]) +def click_train( + exp_dir1, + sr2, + if_f0_3, + spk_id5, + save_epoch10, + total_epoch11, + batch_size12, + if_save_latest13, + pretrained_G14, + pretrained_D15, + gpus16, + if_cache_gpu17, + if_save_every_weights18, + version19, +): + # 生成filelist + exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) + os.makedirs(exp_dir, exist_ok=True) + gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir) + feature_dir = ( + "%s/3_feature256" % (exp_dir) + if version19 == "v1" + else "%s/3_feature768" % (exp_dir) + ) + if if_f0_3: + f0_dir = "%s/2a_f0" % (exp_dir) + f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) + names = ( + set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) + & set([name.split(".")[0] for name in os.listdir(feature_dir)]) + & set([name.split(".")[0] for name in os.listdir(f0_dir)]) + & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) + ) + else: + names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( + [name.split(".")[0] for name in os.listdir(feature_dir)] + ) + opt = [] + for name in names: + if if_f0_3: + opt.append( + "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" + % ( + gt_wavs_dir.replace("\\", "\\\\"), + name, + feature_dir.replace("\\", "\\\\"), + name, + f0_dir.replace("\\", "\\\\"), + name, + f0nsf_dir.replace("\\", "\\\\"), + name, + spk_id5, + ) + ) + else: + opt.append( + "%s/%s.wav|%s/%s.npy|%s" + % ( + gt_wavs_dir.replace("\\", "\\\\"), + name, + feature_dir.replace("\\", "\\\\"), + name, + spk_id5, + ) + ) + fea_dim = 256 if version19 == "v1" else 768 + if if_f0_3: + for _ in range(2): + opt.append( + "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" + % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) + ) + else: + for _ in range(2): + opt.append( + "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" + % (now_dir, sr2, now_dir, fea_dim, spk_id5) + ) + shuffle(opt) + with open("%s/filelist.txt" % exp_dir, "w") as f: + f.write("\n".join(opt)) + print("write filelist done") + # 生成config#无需生成config + # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0" + print("use gpus:", gpus16) + if pretrained_G14 == "": + print("no pretrained Generator") + if pretrained_D15 == "": + print("no pretrained Discriminator") + if gpus16: + cmd = ( + config.python_cmd + + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' + % ( + exp_dir1, + sr2, + 1 if if_f0_3 else 0, + batch_size12, + gpus16, + total_epoch11, + save_epoch10, + "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", + "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", + 1 if if_save_latest13 == i18n("是") else 0, + 1 if if_cache_gpu17 == i18n("是") else 0, + 1 if if_save_every_weights18 == i18n("是") else 0, + version19, + ) + ) + else: + cmd = ( + config.python_cmd + + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' + % ( + exp_dir1, + sr2, + 1 if if_f0_3 else 0, + batch_size12, + total_epoch11, + save_epoch10, + "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "\b", + "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "\b", + 1 if if_save_latest13 == i18n("是") else 0, + 1 if if_cache_gpu17 == i18n("是") else 0, + 1 if if_save_every_weights18 == i18n("是") else 0, + version19, + ) + ) + print(cmd) + p = Popen(cmd, shell=True, cwd=now_dir) + p.wait() + return "训练结束, 您可查看控制台训练日志或实验文件夹下的train.log" + + +# but4.click(train_index, [exp_dir1], info3) +def train_index(exp_dir1, version19): + exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) + os.makedirs(exp_dir, exist_ok=True) + feature_dir = ( + "%s/3_feature256" % (exp_dir) + if version19 == "v1" + else "%s/3_feature768" % (exp_dir) + ) + if not os.path.exists(feature_dir): + return "请先进行特征提取!" + listdir_res = list(os.listdir(feature_dir)) + if len(listdir_res) == 0: + return "请先进行特征提取!" + infos = [] + npys = [] + for name in sorted(listdir_res): + phone = np.load("%s/%s" % (feature_dir, name)) + npys.append(phone) + big_npy = np.concatenate(npys, 0) + big_npy_idx = np.arange(big_npy.shape[0]) + np.random.shuffle(big_npy_idx) + big_npy = big_npy[big_npy_idx] + if big_npy.shape[0] > 2e5: + # if(1): + infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]) + yield "\n".join(infos) + try: + big_npy = ( + MiniBatchKMeans( + n_clusters=10000, + verbose=True, + batch_size=256 * config.n_cpu, + compute_labels=False, + init="random", + ) + .fit(big_npy) + .cluster_centers_ + ) + except: + info = traceback.format_exc() + print(info) + infos.append(info) + yield "\n".join(infos) + + np.save("%s/total_fea.npy" % exp_dir, big_npy) + n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) + infos.append("%s,%s" % (big_npy.shape, n_ivf)) + yield "\n".join(infos) + index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) + # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf) + infos.append("training") + yield "\n".join(infos) + index_ivf = faiss.extract_index_ivf(index) # + index_ivf.nprobe = 1 + index.train(big_npy) + faiss.write_index( + index, + "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), + ) + # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) + infos.append("adding") + yield "\n".join(infos) + batch_size_add = 8192 + for i in range(0, big_npy.shape[0], batch_size_add): + index.add(big_npy[i : i + batch_size_add]) + faiss.write_index( + index, + "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), + ) + infos.append( + "成功构建索引,added_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (n_ivf, index_ivf.nprobe, exp_dir1, version19) + ) + # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) + # infos.append("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19)) + yield "\n".join(infos) + + +# but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3) +def train1key( + exp_dir1, + sr2, + if_f0_3, + trainset_dir4, + spk_id5, + np7, + f0method8, + save_epoch10, + total_epoch11, + batch_size12, + if_save_latest13, + pretrained_G14, + pretrained_D15, + gpus16, + if_cache_gpu17, + if_save_every_weights18, + version19, + gpus_rmvpe, +): + infos = [] + + def get_info_str(strr): + infos.append(strr) + return "\n".join(infos) + + model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1) + preprocess_log_path = "%s/preprocess.log" % model_log_dir + extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir + gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir + feature_dir = ( + "%s/3_feature256" % model_log_dir + if version19 == "v1" + else "%s/3_feature768" % model_log_dir + ) + + os.makedirs(model_log_dir, exist_ok=True) + #########step1:处理数据 + open(preprocess_log_path, "w").close() + cmd = ( + config.python_cmd + + ' trainset_preprocess_pipeline_print.py "%s" %s %s "%s" ' + % (trainset_dir4, sr_dict[sr2], np7, model_log_dir) + + str(config.noparallel) + ) + yield get_info_str(i18n("step1:正在处理数据")) + yield get_info_str(cmd) + p = Popen(cmd, shell=True) + p.wait() + with open(preprocess_log_path, "r") as f: + print(f.read()) + #########step2a:提取音高 + open(extract_f0_feature_log_path, "w") + if if_f0_3: + yield get_info_str("step2a:正在提取音高") + if f0method8 != "rmvpe_gpu": + cmd = config.python_cmd + ' extract_f0_print.py "%s" %s %s' % ( + model_log_dir, + np7, + f0method8, + ) + yield get_info_str(cmd) + p = Popen(cmd, shell=True, cwd=now_dir) + p.wait() + else: + gpus_rmvpe = gpus_rmvpe.split("-") + leng = len(gpus_rmvpe) + ps = [] + for idx, n_g in enumerate(gpus_rmvpe): + cmd = config.python_cmd + ' extract_f0_rmvpe.py %s %s %s "%s" %s ' % ( + leng, + idx, + n_g, + model_log_dir, + config.is_half, + ) + yield get_info_str(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir + ps.append(p) + for p in ps: + p.wait() + with open(extract_f0_feature_log_path, "r") as f: + print(f.read()) + else: + yield get_info_str(i18n("step2a:无需提取音高")) + #######step2b:提取特征 + yield get_info_str(i18n("step2b:正在提取特征")) + gpus = gpus16.split("-") + leng = len(gpus) + ps = [] + for idx, n_g in enumerate(gpus): + cmd = config.python_cmd + ' extract_feature_print.py %s %s %s %s "%s" %s' % ( + config.device, + leng, + idx, + n_g, + model_log_dir, + version19, + ) + yield get_info_str(cmd) + p = Popen( + cmd, shell=True, cwd=now_dir + ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir + ps.append(p) + for p in ps: + p.wait() + with open(extract_f0_feature_log_path, "r") as f: + print(f.read()) + #######step3a:训练模型 + yield get_info_str(i18n("step3a:正在训练模型")) + # 生成filelist + if if_f0_3: + f0_dir = "%s/2a_f0" % model_log_dir + f0nsf_dir = "%s/2b-f0nsf" % model_log_dir + names = ( + set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) + & set([name.split(".")[0] for name in os.listdir(feature_dir)]) + & set([name.split(".")[0] for name in os.listdir(f0_dir)]) + & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) + ) + else: + names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( + [name.split(".")[0] for name in os.listdir(feature_dir)] + ) + opt = [] + for name in names: + if if_f0_3: + opt.append( + "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" + % ( + gt_wavs_dir.replace("\\", "\\\\"), + name, + feature_dir.replace("\\", "\\\\"), + name, + f0_dir.replace("\\", "\\\\"), + name, + f0nsf_dir.replace("\\", "\\\\"), + name, + spk_id5, + ) + ) + else: + opt.append( + "%s/%s.wav|%s/%s.npy|%s" + % ( + gt_wavs_dir.replace("\\", "\\\\"), + name, + feature_dir.replace("\\", "\\\\"), + name, + spk_id5, + ) + ) + fea_dim = 256 if version19 == "v1" else 768 + if if_f0_3: + for _ in range(2): + opt.append( + "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" + % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) + ) + else: + for _ in range(2): + opt.append( + "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" + % (now_dir, sr2, now_dir, fea_dim, spk_id5) + ) + shuffle(opt) + with open("%s/filelist.txt" % model_log_dir, "w") as f: + f.write("\n".join(opt)) + yield get_info_str("write filelist done") + if gpus16: + cmd = ( + config.python_cmd + + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' + % ( + exp_dir1, + sr2, + 1 if if_f0_3 else 0, + batch_size12, + gpus16, + total_epoch11, + save_epoch10, + "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", + "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", + 1 if if_save_latest13 == i18n("是") else 0, + 1 if if_cache_gpu17 == i18n("是") else 0, + 1 if if_save_every_weights18 == i18n("是") else 0, + version19, + ) + ) + else: + cmd = ( + config.python_cmd + + ' train_nsf_sim_cache_sid_load_pretrain.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s' + % ( + exp_dir1, + sr2, + 1 if if_f0_3 else 0, + batch_size12, + total_epoch11, + save_epoch10, + "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", + "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", + 1 if if_save_latest13 == i18n("是") else 0, + 1 if if_cache_gpu17 == i18n("是") else 0, + 1 if if_save_every_weights18 == i18n("是") else 0, + version19, + ) + ) + yield get_info_str(cmd) + p = Popen(cmd, shell=True, cwd=now_dir) + p.wait() + yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")) + #######step3b:训练索引 + npys = [] + listdir_res = list(os.listdir(feature_dir)) + for name in sorted(listdir_res): + phone = np.load("%s/%s" % (feature_dir, name)) + npys.append(phone) + big_npy = np.concatenate(npys, 0) + + big_npy_idx = np.arange(big_npy.shape[0]) + np.random.shuffle(big_npy_idx) + big_npy = big_npy[big_npy_idx] + + if big_npy.shape[0] > 2e5: + # if(1): + info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0] + print(info) + yield get_info_str(info) + try: + big_npy = ( + MiniBatchKMeans( + n_clusters=10000, + verbose=True, + batch_size=256 * config.n_cpu, + compute_labels=False, + init="random", + ) + .fit(big_npy) + .cluster_centers_ + ) + except: + info = traceback.format_exc() + print(info) + yield get_info_str(info) + + np.save("%s/total_fea.npy" % model_log_dir, big_npy) + + # n_ivf = big_npy.shape[0] // 39 + n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) + yield get_info_str("%s,%s" % (big_npy.shape, n_ivf)) + index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) + yield get_info_str("training index") + index_ivf = faiss.extract_index_ivf(index) # + index_ivf.nprobe = 1 + index.train(big_npy) + faiss.write_index( + index, + "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), + ) + yield get_info_str("adding index") + batch_size_add = 8192 + for i in range(0, big_npy.shape[0], batch_size_add): + index.add(big_npy[i : i + batch_size_add]) + faiss.write_index( + index, + "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), + ) + yield get_info_str( + "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (n_ivf, index_ivf.nprobe, exp_dir1, version19) + ) + yield get_info_str(i18n("全流程结束!")) + + +# ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__]) +def change_info_(ckpt_path): + if not os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")): + return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} + try: + with open( + ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r" + ) as f: + info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1]) + sr, f0 = info["sample_rate"], info["if_f0"] + version = "v2" if ("version" in info and info["version"] == "v2") else "v1" + return sr, str(f0), version + except: + traceback.print_exc() + return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} + + +def change_f0_method(f0method8): + if f0method8 == "rmvpe_gpu": + visible = True + else: + visible = False + return {"visible": visible, "__type__": "update"} + + +def export_onnx(ModelPath, ExportedPath): + global cpt + cpt = torch.load(ModelPath, map_location="cpu") + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] + vec_channels = 256 if cpt.get("version", "v1") == "v1" else 768 + + test_phone = torch.rand(1, 200, vec_channels) # hidden unit + test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用) + test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹) + test_pitchf = torch.rand(1, 200) # nsf基频 + test_ds = torch.LongTensor([0]) # 说话人ID + test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子) + + device = "cpu" # 导出时设备(不影响使用模型) + + net_g = SynthesizerTrnMsNSFsidM( + *cpt["config"], is_half=False, version=cpt.get("version", "v1") + ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16) + net_g.load_state_dict(cpt["weight"], strict=False) + input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] + output_names = [ + "audio", + ] + # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出 + torch.onnx.export( + net_g, + ( + test_phone.to(device), + test_phone_lengths.to(device), + test_pitch.to(device), + test_pitchf.to(device), + test_ds.to(device), + test_rnd.to(device), + ), + ExportedPath, + dynamic_axes={ + "phone": [1], + "pitch": [1], + "pitchf": [1], + "rnd": [2], + }, + do_constant_folding=False, + opset_version=13, + verbose=False, + input_names=input_names, + output_names=output_names, + ) + return "Finished" + + +with gr.Blocks(title="RVC WebUI") as app: + gr.Markdown( + value=i18n( + "本软件以MIT协议开源, 作者不对软件具备任何控制力, 使用软件者、传播软件导出的声音者自负全责.
如不认可该条款, 则不能使用或引用软件包内任何代码和文件. 详见根目录LICENSE." + ) + ) + with gr.Tabs(): + with gr.TabItem(i18n("模型推理")): + with gr.Row(): + sid0 = gr.Dropdown(label=i18n("推理音色"), choices=sorted(names)) + refresh_button = gr.Button(i18n("刷新音色列表和索引路径"), variant="primary") + clean_button = gr.Button(i18n("卸载音色省显存"), variant="primary") + spk_item = gr.Slider( + minimum=0, + maximum=2333, + step=1, + label=i18n("请选择说话人id"), + value=0, + visible=False, + interactive=True, + ) + clean_button.click( + fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean" + ) + with gr.Group(): + gr.Markdown( + value=i18n("男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. ") + ) + with gr.Row(): + with gr.Column(): + vc_transform0 = gr.Number( + label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 + ) + input_audio0 = gr.Textbox( + label=i18n("输入待处理音频文件路径(默认是正确格式示例)"), + value="E:\\codes\\py39\\test-20230416b\\todo-songs\\冬之花clip1.wav", + ) + f0method0 = gr.Radio( + label=i18n( + "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" + ), + choices=["pm", "harvest", "crepe", "rmvpe"], + value="pm", + interactive=True, + ) + filter_radius0 = gr.Slider( + minimum=0, + maximum=7, + label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), + value=3, + step=1, + interactive=True, + ) + with gr.Column(): + file_index1 = gr.Textbox( + label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), + value="", + interactive=True, + ) + file_index2 = gr.Dropdown( + label=i18n("自动检测index路径,下拉式选择(dropdown)"), + choices=sorted(index_paths), + interactive=True, + ) + refresh_button.click( + fn=change_choices, + inputs=[], + outputs=[sid0, file_index2], + api_name="infer_refresh", + ) + # file_big_npy1 = gr.Textbox( + # label=i18n("特征文件路径"), + # value="E:\\codes\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", + # interactive=True, + # ) + index_rate1 = gr.Slider( + minimum=0, + maximum=1, + label=i18n("检索特征占比"), + value=0.75, + interactive=True, + ) + with gr.Column(): + resample_sr0 = gr.Slider( + minimum=0, + maximum=48000, + label=i18n("后处理重采样至最终采样率,0为不进行重采样"), + value=0, + step=1, + interactive=True, + ) + rms_mix_rate0 = gr.Slider( + minimum=0, + maximum=1, + label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), + value=0.25, + interactive=True, + ) + protect0 = gr.Slider( + minimum=0, + maximum=0.5, + label=i18n( + "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果" + ), + value=0.33, + step=0.01, + interactive=True, + ) + f0_file = gr.File(label=i18n("F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调")) + but0 = gr.Button(i18n("转换"), variant="primary") + with gr.Row(): + vc_output1 = gr.Textbox(label=i18n("输出信息")) + vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)")) + but0.click( + vc_single, + [ + spk_item, + input_audio0, + vc_transform0, + f0_file, + f0method0, + file_index1, + file_index2, + # file_big_npy1, + index_rate1, + filter_radius0, + resample_sr0, + rms_mix_rate0, + protect0, + ], + [vc_output1, vc_output2], + api_name="infer_convert", + ) + with gr.Group(): + gr.Markdown( + value=i18n("批量转换, 输入待转换音频文件夹, 或上传多个音频文件, 在指定文件夹(默认opt)下输出转换的音频. ") + ) + with gr.Row(): + with gr.Column(): + vc_transform1 = gr.Number( + label=i18n("变调(整数, 半音数量, 升八度12降八度-12)"), value=0 + ) + opt_input = gr.Textbox(label=i18n("指定输出文件夹"), value="opt") + f0method1 = gr.Radio( + label=i18n( + "选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU,rmvpe效果最好且微吃GPU" + ), + choices=["pm", "harvest", "crepe", "rmvpe"], + value="pm", + interactive=True, + ) + filter_radius1 = gr.Slider( + minimum=0, + maximum=7, + label=i18n(">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"), + value=3, + step=1, + interactive=True, + ) + with gr.Column(): + file_index3 = gr.Textbox( + label=i18n("特征检索库文件路径,为空则使用下拉的选择结果"), + value="", + interactive=True, + ) + file_index4 = gr.Dropdown( + label=i18n("自动检测index路径,下拉式选择(dropdown)"), + choices=sorted(index_paths), + interactive=True, + ) + refresh_button.click( + fn=lambda: change_choices()[1], + inputs=[], + outputs=file_index4, + api_name="infer_refresh_batch", + ) + # file_big_npy2 = gr.Textbox( + # label=i18n("特征文件路径"), + # value="E:\\codes\\py39\\vits_vc_gpu_train\\logs\\mi-test-1key\\total_fea.npy", + # interactive=True, + # ) + index_rate2 = gr.Slider( + minimum=0, + maximum=1, + label=i18n("检索特征占比"), + value=1, + interactive=True, + ) + with gr.Column(): + resample_sr1 = gr.Slider( + minimum=0, + maximum=48000, + label=i18n("后处理重采样至最终采样率,0为不进行重采样"), + value=0, + step=1, + interactive=True, + ) + rms_mix_rate1 = gr.Slider( + minimum=0, + maximum=1, + label=i18n("输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"), + value=1, + interactive=True, + ) + protect1 = gr.Slider( + minimum=0, + maximum=0.5, + label=i18n( + "保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果" + ), + value=0.33, + step=0.01, + interactive=True, + ) + with gr.Column(): + dir_input = gr.Textbox( + label=i18n("输入待处理音频文件夹路径(去文件管理器地址栏拷就行了)"), + value="E:\codes\py39\\test-20230416b\\todo-songs", + ) + inputs = gr.File( + file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") + ) + with gr.Row(): + format1 = gr.Radio( + label=i18n("导出文件格式"), + choices=["wav", "flac", "mp3", "m4a"], + value="flac", + interactive=True, + ) + but1 = gr.Button(i18n("转换"), variant="primary") + vc_output3 = gr.Textbox(label=i18n("输出信息")) + but1.click( + vc_multi, + [ + spk_item, + dir_input, + opt_input, + inputs, + vc_transform1, + f0method1, + file_index3, + file_index4, + # file_big_npy2, + index_rate2, + filter_radius1, + resample_sr1, + rms_mix_rate1, + protect1, + format1, + ], + [vc_output3], + api_name="infer_convert_batch", + ) + sid0.change( + fn=get_vc, + inputs=[sid0, protect0, protect1], + outputs=[spk_item, protect0, protect1, file_index2], + ) + with gr.TabItem(i18n("伴奏人声分离&去混响&去回声")): + with gr.Group(): + gr.Markdown( + value=i18n( + "人声伴奏分离批量处理, 使用UVR5模型。
合格的文件夹路径格式举例: E:\\codes\\py39\\vits_vc_gpu\\白鹭霜华测试样例(去文件管理器地址栏拷就行了)。
模型分为三类:
1、保留人声:不带和声的音频选这个,对主人声保留比HP5更好。内置HP2和HP3两个模型,HP3可能轻微漏伴奏但对主人声保留比HP2稍微好一丁点;
2、仅保留主人声:带和声的音频选这个,对主人声可能有削弱。内置HP5一个模型;
3、去混响、去延迟模型(by FoxJoy):
  (1)MDX-Net(onnx_dereverb):对于双通道混响是最好的选择,不能去除单通道混响;
 (234)DeEcho:去除延迟效果。Aggressive比Normal去除得更彻底,DeReverb额外去除混响,可去除单声道混响,但是对高频重的板式混响去不干净。
去混响/去延迟,附:
1、DeEcho-DeReverb模型的耗时是另外2个DeEcho模型的接近2倍;
2、MDX-Net-Dereverb模型挺慢的;
3、个人推荐的最干净的配置是先MDX-Net再DeEcho-Aggressive。" + ) + ) + with gr.Row(): + with gr.Column(): + dir_wav_input = gr.Textbox( + label=i18n("输入待处理音频文件夹路径"), + value="E:\\codes\\py39\\test-20230416b\\todo-songs\\todo-songs", + ) + wav_inputs = gr.File( + file_count="multiple", label=i18n("也可批量输入音频文件, 二选一, 优先读文件夹") + ) + with gr.Column(): + model_choose = gr.Dropdown(label=i18n("模型"), choices=uvr5_names) + agg = gr.Slider( + minimum=0, + maximum=20, + step=1, + label="人声提取激进程度", + value=10, + interactive=True, + visible=False, # 先不开放调整 + ) + opt_vocal_root = gr.Textbox( + label=i18n("指定输出主人声文件夹"), value="opt" + ) + opt_ins_root = gr.Textbox( + label=i18n("指定输出非主人声文件夹"), value="opt" + ) + format0 = gr.Radio( + label=i18n("导出文件格式"), + choices=["wav", "flac", "mp3", "m4a"], + value="flac", + interactive=True, + ) + but2 = gr.Button(i18n("转换"), variant="primary") + vc_output4 = gr.Textbox(label=i18n("输出信息")) + but2.click( + uvr, + [ + model_choose, + dir_wav_input, + opt_vocal_root, + wav_inputs, + opt_ins_root, + agg, + format0, + ], + [vc_output4], + api_name="uvr_convert", + ) + with gr.TabItem(i18n("训练")): + gr.Markdown( + value=i18n( + "step1: 填写实验配置. 实验数据放在logs下, 每个实验一个文件夹, 需手工输入实验名路径, 内含实验配置, 日志, 训练得到的模型文件. " + ) + ) + with gr.Row(): + exp_dir1 = gr.Textbox(label=i18n("输入实验名"), value="mi-test") + sr2 = gr.Radio( + label=i18n("目标采样率"), + choices=["40k", "48k"], + value="40k", + interactive=True, + ) + if_f0_3 = gr.Radio( + label=i18n("模型是否带音高指导(唱歌一定要, 语音可以不要)"), + choices=[True, False], + value=True, + interactive=True, + ) + version19 = gr.Radio( + label=i18n("版本"), + choices=["v1", "v2"], + value="v2", + interactive=True, + visible=True, + ) + np7 = gr.Slider( + minimum=0, + maximum=config.n_cpu, + step=1, + label=i18n("提取音高和处理数据使用的CPU进程数"), + value=int(np.ceil(config.n_cpu / 1.5)), + interactive=True, + ) + with gr.Group(): # 暂时单人的, 后面支持最多4人的#数据处理 + gr.Markdown( + value=i18n( + "step2a: 自动遍历训练文件夹下所有可解码成音频的文件并进行切片归一化, 在实验目录下生成2个wav文件夹; 暂时只支持单人训练. " + ) + ) + with gr.Row(): + trainset_dir4 = gr.Textbox( + label=i18n("输入训练文件夹路径"), value="E:\\语音音频+标注\\米津玄师\\src" + ) + spk_id5 = gr.Slider( + minimum=0, + maximum=4, + step=1, + label=i18n("请指定说话人id"), + value=0, + interactive=True, + ) + but1 = gr.Button(i18n("处理数据"), variant="primary") + info1 = gr.Textbox(label=i18n("输出信息"), value="") + but1.click( + preprocess_dataset, + [trainset_dir4, exp_dir1, sr2, np7], + [info1], + api_name="train_preprocess", + ) + with gr.Group(): + gr.Markdown(value=i18n("step2b: 使用CPU提取音高(如果模型带音高), 使用GPU提取特征(选择卡号)")) + with gr.Row(): + with gr.Column(): + gpus6 = gr.Textbox( + label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), + value=gpus, + interactive=True, + ) + gpu_info9 = gr.Textbox(label=i18n("显卡信息"), value=gpu_info) + with gr.Column(): + f0method8 = gr.Radio( + label=i18n( + "选择音高提取算法:输入歌声可用pm提速,高质量语音但CPU差可用dio提速,harvest质量更好但慢" + ), + choices=["pm", "harvest", "dio", "rmvpe", "rmvpe_gpu"], + value="rmvpe_gpu", + interactive=True, + ) + gpus_rmvpe = gr.Textbox( + label=i18n( + "rmvpe卡号配置:以-分隔输入使用的不同进程卡号,例如0-0-1使用在卡0上跑2个进程并在卡1上跑1个进程" + ), + value="%s-%s" % (gpus, gpus), + interactive=True, + visible=True, + ) + but2 = gr.Button(i18n("特征提取"), variant="primary") + info2 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) + f0method8.change( + fn=change_f0_method, + inputs=[f0method8], + outputs=[gpus_rmvpe], + ) + but2.click( + extract_f0_feature, + [ + gpus6, + np7, + f0method8, + if_f0_3, + exp_dir1, + version19, + gpus_rmvpe, + ], + [info2], + api_name="train_extract_f0_feature", + ) + with gr.Group(): + gr.Markdown(value=i18n("step3: 填写训练设置, 开始训练模型和索引")) + with gr.Row(): + save_epoch10 = gr.Slider( + minimum=0, + maximum=50, + step=1, + label=i18n("保存频率save_every_epoch"), + value=5, + interactive=True, + ) + total_epoch11 = gr.Slider( + minimum=0, + maximum=1000, + step=1, + label=i18n("总训练轮数total_epoch"), + value=20, + interactive=True, + ) + batch_size12 = gr.Slider( + minimum=1, + maximum=40, + step=1, + label=i18n("每张显卡的batch_size"), + value=default_batch_size, + interactive=True, + ) + if_save_latest13 = gr.Radio( + label=i18n("是否仅保存最新的ckpt文件以节省硬盘空间"), + choices=[i18n("是"), i18n("否")], + value=i18n("否"), + interactive=True, + ) + if_cache_gpu17 = gr.Radio( + label=i18n( + "是否缓存所有训练集至显存. 10min以下小数据可缓存以加速训练, 大数据缓存会炸显存也加不了多少速" + ), + choices=[i18n("是"), i18n("否")], + value=i18n("否"), + interactive=True, + ) + if_save_every_weights18 = gr.Radio( + label=i18n("是否在每次保存时间点将最终小模型保存至weights文件夹"), + choices=[i18n("是"), i18n("否")], + value=i18n("否"), + interactive=True, + ) + with gr.Row(): + pretrained_G14 = gr.Textbox( + label=i18n("加载预训练底模G路径"), + value="pretrained_v2/f0G40k.pth", + interactive=True, + ) + pretrained_D15 = gr.Textbox( + label=i18n("加载预训练底模D路径"), + value="pretrained_v2/f0D40k.pth", + interactive=True, + ) + sr2.change( + change_sr2, + [sr2, if_f0_3, version19], + [pretrained_G14, pretrained_D15], + ) + version19.change( + change_version19, + [sr2, if_f0_3, version19], + [pretrained_G14, pretrained_D15, sr2], + ) + if_f0_3.change( + change_f0, + [if_f0_3, sr2, version19], + [f0method8, pretrained_G14, pretrained_D15], + ) + gpus16 = gr.Textbox( + label=i18n("以-分隔输入使用的卡号, 例如 0-1-2 使用卡0和卡1和卡2"), + value=gpus, + interactive=True, + ) + but3 = gr.Button(i18n("训练模型"), variant="primary") + but4 = gr.Button(i18n("训练特征索引"), variant="primary") + but5 = gr.Button(i18n("一键训练"), variant="primary") + info3 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=10) + but3.click( + click_train, + [ + exp_dir1, + sr2, + if_f0_3, + spk_id5, + save_epoch10, + total_epoch11, + batch_size12, + if_save_latest13, + pretrained_G14, + pretrained_D15, + gpus16, + if_cache_gpu17, + if_save_every_weights18, + version19, + ], + info3, + api_name="train_start", + ) + but4.click(train_index, [exp_dir1, version19], info3) + but5.click( + train1key, + [ + exp_dir1, + sr2, + if_f0_3, + trainset_dir4, + spk_id5, + np7, + f0method8, + save_epoch10, + total_epoch11, + batch_size12, + if_save_latest13, + pretrained_G14, + pretrained_D15, + gpus16, + if_cache_gpu17, + if_save_every_weights18, + version19, + gpus_rmvpe, + ], + info3, + api_name="train_start_all", + ) + + with gr.TabItem(i18n("ckpt处理")): + with gr.Group(): + gr.Markdown(value=i18n("模型融合, 可用于测试音色融合")) + with gr.Row(): + ckpt_a = gr.Textbox(label=i18n("A模型路径"), value="", interactive=True) + ckpt_b = gr.Textbox(label=i18n("B模型路径"), value="", interactive=True) + alpha_a = gr.Slider( + minimum=0, + maximum=1, + label=i18n("A模型权重"), + value=0.5, + interactive=True, + ) + with gr.Row(): + sr_ = gr.Radio( + label=i18n("目标采样率"), + choices=["40k", "48k"], + value="40k", + interactive=True, + ) + if_f0_ = gr.Radio( + label=i18n("模型是否带音高指导"), + choices=[i18n("是"), i18n("否")], + value=i18n("是"), + interactive=True, + ) + info__ = gr.Textbox( + label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True + ) + name_to_save0 = gr.Textbox( + label=i18n("保存的模型名不带后缀"), + value="", + max_lines=1, + interactive=True, + ) + version_2 = gr.Radio( + label=i18n("模型版本型号"), + choices=["v1", "v2"], + value="v1", + interactive=True, + ) + with gr.Row(): + but6 = gr.Button(i18n("融合"), variant="primary") + info4 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) + but6.click( + merge, + [ + ckpt_a, + ckpt_b, + alpha_a, + sr_, + if_f0_, + info__, + name_to_save0, + version_2, + ], + info4, + api_name="ckpt_merge", + ) # def merge(path1,path2,alpha1,sr,f0,info): + with gr.Group(): + gr.Markdown(value=i18n("修改模型信息(仅支持weights文件夹下提取的小模型文件)")) + with gr.Row(): + ckpt_path0 = gr.Textbox( + label=i18n("模型路径"), value="", interactive=True + ) + info_ = gr.Textbox( + label=i18n("要改的模型信息"), value="", max_lines=8, interactive=True + ) + name_to_save1 = gr.Textbox( + label=i18n("保存的文件名, 默认空为和源文件同名"), + value="", + max_lines=8, + interactive=True, + ) + with gr.Row(): + but7 = gr.Button(i18n("修改"), variant="primary") + info5 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) + but7.click( + change_info, + [ckpt_path0, info_, name_to_save1], + info5, + api_name="ckpt_modify", + ) + with gr.Group(): + gr.Markdown(value=i18n("查看模型信息(仅支持weights文件夹下提取的小模型文件)")) + with gr.Row(): + ckpt_path1 = gr.Textbox( + label=i18n("模型路径"), value="", interactive=True + ) + but8 = gr.Button(i18n("查看"), variant="primary") + info6 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) + but8.click(show_info, [ckpt_path1], info6, api_name="ckpt_show") + with gr.Group(): + gr.Markdown( + value=i18n( + "模型提取(输入logs文件夹下大文件模型路径),适用于训一半不想训了模型没有自动提取保存小文件模型,或者想测试中间模型的情况" + ) + ) + with gr.Row(): + ckpt_path2 = gr.Textbox( + label=i18n("模型路径"), + value="E:\\codes\\py39\\logs\\mi-test_f0_48k\\G_23333.pth", + interactive=True, + ) + save_name = gr.Textbox( + label=i18n("保存名"), value="", interactive=True + ) + sr__ = gr.Radio( + label=i18n("目标采样率"), + choices=["32k", "40k", "48k"], + value="40k", + interactive=True, + ) + if_f0__ = gr.Radio( + label=i18n("模型是否带音高指导,1是0否"), + choices=["1", "0"], + value="1", + interactive=True, + ) + version_1 = gr.Radio( + label=i18n("模型版本型号"), + choices=["v1", "v2"], + value="v2", + interactive=True, + ) + info___ = gr.Textbox( + label=i18n("要置入的模型信息"), value="", max_lines=8, interactive=True + ) + but9 = gr.Button(i18n("提取"), variant="primary") + info7 = gr.Textbox(label=i18n("输出信息"), value="", max_lines=8) + ckpt_path2.change( + change_info_, [ckpt_path2], [sr__, if_f0__, version_1] + ) + but9.click( + extract_small_model, + [ckpt_path2, save_name, sr__, if_f0__, info___, version_1], + info7, + api_name="ckpt_extract", + ) + + with gr.TabItem(i18n("Onnx导出")): + with gr.Row(): + ckpt_dir = gr.Textbox(label=i18n("RVC模型路径"), value="", interactive=True) + with gr.Row(): + onnx_dir = gr.Textbox( + label=i18n("Onnx输出路径"), value="", interactive=True + ) + with gr.Row(): + infoOnnx = gr.Label(label="info") + with gr.Row(): + butOnnx = gr.Button(i18n("导出Onnx模型"), variant="primary") + butOnnx.click( + export_onnx, [ckpt_dir, onnx_dir], infoOnnx, api_name="export_onnx" + ) + + tab_faq = i18n("常见问题解答") + with gr.TabItem(tab_faq): + try: + if tab_faq == "常见问题解答": + with open("docs/faq.md", "r", encoding="utf8") as f: + info = f.read() + else: + with open("docs/faq_en.md", "r", encoding="utf8") as f: + info = f.read() + gr.Markdown(value=info) + except: + gr.Markdown(traceback.format_exc()) + + # with gr.TabItem(i18n("招募音高曲线前端编辑器")): + # gr.Markdown(value=i18n("加开发群联系我xxxxx")) + # with gr.TabItem(i18n("点击查看交流、问题反馈群号")): + # gr.Markdown(value=i18n("xxxxx")) + + if config.iscolab: + app.queue(concurrency_count=511, max_size=1022).launch(share=True) + else: + app.queue(concurrency_count=511, max_size=1022).launch( + server_name="0.0.0.0", + inbrowser=not config.noautoopen, + server_port=config.listen_port, + quiet=True, + ) diff --git a/AIMeiSheng/infer_batch_rvc.py b/AIMeiSheng/infer_batch_rvc.py new file mode 100644 index 0000000..604853f --- /dev/null +++ b/AIMeiSheng/infer_batch_rvc.py @@ -0,0 +1,215 @@ +""" +v1 +runtime\python.exe myinfer-v2-0528.py 0 "E:\codes\py39\RVC-beta\todo-songs" "E:\codes\py39\logs\mi-test\added_IVF677_Flat_nprobe_7.index" harvest "E:\codes\py39\RVC-beta\output" "E:\codes\py39\test-20230416b\weights\mi-test.pth" 0.66 cuda:0 True 3 0 1 0.33 +v2 +runtime\python.exe myinfer-v2-0528.py 0 "E:\codes\py39\RVC-beta\todo-songs" "E:\codes\py39\test-20230416b\logs\mi-test-v2\aadded_IVF677_Flat_nprobe_1_v2.index" harvest "E:\codes\py39\RVC-beta\output_v2" "E:\codes\py39\test-20230416b\weights\mi-test-v2.pth" 0.66 cuda:0 True 3 0 1 0.33 +""" +import os, sys, pdb, torch + +now_dir = os.getcwd() +sys.path.append(now_dir) +import sys +import torch +import tqdm as tq +from multiprocessing import cpu_count + + +class Config: + def __init__(self, device, is_half): + self.device = device + self.is_half = is_half + self.n_cpu = 0 + self.gpu_name = None + self.gpu_mem = None + self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + + def device_config(self) -> tuple: + if torch.cuda.is_available(): + i_device = int(self.device.split(":")[-1]) + self.gpu_name = torch.cuda.get_device_name(i_device) + if ( + ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) + or "P40" in self.gpu_name.upper() + or "1060" in self.gpu_name + or "1070" in self.gpu_name + or "1080" in self.gpu_name + ): + print("16系/10系显卡和P40强制单精度") + self.is_half = False + for config_file in ["32k.json", "40k.json", "48k.json"]: + with open(f"configs/{config_file}", "r") as f: + strr = f.read().replace("true", "false") + with open(f"configs/{config_file}", "w") as f: + f.write(strr) + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + else: + self.gpu_name = None + self.gpu_mem = int( + torch.cuda.get_device_properties(i_device).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + if self.gpu_mem <= 4: + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + elif torch.backends.mps.is_available(): + print("没有发现支持的N卡, 使用MPS进行推理") + self.device = "mps" + else: + print("没有发现支持的N卡, 使用CPU进行推理") + self.device = "cpu" + self.is_half = True + + if self.n_cpu == 0: + self.n_cpu = cpu_count() + + if self.is_half: + # 6G显存配置 + x_pad = 3 + x_query = 10 + x_center = 60 + x_max = 65 + else: + # 5G显存配置 + x_pad = 1 + x_query = 6 + x_center = 38 + x_max = 41 + + if self.gpu_mem != None and self.gpu_mem <= 4: + x_pad = 1 + x_query = 5 + x_center = 30 + x_max = 32 + + return x_pad, x_query, x_center, x_max + + +f0up_key = sys.argv[1] +input_path = sys.argv[2] +index_path = sys.argv[3] +f0method = sys.argv[4] # harvest or pm +opt_path = sys.argv[5] +model_path = sys.argv[6] +index_rate = float(sys.argv[7]) +device = sys.argv[8] +is_half = sys.argv[9].lower() != "false" +filter_radius = int(sys.argv[10]) +resample_sr = int(sys.argv[11]) +rms_mix_rate = float(sys.argv[12]) +protect = float(sys.argv[13]) +print(sys.argv) +config = Config(device, is_half) +now_dir = os.getcwd() +sys.path.append(now_dir) +from vc_infer_pipeline import VC +from lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +from lib.audio import load_audio +from fairseq import checkpoint_utils +from scipy.io import wavfile + +hubert_model = None + + +def load_hubert(): + global hubert_model + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + ["hubert_base.pt"], + suffix="", + ) + hubert_model = models[0] + hubert_model = hubert_model.to(device) + if is_half: + hubert_model = hubert_model.half() + else: + hubert_model = hubert_model.float() + hubert_model.eval() + + +def vc_single(sid, input_audio, f0_up_key, f0_file, f0_method, file_index, index_rate): + global tgt_sr, net_g, vc, hubert_model, version + if input_audio is None: + return "You need to upload an audio", None + f0_up_key = int(f0_up_key) + audio = load_audio(input_audio, 16000) + times = [0, 0, 0] + if hubert_model == None: + load_hubert() + if_f0 = cpt.get("f0", 1) + # audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file) + audio_opt = vc.pipeline( + hubert_model, + net_g, + sid, + audio, + input_audio, + times, + f0_up_key, + f0_method, + file_index, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + f0_file=f0_file, + ) + print(times) + return audio_opt + + +def get_vc(model_path): + global n_spk, tgt_sr, net_g, vc, cpt, device, is_half, version + print("loading pth %s" % model_path) + cpt = torch.load(model_path, map_location="cpu") + tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk + if_f0 = cpt.get("f0", 1) + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: # + net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=is_half) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del net_g.enc_q + print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净,真奇葩 + net_g.eval().to(device) + if is_half: + net_g = net_g.half() + else: + net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk = cpt["config"][-3] + # return {"visible": True,"maximum": n_spk, "__type__": "update"} + + +get_vc(model_path) +audios = os.listdir(input_path) +for file in tq.tqdm(audios): + if file.endswith(".wav"): + file_path = input_path + "/" + file + wav_opt = vc_single( + 0, file_path, f0up_key, None, f0method, index_path, index_rate + ) + out_path = opt_path + "/" + file + wavfile.write(out_path, tgt_sr, wav_opt) diff --git a/AIMeiSheng/infer_cli.py b/AIMeiSheng/infer_cli.py new file mode 100644 index 0000000..63170ae --- /dev/null +++ b/AIMeiSheng/infer_cli.py @@ -0,0 +1,272 @@ +from scipy.io import wavfile +from fairseq import checkpoint_utils +from lib.audio import load_audio +from lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +from vc_infer_pipeline import VC +from multiprocessing import cpu_count +import numpy as np +import torch +import sys +import glob +import argparse +import os +import sys +import pdb +import torch + +now_dir = os.getcwd() +sys.path.append(now_dir) + +#### +# USAGE +# +# In your Terminal or CMD or whatever +# python infer_cli.py [TRANSPOSE_VALUE] "[INPUT_PATH]" "[OUTPUT_PATH]" "[MODEL_PATH]" "[INDEX_FILE_PATH]" "[INFERENCE_DEVICE]" "[METHOD]" + +using_cli = False +device = "cuda:0" +is_half = False + +if len(sys.argv) > 0: + f0_up_key = int(sys.argv[1]) # transpose value + input_path = sys.argv[2] + output_path = sys.argv[3] + model_path = sys.argv[4] + file_index = sys.argv[5] # .index file + device = sys.argv[6] + f0_method = sys.argv[7] # pm or harvest or crepe + + using_cli = True + + # file_index2=sys.argv[8] + # index_rate=float(sys.argv[10]) #search feature ratio + # filter_radius=float(sys.argv[11]) #median filter + # resample_sr=float(sys.argv[12]) #resample audio in post processing + # rms_mix_rate=float(sys.argv[13]) #search feature + print(sys.argv) + + +class Config: + def __init__(self, device, is_half): + self.device = device + self.is_half = is_half + self.n_cpu = 0 + self.gpu_name = None + self.gpu_mem = None + self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + + def device_config(self) -> tuple: + if torch.cuda.is_available() and device != "cpu": + i_device = int(self.device.split(":")[-1]) + self.gpu_name = torch.cuda.get_device_name(i_device) + if ( + ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) + or "P40" in self.gpu_name.upper() + or "1060" in self.gpu_name + or "1070" in self.gpu_name + or "1080" in self.gpu_name + ): + print("16系/10系显卡和P40强制单精度") + self.is_half = False + for config_file in ["32k.json", "40k.json", "48k.json"]: + with open(f"configs/{config_file}", "r") as f: + strr = f.read().replace("true", "false") + with open(f"configs/{config_file}", "w") as f: + f.write(strr) + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + else: + self.gpu_name = None + self.gpu_mem = int( + torch.cuda.get_device_properties(i_device).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + if self.gpu_mem <= 4: + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + elif torch.backends.mps.is_available(): + print("没有发现支持的N卡, 使用MPS进行推理") + self.device = "mps" + else: + print("没有发现支持的N卡, 使用CPU进行推理") + self.device = "cpu" + self.is_half = False + + if self.n_cpu == 0: + self.n_cpu = cpu_count() + + if self.is_half: + # 6G显存配置 + x_pad = 3 + x_query = 10 + x_center = 60 + x_max = 65 + else: + # 5G显存配置 + x_pad = 1 + x_query = 6 + x_center = 38 + x_max = 41 + + if self.gpu_mem != None and self.gpu_mem <= 4: + x_pad = 1 + x_query = 5 + x_center = 30 + x_max = 32 + + return x_pad, x_query, x_center, x_max + + +config = Config(device, is_half) +now_dir = os.getcwd() +sys.path.append(now_dir) + +hubert_model = None + + +def load_hubert(): + global hubert_model + models, _, _ = checkpoint_utils.load_model_ensemble_and_task( + ["hubert_base.pt"], + suffix="", + ) + hubert_model = models[0] + hubert_model = hubert_model.to(config.device) + if config.is_half: + hubert_model = hubert_model.half() + else: + hubert_model = hubert_model.float() + hubert_model.eval() + + +def vc_single( + sid=0, + input_audio_path=None, + f0_up_key=0, + f0_file=None, + f0_method="pm", + file_index="", # .index file + file_index2="", + # file_big_npy, + index_rate=1.0, + filter_radius=3, + resample_sr=0, + rms_mix_rate=1.0, + model_path="", + output_path="", + protect=0.33, +): + global tgt_sr, net_g, vc, hubert_model, version + get_vc(model_path) + if input_audio_path is None: + return "You need to upload an audio file", None + + f0_up_key = int(f0_up_key) + audio = load_audio(input_audio_path, 16000) + audio_max = np.abs(audio).max() / 0.95 + + if audio_max > 1: + audio /= audio_max + times = [0, 0, 0] + + if hubert_model == None: + load_hubert() + + if_f0 = cpt.get("f0", 1) + + file_index = ( + ( + file_index.strip(" ") + .strip('"') + .strip("\n") + .strip('"') + .strip(" ") + .replace("trained", "added") + ) + if file_index != "" + else file_index2 + ) + + audio_opt = vc.pipeline( + hubert_model, + net_g, + sid, + audio, + input_audio_path, + times, + f0_up_key, + f0_method, + file_index, + # file_big_npy, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + f0_file=f0_file, + protect=protect, + ) + wavfile.write(output_path, tgt_sr, audio_opt) + return "processed" + + +def get_vc(model_path): + global n_spk, tgt_sr, net_g, vc, cpt, device, is_half, version + print("loading pth %s" % model_path) + cpt = torch.load(model_path, map_location="cpu") + tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk + if_f0 = cpt.get("f0", 1) + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: + net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=is_half) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del net_g.enc_q + print(net_g.load_state_dict(cpt["weight"], strict=False)) + net_g.eval().to(device) + if is_half: + net_g = net_g.half() + else: + net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk = cpt["config"][-3] + # return {"visible": True,"maximum": n_spk, "__type__": "update"} + + +if using_cli: + vc_single( + sid=0, + input_audio_path=input_path, + f0_up_key=f0_up_key, + f0_file=None, + f0_method=f0_method, + file_index=file_index, + file_index2="", + index_rate=1, + filter_radius=3, + resample_sr=0, + rms_mix_rate=0, + model_path=model_path, + output_path=output_path, + ) diff --git a/AIMeiSheng/infer_uvr5.py b/AIMeiSheng/infer_uvr5.py new file mode 100644 index 0000000..0ffdb5d --- /dev/null +++ b/AIMeiSheng/infer_uvr5.py @@ -0,0 +1,363 @@ +import os, sys, torch, warnings, pdb + +now_dir = os.getcwd() +sys.path.append(now_dir) +from json import load as ll + +warnings.filterwarnings("ignore") +import librosa +import importlib +import numpy as np +import hashlib, math +from tqdm import tqdm +from lib.uvr5_pack.lib_v5 import spec_utils +from lib.uvr5_pack.utils import _get_name_params, inference +from lib.uvr5_pack.lib_v5.model_param_init import ModelParameters +import soundfile as sf +from lib.uvr5_pack.lib_v5.nets_new import CascadedNet +from lib.uvr5_pack.lib_v5 import nets_61968KB as nets + + +class _audio_pre_: + def __init__(self, agg, model_path, device, is_half): + self.model_path = model_path + self.device = device + self.data = { + # Processing Options + "postprocess": False, + "tta": False, + # Constants + "window_size": 512, + "agg": agg, + "high_end_process": "mirroring", + } + mp = ModelParameters("lib/uvr5_pack/lib_v5/modelparams/4band_v2.json") + model = nets.CascadedASPPNet(mp.param["bins"] * 2) + cpk = torch.load(model_path, map_location="cpu") + model.load_state_dict(cpk) + model.eval() + if is_half: + model = model.half().to(device) + else: + model = model.to(device) + + self.mp = mp + self.model = model + + def _path_audio_(self, music_file, ins_root=None, vocal_root=None, format="flac"): + if ins_root is None and vocal_root is None: + return "No save root." + name = os.path.basename(music_file) + if ins_root is not None: + os.makedirs(ins_root, exist_ok=True) + if vocal_root is not None: + os.makedirs(vocal_root, exist_ok=True) + X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {} + bands_n = len(self.mp.param["band"]) + # print(bands_n) + for d in range(bands_n, 0, -1): + bp = self.mp.param["band"][d] + if d == bands_n: # high-end band + ( + X_wave[d], + _, + ) = librosa.core.load( # 理论上librosa读取可能对某些音频有bug,应该上ffmpeg读取,但是太麻烦了弃坑 + music_file, + bp["sr"], + False, + dtype=np.float32, + res_type=bp["res_type"], + ) + if X_wave[d].ndim == 1: + X_wave[d] = np.asfortranarray([X_wave[d], X_wave[d]]) + else: # lower bands + X_wave[d] = librosa.core.resample( + X_wave[d + 1], + self.mp.param["band"][d + 1]["sr"], + bp["sr"], + res_type=bp["res_type"], + ) + # Stft of wave source + X_spec_s[d] = spec_utils.wave_to_spectrogram_mt( + X_wave[d], + bp["hl"], + bp["n_fft"], + self.mp.param["mid_side"], + self.mp.param["mid_side_b2"], + self.mp.param["reverse"], + ) + # pdb.set_trace() + if d == bands_n and self.data["high_end_process"] != "none": + input_high_end_h = (bp["n_fft"] // 2 - bp["crop_stop"]) + ( + self.mp.param["pre_filter_stop"] - self.mp.param["pre_filter_start"] + ) + input_high_end = X_spec_s[d][ + :, bp["n_fft"] // 2 - input_high_end_h : bp["n_fft"] // 2, : + ] + + X_spec_m = spec_utils.combine_spectrograms(X_spec_s, self.mp) + aggresive_set = float(self.data["agg"] / 100) + aggressiveness = { + "value": aggresive_set, + "split_bin": self.mp.param["band"][1]["crop_stop"], + } + with torch.no_grad(): + pred, X_mag, X_phase = inference( + X_spec_m, self.device, self.model, aggressiveness, self.data + ) + # Postprocess + if self.data["postprocess"]: + pred_inv = np.clip(X_mag - pred, 0, np.inf) + pred = spec_utils.mask_silence(pred, pred_inv) + y_spec_m = pred * X_phase + v_spec_m = X_spec_m - y_spec_m + + if ins_root is not None: + if self.data["high_end_process"].startswith("mirroring"): + input_high_end_ = spec_utils.mirroring( + self.data["high_end_process"], y_spec_m, input_high_end, self.mp + ) + wav_instrument = spec_utils.cmb_spectrogram_to_wave( + y_spec_m, self.mp, input_high_end_h, input_high_end_ + ) + else: + wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) + print("%s instruments done" % name) + if format in ["wav", "flac"]: + sf.write( + os.path.join( + ins_root, + "instrument_{}_{}.{}".format(name, self.data["agg"], format), + ), + (np.array(wav_instrument) * 32768).astype("int16"), + self.mp.param["sr"], + ) # + else: + path = os.path.join( + ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"]) + ) + sf.write( + path, + (np.array(wav_instrument) * 32768).astype("int16"), + self.mp.param["sr"], + ) + if os.path.exists(path): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4] + ".%s" % format) + ) + if vocal_root is not None: + if self.data["high_end_process"].startswith("mirroring"): + input_high_end_ = spec_utils.mirroring( + self.data["high_end_process"], v_spec_m, input_high_end, self.mp + ) + wav_vocals = spec_utils.cmb_spectrogram_to_wave( + v_spec_m, self.mp, input_high_end_h, input_high_end_ + ) + else: + wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp) + print("%s vocals done" % name) + if format in ["wav", "flac"]: + sf.write( + os.path.join( + vocal_root, + "vocal_{}_{}.{}".format(name, self.data["agg"], format), + ), + (np.array(wav_vocals) * 32768).astype("int16"), + self.mp.param["sr"], + ) + else: + path = os.path.join( + vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"]) + ) + sf.write( + path, + (np.array(wav_vocals) * 32768).astype("int16"), + self.mp.param["sr"], + ) + if os.path.exists(path): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4] + ".%s" % format) + ) + + +class _audio_pre_new: + def __init__(self, agg, model_path, device, is_half): + self.model_path = model_path + self.device = device + self.data = { + # Processing Options + "postprocess": False, + "tta": False, + # Constants + "window_size": 512, + "agg": agg, + "high_end_process": "mirroring", + } + mp = ModelParameters("lib/uvr5_pack/lib_v5/modelparams/4band_v3.json") + nout = 64 if "DeReverb" in model_path else 48 + model = CascadedNet(mp.param["bins"] * 2, nout) + cpk = torch.load(model_path, map_location="cpu") + model.load_state_dict(cpk) + model.eval() + if is_half: + model = model.half().to(device) + else: + model = model.to(device) + + self.mp = mp + self.model = model + + def _path_audio_( + self, music_file, vocal_root=None, ins_root=None, format="flac" + ): # 3个VR模型vocal和ins是反的 + if ins_root is None and vocal_root is None: + return "No save root." + name = os.path.basename(music_file) + if ins_root is not None: + os.makedirs(ins_root, exist_ok=True) + if vocal_root is not None: + os.makedirs(vocal_root, exist_ok=True) + X_wave, y_wave, X_spec_s, y_spec_s = {}, {}, {}, {} + bands_n = len(self.mp.param["band"]) + # print(bands_n) + for d in range(bands_n, 0, -1): + bp = self.mp.param["band"][d] + if d == bands_n: # high-end band + ( + X_wave[d], + _, + ) = librosa.core.load( # 理论上librosa读取可能对某些音频有bug,应该上ffmpeg读取,但是太麻烦了弃坑 + music_file, + bp["sr"], + False, + dtype=np.float32, + res_type=bp["res_type"], + ) + if X_wave[d].ndim == 1: + X_wave[d] = np.asfortranarray([X_wave[d], X_wave[d]]) + else: # lower bands + X_wave[d] = librosa.core.resample( + X_wave[d + 1], + self.mp.param["band"][d + 1]["sr"], + bp["sr"], + res_type=bp["res_type"], + ) + # Stft of wave source + X_spec_s[d] = spec_utils.wave_to_spectrogram_mt( + X_wave[d], + bp["hl"], + bp["n_fft"], + self.mp.param["mid_side"], + self.mp.param["mid_side_b2"], + self.mp.param["reverse"], + ) + # pdb.set_trace() + if d == bands_n and self.data["high_end_process"] != "none": + input_high_end_h = (bp["n_fft"] // 2 - bp["crop_stop"]) + ( + self.mp.param["pre_filter_stop"] - self.mp.param["pre_filter_start"] + ) + input_high_end = X_spec_s[d][ + :, bp["n_fft"] // 2 - input_high_end_h : bp["n_fft"] // 2, : + ] + + X_spec_m = spec_utils.combine_spectrograms(X_spec_s, self.mp) + aggresive_set = float(self.data["agg"] / 100) + aggressiveness = { + "value": aggresive_set, + "split_bin": self.mp.param["band"][1]["crop_stop"], + } + with torch.no_grad(): + pred, X_mag, X_phase = inference( + X_spec_m, self.device, self.model, aggressiveness, self.data + ) + # Postprocess + if self.data["postprocess"]: + pred_inv = np.clip(X_mag - pred, 0, np.inf) + pred = spec_utils.mask_silence(pred, pred_inv) + y_spec_m = pred * X_phase + v_spec_m = X_spec_m - y_spec_m + + if ins_root is not None: + if self.data["high_end_process"].startswith("mirroring"): + input_high_end_ = spec_utils.mirroring( + self.data["high_end_process"], y_spec_m, input_high_end, self.mp + ) + wav_instrument = spec_utils.cmb_spectrogram_to_wave( + y_spec_m, self.mp, input_high_end_h, input_high_end_ + ) + else: + wav_instrument = spec_utils.cmb_spectrogram_to_wave(y_spec_m, self.mp) + print("%s instruments done" % name) + if format in ["wav", "flac"]: + sf.write( + os.path.join( + ins_root, + "instrument_{}_{}.{}".format(name, self.data["agg"], format), + ), + (np.array(wav_instrument) * 32768).astype("int16"), + self.mp.param["sr"], + ) # + else: + path = os.path.join( + ins_root, "instrument_{}_{}.wav".format(name, self.data["agg"]) + ) + sf.write( + path, + (np.array(wav_instrument) * 32768).astype("int16"), + self.mp.param["sr"], + ) + if os.path.exists(path): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4] + ".%s" % format) + ) + if vocal_root is not None: + if self.data["high_end_process"].startswith("mirroring"): + input_high_end_ = spec_utils.mirroring( + self.data["high_end_process"], v_spec_m, input_high_end, self.mp + ) + wav_vocals = spec_utils.cmb_spectrogram_to_wave( + v_spec_m, self.mp, input_high_end_h, input_high_end_ + ) + else: + wav_vocals = spec_utils.cmb_spectrogram_to_wave(v_spec_m, self.mp) + print("%s vocals done" % name) + if format in ["wav", "flac"]: + sf.write( + os.path.join( + vocal_root, + "vocal_{}_{}.{}".format(name, self.data["agg"], format), + ), + (np.array(wav_vocals) * 32768).astype("int16"), + self.mp.param["sr"], + ) + else: + path = os.path.join( + vocal_root, "vocal_{}_{}.wav".format(name, self.data["agg"]) + ) + sf.write( + path, + (np.array(wav_vocals) * 32768).astype("int16"), + self.mp.param["sr"], + ) + if os.path.exists(path): + os.system( + "ffmpeg -i %s -vn %s -q:a 2 -y" + % (path, path[:-4] + ".%s" % format) + ) + + +if __name__ == "__main__": + device = "cuda" + is_half = True + # model_path = "uvr5_weights/2_HP-UVR.pth" + # model_path = "uvr5_weights/VR-DeEchoDeReverb.pth" + # model_path = "uvr5_weights/VR-DeEchoNormal.pth" + model_path = "uvr5_weights/DeEchoNormal.pth" + # pre_fun = _audio_pre_(model_path=model_path, device=device, is_half=True,agg=10) + pre_fun = _audio_pre_new(model_path=model_path, device=device, is_half=True, agg=10) + audio_path = "雪雪伴奏对消HP5.wav" + save_path = "opt" + pre_fun._path_audio_(audio_path, save_path, save_path) diff --git a/AIMeiSheng/losses.py b/AIMeiSheng/losses.py new file mode 100644 index 0000000..dc087c1 --- /dev/null +++ b/AIMeiSheng/losses.py @@ -0,0 +1,64 @@ +import torch + + +def feature_loss(fmap_r, fmap_g): + loss = 0 + for dr, dg in zip(fmap_r, fmap_g): + for rl, gl in zip(dr, dg): + rl = rl.float().detach() + gl = gl.float() + loss += torch.mean(torch.abs(rl - gl)) + + return loss * 2 + + +def discriminator_loss(disc_real_outputs, disc_generated_outputs): + loss = 0 + r_losses = [] + g_losses = [] + for dr, dg in zip(disc_real_outputs, disc_generated_outputs): + dr = dr.float() + dg = dg.float() + r_loss = torch.mean((1 - dr) ** 2) + g_loss = torch.mean(dg**2) + loss += r_loss + g_loss + r_losses.append(r_loss.item()) + g_losses.append(g_loss.item()) + + return loss, r_losses, g_losses + + +def generator_loss(disc_outputs): + loss = 0 + gen_losses = [] + for dg in disc_outputs: + dg = dg.float() + l = torch.mean((1 - dg) ** 2) + gen_losses.append(l) + loss += l + + return loss, gen_losses + + +def kl_loss(z_p, logs_q, m_p, logs_p, z_mask): + """ + z_p, logs_q: [b, h, t_t] + m_p, logs_p: [b, h, t_t] + """ + z_p = z_p.float() + logs_q = logs_q.float() + m_p = m_p.float() + logs_p = logs_p.float() + z_mask = z_mask.float() + + kl = logs_p - logs_q - 0.5 + kl += 0.5 * ((z_p - m_p) ** 2) * torch.exp(-2.0 * logs_p) + kl = torch.sum(kl * z_mask) + l = kl / torch.sum(z_mask) + #if torch.isnan(l).any(): + # l = torch.zeros_like(l) + #l = kl / torch.clip(torch.sum(z_mask),1e-10) + #l = torch.clip(l,-9,9) + loss_kl = l + loss_kl = torch.where(torch.isnan(loss_kl), torch.full_like(loss_kl, 1e-10), loss_kl) + return loss_kl diff --git a/AIMeiSheng/meisheng_svc.py b/AIMeiSheng/meisheng_svc.py new file mode 100644 index 0000000..7201a51 --- /dev/null +++ b/AIMeiSheng/meisheng_svc.py @@ -0,0 +1,166 @@ + + +import os,sys +import time +import shutil +import glob +import hashlib +import librosa +import soundfile +import gradio as gr + +gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件 + +song_folder="../data_meisheng/" ##song folder +gs_work_dir = "../data_meisheng/tmp" #工作空间 +gs_out_audio_dir = "../data_meisheng/tmp_svc" ##svc存储位置 +pth_model_path ="./weights/xusong_v2_org_version_aishellVctkYuanshen_embed1_enzx_diff_e21_s310947.pth" +#pth_model_path ="./weights/xusong_v2_org_version_aishellVctkYuanshen_embed1_enzx_diff_e46_s775146.pth" + +def mix(in_path, acc_path, dst_path): + # svc转码到442 + svc_442_file = in_path + "_442.wav" + st = time.time() + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file) + os.system(cmd) + if not os.path.exists(svc_442_file): + return -1 + print("transcode,{},sp={}".format(in_path, time.time() - st)) + + # 混合 + st = time.time() + cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path) + os.system(cmd) + print("mixer,{},sp={}".format(in_path, time.time() - st)) + +from cal_cos_distance_folder import load_and_cal_distance +from pitch_histogram_sever import pyin_process_single +from infererence_fang_meisheng import get_embed, get_embed_model + +from myinfer_multi_spk_embed_in_dec_diff_meisheng import svc_main,load_hubert, get_vc +def load_model(): + print("load embed_model...") + embed_model = get_embed_model() + print("load hubert_model...") + hubert_model = load_hubert() + get_vc(pth_model_path) + print("load finish") + return embed_model, hubert_model#,svc_model + +embed_model, hubert_model = load_model() ##提前加载模型 + + + +def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy): + + #embed_npy = target_wav[:-4] + '.npy' + ##计算pitch + f0up_key = pyin_process_single(target_wav) + print("@@f0up_key:",f0up_key) + ## get embed + get_embed(target_wav, embed_npy, embed_model) + print("@@get embed") + + #pth_model_path ="./weights/xusong_v2_org_version_aishellVctkYuanshen_embed1_enzx_diff_e21_s310947.pth" + print("@@@@song_wav:",song_wav) + print("@@@svc_out_path:",svc_out_path) + print("@@@@embed_npy:",embed_npy) + svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model) + print("svc main finished!!") + + svc_embed_npy = embed_npy[:-4] + '_svc.npy' + get_embed(svc_out_path, svc_embed_npy, embed_model) + similar =load_and_cal_distance(embed_npy,svc_embed_npy) + print("target_npy:",embed_npy,"svc_npy:",svc_embed_npy) + print("######similar:",similar.numpy()) + return + +def process_svc(song_wav, target_wav, svc_out_path): + embed_npy = target_wav[:-4] + '.npy' ##npy存储位置 + + abs_path = "/data/bingxiao.fang/voice_conversion/Retrieval-based-Voice-Conversion-WebUIx/content/data_meisheng/tmp/" + song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename( + target_wav), os.path.basename(svc_out_path) + song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path + embed_npy = target_wav[:-4] + '.npy' ##npy存储位置 + + #cmd = f"sh meisheng_svc.sh {song_wav} {target_wav} {svc_out_path} {embed_npy}" + #print("cmd:", cmd) + #os.system(cmd) + print("@@@@@@@@@@@@@@@@@@@@@@@@") + meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy) + + print("svc finished!!") + return + + +def get_svc(target_yinse_wav, gender, song_name): + ''' + :param target_yinse_wav: 目标音色 + :param gender: 性别选择 + :param song_name: 歌曲名字 + :return: svc路径名 + ''' + + ##清空临时路径 + if os.path.exists(gs_work_dir): + shutil.rmtree(gs_work_dir) + os.makedirs(gs_work_dir) + + + + ##目标音色读取 + f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav)) + print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav) + #shutil.move(target_yinse_wav, f_dst) ##放在工作目录 + shutil.copy(target_yinse_wav, f_dst) + target_yinse_wav = f_dst + + # sr, data = train_audio_data + # data, sr = librosa.load(train_audio_data) + ## soundfile.write(train_audio_path, data, samplerate=sr, format='wav')###写入训练路径,名字train_audio_path + + ##歌曲/伴奏 读取 + song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 人声 + inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name)) # 伴奏 + svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字 + # print("svc: {}".format(train_audio_path)) + print("svc out: {}".format(svc_out_path)) + + ##process + st = time.time() + + ##保存对应 vocal 到workspace + cmd = f"cp {song_wav} {gs_work_dir}" + os.system(cmd) + + + print("start inference...") + print("inputMsg:", song_wav, target_yinse_wav, svc_out_path) + process_svc(song_wav, target_yinse_wav, svc_out_path) + + ##加混响 + print("add reverbration...") + svc_out_path_effect = svc_out_path[:-4] + '_effect.wav' + cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}" + print("cmd :", cmd) + os.system(cmd) + + + # 人声伴奏合并 + print("add acc...") + out_path = svc_out_path_effect[:-4] + '_music.wav' + mix(svc_out_path_effect, inf_acc_path, out_path) + + print("time cost = {}".format(time.time() - st)) + + + return out_path + + + +if __name__=='__main__': + target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径 + song_name = "drivers_license" #"Levitating" ##路径会自动添加 + gender = 'female' + get_svc(target_yinse_wav, gender, song_name) diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py new file mode 100644 index 0000000..6359fb9 --- /dev/null +++ b/AIMeiSheng/meisheng_svc_final.py @@ -0,0 +1,212 @@ + + +import os,sys +import time +import shutil +import glob +import hashlib +import librosa +import soundfile +import gradio as gr +import pandas as pd +import numpy as np +sys.path.append('./RawNet3/') +from infererence_fang_meisheng import get_embed, get_embed_model +from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe +from gender_classify import load_gender_model + +gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件 +tmp_workspace_name = "batch_test_ocean_fi"#工作空间名 +song_folder = "./data_meisheng/" ##song folder +gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径 +pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件 + + +cur_dir = os.path.abspath(os.path.dirname(__file__)) +abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/' + + + +def mix(in_path, acc_path, dst_path): + # svc转码到442 + svc_442_file = in_path + "_442.wav" + st = time.time() + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file) + os.system(cmd) + if not os.path.exists(svc_442_file): + return -1 + print("transcode,{},sp={}".format(in_path, time.time() - st)) + + # 混合 + st = time.time() + cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path) + os.system(cmd) + print("mixer,{},sp={}".format(in_path, time.time() - st)) + + +def load_model(): + global f0_method + embed_model = get_embed_model() + hubert_model = load_hubert() + get_vc(pth_model_path) + f0_method = get_rmvpe() + print("model preload finish!!!") + return embed_model, hubert_model#,svc_model + +embed_model, hubert_model = load_model() ##提前加载模型 +gender_model = load_gender_model() + +def pyin_process_single_rmvpe(input_file): + global f0_method + rate = 16000 #44100 + # 读取音频文件 + y, sr = librosa.load(input_file, sr=rate) + + len_s = len(y)/sr + lim_s = 15 #10 + if(len_s > lim_s): + y1 = y[:sr*lim_s] + y2 = y[-sr*lim_s:] + f0 = f0_method.infer_from_audio(y1, thred=0.03) + f0 = f0[f0 < 600] + valid_f0 = f0[f0 > 50] + mean_pitch1 = np.mean(valid_f0) + f0 = f0_method.infer_from_audio(y2, thred=0.03) + f0 = f0[f0 < 600] + valid_f0 = f0[f0 > 50] + mean_pitch2 = np.mean(valid_f0) + + if abs(mean_pitch1 - mean_pitch2) > 55: + mean_pitch_cur = min(mean_pitch1, mean_pitch2) + else: + mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2 + + else: + f0 = f0_method.infer_from_audio(y, thred=0.03) + f0 = f0[f0 < 600] + valid_f0 = f0[f0 > 50] + mean_pitch_cur = np.mean(valid_f0) + + + return mean_pitch_cur + +def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras): + + ##计算pitch + f0up_key = pyin_process_single_rmvpe(target_wav) + ## get embed + get_embed(target_wav, embed_npy, embed_model) + + print("svc main start...") + svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras) + print("svc main finished!!") + + return 0 +def process_svc(song_wav, target_wav, svc_out_path,paras): + + song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename( + target_wav), os.path.basename(svc_out_path) #绝对路径 + song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path + embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置 + + similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras) + + + return similar + + +def get_svc(target_yinse_wav, song_name, paras): + ''' + :param target_yinse_wav: 目标音色 + :param song_name: 歌曲名字 + ;param paras: 其他参数 + :return: svc路径名 + ''' + + ##清空工作空间临时路径 + if os.path.exists(gs_work_dir): + #shutil.rmtree(gs_work_dir) + cmd = f"rm -rf {gs_work_dir}/*" + os.system(cmd) + else: + os.makedirs(gs_work_dir) + + gender = paras['gender']##为了确定歌曲 + + ##目标音色读取 + f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav)) + #print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav) + #shutil.move(target_yinse_wav, f_dst) ##放在工作目录 + shutil.copy(target_yinse_wav, f_dst) + target_yinse_wav = f_dst + + ##歌曲/伴奏 读取(路径需要修改) + song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal + inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name)) + #song_wav = './xusong_long.wav' + svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字 + print("inputMsg:", song_wav, target_yinse_wav, svc_out_path) + + ## svc process + st = time.time() + print("start inference...") + similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras) + print("svc finished!!") + print("time cost = {}".format(time.time() - st)) + print("out path name {} ".format(svc_out_path)) + + #''' + ##加混响 + print("add reverbration...") + svc_out_path_effect = svc_out_path[:-4] + '_effect.wav' + cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}" + print("cmd :", cmd) + os.system(cmd) + # # 人声伴奏合并 + print("add acc...") + out_path = svc_out_path_effect[:-4] + '_music.wav' + mix(svc_out_path_effect, inf_acc_path, out_path) + + print("time cost = {}".format(time.time() - st)) + print("out path name {} ".format(out_path)) + #''' + + + return svc_out_path + + +if __name__=='__main__': + + ###gender predict + target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a" + gender, female_rate, is_pure = gender_model.process(target_yinse_wav) + print('=====================') + print("gender:{}, female_rate:{},is_pure:{}".format(gender,female_rate,is_pure)) + if gender == 0: + gender = 'female' + elif gender == 1: + gender = 'male' + elif female_rate > 0.5: + gender = 'female' + else: + gender = 'male' + print("modified gender:{} ".format(gender)) + print('=====================') + + ###接口函数 + ''' + target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径 + song_name = "drivers_license" #"Levitating" ##路径会自动添加(要更改) + paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms + #paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试 + #''' + + #''' + #target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a" + song_name = "lost_stars" + #paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} + paras = {'gender': gender, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None } + get_svc(target_yinse_wav, song_name, paras) + #''' + + diff --git a/AIMeiSheng/meisheng_svc_final_test.py b/AIMeiSheng/meisheng_svc_final_test.py new file mode 100644 index 0000000..04853e4 --- /dev/null +++ b/AIMeiSheng/meisheng_svc_final_test.py @@ -0,0 +1,215 @@ + + +import os,sys +import time +import shutil +import glob +import hashlib +import librosa +import soundfile +import gradio as gr +import pandas as pd +import numpy as np +sys.path.append('./RawNet3/') +from cal_cos_distance_folder import load_and_cal_distance #del +from infererence_fang_meisheng import get_embed, get_embed_model + +times_st = 0 +times_sum = 0 +gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件 +tmp_workspace_name = "batch_test_ocean_fi"#工作空间名 +song_folder = "./data_meisheng/" ##song folder +gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径 +#abs_path = f"/data/bingxiao.fang/voice_conversion/SVC_MEISHENG/svc_vits-diff/Retrieval-based-Voice-Conversion-WebUI/data_meisheng/{tmp_workspace_name}/" +cur_dir = os.path.abspath(os.path.dirname(__file__)) +#par_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) +abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/' + +pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" + +from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe + + +def mix(in_path, acc_path, dst_path): + # svc转码到442 + svc_442_file = in_path + "_442.wav" + st = time.time() + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file) + os.system(cmd) + if not os.path.exists(svc_442_file): + return -1 + print("transcode,{},sp={}".format(in_path, time.time() - st)) + + # 混合 + st = time.time() + cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path) + os.system(cmd) + print("mixer,{},sp={}".format(in_path, time.time() - st)) + + +def load_model(): + global f0_method + print("load embed_model...") + embed_model = get_embed_model() + print("load hubert_model...") + hubert_model = load_hubert() + print("load vc_model...") + get_vc(pth_model_path) + print("load rmvpe...") + f0_method = get_rmvpe() + print("load finish") + return embed_model, hubert_model#,svc_model + +embed_model, hubert_model = load_model() ##提前加载模型 + +def pyin_process_single_rmvpe(input_file): + global f0_method + rate = 16000 #44100 + # 读取音频文件 + y, sr = librosa.load(input_file, sr=rate) + ''' #方法1 + f0 = f0_method.infer_from_audio(y, thred=0.03) + + valid_f0 = f0[f0 > 50] + mean_pitch_cur = np.mean(valid_f0[:min(len(valid_f0),500)]) + #''' + + len_s = len(y)/sr + lim_s = 15 #10 + if(len_s > lim_s): + y1 = y[:sr*lim_s] + y2 = y[-sr*lim_s:] + f0 = f0_method.infer_from_audio(y1, thred=0.03) + f0 = f0[f0 < 600] + valid_f0 = f0[f0 > 50] + mean_pitch1 = np.mean(valid_f0) + f0 = f0_method.infer_from_audio(y2, thred=0.03) + f0 = f0[f0 < 600] + valid_f0 = f0[f0 > 50] + mean_pitch2 = np.mean(valid_f0) + + if abs(mean_pitch1 - mean_pitch2) > 55: + mean_pitch_cur = min(mean_pitch1, mean_pitch2) + else: + mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2 + + print("mean_pitch1:",mean_pitch1,"mean_pitch2:",mean_pitch2) + else: + f0 = f0_method.infer_from_audio(y, thred=0.03) + f0 = f0[f0 < 600] + valid_f0 = f0[f0 > 50] + mean_pitch_cur = np.mean(valid_f0) + + print("final mean_pitch:",mean_pitch_cur) + + return mean_pitch_cur + +def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras): + + ##计算pitch + f0up_key = pyin_process_single_rmvpe(target_wav) + print("@@f0up_key:",f0up_key) + ## get embed + get_embed(target_wav, embed_npy, embed_model) + print("@@get embed") + + global times_st,times_sum + times_sum += (time.time() - times_st) + print("@@@@song_wav:",song_wav) + print("@@@svc_out_path:",svc_out_path) + print("@@@@embed_npy:",embed_npy) + svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras) + print("svc main finished!!") + + ##计算相似度 + ''' + svc_embed_npy = embed_npy[:-4] + '_svc.npy' + get_embed(svc_out_path, svc_embed_npy, embed_model) + similar = load_and_cal_distance(embed_npy,svc_embed_npy) + print("target_npy:",embed_npy,"svc_npy:",svc_embed_npy) + print("######similar:",similar.numpy()) + + return similar.numpy()[0] + #''' + return 0 +def process_svc(song_wav, target_wav, svc_out_path,paras): + + song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename( + target_wav), os.path.basename(svc_out_path) + song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path + embed_npy = target_wav[:-4] + '.npy' ##npy存储位置 + + similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras) + + + return similar + + +def get_svc(target_yinse_wav, song_name, paras): + ''' + :param target_yinse_wav: 目标音色 + :param gender: 性别选择 + :param song_name: 歌曲名字 + :return: svc路径名 + ''' + + ##清空工作空间临时路径 + if os.path.exists(gs_work_dir): + #shutil.rmtree(gs_work_dir) + cmd = f"rm -rf {gs_work_dir}/*" + os.system(cmd) + else: + os.makedirs(gs_work_dir) + + gender = paras['gender']##为了确定歌曲 + + ##目标音色读取 + f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav)) + print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav) + #shutil.move(target_yinse_wav, f_dst) ##放在工作目录 + shutil.copy(target_yinse_wav, f_dst) + target_yinse_wav = f_dst + + ##歌曲/伴奏 读取(路径需要修改) + song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal + # inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name)) # 伴奏 + song_wav = './xusong_long.wav' + svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字 + print("svc out: {}".format(svc_out_path)) + + ##process + st = time.time() + + ##保存对应 vocal 到workspace + cmd = f"cp {song_wav} {gs_work_dir}" + os.system(cmd) + global times_st,times_sum + + print("start inference...") + print("inputMsg:", song_wav, target_yinse_wav, svc_out_path) + test_num = 20 + for idx in range(test_num): + times_st = time.time() + similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras) + print("svc finished!!") + print("time cost = {}".format(time.time() - st)) + print("mean time cost = {}".format((time.time() - st)/test_num)) + print("process before svc = {}, mean time cost {}".format(times_sum, times_sum/test_num) ) + + return similar + + + +if __name__=='__main__': + + ###接口函数 + target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径 + song_name = "drivers_license" #"Levitating" ##路径会自动添加 + # gender = 'female' + #song_path是预留的预处理文件存放位置 + paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms + #paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} + similar = get_svc(target_yinse_wav, song_name, paras) + + + diff --git a/AIMeiSheng/models_embed_in_dec_diff_fi.py b/AIMeiSheng/models_embed_in_dec_diff_fi.py new file mode 100644 index 0000000..9fa483c --- /dev/null +++ b/AIMeiSheng/models_embed_in_dec_diff_fi.py @@ -0,0 +1,1233 @@ +import math, pdb, os +from time import time as ttime +import torch +from torch import nn +from torch.nn import functional as F +from lib.infer_pack import modules +from lib.infer_pack import attentions_in_dec as attentions +from lib.infer_pack import commons +from lib.infer_pack.commons import init_weights, get_padding +from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d +from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm +from lib.infer_pack.commons import init_weights +import numpy as np +from lib.infer_pack import commons +from thop import profile +from diffuse_fang.diffUse_wraper import diff_decoder,ddpm_para +ddpm_dp = ddpm_para() + +class TextEncoder256(nn.Module): + def __init__( + self, + out_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + f0=True, + ): + super().__init__() + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.emb_phone = nn.Linear(256, hidden_channels) + self.lrelu = nn.LeakyReLU(0.1, inplace=True) + if f0 == True: + self.emb_pitch = nn.Embedding(256, hidden_channels) # pitch 256 + self.encoder = attentions.Encoder( + hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout + ) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, phone, pitch, lengths): + if pitch == None: + x = self.emb_phone(phone) + else: + x = self.emb_phone(phone) + self.emb_pitch(pitch) + x = x * math.sqrt(self.hidden_channels) # [b, t, h] + x = self.lrelu(x) + x = torch.transpose(x, 1, -1) # [b, h, t] + x_mask = torch.unsqueeze(commons.sequence_mask(lengths, x.size(2)), 1).to( + x.dtype + ) + x = self.encoder(x * x_mask, x_mask) + stats = self.proj(x) * x_mask + + m, logs = torch.split(stats, self.out_channels, dim=1) + return m, logs, x_mask + + +class TextEncoder768(nn.Module): + def __init__( + self, + out_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + f0=True, + ): + super().__init__() + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.emb_phone = nn.Linear(768, hidden_channels) + self.lrelu = nn.LeakyReLU(0.1, inplace=True) + if f0 == True: + self.emb_pitch = nn.Embedding(256, hidden_channels) # pitch 256 + self.encoder = attentions.Encoder( + hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout + ) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, phone, pitch, lengths,g):#fang add + if pitch == None: + x = self.emb_phone(phone) + else: + x = self.emb_phone(phone) + self.emb_pitch(pitch) + x = x * math.sqrt(self.hidden_channels) # [b, t, h] + x = self.lrelu(x) + x = torch.transpose(x, 1, -1) # [b, h, t] + x_mask = torch.unsqueeze(commons.sequence_mask(lengths, x.size(2)), 1).to( + x.dtype + ) + #x = self.encoder(x * x_mask, x_mask,g) + x = self.encoder(x * x_mask, x_mask,g)#fang add + stats = self.proj(x) * x_mask + + m, logs = torch.split(stats, self.out_channels, dim=1) + return m, logs, x_mask + + +class ResidualCouplingBlock(nn.Module): + def __init__( + self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + n_flows=4, + gin_channels=0, + ): + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.n_flows = n_flows + self.gin_channels = gin_channels + + self.flows = nn.ModuleList() + for i in range(n_flows): + self.flows.append( + modules.ResidualCouplingLayer( + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=gin_channels, + mean_only=True, + ) + ) + self.flows.append(modules.Flip()) + + def forward(self, x, x_mask, g=None, reverse=False): + if not reverse: + for flow in self.flows: + x, _ = flow(x, x_mask, g=g, reverse=reverse) + else: + for flow in reversed(self.flows): + x = flow(x, x_mask, g=g, reverse=reverse) + return x + + def remove_weight_norm(self): + for i in range(self.n_flows): + self.flows[i * 2].remove_weight_norm() + + +class PosteriorEncoder(nn.Module): + def __init__( + self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.enc = modules.WN( + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=gin_channels, + ) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, x, x_lengths, g=None): + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to( + x.dtype + ) + x = self.pre(x) * x_mask + x = self.enc(x, x_mask, g=g) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1)#均值和方差 fang + z = (m + torch.randn_like(m) * torch.exp(logs)) * x_mask ##随机采样 fang + return z, m, logs, x_mask + + def remove_weight_norm(self): + self.enc.remove_weight_norm() + + +class Generator(torch.nn.Module): + def __init__( + self, + initial_channel, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=0, + ): + super(Generator, self).__init__() + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_rates) + self.conv_pre = Conv1d( + initial_channel, upsample_initial_channel, 7, 1, padding=3 + ) + resblock = modules.ResBlock1 if resblock == "1" else modules.ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + self.ups.append( + weight_norm( + ConvTranspose1d( + upsample_initial_channel // (2**i), + upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + for j, (k, d) in enumerate( + zip(resblock_kernel_sizes, resblock_dilation_sizes) + ): + self.resblocks.append(resblock(ch, k, d)) + + self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) + self.ups.apply(init_weights) + + if gin_channels != 0: + self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1) + + def forward(self, x, g=None): + x = self.conv_pre(x) + if g is not None: + x = x + self.cond(g) + + for i in range(self.num_upsamples): + x = F.leaky_relu(x, modules.LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + + +class SineGen(torch.nn.Module): + """Definition of sine generator + SineGen(samp_rate, harmonic_num = 0, + sine_amp = 0.1, noise_std = 0.003, + voiced_threshold = 0, + flag_for_pulse=False) + samp_rate: sampling rate in Hz + harmonic_num: number of harmonic overtones (default 0) + sine_amp: amplitude of sine-wavefrom (default 0.1) + noise_std: std of Gaussian noise (default 0.003) + voiced_thoreshold: F0 threshold for U/V classification (default 0) + flag_for_pulse: this SinGen is used inside PulseGen (default False) + Note: when flag_for_pulse is True, the first time step of a voiced + segment is always sin(np.pi) or cos(0) + """ + + def __init__( + self, + samp_rate, + harmonic_num=0, + sine_amp=0.1, + noise_std=0.003, + voiced_threshold=0, + flag_for_pulse=False, + ): + super(SineGen, self).__init__() + self.sine_amp = sine_amp + self.noise_std = noise_std + self.harmonic_num = harmonic_num + self.dim = self.harmonic_num + 1 + self.sampling_rate = samp_rate + self.voiced_threshold = voiced_threshold + + def _f02uv(self, f0): + # generate uv signal + uv = torch.ones_like(f0) + uv = uv * (f0 > self.voiced_threshold) + return uv + + def forward(self, f0, upp): + """sine_tensor, uv = forward(f0) + input F0: tensor(batchsize=1, length, dim=1) + f0 for unvoiced steps should be 0 + output sine_tensor: tensor(batchsize=1, length, dim) + output uv: tensor(batchsize=1, length, 1) + """ + with torch.no_grad(): + f0 = f0[:, None].transpose(1, 2) + f0_buf = torch.zeros(f0.shape[0], f0.shape[1], self.dim, device=f0.device) + # fundamental component + f0_buf[:, :, 0] = f0[:, :, 0] + for idx in np.arange(self.harmonic_num): + f0_buf[:, :, idx + 1] = f0_buf[:, :, 0] * ( + idx + 2 + ) # idx + 2: the (idx+1)-th overtone, (idx+2)-th harmonic + rad_values = (f0_buf / self.sampling_rate) % 1 ###%1意味着n_har的乘积无法后处理优化 + rand_ini = torch.rand( + f0_buf.shape[0], f0_buf.shape[2], device=f0_buf.device + ) + rand_ini[:, 0] = 0 + rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini + tmp_over_one = torch.cumsum(rad_values, 1) # % 1 #####%1意味着后面的cumsum无法再优化 + tmp_over_one *= upp + tmp_over_one = F.interpolate( + tmp_over_one.transpose(2, 1), + scale_factor=upp, + mode="linear", + align_corners=True, + ).transpose(2, 1) + rad_values = F.interpolate( + rad_values.transpose(2, 1), scale_factor=upp, mode="nearest" + ).transpose( + 2, 1 + ) ####### + tmp_over_one %= 1 + tmp_over_one_idx = (tmp_over_one[:, 1:, :] - tmp_over_one[:, :-1, :]) < 0 + cumsum_shift = torch.zeros_like(rad_values) + cumsum_shift[:, 1:, :] = tmp_over_one_idx * -1.0 + sine_waves = torch.sin( + torch.cumsum(rad_values + cumsum_shift, dim=1) * 2 * np.pi + ) + sine_waves = sine_waves * self.sine_amp + uv = self._f02uv(f0) + uv = F.interpolate( + uv.transpose(2, 1), scale_factor=upp, mode="nearest" + ).transpose(2, 1) + noise_amp = uv * self.noise_std + (1 - uv) * self.sine_amp / 3 + noise = noise_amp * torch.randn_like(sine_waves) + sine_waves = sine_waves * uv + noise + return sine_waves, uv, noise + + +class SourceModuleHnNSF(torch.nn.Module): + """SourceModule for hn-nsf + SourceModule(sampling_rate, harmonic_num=0, sine_amp=0.1, + add_noise_std=0.003, voiced_threshod=0) + sampling_rate: sampling_rate in Hz + harmonic_num: number of harmonic above F0 (default: 0) + sine_amp: amplitude of sine source signal (default: 0.1) + add_noise_std: std of additive Gaussian noise (default: 0.003) + note that amplitude of noise in unvoiced is decided + by sine_amp + voiced_threshold: threhold to set U/V given F0 (default: 0) + Sine_source, noise_source = SourceModuleHnNSF(F0_sampled) + F0_sampled (batchsize, length, 1) + Sine_source (batchsize, length, 1) + noise_source (batchsize, length 1) + uv (batchsize, length, 1) + """ + + def __init__( + self, + sampling_rate, + harmonic_num=0, + sine_amp=0.1, + add_noise_std=0.003, + voiced_threshod=0, + is_half=True, + ): + super(SourceModuleHnNSF, self).__init__() + + self.sine_amp = sine_amp + self.noise_std = add_noise_std + self.is_half = is_half + # to produce sine waveforms + self.l_sin_gen = SineGen( + sampling_rate, harmonic_num, sine_amp, add_noise_std, voiced_threshod + ) + + # to merge source harmonics into a single excitation + self.l_linear = torch.nn.Linear(harmonic_num + 1, 1) + self.l_tanh = torch.nn.Tanh() + + def forward(self, x, upp=None): + sine_wavs, uv, _ = self.l_sin_gen(x, upp) + if self.is_half: + sine_wavs = sine_wavs.half() + sine_merge = self.l_tanh(self.l_linear(sine_wavs)) + return sine_merge, None, None # noise, uv + + +class GeneratorNSF(torch.nn.Module): + def __init__( + self, + initial_channel, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels, + sr, + is_half=False, + ): + super(GeneratorNSF, self).__init__() + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_rates) + + self.f0_upsamp = torch.nn.Upsample(scale_factor=np.prod(upsample_rates)) + self.m_source = SourceModuleHnNSF( + sampling_rate=sr, harmonic_num=0, is_half=is_half + ) + self.noise_convs = nn.ModuleList() + self.conv_pre = Conv1d( + initial_channel, upsample_initial_channel, 7, 1, padding=3 + ) + resblock = modules.ResBlock1 if resblock == "1" else modules.ResBlock2 + + self.ups = nn.ModuleList() + self.ups_g = nn.ModuleList()# fang add + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + c_cur = upsample_initial_channel // (2 ** (i + 1)) + self.ups.append( + weight_norm( + ConvTranspose1d( + upsample_initial_channel // (2**i), + upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + self.ups_g.append( + nn.Conv1d(upsample_initial_channel,upsample_initial_channel // (2 ** (i + 1) ), 1) + #F.interpolate(input, scale_factor=2, mode='nearest') + )# fang add + if i + 1 < len(upsample_rates): + stride_f0 = np.prod(upsample_rates[i + 1 :]) + self.noise_convs.append( + Conv1d( + 1, + c_cur, + kernel_size=stride_f0 * 2, + stride=stride_f0, + padding=stride_f0 // 2, + ) + ) + else: + self.noise_convs.append(Conv1d(1, c_cur, kernel_size=1)) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + for j, (k, d) in enumerate( + zip(resblock_kernel_sizes, resblock_dilation_sizes) + ): + self.resblocks.append(resblock(ch, k, d)) + + self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) + self.ups.apply(init_weights) + + if gin_channels != 0: + self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1) + + self.upp = np.prod(upsample_rates) + + def forward(self, x, f0, g=None): + har_source, noi_source, uv = self.m_source(f0, self.upp) + har_source = har_source.transpose(1, 2) + x = self.conv_pre(x) + if g is not None: + #x = x + self.cond(g) ##org + tmp_g = self.cond(g) ##fang add + x = x + tmp_g ##fang add + #print('###@@@@##x:',x.shape ) + for i in range(self.num_upsamples): + x = F.leaky_relu(x, modules.LRELU_SLOPE) + x = self.ups[i](x) + x_source = self.noise_convs[i](har_source) + x = x + x_source + xg = self.ups_g[i](tmp_g) #fang add + x = x + xg #fang add + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + #print('@@@@##x:',x.shape) + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + return x + + def remove_weight_norm(self): + for l in self.ups: + remove_weight_norm(l) + for l in self.resblocks: + l.remove_weight_norm() + + +sr2sr = { + "32k": 32000, + "40k": 40000, + "48k": 48000, + "24k": 24000, +} + + +class SynthesizerTrnMs256NSFsid(nn.Module): + def __init__( + self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + spk_embed_dim, + gin_channels, + sr, + **kwargs + ): + super().__init__() + if type(sr) == type("strr"): + sr = sr2sr[sr] + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + # self.hop_length = hop_length# + self.spk_embed_dim = spk_embed_dim + self.enc_p = TextEncoder256( + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + ) + self.dec = GeneratorNSF( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + sr=sr, + is_half=kwargs["is_half"], + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + self.flow = ResidualCouplingBlock( + inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels + ) + self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) + print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + + def remove_weight_norm(self): + self.dec.remove_weight_norm() + self.flow.remove_weight_norm() + self.enc_q.remove_weight_norm() + + def forward( + self, phone, phone_lengths, pitch, pitchf, y, y_lengths, ds + ): # 这里ds是id,[bs,1] + # print(1,pitch.shape)#[bs,t] + g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + #print("@@@pitch.shape: ",pitch.shape) + #g = ds.unsqueeze(-1) + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) + z_p = self.flow(z, y_mask, g=g) + z_slice, ids_slice = commons.rand_slice_segments( + z, y_lengths, self.segment_size + ) #按照self.segment_size这个长度,进行随机切割z,长度固定,开始位置不同存在ids_slice中,z_slice是切割的结果, fang + # print(-1,pitchf.shape,ids_slice,self.segment_size,self.hop_length,self.segment_size//self.hop_length) + pitchf = commons.slice_segments2(pitchf, ids_slice, self.segment_size) + # print(-2,pitchf.shape,z_slice.shape) + o = self.dec(z_slice, pitchf, g=g) + return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + def infer(self, phone, phone_lengths, pitch, nsff0, sid, rate=None): + g = self.emb_g(sid).unsqueeze(-1) + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) + z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if rate: + head = int(z_p.shape[2] * rate) + z_p = z_p[:, :, -head:] + x_mask = x_mask[:, :, -head:] + nsff0 = nsff0[:, -head:] + z = self.flow(z_p, x_mask, g=g, reverse=True) + print('z shape: ',z.shape) + print('x_mask shape: ',x_mask.shape) + z_x_mask = z * x_mask + print('z_x_mask shape: ',z_x_mask.shape) + print('nsff0 shape:p', nsff0.shape) + print('g shape: ',g.shape) + o = self.dec(z * x_mask, nsff0, g=g) + + self.get_floats() + return o, x_mask, (z, z_p, m_p, logs_p) + + def get_floats(self,): + T = 21.4 #郭宇_但愿人长久_40k.wav + z = torch.randn(1,192 ,2740)# 2s data(同时用2s数据验证,整数倍就对了,防止干扰) + x_mask = torch.randn(1,1 ,2740) + g = torch.randn(1,256 ,1) + + inputs_bfcc = z #z * x_mask + nsff0 = torch.randn(1, 2740) + devices = 'cuda' #'cpu' + self.dec = self.dec.to(devices).half() + inputs_bfcc , nsff0, g = inputs_bfcc.to(devices).half(), nsff0.to(devices).half(), g.to(devices).half() + flops, params = profile(self.dec, (inputs_bfcc, nsff0, g)) + print(f'@@@hifi-gan nsf decflops: {flops/(T*pow(10,9))} GFLOPS, params: { params/pow(10,6)} M') + return 0 + +class SynthesizerTrnMs768NSFsid(nn.Module): + def __init__( + self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + spk_embed_dim, + gin_channels, + sr, + **kwargs + ): + super().__init__() + if type(sr) == type("strr"): + sr = sr2sr[sr] + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + # self.hop_length = hop_length# + self.spk_embed_dim = spk_embed_dim + self.enc_p = TextEncoder768( + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + ) + self.dec = GeneratorNSF( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + sr=sr, + is_half=kwargs["is_half"], + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + self.flow = ResidualCouplingBlock( + inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels + ) + self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) + print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + + self.diff_decoder = diff_decoder + self.diff_cond_g = nn.Conv1d(256,192, 1) + + + def remove_weight_norm(self): + self.dec.remove_weight_norm() + self.flow.remove_weight_norm() + self.enc_q.remove_weight_norm() + + def forward( + self, phone, phone_lengths, pitch, pitchf, y, y_lengths, ds + ): # 这里ds是id,[bs,1] + # print(1,pitch.shape)#[bs,t] + #g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + #print("@@@@@fang@@@@@") + g = ds.unsqueeze(-1) + #print("g:",g.size()) + #print("phone_lengths: ",phone_lengths.size()) + #print("pitch: ",pitch.size()) + #m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths,g)#fang add + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g)#self.enc_q = PosteriorEncoder ##这里面预测出了随机采样的隐变量z,m_q是均值,logs_q是方差,y_mask是mask的数据 fangi + + z_p = self.flow(z, y_mask, g=g) + + ###diff st + z_p_sample = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * y_mask + z_flow = self.flow(z_p_sample, y_mask, g=g, reverse=True) + diff_loss = torch.zeros_like(z_flow) + + ''' + z_res = z - z_flow + lzp = 1e-2 + g_z_p =self.diff_cond_g(g) + z_p_sample = z_p_sample * lzp + g_z_p + z_p_diff = z_p_sample.transpose(1,2).float() ##b,frames,feat + z_res_diff = z_res.transpose(1,2) ##b,frames,feat + self.diff_decoder = self.diff_decoder.float() + diff_loss,_ = self.diff_decoder(z_p_diff, gt_spec=z_res_diff, infer=False, infer_speedup=ddpm_dp.infer_speedup, method=ddpm_dp.method, use_tqdm=ddpm_dp.use_tqdm) + ''' + #z_flow_diff = z_flow.transpose(1,2) + #z_x = self.diff_decoder(z_p_diff, gt_spec=z_flow_diff, infer=True, infer_speedup=ddpm_dp.infer_speedup, method=ddpm_dp.method, k_step=200, use_tqdm=False) + #z_x_rec = z_x.transpose(1,2) + z = z_flow #+ z_x_rec + + ###diff en + + z_slice, ids_slice = commons.rand_slice_segments( + z, y_lengths, self.segment_size + ) + # print(-1,pitchf.shape,ids_slice,self.segment_size,self.hop_length,self.segment_size//self.hop_length) + pitchf = commons.slice_segments2(pitchf, ids_slice, self.segment_size) + # print(-2,pitchf.shape,z_slice.shape) + o = self.dec(z_slice, pitchf, g=g) + return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q),diff_loss + + def infer(self, phone, phone_lengths, pitch, nsff0, sid, rate=None): + #g = self.emb_g(sid).unsqueeze(-1) + g = sid.unsqueeze(-1).unsqueeze(0) + #m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths) #org + m_p, logs_p, x_mask = self.enc_p(phone, pitch, phone_lengths,g) #fang add + z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if rate: + head = int(z_p.shape[2] * rate) + z_p = z_p[:, :, -head:] + x_mask = x_mask[:, :, -head:] + nsff0 = nsff0[:, -head:] + z_flow = self.flow(z_p, x_mask, g=g, reverse=True) + ''' + #if is_half: + #self.diff_decoder = self.diff_decoder.float() + z_p_diff = z_p.transpose(1,2).float() ##b,frames,feat + z_diff = z_flow.transpose(1,2) ##b,frames,feat + print("@@z_p_diff", z_p_diff[0,0,:]) + self.diff_decoder = self.diff_decoder.float() + z_x = self.diff_decoder(z_p_diff, gt_spec=None, infer=True, infer_speedup=ddpm_dp.infer_speedup, method=ddpm_dp.method, k_step=200, use_tqdm=ddpm_dp.use_tqdm) + #print("@@z_x", z_x[0,0,:]) + z_x_rec = z_x.transpose(1,2).half() + #''' + z = z_flow #+ z_x_rec + o = self.dec(z * x_mask, nsff0, g=g) + #self.get_floats() + return o, x_mask, (z, z_p, m_p, logs_p) + + +class SynthesizerTrnMs256NSFsid_nono(nn.Module): + def __init__( + self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + spk_embed_dim, + gin_channels, + sr=None, + **kwargs + ): + super().__init__() + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + # self.hop_length = hop_length# + self.spk_embed_dim = spk_embed_dim + self.enc_p = TextEncoder256( + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + f0=False, + ) + self.dec = Generator( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + self.flow = ResidualCouplingBlock( + inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels + ) + self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) + print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + + def remove_weight_norm(self): + self.dec.remove_weight_norm() + self.flow.remove_weight_norm() + self.enc_q.remove_weight_norm() + + def forward(self, phone, phone_lengths, y, y_lengths, ds): # 这里ds是id,[bs,1] + g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) + z_p = self.flow(z, y_mask, g=g) + z_slice, ids_slice = commons.rand_slice_segments( + z, y_lengths, self.segment_size + ) + o = self.dec(z_slice, g=g) + return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + def infer(self, phone, phone_lengths, sid, rate=None): + g = self.emb_g(sid).unsqueeze(-1) + m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) + z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if rate: + head = int(z_p.shape[2] * rate) + z_p = z_p[:, :, -head:] + x_mask = x_mask[:, :, -head:] + z = self.flow(z_p, x_mask, g=g, reverse=True) + o = self.dec(z * x_mask, g=g) + return o, x_mask, (z, z_p, m_p, logs_p) + + +class SynthesizerTrnMs768NSFsid_nono(nn.Module): + def __init__( + self, + spec_channels, + segment_size, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + spk_embed_dim, + gin_channels, + sr=None, + **kwargs + ): + super().__init__() + self.spec_channels = spec_channels + self.inter_channels = inter_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.resblock = resblock + self.resblock_kernel_sizes = resblock_kernel_sizes + self.resblock_dilation_sizes = resblock_dilation_sizes + self.upsample_rates = upsample_rates + self.upsample_initial_channel = upsample_initial_channel + self.upsample_kernel_sizes = upsample_kernel_sizes + self.segment_size = segment_size + self.gin_channels = gin_channels + # self.hop_length = hop_length# + self.spk_embed_dim = spk_embed_dim + self.enc_p = TextEncoder768( + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + f0=False, + ) + self.dec = Generator( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + self.flow = ResidualCouplingBlock( + inter_channels, hidden_channels, 5, 1, 3, gin_channels=gin_channels + ) + self.emb_g = nn.Embedding(self.spk_embed_dim, gin_channels) + print("gin_channels:", gin_channels, "self.spk_embed_dim:", self.spk_embed_dim) + + def remove_weight_norm(self): + self.dec.remove_weight_norm() + self.flow.remove_weight_norm() + self.enc_q.remove_weight_norm() + + def forward(self, phone, phone_lengths, y, y_lengths, ds): # 这里ds是id,[bs,1] + #g = self.emb_g(ds).unsqueeze(-1) # [b, 256, 1]##1是t,广播的 + g = ds.unsqueeze(-1) + #m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) #org + m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths,g=g)#fang add + z, m_q, logs_q, y_mask = self.enc_q(y, y_lengths, g=g) + z_p = self.flow(z, y_mask, g=g) + z_slice, ids_slice = commons.rand_slice_segments( + z, y_lengths, self.segment_size + ) + o = self.dec(z_slice, g=g) + return o, ids_slice, x_mask, y_mask, (z, z_p, m_p, logs_p, m_q, logs_q) + + def infer(self, phone, phone_lengths, sid, rate=None): + #g = self.emb_g(sid).unsqueeze(-1) + g = sid.unsqueeze(-1).unsqueeze(0) + #m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths) + m_p, logs_p, x_mask = self.enc_p(phone, None, phone_lengths,g=g)#fang add + z_p = (m_p + torch.exp(logs_p) * torch.randn_like(m_p) * 0.66666) * x_mask + if rate: + head = int(z_p.shape[2] * rate) + z_p = z_p[:, :, -head:] + x_mask = x_mask[:, :, -head:] + z = self.flow(z_p, x_mask, g=g, reverse=True) + o = self.dec(z * x_mask, g=g) + return o, x_mask, (z, z_p, m_p, logs_p) + + +class MultiPeriodDiscriminator(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(MultiPeriodDiscriminator, self).__init__() + periods = [2, 3, 5, 7, 11, 17] + # periods = [3, 5, 7, 11, 17, 23, 37] + + discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + discs = discs + [ + DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods + ] + self.discriminators = nn.ModuleList(discs) + + def forward(self, y, y_hat): + y_d_rs = [] # + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + # for j in range(len(fmap_r)): + # print(i,j,y.shape,y_hat.shape,fmap_r[j].shape,fmap_g[j].shape) + y_d_rs.append(y_d_r) + y_d_gs.append(y_d_g) + fmap_rs.append(fmap_r) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class MultiPeriodDiscriminatorV2(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(MultiPeriodDiscriminatorV2, self).__init__() + # periods = [2, 3, 5, 7, 11, 17] + periods = [2, 3, 5, 7, 11, 17, 23, 37] + + discs = [DiscriminatorS(use_spectral_norm=use_spectral_norm)] + discs = discs + [ + DiscriminatorP(i, use_spectral_norm=use_spectral_norm) for i in periods + ] + self.discriminators = nn.ModuleList(discs) + + def forward(self, y, y_hat): + y_d_rs = [] # + y_d_gs = [] + fmap_rs = [] + fmap_gs = [] + for i, d in enumerate(self.discriminators): + y_d_r, fmap_r = d(y) + y_d_g, fmap_g = d(y_hat) + # for j in range(len(fmap_r)): + # print(i,j,y.shape,y_hat.shape,fmap_r[j].shape,fmap_g[j].shape) + y_d_rs.append(y_d_r) + y_d_gs.append(y_d_g) + fmap_rs.append(fmap_r) + fmap_gs.append(fmap_g) + + return y_d_rs, y_d_gs, fmap_rs, fmap_gs + + +class DiscriminatorS(torch.nn.Module): + def __init__(self, use_spectral_norm=False): + super(DiscriminatorS, self).__init__() + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList( + [ + norm_f(Conv1d(1, 16, 15, 1, padding=7)), + norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)), + norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)), + norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)), + norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)), + norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), + ] + ) + self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) + + def forward(self, x): + fmap = [] + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap + + +class DiscriminatorP(torch.nn.Module): + def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): + super(DiscriminatorP, self).__init__() + self.period = period + self.use_spectral_norm = use_spectral_norm + norm_f = weight_norm if use_spectral_norm == False else spectral_norm + self.convs = nn.ModuleList( + [ + norm_f( + Conv2d( + 1, + 32, + (kernel_size, 1), + (stride, 1), + padding=(get_padding(kernel_size, 1), 0), + ) + ), + norm_f( + Conv2d( + 32, + 128, + (kernel_size, 1), + (stride, 1), + padding=(get_padding(kernel_size, 1), 0), + ) + ), + norm_f( + Conv2d( + 128, + 512, + (kernel_size, 1), + (stride, 1), + padding=(get_padding(kernel_size, 1), 0), + ) + ), + norm_f( + Conv2d( + 512, + 1024, + (kernel_size, 1), + (stride, 1), + padding=(get_padding(kernel_size, 1), 0), + ) + ), + norm_f( + Conv2d( + 1024, + 1024, + (kernel_size, 1), + 1, + padding=(get_padding(kernel_size, 1), 0), + ) + ), + ] + ) + self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) + + def forward(self, x): + fmap = [] + + # 1d to 2d + b, c, t = x.shape + if t % self.period != 0: # pad first + n_pad = self.period - (t % self.period) + x = F.pad(x, (0, n_pad), "reflect") + t = t + n_pad + x = x.view(b, c, t // self.period, self.period) + + for l in self.convs: + x = l(x) + x = F.leaky_relu(x, modules.LRELU_SLOPE) + fmap.append(x) + x = self.conv_post(x) + fmap.append(x) + x = torch.flatten(x, 1, -1) + + return x, fmap diff --git a/AIMeiSheng/myinfer_multi_spk_embed_in_dec_diff_fi_meisheng.py b/AIMeiSheng/myinfer_multi_spk_embed_in_dec_diff_fi_meisheng.py new file mode 100644 index 0000000..b14397a --- /dev/null +++ b/AIMeiSheng/myinfer_multi_spk_embed_in_dec_diff_fi_meisheng.py @@ -0,0 +1,217 @@ + +import os,sys,pdb,torch +now_dir = os.getcwd() +sys.path.append(now_dir) +import argparse +import glob +import sys +import torch +from multiprocessing import cpu_count +class Config: + def __init__(self,device,is_half): + self.device = device + self.is_half = is_half + self.n_cpu = 0 + self.gpu_name = None + self.gpu_mem = None + self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + + def device_config(self) -> tuple: + if torch.cuda.is_available(): + i_device = int(self.device.split(":")[-1]) + self.gpu_name = torch.cuda.get_device_name(i_device) + if ( + ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) + or "P40" in self.gpu_name.upper() + or "1060" in self.gpu_name + or "1070" in self.gpu_name + or "1080" in self.gpu_name + ): + print("16系/10系显卡和P40强制单精度") + self.is_half = False + for config_file in ["32k.json", "40k.json", "48k.json"]: + with open(f"configs/{config_file}", "r") as f: + strr = f.read().replace("true", "false") + with open(f"configs/{config_file}", "w") as f: + f.write(strr) + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + else: + self.gpu_name = None + self.gpu_mem = int( + torch.cuda.get_device_properties(i_device).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + if self.gpu_mem <= 4: + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + elif torch.backends.mps.is_available(): + print("没有发现支持的N卡, 使用MPS进行推理") + self.device = "mps" + else: + print("没有发现支持的N卡, 使用CPU进行推理") + self.device = "cpu" + self.is_half = True + + if self.n_cpu == 0: + self.n_cpu = cpu_count() + + if self.is_half: + # 6G显存配置 + x_pad = 3 + x_query = 10 + x_center = 80 #60 + x_max = 85#65 + else: + # 5G显存配置 + x_pad = 1 + x_query = 6 + x_center = 38 + x_max = 41 + + if self.gpu_mem != None and self.gpu_mem <= 4: + x_pad = 1 + x_query = 5 + x_center = 30 + x_max = 32 + + return x_pad, x_query, x_center, x_max + + +index_path="./logs/xusong_v2_org_version_multispk_charlie_puth_embed_in_dec_muloss_show/added_IVF614_Flat_nprobe_1_xusong_v2_org_version_multispk_charlie_puth_embed_in_dec_show_v2.index" +# f0method="rmvpe" #harvest or pm +index_rate=float("0.0") #index rate +device="cuda:0" +is_half=True +filter_radius=int(3) ##3 +resample_sr=int(0) # 0 +rms_mix_rate=float(1) # rms混合比例 1,不等于1混合 +protect=float(0.33 )## ??? 0.33 fang + + + +#print(sys.argv) +config=Config(device,is_half) +now_dir=os.getcwd() +sys.path.append(now_dir) + +from vc_infer_pipeline_org_embed import VC +from lib.infer_pack.models_embed_in_dec_diff_fi import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +from lib.audio import load_audio +from fairseq import checkpoint_utils +from scipy.io import wavfile + +# hubert_model=None +def load_hubert(): + # global hubert_model + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",) + #models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["checkpoint_best_legacy_500.pt"],suffix="",) + hubert_model = models[0] + hubert_model = hubert_model.to(device) + if(is_half):hubert_model = hubert_model.half() + else:hubert_model = hubert_model.float() + hubert_model.eval() + return hubert_model + +def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,index_rate,hubert_model,paras): + global tgt_sr,net_g,vc,version + if input_audio is None:return "You need to upload an audio", None + f0_up_key = int(f0_up_key) + # print("@@xxxf0_up_key:",f0_up_key) + audio = load_audio(input_audio,16000) + if paras != None: + st = int(paras['tst'] * 16000/1000) + en = len(audio) + if paras['tnd'] != None: + en = min(en,int(paras['tnd'] * 16000/1000)) + audio = audio[st:en] + + times = [0, 0, 0] + if(hubert_model==None): + hubert_model = load_hubert() + if_f0 = cpt.get("f0", 1) + audio_opt=vc.pipeline_mulprocess(hubert_model,net_g,sid,audio,input_audio,times,f0_up_key,f0_method,file_index,index_rate,if_f0,filter_radius,tgt_sr,resample_sr,rms_mix_rate,version,protect,f0_file=f0_file) + + #print(times) + #print("@@using multi process") + return audio_opt + + +def get_vc_core(model_path,is_half): + + #print("loading pth %s" % model_path) + cpt = torch.load(model_path, map_location="cpu") + tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] + if_f0 = cpt.get("f0", 1) + version = cpt.get("version", "v1") + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: # + net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=is_half) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + #print("load model finished") + del net_g.enc_q + net_g.load_state_dict(cpt["weight"], strict=False) + #print("load net_g finished") + + return tgt_sr,net_g,cpt,version + +def get_vc1(model_path,is_half): + tgt_sr, net_g, cpt, version = get_vc_core(model_path, is_half) + + net_g.eval().to(device) + if (is_half):net_g = net_g.half() + else:net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk=cpt["config"][-3] + return +def get_rmvpe(): + from lib.rmvpe import RMVPE + global f0_method + #print("loading rmvpe model") + f0_method = RMVPE( + "rmvpe.pt", is_half=True, device='cuda:0' + ) + return f0_method + + +def get_vc(model_path): + global n_spk,tgt_sr,net_g,vc,cpt,device,is_half,version + tgt_sr, net_g, cpt, version = get_vc_core(model_path, is_half) + + net_g.eval().to(device) + if (is_half):net_g = net_g.half() + else:net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk=cpt["config"][-3] + # return {"visible": True,"maximum": n_spk, "__type__": "update"} + # return net_g + + +def svc_main(input_path,opt_path,model_path,sid_embed,f0up_key=0,hubert_model=None, paras=None): + #print("sid_embed: ",sid_embed) + wav_opt = vc_single(sid_embed,input_path,f0up_key,None,f0_method,index_path,index_rate,hubert_model,paras) + #print("out_path: ",opt_path) + wavfile.write(opt_path, tgt_sr, wav_opt) + + + + diff --git a/AIMeiSheng/myinfer_multi_spk_embed_in_dec_diff_meisheng.py b/AIMeiSheng/myinfer_multi_spk_embed_in_dec_diff_meisheng.py new file mode 100644 index 0000000..2b0633e --- /dev/null +++ b/AIMeiSheng/myinfer_multi_spk_embed_in_dec_diff_meisheng.py @@ -0,0 +1,244 @@ +''' +v1 +runtime\python.exe myinfer-v2-0528.py 0 "E:\codes\py39\RVC-beta\todo-songs\1111.wav" "E:\codes\py39\logs\mi-test\added_IVF677_Flat_nprobe_7.index" harvest "test.wav" "E:\codes\py39\test-20230416b\weights\mi-test.pth" 0.66 cuda:0 True 3 0 1 0.33 +v2 +runtime\python.exe myinfer-v2-0528.py 0 "E:\codes\py39\RVC-beta\todo-songs\1111.wav" "E:\codes\py39\test-20230416b\logs\mi-test-v2\aadded_IVF677_Flat_nprobe_1_v2.index" harvest "test_v2.wav" "E:\codes\py39\test-20230416b\weights\mi-test-v2.pth" 0.66 cuda:0 True 3 0 1 0.33 +''' +import os,sys,pdb,torch +now_dir = os.getcwd() +sys.path.append(now_dir) +import argparse +import glob +import sys +import torch +from multiprocessing import cpu_count +class Config: + def __init__(self,device,is_half): + self.device = device + self.is_half = is_half + self.n_cpu = 0 + self.gpu_name = None + self.gpu_mem = None + self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config() + + def device_config(self) -> tuple: + if torch.cuda.is_available(): + i_device = int(self.device.split(":")[-1]) + self.gpu_name = torch.cuda.get_device_name(i_device) + if ( + ("16" in self.gpu_name and "V100" not in self.gpu_name.upper()) + or "P40" in self.gpu_name.upper() + or "1060" in self.gpu_name + or "1070" in self.gpu_name + or "1080" in self.gpu_name + ): + print("16系/10系显卡和P40强制单精度") + self.is_half = False + for config_file in ["32k.json", "40k.json", "48k.json"]: + with open(f"configs/{config_file}", "r") as f: + strr = f.read().replace("true", "false") + with open(f"configs/{config_file}", "w") as f: + f.write(strr) + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + else: + self.gpu_name = None + self.gpu_mem = int( + torch.cuda.get_device_properties(i_device).total_memory + / 1024 + / 1024 + / 1024 + + 0.4 + ) + if self.gpu_mem <= 4: + with open("trainset_preprocess_pipeline_print.py", "r") as f: + strr = f.read().replace("3.7", "3.0") + with open("trainset_preprocess_pipeline_print.py", "w") as f: + f.write(strr) + elif torch.backends.mps.is_available(): + print("没有发现支持的N卡, 使用MPS进行推理") + self.device = "mps" + else: + print("没有发现支持的N卡, 使用CPU进行推理") + self.device = "cpu" + self.is_half = True + + if self.n_cpu == 0: + self.n_cpu = cpu_count() + + if self.is_half: + # 6G显存配置 + x_pad = 3 + x_query = 10 + x_center = 60 + x_max = 65 + else: + # 5G显存配置 + x_pad = 1 + x_query = 6 + x_center = 38 + x_max = 41 + + if self.gpu_mem != None and self.gpu_mem <= 4: + x_pad = 1 + x_query = 5 + x_center = 30 + x_max = 32 + + return x_pad, x_query, x_center, x_max +# +# f0up_key=sys.argv[1] #speaker id +# input_path=sys.argv[2] #wav input +# index_path=sys.argv[3] #index 文件 +# f0method=sys.argv[4] #harvest or pm +# opt_path=sys.argv[5] #out wav +# model_path=sys.argv[6] #model net_g +# index_rate=float(sys.argv[7]) #index rate +# device=sys.argv[8] +# is_half=bool(sys.argv[9]) +# filter_radius=int(sys.argv[10]) ##3 +# resample_sr=int(sys.argv[11]) # 0 +# rms_mix_rate=float(sys.argv[12]) # rms混合比例 1,不等于1混合 +# protect=float(sys.argv[13])## ??? 0.33 fang +# #sid=float(sys.argv[14]) +# sid_embed=sys.argv[14] + + +index_path="./logs/xusong_v2_org_version_multispk_charlie_puth_embed_in_dec_muloss_show/added_IVF614_Flat_nprobe_1_xusong_v2_org_version_multispk_charlie_puth_embed_in_dec_show_v2.index" +f0method="rmvpe" #harvest or pm +index_rate=float("0.0") #index rate +device="cuda:0" +is_half=True +filter_radius=int(3) ##3 +resample_sr=int(0) # 0 +rms_mix_rate=float(1) # rms混合比例 1,不等于1混合 +protect=float(0.33 )## ??? 0.33 fang + + +# f0up_key=sys.argv[1] #speaker id +# input_path=sys.argv[2] #wav input +# opt_path=sys.argv[5] #out wav +# model_path=sys.argv[6] #model net_g +# sid_embed=sys.argv[14] + +#print(sys.argv) +config=Config(device,is_half) +now_dir=os.getcwd() +sys.path.append(now_dir) + +from vc_infer_pipeline_org_embed import VC +from lib.infer_pack.models_embed_in_dec_diff import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +from lib.audio import load_audio +from fairseq import checkpoint_utils +from scipy.io import wavfile + +# hubert_model=None +def load_hubert(): + # global hubert_model + models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["hubert_base.pt"],suffix="",) + #models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(["checkpoint_best_legacy_500.pt"],suffix="",) + hubert_model = models[0] + hubert_model = hubert_model.to(device) + if(is_half):hubert_model = hubert_model.half() + else:hubert_model = hubert_model.float() + hubert_model.eval() + return hubert_model + +def vc_single(sid,input_audio,f0_up_key,f0_file,f0_method,file_index,index_rate,hubert_model): + global tgt_sr,net_g,vc,version + if input_audio is None:return "You need to upload an audio", None + f0_up_key = int(f0_up_key) + print("@@xxxf0_up_key:",f0_up_key) + audio=load_audio(input_audio,16000) + times = [0, 0, 0] + if(hubert_model==None): + hubert_model = load_hubert() + if_f0 = cpt.get("f0", 1) + # audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,times,f0_up_key,f0_method,file_index,file_big_npy,index_rate,if_f0,f0_file=f0_file) + #net_g.summary() + #hubert_model.summary() + audio_opt=vc.pipeline(hubert_model,net_g,sid,audio,input_audio,times,f0_up_key,f0_method,file_index,index_rate,if_f0,filter_radius,tgt_sr,resample_sr,rms_mix_rate,version,protect,f0_file=f0_file) + print(times) + return audio_opt + + +def get_vc_core(model_path,is_half): + + #print("loading pth %s" % model_path) + cpt = torch.load(model_path, map_location="cpu") + tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] + if_f0 = cpt.get("f0", 1) + print("if_f0: ", if_f0) + version = cpt.get("version", "v1") + print("@@@version:", version) + if version == "v1": + if if_f0 == 1: + net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=is_half) + else: + net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif version == "v2": + if if_f0 == 1: # + net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=is_half) + else: + net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + print("load model finished") + del net_g.enc_q + print(net_g.load_state_dict(cpt["weight"], strict=False)) # 不加这一行清不干净,真奇葩 + print("load net_g finished") + + return tgt_sr,net_g,cpt,version + +def get_vc1(model_path,is_half): + tgt_sr, net_g, cpt, version = get_vc_core(model_path, is_half) + + net_g.eval().to(device) + if (is_half):net_g = net_g.half() + else:net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk=cpt["config"][-3] + return +def get_rmvpe(): + from lib.rmvpe import RMVPE + global f0_method + print("loading rmvpe model") + f0_method = RMVPE( + "rmvpe.pt", is_half=True, device='cuda:0' + ) + return f0_method + + +def get_vc(model_path): + global n_spk,tgt_sr,net_g,vc,cpt,device,is_half,version + tgt_sr, net_g, cpt, version = get_vc_core(model_path, is_half) + + net_g.eval().to(device) + if (is_half):net_g = net_g.half() + else:net_g = net_g.float() + vc = VC(tgt_sr, config) + n_spk=cpt["config"][-3] + # return {"visible": True,"maximum": n_spk, "__type__": "update"} + # return net_g + +# f0up_key=sys.argv[1] #speaker id +# input_path=sys.argv[2] #wav input +# opt_path=sys.argv[5] #out wav +# model_path=sys.argv[6] #model net_g +def svc_main(input_path,opt_path,model_path,sid_embed,f0up_key=0,hubert_model=None): + #get_vc(model_path) + print("sid_embed: ",sid_embed) + #wav_opt = vc_single(sid_embed,input_path,f0up_key,None,f0method,index_path,index_rate,hubert_model) + wav_opt = vc_single(sid_embed,input_path,f0up_key,None,f0_method,index_path,index_rate,hubert_model) + wavfile.write(opt_path, tgt_sr, wav_opt) + + +print() + + diff --git a/AIMeiSheng/poetry.lock b/AIMeiSheng/poetry.lock new file mode 100644 index 0000000..021e7ee --- /dev/null +++ b/AIMeiSheng/poetry.lock @@ -0,0 +1,3871 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "absl-py" +version = "1.4.0" +description = "Abseil Python Common Libraries, see https://github.com/abseil/abseil-py." +optional = false +python-versions = ">=3.6" +files = [ + {file = "absl-py-1.4.0.tar.gz", hash = "sha256:d2c244d01048ba476e7c080bd2c6df5e141d211de80223460d5b3b8a2a58433d"}, + {file = "absl_py-1.4.0-py3-none-any.whl", hash = "sha256:0d3fe606adfa4f7db64792dd4c7aee4ee0c38ab75dfd353b7a83ed3e957fcb47"}, +] + +[[package]] +name = "aiofiles" +version = "23.1.0" +description = "File support for asyncio." +optional = false +python-versions = ">=3.7,<4.0" +files = [ + {file = "aiofiles-23.1.0-py3-none-any.whl", hash = "sha256:9312414ae06472eb6f1d163f555e466a23aed1c8f60c30cccf7121dba2e53eb2"}, + {file = "aiofiles-23.1.0.tar.gz", hash = "sha256:edd247df9a19e0db16534d4baaf536d6609a43e1de5401d7a4c1c148753a1635"}, +] + +[[package]] +name = "aiohttp" +version = "3.8.5" +description = "Async http client/server framework (asyncio)" +optional = false +python-versions = ">=3.6" +files = [ + {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a94159871304770da4dd371f4291b20cac04e8c94f11bdea1c3478e557fbe0d8"}, + {file = "aiohttp-3.8.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:13bf85afc99ce6f9ee3567b04501f18f9f8dbbb2ea11ed1a2e079670403a7c84"}, + {file = "aiohttp-3.8.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2ce2ac5708501afc4847221a521f7e4b245abf5178cf5ddae9d5b3856ddb2f3a"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96943e5dcc37a6529d18766597c491798b7eb7a61d48878611298afc1fca946c"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ad5c3c4590bb3cc28b4382f031f3783f25ec223557124c68754a2231d989e2b"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0c413c633d0512df4dc7fd2373ec06cc6a815b7b6d6c2f208ada7e9e93a5061d"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df72ac063b97837a80d80dec8d54c241af059cc9bb42c4de68bd5b61ceb37caa"}, + {file = "aiohttp-3.8.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c48c5c0271149cfe467c0ff8eb941279fd6e3f65c9a388c984e0e6cf57538e14"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:368a42363c4d70ab52c2c6420a57f190ed3dfaca6a1b19afda8165ee16416a82"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7607ec3ce4993464368505888af5beb446845a014bc676d349efec0e05085905"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:0d21c684808288a98914e5aaf2a7c6a3179d4df11d249799c32d1808e79503b5"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:312fcfbacc7880a8da0ae8b6abc6cc7d752e9caa0051a53d217a650b25e9a691"}, + {file = "aiohttp-3.8.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ad093e823df03bb3fd37e7dec9d4670c34f9e24aeace76808fc20a507cace825"}, + {file = "aiohttp-3.8.5-cp310-cp310-win32.whl", hash = "sha256:33279701c04351a2914e1100b62b2a7fdb9a25995c4a104259f9a5ead7ed4802"}, + {file = "aiohttp-3.8.5-cp310-cp310-win_amd64.whl", hash = "sha256:6e4a280e4b975a2e7745573e3fc9c9ba0d1194a3738ce1cbaa80626cc9b4f4df"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ae871a964e1987a943d83d6709d20ec6103ca1eaf52f7e0d36ee1b5bebb8b9b9"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:461908b2578955045efde733719d62f2b649c404189a09a632d245b445c9c975"}, + {file = "aiohttp-3.8.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:72a860c215e26192379f57cae5ab12b168b75db8271f111019509a1196dfc780"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc14be025665dba6202b6a71cfcdb53210cc498e50068bc088076624471f8bb9"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8af740fc2711ad85f1a5c034a435782fbd5b5f8314c9a3ef071424a8158d7f6b"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:841cd8233cbd2111a0ef0a522ce016357c5e3aff8a8ce92bcfa14cef890d698f"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ed1c46fb119f1b59304b5ec89f834f07124cd23ae5b74288e364477641060ff"}, + {file = "aiohttp-3.8.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:84f8ae3e09a34f35c18fa57f015cc394bd1389bce02503fb30c394d04ee6b938"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62360cb771707cb70a6fd114b9871d20d7dd2163a0feafe43fd115cfe4fe845e"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:23fb25a9f0a1ca1f24c0a371523546366bb642397c94ab45ad3aedf2941cec6a"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:b0ba0d15164eae3d878260d4c4df859bbdc6466e9e6689c344a13334f988bb53"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5d20003b635fc6ae3f96d7260281dfaf1894fc3aa24d1888a9b2628e97c241e5"}, + {file = "aiohttp-3.8.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0175d745d9e85c40dcc51c8f88c74bfbaef9e7afeeeb9d03c37977270303064c"}, + {file = "aiohttp-3.8.5-cp311-cp311-win32.whl", hash = "sha256:2e1b1e51b0774408f091d268648e3d57f7260c1682e7d3a63cb00d22d71bb945"}, + {file = "aiohttp-3.8.5-cp311-cp311-win_amd64.whl", hash = "sha256:043d2299f6dfdc92f0ac5e995dfc56668e1587cea7f9aa9d8a78a1b6554e5755"}, + {file = "aiohttp-3.8.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cae533195e8122584ec87531d6df000ad07737eaa3c81209e85c928854d2195c"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f21e83f355643c345177a5d1d8079f9f28b5133bcd154193b799d380331d5d3"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a7a75ef35f2df54ad55dbf4b73fe1da96f370e51b10c91f08b19603c64004acc"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e2e9839e14dd5308ee773c97115f1e0a1cb1d75cbeeee9f33824fa5144c7634"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c44e65da1de4403d0576473e2344828ef9c4c6244d65cf4b75549bb46d40b8dd"}, + {file = "aiohttp-3.8.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78d847e4cde6ecc19125ccbc9bfac4a7ab37c234dd88fbb3c5c524e8e14da543"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:c7a815258e5895d8900aec4454f38dca9aed71085f227537208057853f9d13f2"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8b929b9bd7cd7c3939f8bcfffa92fae7480bd1aa425279d51a89327d600c704d"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:5db3a5b833764280ed7618393832e0853e40f3d3e9aa128ac0ba0f8278d08649"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:a0215ce6041d501f3155dc219712bc41252d0ab76474615b9700d63d4d9292af"}, + {file = "aiohttp-3.8.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:fd1ed388ea7fbed22c4968dd64bab0198de60750a25fe8c0c9d4bef5abe13824"}, + {file = "aiohttp-3.8.5-cp36-cp36m-win32.whl", hash = "sha256:6e6783bcc45f397fdebc118d772103d751b54cddf5b60fbcc958382d7dd64f3e"}, + {file = "aiohttp-3.8.5-cp36-cp36m-win_amd64.whl", hash = "sha256:b5411d82cddd212644cf9360879eb5080f0d5f7d809d03262c50dad02f01421a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:01d4c0c874aa4ddfb8098e85d10b5e875a70adc63db91f1ae65a4b04d3344cda"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5980a746d547a6ba173fd5ee85ce9077e72d118758db05d229044b469d9029a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a482e6da906d5e6e653be079b29bc173a48e381600161c9932d89dfae5942ef"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80bd372b8d0715c66c974cf57fe363621a02f359f1ec81cba97366948c7fc873"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1161b345c0a444ebcf46bf0a740ba5dcf50612fd3d0528883fdc0eff578006a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cd56db019015b6acfaaf92e1ac40eb8434847d9bf88b4be4efe5bfd260aee692"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:153c2549f6c004d2754cc60603d4668899c9895b8a89397444a9c4efa282aaf4"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:4a01951fabc4ce26ab791da5f3f24dca6d9a6f24121746eb19756416ff2d881b"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bfb9162dcf01f615462b995a516ba03e769de0789de1cadc0f916265c257e5d8"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:7dde0009408969a43b04c16cbbe252c4f5ef4574ac226bc8815cd7342d2028b6"}, + {file = "aiohttp-3.8.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:4149d34c32f9638f38f544b3977a4c24052042affa895352d3636fa8bffd030a"}, + {file = "aiohttp-3.8.5-cp37-cp37m-win32.whl", hash = "sha256:68c5a82c8779bdfc6367c967a4a1b2aa52cd3595388bf5961a62158ee8a59e22"}, + {file = "aiohttp-3.8.5-cp37-cp37m-win_amd64.whl", hash = "sha256:2cf57fb50be5f52bda004b8893e63b48530ed9f0d6c96c84620dc92fe3cd9b9d"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:eca4bf3734c541dc4f374ad6010a68ff6c6748f00451707f39857f429ca36ced"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1274477e4c71ce8cfe6c1ec2f806d57c015ebf84d83373676036e256bc55d690"}, + {file = "aiohttp-3.8.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c543e54710d6158fc6f439296c7865b29e0b616629767e685a7185fab4a6b9"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:910bec0c49637d213f5d9877105d26e0c4a4de2f8b1b29405ff37e9fc0ad52b8"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5443910d662db951b2e58eb70b0fbe6b6e2ae613477129a5805d0b66c54b6cb7"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2e460be6978fc24e3df83193dc0cc4de46c9909ed92dd47d349a452ef49325b7"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1558def481d84f03b45888473fc5a1f35747b5f334ef4e7a571bc0dfcb11f8"}, + {file = "aiohttp-3.8.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34dd0c107799dcbbf7d48b53be761a013c0adf5571bf50c4ecad5643fe9cfcd0"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:aa1990247f02a54185dc0dff92a6904521172a22664c863a03ff64c42f9b5410"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0e584a10f204a617d71d359fe383406305a4b595b333721fa50b867b4a0a1548"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:a3cf433f127efa43fee6b90ea4c6edf6c4a17109d1d037d1a52abec84d8f2e42"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:c11f5b099adafb18e65c2c997d57108b5bbeaa9eeee64a84302c0978b1ec948b"}, + {file = "aiohttp-3.8.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:84de26ddf621d7ac4c975dbea4c945860e08cccde492269db4e1538a6a6f3c35"}, + {file = "aiohttp-3.8.5-cp38-cp38-win32.whl", hash = "sha256:ab88bafedc57dd0aab55fa728ea10c1911f7e4d8b43e1d838a1739f33712921c"}, + {file = "aiohttp-3.8.5-cp38-cp38-win_amd64.whl", hash = "sha256:5798a9aad1879f626589f3df0f8b79b3608a92e9beab10e5fda02c8a2c60db2e"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a6ce61195c6a19c785df04e71a4537e29eaa2c50fe745b732aa937c0c77169f3"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:773dd01706d4db536335fcfae6ea2440a70ceb03dd3e7378f3e815b03c97ab51"}, + {file = "aiohttp-3.8.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f83a552443a526ea38d064588613aca983d0ee0038801bc93c0c916428310c28"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f7372f7341fcc16f57b2caded43e81ddd18df53320b6f9f042acad41f8e049a"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea353162f249c8097ea63c2169dd1aa55de1e8fecbe63412a9bc50816e87b761"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d47ae48db0b2dcf70bc8a3bc72b3de86e2a590fc299fdbbb15af320d2659de"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d827176898a2b0b09694fbd1088c7a31836d1a505c243811c87ae53a3f6273c1"}, + {file = "aiohttp-3.8.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3562b06567c06439d8b447037bb655ef69786c590b1de86c7ab81efe1c9c15d8"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4e874cbf8caf8959d2adf572a78bba17cb0e9d7e51bb83d86a3697b686a0ab4d"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:6809a00deaf3810e38c628e9a33271892f815b853605a936e2e9e5129762356c"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:33776e945d89b29251b33a7e7d006ce86447b2cfd66db5e5ded4e5cd0340585c"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:eaeed7abfb5d64c539e2db173f63631455f1196c37d9d8d873fc316470dfbacd"}, + {file = "aiohttp-3.8.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e91d635961bec2d8f19dfeb41a539eb94bd073f075ca6dae6c8dc0ee89ad6f91"}, + {file = "aiohttp-3.8.5-cp39-cp39-win32.whl", hash = "sha256:00ad4b6f185ec67f3e6562e8a1d2b69660be43070bd0ef6fcec5211154c7df67"}, + {file = "aiohttp-3.8.5-cp39-cp39-win_amd64.whl", hash = "sha256:c0a9034379a37ae42dea7ac1e048352d96286626251862e448933c0f59cbd79c"}, + {file = "aiohttp-3.8.5.tar.gz", hash = "sha256:b9552ec52cc147dbf1944ac7ac98af7602e51ea2dcd076ed194ca3c0d1c7d0bc"}, +] + +[package.dependencies] +aiosignal = ">=1.1.2" +async-timeout = ">=4.0.0a3,<5.0" +attrs = ">=17.3.0" +charset-normalizer = ">=2.0,<4.0" +frozenlist = ">=1.1.1" +multidict = ">=4.5,<7.0" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["Brotli", "aiodns", "cchardet"] + +[[package]] +name = "aiosignal" +version = "1.3.1" +description = "aiosignal: a list of registered asynchronous callbacks" +optional = false +python-versions = ">=3.7" +files = [ + {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, + {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"}, +] + +[package.dependencies] +frozenlist = ">=1.1.0" + +[[package]] +name = "altair" +version = "5.0.1" +description = "Vega-Altair: A declarative statistical visualization library for Python." +optional = false +python-versions = ">=3.7" +files = [ + {file = "altair-5.0.1-py3-none-any.whl", hash = "sha256:9f3552ed5497d4dfc14cf48a76141d8c29ee56eae2873481b4b28134268c9bbe"}, + {file = "altair-5.0.1.tar.gz", hash = "sha256:087d7033cb2d6c228493a053e12613058a5d47faf6a36aea3ff60305fd8b4cb0"}, +] + +[package.dependencies] +jinja2 = "*" +jsonschema = ">=3.0" +numpy = "*" +pandas = ">=0.18" +toolz = "*" +typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["black (<24)", "hatch", "ipython", "m2r", "mypy", "pandas-stubs", "pytest", "pytest-cov", "ruff", "types-jsonschema", "types-setuptools", "vega-datasets", "vl-convert-python"] +doc = ["docutils", "geopandas", "jinja2", "myst-parser", "numpydoc", "pillow", "pydata-sphinx-theme", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinxext-altair"] + +[[package]] +name = "annotated-types" +version = "0.5.0" +description = "Reusable constraint types to use with typing.Annotated" +optional = false +python-versions = ">=3.7" +files = [ + {file = "annotated_types-0.5.0-py3-none-any.whl", hash = "sha256:58da39888f92c276ad970249761ebea80ba544b77acddaa1a4d6cf78287d45fd"}, + {file = "annotated_types-0.5.0.tar.gz", hash = "sha256:47cdc3490d9ac1506ce92c7aaa76c579dc3509ff11e098fc867e5130ab7be802"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} + +[[package]] +name = "antlr4-python3-runtime" +version = "4.8" +description = "ANTLR 4.8 runtime for Python 3.7" +optional = false +python-versions = "*" +files = [ + {file = "antlr4-python3-runtime-4.8.tar.gz", hash = "sha256:15793f5d0512a372b4e7d2284058ad32ce7dd27126b105fb0b2245130445db33"}, +] + +[[package]] +name = "anyio" +version = "3.7.1" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.7" +files = [ + {file = "anyio-3.7.1-py3-none-any.whl", hash = "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5"}, + {file = "anyio-3.7.1.tar.gz", hash = "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780"}, +] + +[package.dependencies] +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["Sphinx", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-jquery"] +test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (<0.22)"] + +[[package]] +name = "async-timeout" +version = "4.0.2" +description = "Timeout context manager for asyncio programs" +optional = false +python-versions = ">=3.6" +files = [ + {file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"}, + {file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"}, +] + +[[package]] +name = "attrs" +version = "23.1.0" +description = "Classes Without Boilerplate" +optional = false +python-versions = ">=3.7" +files = [ + {file = "attrs-23.1.0-py3-none-any.whl", hash = "sha256:1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04"}, + {file = "attrs-23.1.0.tar.gz", hash = "sha256:6279836d581513a26f1bf235f9acd333bc9115683f14f7e8fae46c98fc50e015"}, +] + +[package.extras] +cov = ["attrs[tests]", "coverage[toml] (>=5.3)"] +dev = ["attrs[docs,tests]", "pre-commit"] +docs = ["furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier", "zope-interface"] +tests = ["attrs[tests-no-zope]", "zope-interface"] +tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"] + +[[package]] +name = "audioread" +version = "3.0.0" +description = "multi-library, cross-platform audio decoding" +optional = false +python-versions = ">=3.6" +files = [ + {file = "audioread-3.0.0.tar.gz", hash = "sha256:121995bd207eb1fda3d566beb851d3534275925bc35a4fb6da0cb11de0f7251a"}, +] + +[[package]] +name = "bitarray" +version = "2.8.0" +description = "efficient arrays of booleans -- C extension" +optional = false +python-versions = "*" +files = [ + {file = "bitarray-2.8.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8d59ddee615c64a8c37c5bfd48ceea5b88d8808f90234e9154e1e209981a4683"}, + {file = "bitarray-2.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd151c59b3756b05d8d616230211e0fb9ee10826b080f51f3e0bf85775027f8c"}, + {file = "bitarray-2.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16b6144c30aa6661787a25e489335065e44fc4f74518e1e66e4591d669460516"}, + {file = "bitarray-2.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b8c607bfcb43c8230e24c18c368c9773cf37040fb14355ecbc51ad7b7b89be5a"}, + {file = "bitarray-2.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7cd2df3c507ee85219b38e2812174ba8236a77a729f6d9ba3f66faed8661dc3b"}, + {file = "bitarray-2.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:323d1b9710d1ef320c0b6c1f3d422355b8c371f4c898d0a9d9acb46586fd30d4"}, + {file = "bitarray-2.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d4723b41afbd3574d3a72a383f80112aeceaeebbe6204b1e0ac8d4d7f2353b2"}, + {file = "bitarray-2.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:28dced57e7ee905f0a6287b6288d220d35d0c52ea925d2461b4eef5c16a40263"}, + {file = "bitarray-2.8.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:f4916b09f5dafe74133224956ce72399de1be7ca7b4726ce7bf8aac93f9b0ab6"}, + {file = "bitarray-2.8.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:524b5898248b47a1f39cd54ab739e823bb6469d4b3619e84f246b654a2239262"}, + {file = "bitarray-2.8.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:37fe92915561dd688ff450235ce75faa6679940c78f7e002ebc092aa71cadce9"}, + {file = "bitarray-2.8.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:a13d7cfdbcc5604670abb1faaa8e2082b4ce70475922f07bbee3cd999b092698"}, + {file = "bitarray-2.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ba2870bc136b2e76d02a64621e5406daf97b3a333287132344d4029d91ad4197"}, + {file = "bitarray-2.8.0-cp310-cp310-win32.whl", hash = "sha256:432ff0eaf79414df582be023748d48c9b3a7d20cead494b7bc70a66cb62fb34f"}, + {file = "bitarray-2.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:fb33df6bbe32d2146229e7ad885f654adc1484c7f734633e6dba2af88000b947"}, + {file = "bitarray-2.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e1df5bc9768861178632dab044725ad305170161c08e9aa1d70b074287d5cbd3"}, + {file = "bitarray-2.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5ff04386b9868cc5961d95c84a8389f5fc4e3a2cbea52499a907deea13f16ae4"}, + {file = "bitarray-2.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:cd0a807a04e69aa9e4ea3314b43beb120dad231fce55c718aa00691595df628f"}, + {file = "bitarray-2.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ddb75bd9bfbdff5231f0218e7cd4fd72653dc0c7baa782c3a95ff3dac4d5556"}, + {file = "bitarray-2.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:599a57c5f0082311bccf7b35a3eaa4fdca7bf59179cb45958a6a418a9b8339d1"}, + {file = "bitarray-2.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:86a563fa4d2bfb2394ac21f71f8e8bb1d606d030b003398efe37c5323df664aa"}, + {file = "bitarray-2.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:561e6b5a8f4498240f34de67dc672f7a6867c6f28681574a41dc73bb4451b0cb"}, + {file = "bitarray-2.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8d5fc3e73f189daf8f351fefdbad77a6f4edc5ad001aca4a541615322dbe8ee9"}, + {file = "bitarray-2.8.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:84137be7d55bed08e3ef507b0bde8311290bf92fba5a9d05069b0d1910217f16"}, + {file = "bitarray-2.8.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d6b0ce7a00a1b886e2410c20e089f3c701bc179429c681060419bbbf6ea263b7"}, + {file = "bitarray-2.8.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:f06680947298dca47437a79660c69db6442570dd492e8066ab3bf7166246dee1"}, + {file = "bitarray-2.8.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:b101a770d11b4fb0493e649cf3160d8de582e32e517ff3a7d024fad2e6ffe9e1"}, + {file = "bitarray-2.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3a83eedc91f88d31e1e7e386bd7bf65eacd5064af95d5b1ccd512bef3d516a4b"}, + {file = "bitarray-2.8.0-cp311-cp311-win32.whl", hash = "sha256:1f90c59309f7208792f46d84adac58d8fdf6db3b1479b40e6386dd39a12950eb"}, + {file = "bitarray-2.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:b70caaec1eece68411dfeded34466ad259e852ac4be8ee4001ee7dea4b37a5b2"}, + {file = "bitarray-2.8.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:181394e0da1817d7a72a9b6cad6a77f6cfac5aa70007e21aadfa702fcf0d89eb"}, + {file = "bitarray-2.8.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e3636c073b501029256fda1546020b60e0af572a9a5b11f5c50c855113b1fbc"}, + {file = "bitarray-2.8.0-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:40e6047a049595147518e6fe40759e609559799402efade093a3b67cda9e7ea9"}, + {file = "bitarray-2.8.0-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:74dd172224a2e9fea2818a0d8c892b273fa6de434b953b97a2252572fcf01fb3"}, + {file = "bitarray-2.8.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03425503093f28445b7e8c7df5faf2a704e32ee69c80e6dc5518ccea0b876ac9"}, + {file = "bitarray-2.8.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:089c707a4997b49cd3a4fb9a4239a9b0aaac59cc937dfa84c9a6862f08634d6f"}, + {file = "bitarray-2.8.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:1dfa4b66779ea4bba23ca655edbdd7e8c839daea160c6a1f1c1e6587fb8c79af"}, + {file = "bitarray-2.8.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:8a6593023d03dc71f015efba1ce9319982a49add363050a3e298904ca19b60ef"}, + {file = "bitarray-2.8.0-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:93c5937df1bfbfb17ee17c7717b49cbe04d88fa5d9dcfc1846914318dcf0135b"}, + {file = "bitarray-2.8.0-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:67af0a5f32ec1de99c6baaa2359c47adac245fda20969c169da9b03dacb48fb7"}, + {file = "bitarray-2.8.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:4b6650d05ebb92379465393bd279d298ff0a13fbf23bacbd1bcb20d202fccc67"}, + {file = "bitarray-2.8.0-cp36-cp36m-win32.whl", hash = "sha256:b3381e75bb34ca0f455c4a0ac3625e5d9472f79914a3fd15ee1230584eab7d00"}, + {file = "bitarray-2.8.0-cp36-cp36m-win_amd64.whl", hash = "sha256:951b39a515ed07487df02f0480617500f87b5e01cb36ec775dd30577633bec44"}, + {file = "bitarray-2.8.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4e5c53500ee060c36303210d34df0e18636584ae1a70eb427e96fed70189896f"}, + {file = "bitarray-2.8.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1deaaebbae83cf7b6fd252c36a4f03bd820bcf209da1ca400dddbf11064e35ec"}, + {file = "bitarray-2.8.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36eb9bdeee9c5988beca491741c4e2611abbea7fbbe3f4ebe35e00d509c40847"}, + {file = "bitarray-2.8.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:143c9ac7a7f7e155f42bbf1fa547feaf9b4b2c226a25f17ae0d0d537ce9a328d"}, + {file = "bitarray-2.8.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:06984d12925e595a26da7855a5e868ce9b19b646e4b130e69a85bfcd6ce9227b"}, + {file = "bitarray-2.8.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:aa54a847ae50050099e23ddc2bf20c7f2792706f95e997095e3551048841fc68"}, + {file = "bitarray-2.8.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:dd5dcc4c26d7ef55934fcecea7ebd765313554d86747282c716fa64954cf103d"}, + {file = "bitarray-2.8.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:706835e0e40b4707894af0ddd193eb8bbfb72835db8e4a8be7f6697ddc63c3eb"}, + {file = "bitarray-2.8.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:216af36c9885a229d493ebdd5aa5648aae8db15b1c79ca6c2ad11b7f9bf4062f"}, + {file = "bitarray-2.8.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:6f45bffd00892afa7e455990a9da0bbe0ac2bee978b4bdbb70439345f61b618a"}, + {file = "bitarray-2.8.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e006e43ee096922cdaca797b313292a7ee29b43361da7d3d85d859455a0b6339"}, + {file = "bitarray-2.8.0-cp37-cp37m-win32.whl", hash = "sha256:f00dc03d1c909712a14edafd7edeccf77aca1590928f02f29901d767153b95ef"}, + {file = "bitarray-2.8.0-cp37-cp37m-win_amd64.whl", hash = "sha256:1fdba2209df0ca379b5276dc48c189f424ec6701158a666876265b2669db9ed7"}, + {file = "bitarray-2.8.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:741fc4eb77847b5f046559f77e0f822b3ce270774098f075bc712ef9f5c5948d"}, + {file = "bitarray-2.8.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:66cf402bc4154a074d95f4dec3260497f637112fb982c2335d3bbc174d8c0a2d"}, + {file = "bitarray-2.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:46fb5fbde325fd0bfcd9efd7ea3c5e2c1fd7117ad06e5cf37ca2c6dab539abc4"}, + {file = "bitarray-2.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d6922dffc5e123e09907b79291951655ec0a2fde7c36a5584eb67c3b769d118"}, + {file = "bitarray-2.8.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7885e5c23bb2954d913b4e8bb1486a7d2fbf69d27438ef096178eccf1d9e1e7a"}, + {file = "bitarray-2.8.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:123d3802e7eafada61854d16c20d0df0c5f1d68da98f9e16059a23d200b5057a"}, + {file = "bitarray-2.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6167bf10c3f773612a65b925edb4c8e002f1b826db6d3e91839153d6030fec17"}, + {file = "bitarray-2.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:844e12f06e7167855c7db6838ea4ef08e44621dd4606039a4b5c0c6ca0801edf"}, + {file = "bitarray-2.8.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:117d53e1ada8d7f9b8a350bb78597488311637c036da1a6aeb7071527672fdf7"}, + {file = "bitarray-2.8.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:816510e83e61d1f44ff2f138863068451840314774bad1cc2911a1f86c93eb2f"}, + {file = "bitarray-2.8.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:3619bd30f163a3748325677996d4095b56ab1eb21610797f2b59f30e26ad1a7a"}, + {file = "bitarray-2.8.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:f89cd1a17b57810b640344a559de60039bf50de36e0d577f6f72fab7c23ee023"}, + {file = "bitarray-2.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:639f8ebaad5cec929dd73859d5ab850d4df746272754987720cf52fbbe2ec08e"}, + {file = "bitarray-2.8.0-cp38-cp38-win32.whl", hash = "sha256:991dfaee77ecd82d96ddd85d242836de9471940dd89e943feea26549a9170ecb"}, + {file = "bitarray-2.8.0-cp38-cp38-win_amd64.whl", hash = "sha256:45c5e6d5970ade6f98e91341b47722c3d0d68742bf62e3d47b586897c447e78a"}, + {file = "bitarray-2.8.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:62899c1102b47637757ad3448cb32caa4d4d8070986c29abe091711535644192"}, + {file = "bitarray-2.8.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6897cd0c67c9433faca9023cb5eff25678e056764ce158998e6f30137e9a7f17"}, + {file = "bitarray-2.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d0952c8417c21ea9eb2532475b2927753d6080f346f953a520e28794297d45f3"}, + {file = "bitarray-2.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa6e51062a9eba797d97390a4c1f7941e489dd807b2de01d6a190d1a69eacf0a"}, + {file = "bitarray-2.8.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8fb89f6b229ef8fa0e70d9206c57118c2f9bd98c54e3d73c4de00ab8147eed1c"}, + {file = "bitarray-2.8.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc6b74eef97dc84acb429bb9c48363f88767f02b7d4a3e6dfd274334e0dc002e"}, + {file = "bitarray-2.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00a7df14e82b0da37b47f51a1e6a053dbdccbad52627ae6ce6f2516e3ca7db13"}, + {file = "bitarray-2.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5557e41cd92a9f05795980d762e9eca4dee3b393b8a005cb5e091d1e5c319181"}, + {file = "bitarray-2.8.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:13dde9b590e27e9b8be9b96b1d697dbb19ca5c790b7d45a5ed310049fe9221b5"}, + {file = "bitarray-2.8.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ebe2a6a8e714e5845fba173c05e26ca50616a7a7845c304f5c3ffccecda98c11"}, + {file = "bitarray-2.8.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:0cd43f0943af45a1056f5dbdd10dc07f513d80ede72cac0306a342db6bf87d1d"}, + {file = "bitarray-2.8.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:9a89b32c81e3e8a5f3fe9b458881ef03c1ba60829ae97999a15e86ea476489c6"}, + {file = "bitarray-2.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b7bf3667e4cb9330b5dc5ae3753e833f398d12cbe14db1baf55cfd6a3ff0052d"}, + {file = "bitarray-2.8.0-cp39-cp39-win32.whl", hash = "sha256:e28b9af8ebeeb19396b7836a06fc1b375a5867cff6a558f7d35420d428a3e2ad"}, + {file = "bitarray-2.8.0-cp39-cp39-win_amd64.whl", hash = "sha256:aabceebde1a450eb363a7ad7a531ab54992520f0a7386844bac7f700d00bb2d3"}, + {file = "bitarray-2.8.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:90f3c63e44eb11424745453da1798ed6abcf6f467a92b75fda7b182cb1fb3e01"}, + {file = "bitarray-2.8.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dd7aa632610fe03272e01fd006c9db2c102340344b034c9bd63e2ed9e3f895cc"}, + {file = "bitarray-2.8.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11447698f2ae9ac6417d25222ab1e6ec087c32d603a9131b2c09ce0911766002"}, + {file = "bitarray-2.8.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:83f80d6f752d40d633c99c12d24d11774a6c3c3fd02dfd038a0496892fb15ed3"}, + {file = "bitarray-2.8.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ee6df5243fcab8bb2bd14396556f1a28eebf94862bf14c1333ff309177ac62ba"}, + {file = "bitarray-2.8.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0d19fd86aa02dbbec68ffb961a237a0bd2ecfbd92a6815fea9f20e9a3536bd92"}, + {file = "bitarray-2.8.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40997802289d647952449b8bf0ee5c56f1f767e65ab33c63e8f756ba463343a7"}, + {file = "bitarray-2.8.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bd66672c9695e75cf54d1f3f143a85e6b57078a7b86faf0de2c0c97736dfbb4"}, + {file = "bitarray-2.8.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ae79e0ed10cf221845e036bc7c3501e467a3bf288768941da1d8d6aaf12fec34"}, + {file = "bitarray-2.8.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:18f7a8d4ebb8c8750e9aafbcfa1b2bfa9b6291baec6d4a31186762956f88cada"}, + {file = "bitarray-2.8.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:eb45c7170c84c14d67978ccae74def18076a7e07cece0fc514078f4d5f8d0b71"}, + {file = "bitarray-2.8.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d47baae8d5618cce60c20111a4ceafd6ed155e5501e0dc9fb9db55408e63e4a"}, + {file = "bitarray-2.8.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc347f9a869a9c2b224bae65f9ed12bd1f7f97c0cbdfe47e520d6a7ba5aeec52"}, + {file = "bitarray-2.8.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a5618e50873f8a5ba96facbf61c5f342ee3212fee4b64c21061a89cb09df4428"}, + {file = "bitarray-2.8.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f59f189ed38ad6fc3ef77a038eae75757b2fe0e3e869085c5db7472f59eaefb3"}, + {file = "bitarray-2.8.0.tar.gz", hash = "sha256:cd69a926a3363e25e94a64408303283c59085be96d71524bdbe6bfc8da2e34e0"}, +] + +[[package]] +name = "cachetools" +version = "5.3.1" +description = "Extensible memoizing collections and decorators" +optional = false +python-versions = ">=3.7" +files = [ + {file = "cachetools-5.3.1-py3-none-any.whl", hash = "sha256:95ef631eeaea14ba2e36f06437f36463aac3a096799e876ee55e5cdccb102590"}, + {file = "cachetools-5.3.1.tar.gz", hash = "sha256:dce83f2d9b4e1f732a8cd44af8e8fab2dbe46201467fc98b3ef8f269092bf62b"}, +] + +[[package]] +name = "certifi" +version = "2023.7.22" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2023.7.22-py3-none-any.whl", hash = "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9"}, + {file = "certifi-2023.7.22.tar.gz", hash = "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082"}, +] + +[[package]] +name = "cffi" +version = "1.15.1" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = "*" +files = [ + {file = "cffi-1.15.1-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_i686.whl", hash = "sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2"}, + {file = "cffi-1.15.1-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914"}, + {file = "cffi-1.15.1-cp27-cp27m-win32.whl", hash = "sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3"}, + {file = "cffi-1.15.1-cp27-cp27m-win_amd64.whl", hash = "sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_i686.whl", hash = "sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162"}, + {file = "cffi-1.15.1-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21"}, + {file = "cffi-1.15.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e"}, + {file = "cffi-1.15.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01"}, + {file = "cffi-1.15.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e"}, + {file = "cffi-1.15.1-cp310-cp310-win32.whl", hash = "sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2"}, + {file = "cffi-1.15.1-cp310-cp310-win_amd64.whl", hash = "sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac"}, + {file = "cffi-1.15.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325"}, + {file = "cffi-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef"}, + {file = "cffi-1.15.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8"}, + {file = "cffi-1.15.1-cp311-cp311-win32.whl", hash = "sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d"}, + {file = "cffi-1.15.1-cp311-cp311-win_amd64.whl", hash = "sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104"}, + {file = "cffi-1.15.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405"}, + {file = "cffi-1.15.1-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e"}, + {file = "cffi-1.15.1-cp36-cp36m-win32.whl", hash = "sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf"}, + {file = "cffi-1.15.1-cp36-cp36m-win_amd64.whl", hash = "sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497"}, + {file = "cffi-1.15.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c"}, + {file = "cffi-1.15.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426"}, + {file = "cffi-1.15.1-cp37-cp37m-win32.whl", hash = "sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9"}, + {file = "cffi-1.15.1-cp37-cp37m-win_amd64.whl", hash = "sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045"}, + {file = "cffi-1.15.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02"}, + {file = "cffi-1.15.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192"}, + {file = "cffi-1.15.1-cp38-cp38-win32.whl", hash = "sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314"}, + {file = "cffi-1.15.1-cp38-cp38-win_amd64.whl", hash = "sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585"}, + {file = "cffi-1.15.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35"}, + {file = "cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76"}, + {file = "cffi-1.15.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3"}, + {file = "cffi-1.15.1-cp39-cp39-win32.whl", hash = "sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee"}, + {file = "cffi-1.15.1-cp39-cp39-win_amd64.whl", hash = "sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c"}, + {file = "cffi-1.15.1.tar.gz", hash = "sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9"}, +] + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "charset-normalizer" +version = "3.2.0" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.2.0.tar.gz", hash = "sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win32.whl", hash = "sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96"}, + {file = "charset_normalizer-3.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win32.whl", hash = "sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1"}, + {file = "charset_normalizer-3.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win32.whl", hash = "sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1"}, + {file = "charset_normalizer-3.2.0-cp37-cp37m-win_amd64.whl", hash = "sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win32.whl", hash = "sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706"}, + {file = "charset_normalizer-3.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win32.whl", hash = "sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9"}, + {file = "charset_normalizer-3.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80"}, + {file = "charset_normalizer-3.2.0-py3-none-any.whl", hash = "sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6"}, +] + +[[package]] +name = "click" +version = "8.1.6" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.6-py3-none-any.whl", hash = "sha256:fa244bb30b3b5ee2cae3da8f55c9e5e0c0e86093306301fb418eb9dc40fbded5"}, + {file = "click-8.1.6.tar.gz", hash = "sha256:48ee849951919527a045bfe3bf7baa8a959c423134e1a5b98c05c20ba75a1cbd"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "contourpy" +version = "1.1.0" +description = "Python library for calculating contours of 2D quadrilateral grids" +optional = false +python-versions = ">=3.8" +files = [ + {file = "contourpy-1.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:89f06eff3ce2f4b3eb24c1055a26981bffe4e7264acd86f15b97e40530b794bc"}, + {file = "contourpy-1.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dffcc2ddec1782dd2f2ce1ef16f070861af4fb78c69862ce0aab801495dda6a3"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25ae46595e22f93592d39a7eac3d638cda552c3e1160255258b695f7b58e5655"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17cfaf5ec9862bc93af1ec1f302457371c34e688fbd381f4035a06cd47324f48"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18a64814ae7bce73925131381603fff0116e2df25230dfc80d6d690aa6e20b37"}, + {file = "contourpy-1.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90c81f22b4f572f8a2110b0b741bb64e5a6427e0a198b2cdc1fbaf85f352a3aa"}, + {file = "contourpy-1.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:53cc3a40635abedbec7f1bde60f8c189c49e84ac180c665f2cd7c162cc454baa"}, + {file = "contourpy-1.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:1f795597073b09d631782e7245016a4323cf1cf0b4e06eef7ea6627e06a37ff2"}, + {file = "contourpy-1.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0b7b04ed0961647691cfe5d82115dd072af7ce8846d31a5fac6c142dcce8b882"}, + {file = "contourpy-1.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:27bc79200c742f9746d7dd51a734ee326a292d77e7d94c8af6e08d1e6c15d545"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:052cc634bf903c604ef1a00a5aa093c54f81a2612faedaa43295809ffdde885e"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9382a1c0bc46230fb881c36229bfa23d8c303b889b788b939365578d762b5c18"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5cec36c5090e75a9ac9dbd0ff4a8cf7cecd60f1b6dc23a374c7d980a1cd710e"}, + {file = "contourpy-1.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f0cbd657e9bde94cd0e33aa7df94fb73c1ab7799378d3b3f902eb8eb2e04a3a"}, + {file = "contourpy-1.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:181cbace49874f4358e2929aaf7ba84006acb76694102e88dd15af861996c16e"}, + {file = "contourpy-1.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:fb3b7d9e6243bfa1efb93ccfe64ec610d85cfe5aec2c25f97fbbd2e58b531256"}, + {file = "contourpy-1.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bcb41692aa09aeb19c7c213411854402f29f6613845ad2453d30bf421fe68fed"}, + {file = "contourpy-1.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5d123a5bc63cd34c27ff9c7ac1cd978909e9c71da12e05be0231c608048bb2ae"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62013a2cf68abc80dadfd2307299bfa8f5aa0dcaec5b2954caeb5fa094171103"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0b6616375d7de55797d7a66ee7d087efe27f03d336c27cf1f32c02b8c1a5ac70"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:317267d915490d1e84577924bd61ba71bf8681a30e0d6c545f577363157e5e94"}, + {file = "contourpy-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d551f3a442655f3dcc1285723f9acd646ca5858834efeab4598d706206b09c9f"}, + {file = "contourpy-1.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:e7a117ce7df5a938fe035cad481b0189049e8d92433b4b33aa7fc609344aafa1"}, + {file = "contourpy-1.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:d4f26b25b4f86087e7d75e63212756c38546e70f2a92d2be44f80114826e1cd4"}, + {file = "contourpy-1.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc00bb4225d57bff7ebb634646c0ee2a1298402ec10a5fe7af79df9a51c1bfd9"}, + {file = "contourpy-1.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:189ceb1525eb0655ab8487a9a9c41f42a73ba52d6789754788d1883fb06b2d8a"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f2931ed4741f98f74b410b16e5213f71dcccee67518970c42f64153ea9313b9"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30f511c05fab7f12e0b1b7730ebdc2ec8deedcfb505bc27eb570ff47c51a8f15"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:143dde50520a9f90e4a2703f367cf8ec96a73042b72e68fcd184e1279962eb6f"}, + {file = "contourpy-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e94bef2580e25b5fdb183bf98a2faa2adc5b638736b2c0a4da98691da641316a"}, + {file = "contourpy-1.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ed614aea8462735e7d70141374bd7650afd1c3f3cb0c2dbbcbe44e14331bf002"}, + {file = "contourpy-1.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:438ba416d02f82b692e371858143970ed2eb6337d9cdbbede0d8ad9f3d7dd17d"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:a698c6a7a432789e587168573a864a7ea374c6be8d4f31f9d87c001d5a843493"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:397b0ac8a12880412da3551a8cb5a187d3298a72802b45a3bd1805e204ad8439"}, + {file = "contourpy-1.1.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:a67259c2b493b00e5a4d0f7bfae51fb4b3371395e47d079a4446e9b0f4d70e76"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2b836d22bd2c7bb2700348e4521b25e077255ebb6ab68e351ab5aa91ca27e027"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084eaa568400cfaf7179b847ac871582199b1b44d5699198e9602ecbbb5f6104"}, + {file = "contourpy-1.1.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:911ff4fd53e26b019f898f32db0d4956c9d227d51338fb3b03ec72ff0084ee5f"}, + {file = "contourpy-1.1.0.tar.gz", hash = "sha256:e53046c3863828d21d531cc3b53786e6580eb1ba02477e8681009b6aa0870b21"}, +] + +[package.dependencies] +numpy = ">=1.16" + +[package.extras] +bokeh = ["bokeh", "selenium"] +docs = ["furo", "sphinx-copybutton"] +mypy = ["contourpy[bokeh,docs]", "docutils-stubs", "mypy (==1.2.0)", "types-Pillow"] +test = ["Pillow", "contourpy[test-no-images]", "matplotlib"] +test-no-images = ["pytest", "pytest-cov", "wurlitzer"] + +[[package]] +name = "cycler" +version = "0.11.0" +description = "Composable style cycles" +optional = false +python-versions = ">=3.6" +files = [ + {file = "cycler-0.11.0-py3-none-any.whl", hash = "sha256:3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3"}, + {file = "cycler-0.11.0.tar.gz", hash = "sha256:9c87405839a19696e837b3b818fed3f5f69f16f1eec1a1ad77e043dcea9c772f"}, +] + +[[package]] +name = "cython" +version = "0.29.36" +description = "The Cython compiler for writing C extensions for the Python language." +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "Cython-0.29.36-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1ea33c1c57f331f5653baa1313e445fbe80d1da56dd9a42c8611037887897b9d"}, + {file = "Cython-0.29.36-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2fe34615c13ace29e77bf9d21c26188d23eff7ad8b3e248da70404e5f5436b95"}, + {file = "Cython-0.29.36-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ae75eac4f13cbbcb50b2097470dcea570182446a3ebd0f7e95dd425c2017a2d7"}, + {file = "Cython-0.29.36-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:847d07fc02978c4433d01b4f5ee489b75fd42fd32ccf9cc4b5fd887e8cffe822"}, + {file = "Cython-0.29.36-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:7cb44aeaf6c5c25bd6a7562ece4eadf50d606fc9b5f624fa95bd0281e8bf0a97"}, + {file = "Cython-0.29.36-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:28fb10aabd56a2e4d399273b48e106abe5a0d271728fd5eed3d36e7171000045"}, + {file = "Cython-0.29.36-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:86b7a13c6b23ab6471d40a320f573fbc8a4e39833947eebed96661145dc34771"}, + {file = "Cython-0.29.36-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:19ccf7fc527cf556e2e6a3dfeffcadfbcabd24a59a988289117795dfed8a25ad"}, + {file = "Cython-0.29.36-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:74bddfc7dc8958526b2018d3adc1aa6dc9cf2a24095c972e5ad06758c360b261"}, + {file = "Cython-0.29.36-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:6c4d7e36fe0211e394adffd296382b435ac22762d14f2fe45c506c230f91cf2d"}, + {file = "Cython-0.29.36-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:0bca6a7504e8cfc63a4d3c7c9b9a04e5d05501942a6c8cee177363b61a32c2d4"}, + {file = "Cython-0.29.36-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:17c74f80b06e2fa8ffc8acd41925f4f9922da8a219cd25c6901beab2f7c56cc5"}, + {file = "Cython-0.29.36-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:25ff471a459aad82146973b0b8c177175ab896051080713d3035ad4418739f66"}, + {file = "Cython-0.29.36-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a9738f23d00d99481797b155ad58f8fc1c72096926ea2554b8ccc46e1d356c27"}, + {file = "Cython-0.29.36-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:af2f333f08c4c279f3480532341bf70ec8010bcbc7d8a6daa5ca0bf4513af295"}, + {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:cd77cedbcc13cb67aef39b8615fd50a67fc42b0c6defea6fc0a21e19d3a062ec"}, + {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50d506d73a46c4a522ef9fdafcbf7a827ba13907b18ff58f61a8fa0887d0bd8d"}, + {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:6a571d7c7b52ee12d73bc65b4855779c069545da3bac26bec06a1389ad17ade5"}, + {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a216b2801c7d9c3babe0a10cc25da3bc92494d7047d1f732d3c47b0cceaf0941"}, + {file = "Cython-0.29.36-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:68abee3be27f21c9642a07a93f8333d491f4c52bc70068e42f51685df9ac1a57"}, + {file = "Cython-0.29.36-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:1ef90023da8a9bf84cf16f06186db0906d2ce52a09f751e2cb9d3da9d54eae46"}, + {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:9deef0761e8c798043dbb728a1c6df97b26e5edc65b8d6c7608b3c07af3eb722"}, + {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:69af2365de2343b4e5a61c567e7611ddf2575ae6f6e5c01968f7d4f2747324eb"}, + {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:fdf377b0f6e9325b73ad88933136023184afdc795caeeaaf3dca13494cffd15e"}, + {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1ff2cc5518558c598028ae8d9a43401e0e734b74b6e598156b005328c9da3472"}, + {file = "Cython-0.29.36-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7ca921068242cd8b52544870c807fe285c1f248b12df7b6dfae25cc9957b965e"}, + {file = "Cython-0.29.36-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:6058a6d04e04d790cda530e1ff675e9352359eb4b777920df3cac2b62a9a030f"}, + {file = "Cython-0.29.36-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:de2045ceae1857e56a72f08e0acfa48c994277a353b7bdab1f097db9f8803f19"}, + {file = "Cython-0.29.36-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9f2a4b4587aaef08815410dc20653613ca04a120a2954a92c39e37c6b5fdf6be"}, + {file = "Cython-0.29.36-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:2edd9f8edca69178d74cbbbc180bc3e848433c9b7dc80374a11a0bb0076c926d"}, + {file = "Cython-0.29.36-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c6c0aea8491a70f98b7496b5057c9523740e02cec21cd678eef609d2aa6c1257"}, + {file = "Cython-0.29.36-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:768f65b16d23c630d8829ce1f95520ef1531a9c0489fa872d87c8c3813f65aee"}, + {file = "Cython-0.29.36-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:568625e8274ee7288ad87b0f615ec36ab446ca9b35e77481ed010027d99c7020"}, + {file = "Cython-0.29.36-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:bdc0a4cb99f55e6878d4b67a4bfee23823484915cb6b7e9c9dd01002dd3592ea"}, + {file = "Cython-0.29.36-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:f0df6552be39853b10dfb5a10dbd08f5c49023d6b390d7ce92d4792a8b6e73ee"}, + {file = "Cython-0.29.36-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_24_i686.whl", hash = "sha256:8894db6f5b6479a3c164e0454e13083ebffeaa9a0822668bb2319bdf1b783df1"}, + {file = "Cython-0.29.36-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:53f93a8c342e9445a8f0cb7039775294f2dbbe5241936573daeaf0afe30397e4"}, + {file = "Cython-0.29.36-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:ee317f9bcab901a3db39c34ee5a27716f7132e5c0de150125342694d18b30f51"}, + {file = "Cython-0.29.36-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e4b8269e5a5d127a2191b02b9df3636c0dac73f14f1ff8a831f39cb5197c4f38"}, + {file = "Cython-0.29.36-py2.py3-none-any.whl", hash = "sha256:95bb13d8be507425d03ebe051f90d4b2a9fdccc64e4f30b35645fdb7542742eb"}, + {file = "Cython-0.29.36.tar.gz", hash = "sha256:41c0cfd2d754e383c9eeb95effc9aa4ab847d0c9747077ddd7c0dcb68c3bc01f"}, +] + +[[package]] +name = "decorator" +version = "5.1.1" +description = "Decorators for Humans" +optional = false +python-versions = ">=3.5" +files = [ + {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"}, + {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.1.2" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash = "sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"}, + {file = "exceptiongroup-1.1.2.tar.gz", hash = "sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "fairseq" +version = "0.12.2" +description = "Facebook AI Research Sequence-to-Sequence Toolkit" +optional = false +python-versions = "*" +files = [ + {file = "fairseq-0.12.2-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:fe65b07c5121b7cda0c7a17166994a6b0059259ce37881b6daa117b8c209b662"}, + {file = "fairseq-0.12.2-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:0543905012e39f00bd8c3f3781d9f49e76ab309801eb2eb7de250f5984df0de3"}, + {file = "fairseq-0.12.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c4877d65346797fc580a3a7e6e2364d2331a0026ef099c22eb8311441e49c2c6"}, + {file = "fairseq-0.12.2-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:26454f334ca705c67f898846dff34e14c148fcdaf53b4f52d64209773b509347"}, + {file = "fairseq-0.12.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:3b8c8b6dc368d2fd23a06ff613a2af05959eee275fe90846d7cffef4a43c522a"}, + {file = "fairseq-0.12.2-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:08fa308c760f995cdc13d9c385e2b9d923a78b48275d8b4d78f3a854c71a8f29"}, + {file = "fairseq-0.12.2.tar.gz", hash = "sha256:34f1b18426bf3844714534162f065ab733e049597476daa35fffb4d06a92b524"}, +] + +[package.dependencies] +bitarray = "*" +cffi = "*" +cython = "*" +hydra-core = ">=1.0.7,<1.1" +numpy = {version = "*", markers = "python_version >= \"3.7\""} +omegaconf = "<2.1" +regex = "*" +sacrebleu = ">=1.4.12" +torch = "*" +torchaudio = ">=0.8.0" +tqdm = "*" + +[[package]] +name = "faiss-cpu" +version = "1.7.4" +description = "A library for efficient similarity search and clustering of dense vectors." +optional = false +python-versions = "*" +files = [ + {file = "faiss-cpu-1.7.4.tar.gz", hash = "sha256:265dc31b0c079bf4433303bf6010f73922490adff9188b915e2d3f5e9c82dd0a"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:50d4ebe7f1869483751c558558504f818980292a9b55be36f9a1ee1009d9a686"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7b1db7fae7bd8312aeedd0c41536bcd19a6e297229e1dce526bde3a73ab8c0b5"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17b7fa7194a228a84929d9e6619d0e7dbf00cc0f717e3462253766f5e3d07de8"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dca531952a2e3eac56f479ff22951af4715ee44788a3fe991d208d766d3f95f3"}, + {file = "faiss_cpu-1.7.4-cp310-cp310-win_amd64.whl", hash = "sha256:7173081d605e74766f950f2e3d6568a6f00c53f32fd9318063e96728c6c62821"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d0bbd6f55d7940cc0692f79e32a58c66106c3c950cee2341b05722de9da23ea3"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e13c14280376100f143767d0efe47dcb32618f69e62bbd3ea5cd38c2e1755926"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c521cb8462f3b00c0c7dfb11caff492bb67816528b947be28a3b76373952c41d"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afdd9fe1141117fed85961fd36ee627c83fc3b9fd47bafb52d3c849cc2f088b7"}, + {file = "faiss_cpu-1.7.4-cp311-cp311-win_amd64.whl", hash = "sha256:2ff7f57889ea31d945e3b87275be3cad5d55b6261a4e3f51c7aba304d76b81fb"}, + {file = "faiss_cpu-1.7.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:eeaf92f27d76249fb53c1adafe617b0f217ab65837acf7b4ec818511caf6e3d8"}, + {file = "faiss_cpu-1.7.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:102b1bd763e9b0c281ac312590af3eaf1c8b663ccbc1145821fe6a9f92b8eaaf"}, + {file = "faiss_cpu-1.7.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5512da6707c967310c46ff712b00418b7ae28e93cb609726136e826e9f2f14fa"}, + {file = "faiss_cpu-1.7.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0c2e5b9d8c28c99f990e87379d5bbcc6c914da91ebb4250166864fd12db5755b"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:43f67f325393145d360171cd98786fcea6120ce50397319afd3bb78be409fb8a"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6a4e4af194b8fce74c4b770cad67ad1dd1b4673677fc169723e4c50ba5bd97a8"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31bfb7b9cffc36897ae02a983e04c09fe3b8c053110a287134751a115334a1df"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52d7de96abef2340c0d373c1f5cbc78026a3cebb0f8f3a5920920a00210ead1f"}, + {file = "faiss_cpu-1.7.4-cp38-cp38-win_amd64.whl", hash = "sha256:699feef85b23c2c729d794e26ca69bebc0bee920d676028c06fd0e0becc15c7e"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:559a0133f5ed44422acb09ee1ac0acffd90c6666d1bc0d671c18f6e93ad603e2"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ea1d71539fe3dc0f1bed41ef954ca701678776f231046bf0ca22ccea5cf5bef6"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:12d45e0157024eb3249842163162983a1ac8b458f1a8b17bbf86f01be4585a99"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f0eab359e066d32c874f51a7d4bf6440edeec068b7fe47e6d803c73605a8b4c"}, + {file = "faiss_cpu-1.7.4-cp39-cp39-win_amd64.whl", hash = "sha256:98459ceeeb735b9df1a5b94572106ffe0a6ce740eb7e4626715dd218657bb4dc"}, +] + +[[package]] +name = "fastapi" +version = "0.100.0" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +optional = false +python-versions = ">=3.7" +files = [ + {file = "fastapi-0.100.0-py3-none-any.whl", hash = "sha256:271662daf986da8fa98dc2b7c7f61c4abdfdccfb4786d79ed8b2878f172c6d5f"}, + {file = "fastapi-0.100.0.tar.gz", hash = "sha256:acb5f941ea8215663283c10018323ba7ea737c571b67fc7e88e9469c7eb1d12e"}, +] + +[package.dependencies] +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<3.0.0" +starlette = ">=0.27.0,<0.28.0" +typing-extensions = ">=4.5.0" + +[package.extras] +all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] + +[[package]] +name = "ffmpeg-python" +version = "0.2.0" +description = "Python bindings for FFmpeg - with complex filtering support" +optional = false +python-versions = "*" +files = [ + {file = "ffmpeg-python-0.2.0.tar.gz", hash = "sha256:65225db34627c578ef0e11c8b1eb528bb35e024752f6f10b78c011f6f64c4127"}, + {file = "ffmpeg_python-0.2.0-py3-none-any.whl", hash = "sha256:ac441a0404e053f8b6a1113a77c0f452f1cfc62f6344a769475ffdc0f56c23c5"}, +] + +[package.dependencies] +future = "*" + +[package.extras] +dev = ["Sphinx (==2.1.0)", "future (==0.17.1)", "numpy (==1.16.4)", "pytest (==4.6.1)", "pytest-mock (==1.10.4)", "tox (==3.12.1)"] + +[[package]] +name = "ffmpy" +version = "0.3.1" +description = "A simple Python wrapper for ffmpeg" +optional = false +python-versions = "*" +files = [ + {file = "ffmpy-0.3.1.tar.gz", hash = "sha256:a173b8f42c7c669ff722df7fb31e1e870067713697f745224fa6e621b82f0004"}, +] + +[[package]] +name = "filelock" +version = "3.12.2" +description = "A platform independent file lock." +optional = false +python-versions = ">=3.7" +files = [ + {file = "filelock-3.12.2-py3-none-any.whl", hash = "sha256:cbb791cdea2a72f23da6ac5b5269ab0a0d161e9ef0100e653b69049a7706d1ec"}, + {file = "filelock-3.12.2.tar.gz", hash = "sha256:002740518d8aa59a26b0c76e10fb8c6e15eae825d34b6fdf670333fd7b938d81"}, +] + +[package.extras] +docs = ["furo (>=2023.5.20)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "diff-cover (>=7.5)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)", "pytest-timeout (>=2.1)"] + +[[package]] +name = "fonttools" +version = "4.41.1" +description = "Tools to manipulate font files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fonttools-4.41.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a7bbb290d13c6dd718ec2c3db46fe6c5f6811e7ea1e07f145fd8468176398224"}, + {file = "fonttools-4.41.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ec453a45778524f925a8f20fd26a3326f398bfc55d534e37bab470c5e415caa1"}, + {file = "fonttools-4.41.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2071267deaa6d93cb16288613419679c77220543551cbe61da02c93d92df72f"}, + {file = "fonttools-4.41.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e3334d51f0e37e2c6056e67141b2adabc92613a968797e2571ca8a03bd64773"}, + {file = "fonttools-4.41.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:cac73bbef7734e78c60949da11c4903ee5837168e58772371bd42a75872f4f82"}, + {file = "fonttools-4.41.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:edee0900cf0eedb29d17c7876102d6e5a91ee333882b1f5abc83e85b934cadb5"}, + {file = "fonttools-4.41.1-cp310-cp310-win32.whl", hash = "sha256:2a22b2c425c698dcd5d6b0ff0b566e8e9663172118db6fd5f1941f9b8063da9b"}, + {file = "fonttools-4.41.1-cp310-cp310-win_amd64.whl", hash = "sha256:547ab36a799dded58a46fa647266c24d0ed43a66028cd1cd4370b246ad426cac"}, + {file = "fonttools-4.41.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:849ec722bbf7d3501a0e879e57dec1fc54919d31bff3f690af30bb87970f9784"}, + {file = "fonttools-4.41.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:38cdecd8f1fd4bf4daae7fed1b3170dfc1b523388d6664b2204b351820aa78a7"}, + {file = "fonttools-4.41.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ae64303ba670f8959fdaaa30ba0c2dabe75364fdec1caeee596c45d51ca3425"}, + {file = "fonttools-4.41.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f14f3ccea4cc7dd1b277385adf3c3bf18f9860f87eab9c2fb650b0af16800f55"}, + {file = "fonttools-4.41.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:33191f062549e6bb1a4782c22a04ebd37009c09360e2d6686ac5083774d06d95"}, + {file = "fonttools-4.41.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:704bccd69b0abb6fab9f5e4d2b75896afa48b427caa2c7988792a2ffce35b441"}, + {file = "fonttools-4.41.1-cp311-cp311-win32.whl", hash = "sha256:4edc795533421e98f60acee7d28fc8d941ff5ac10f44668c9c3635ad72ae9045"}, + {file = "fonttools-4.41.1-cp311-cp311-win_amd64.whl", hash = "sha256:aaaef294d8e411f0ecb778a0aefd11bb5884c9b8333cc1011bdaf3b58ca4bd75"}, + {file = "fonttools-4.41.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3d1f9471134affc1e3b1b806db6e3e2ad3fa99439e332f1881a474c825101096"}, + {file = "fonttools-4.41.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:59eba8b2e749a1de85760da22333f3d17c42b66e03758855a12a2a542723c6e7"}, + {file = "fonttools-4.41.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9b3cc10dc9e0834b6665fd63ae0c6964c6bc3d7166e9bc84772e0edd09f9fa2"}, + {file = "fonttools-4.41.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:da2c2964bdc827ba6b8a91dc6de792620be4da3922c4cf0599f36a488c07e2b2"}, + {file = "fonttools-4.41.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7763316111df7b5165529f4183a334aa24c13cdb5375ffa1dc8ce309c8bf4e5c"}, + {file = "fonttools-4.41.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b2d1ee95be42b80d1f002d1ee0a51d7a435ea90d36f1a5ae331be9962ee5a3f1"}, + {file = "fonttools-4.41.1-cp38-cp38-win32.whl", hash = "sha256:f48602c0b3fd79cd83a34c40af565fe6db7ac9085c8823b552e6e751e3a5b8be"}, + {file = "fonttools-4.41.1-cp38-cp38-win_amd64.whl", hash = "sha256:b0938ebbeccf7c80bb9a15e31645cf831572c3a33d5cc69abe436e7000c61b14"}, + {file = "fonttools-4.41.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e5c2b0a95a221838991e2f0e455dec1ca3a8cc9cd54febd68cc64d40fdb83669"}, + {file = "fonttools-4.41.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:891cfc5a83b0307688f78b9bb446f03a7a1ad981690ac8362f50518bc6153975"}, + {file = "fonttools-4.41.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:73ef0bb5d60eb02ba4d3a7d23ada32184bd86007cb2de3657cfcb1175325fc83"}, + {file = "fonttools-4.41.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f240d9adf0583ac8fc1646afe7f4ac039022b6f8fa4f1575a2cfa53675360b69"}, + {file = "fonttools-4.41.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bdd729744ae7ecd7f7311ad25d99da4999003dcfe43b436cf3c333d4e68de73d"}, + {file = "fonttools-4.41.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b927e5f466d99c03e6e20961946314b81d6e3490d95865ef88061144d9f62e38"}, + {file = "fonttools-4.41.1-cp39-cp39-win32.whl", hash = "sha256:afce2aeb80be72b4da7dd114f10f04873ff512793d13ce0b19d12b2a4c44c0f0"}, + {file = "fonttools-4.41.1-cp39-cp39-win_amd64.whl", hash = "sha256:1df1b6f4c7c4bc8201eb47f3b268adbf2539943aa43c400f84556557e3e109c0"}, + {file = "fonttools-4.41.1-py3-none-any.whl", hash = "sha256:952cb405f78734cf6466252fec42e206450d1a6715746013f64df9cbd4f896fa"}, + {file = "fonttools-4.41.1.tar.gz", hash = "sha256:e16a9449f21a93909c5be2f5ed5246420f2316e94195dbfccb5238aaa38f9751"}, +] + +[package.extras] +all = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "fs (>=2.2.0,<3)", "lxml (>=4.0,<5)", "lz4 (>=1.7.4.2)", "matplotlib", "munkres", "scipy", "skia-pathops (>=0.5.0)", "sympy", "uharfbuzz (>=0.23.0)", "unicodedata2 (>=15.0.0)", "xattr", "zopfli (>=0.1.4)"] +graphite = ["lz4 (>=1.7.4.2)"] +interpolatable = ["munkres", "scipy"] +lxml = ["lxml (>=4.0,<5)"] +pathops = ["skia-pathops (>=0.5.0)"] +plot = ["matplotlib"] +repacker = ["uharfbuzz (>=0.23.0)"] +symfont = ["sympy"] +type1 = ["xattr"] +ufo = ["fs (>=2.2.0,<3)"] +unicode = ["unicodedata2 (>=15.0.0)"] +woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] + +[[package]] +name = "frozenlist" +version = "1.4.0" +description = "A list-like structure which implements collections.abc.MutableSequence" +optional = false +python-versions = ">=3.8" +files = [ + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:764226ceef3125e53ea2cb275000e309c0aa5464d43bd72abd661e27fffc26ab"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d6484756b12f40003c6128bfcc3fa9f0d49a687e171186c2d85ec82e3758c559"}, + {file = "frozenlist-1.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9ac08e601308e41eb533f232dbf6b7e4cea762f9f84f6357136eed926c15d12c"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d081f13b095d74b67d550de04df1c756831f3b83dc9881c38985834387487f1b"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:71932b597f9895f011f47f17d6428252fc728ba2ae6024e13c3398a087c2cdea"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:981b9ab5a0a3178ff413bca62526bb784249421c24ad7381e39d67981be2c326"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e41f3de4df3e80de75845d3e743b3f1c4c8613c3997a912dbf0229fc61a8b963"}, + {file = "frozenlist-1.4.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6918d49b1f90821e93069682c06ffde41829c346c66b721e65a5c62b4bab0300"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e5c8764c7829343d919cc2dfc587a8db01c4f70a4ebbc49abde5d4b158b007b"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:8d0edd6b1c7fb94922bf569c9b092ee187a83f03fb1a63076e7774b60f9481a8"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:e29cda763f752553fa14c68fb2195150bfab22b352572cb36c43c47bedba70eb"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:0c7c1b47859ee2cac3846fde1c1dc0f15da6cec5a0e5c72d101e0f83dcb67ff9"}, + {file = "frozenlist-1.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:901289d524fdd571be1c7be054f48b1f88ce8dddcbdf1ec698b27d4b8b9e5d62"}, + {file = "frozenlist-1.4.0-cp310-cp310-win32.whl", hash = "sha256:1a0848b52815006ea6596c395f87449f693dc419061cc21e970f139d466dc0a0"}, + {file = "frozenlist-1.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:b206646d176a007466358aa21d85cd8600a415c67c9bd15403336c331a10d956"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:de343e75f40e972bae1ef6090267f8260c1446a1695e77096db6cfa25e759a95"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad2a9eb6d9839ae241701d0918f54c51365a51407fd80f6b8289e2dfca977cc3"}, + {file = "frozenlist-1.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bd7bd3b3830247580de99c99ea2a01416dfc3c34471ca1298bccabf86d0ff4dc"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bdf1847068c362f16b353163391210269e4f0569a3c166bc6a9f74ccbfc7e839"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:38461d02d66de17455072c9ba981d35f1d2a73024bee7790ac2f9e361ef1cd0c"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5a32087d720c608f42caed0ef36d2b3ea61a9d09ee59a5142d6070da9041b8f"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dd65632acaf0d47608190a71bfe46b209719bf2beb59507db08ccdbe712f969b"}, + {file = "frozenlist-1.4.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:261b9f5d17cac914531331ff1b1d452125bf5daa05faf73b71d935485b0c510b"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b89ac9768b82205936771f8d2eb3ce88503b1556324c9f903e7156669f521472"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:008eb8b31b3ea6896da16c38c1b136cb9fec9e249e77f6211d479db79a4eaf01"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e74b0506fa5aa5598ac6a975a12aa8928cbb58e1f5ac8360792ef15de1aa848f"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:490132667476f6781b4c9458298b0c1cddf237488abd228b0b3650e5ecba7467"}, + {file = "frozenlist-1.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:76d4711f6f6d08551a7e9ef28c722f4a50dd0fc204c56b4bcd95c6cc05ce6fbb"}, + {file = "frozenlist-1.4.0-cp311-cp311-win32.whl", hash = "sha256:a02eb8ab2b8f200179b5f62b59757685ae9987996ae549ccf30f983f40602431"}, + {file = "frozenlist-1.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:515e1abc578dd3b275d6a5114030b1330ba044ffba03f94091842852f806f1c1"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:f0ed05f5079c708fe74bf9027e95125334b6978bf07fd5ab923e9e55e5fbb9d3"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ca265542ca427bf97aed183c1676e2a9c66942e822b14dc6e5f42e038f92a503"}, + {file = "frozenlist-1.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:491e014f5c43656da08958808588cc6c016847b4360e327a62cb308c791bd2d9"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:17ae5cd0f333f94f2e03aaf140bb762c64783935cc764ff9c82dff626089bebf"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e78fb68cf9c1a6aa4a9a12e960a5c9dfbdb89b3695197aa7064705662515de2"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d5655a942f5f5d2c9ed93d72148226d75369b4f6952680211972a33e59b1dfdc"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c11b0746f5d946fecf750428a95f3e9ebe792c1ee3b1e96eeba145dc631a9672"}, + {file = "frozenlist-1.4.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e66d2a64d44d50d2543405fb183a21f76b3b5fd16f130f5c99187c3fb4e64919"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:88f7bc0fcca81f985f78dd0fa68d2c75abf8272b1f5c323ea4a01a4d7a614efc"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5833593c25ac59ede40ed4de6d67eb42928cca97f26feea219f21d0ed0959b79"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:fec520865f42e5c7f050c2a79038897b1c7d1595e907a9e08e3353293ffc948e"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:b826d97e4276750beca7c8f0f1a4938892697a6bcd8ec8217b3312dad6982781"}, + {file = "frozenlist-1.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:ceb6ec0a10c65540421e20ebd29083c50e6d1143278746a4ef6bcf6153171eb8"}, + {file = "frozenlist-1.4.0-cp38-cp38-win32.whl", hash = "sha256:2b8bcf994563466db019fab287ff390fffbfdb4f905fc77bc1c1d604b1c689cc"}, + {file = "frozenlist-1.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:a6c8097e01886188e5be3e6b14e94ab365f384736aa1fca6a0b9e35bd4a30bc7"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6c38721585f285203e4b4132a352eb3daa19121a035f3182e08e437cface44bf"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a0c6da9aee33ff0b1a451e867da0c1f47408112b3391dd43133838339e410963"}, + {file = "frozenlist-1.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:93ea75c050c5bb3d98016b4ba2497851eadf0ac154d88a67d7a6816206f6fa7f"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f61e2dc5ad442c52b4887f1fdc112f97caeff4d9e6ebe78879364ac59f1663e1"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa384489fefeb62321b238e64c07ef48398fe80f9e1e6afeff22e140e0850eef"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10ff5faaa22786315ef57097a279b833ecab1a0bfb07d604c9cbb1c4cdc2ed87"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:007df07a6e3eb3e33e9a1fe6a9db7af152bbd8a185f9aaa6ece10a3529e3e1c6"}, + {file = "frozenlist-1.4.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4f399d28478d1f604c2ff9119907af9726aed73680e5ed1ca634d377abb087"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:c5374b80521d3d3f2ec5572e05adc94601985cc526fb276d0c8574a6d749f1b3"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:ce31ae3e19f3c902de379cf1323d90c649425b86de7bbdf82871b8a2a0615f3d"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7211ef110a9194b6042449431e08c4d80c0481e5891e58d429df5899690511c2"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:556de4430ce324c836789fa4560ca62d1591d2538b8ceb0b4f68fb7b2384a27a"}, + {file = "frozenlist-1.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7645a8e814a3ee34a89c4a372011dcd817964ce8cb273c8ed6119d706e9613e3"}, + {file = "frozenlist-1.4.0-cp39-cp39-win32.whl", hash = "sha256:19488c57c12d4e8095a922f328df3f179c820c212940a498623ed39160bc3c2f"}, + {file = "frozenlist-1.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:6221d84d463fb110bdd7619b69cb43878a11d51cbb9394ae3105d082d5199167"}, + {file = "frozenlist-1.4.0.tar.gz", hash = "sha256:09163bdf0b2907454042edb19f887c6d33806adc71fbd54afc14908bfdc22251"}, +] + +[[package]] +name = "fsspec" +version = "2023.6.0" +description = "File-system specification" +optional = false +python-versions = ">=3.8" +files = [ + {file = "fsspec-2023.6.0-py3-none-any.whl", hash = "sha256:1cbad1faef3e391fba6dc005ae9b5bdcbf43005c9167ce78c915549c352c869a"}, + {file = "fsspec-2023.6.0.tar.gz", hash = "sha256:d0b2f935446169753e7a5c5c55681c54ea91996cc67be93c39a154fb3a2742af"}, +] + +[package.extras] +abfs = ["adlfs"] +adl = ["adlfs"] +arrow = ["pyarrow (>=1)"] +dask = ["dask", "distributed"] +devel = ["pytest", "pytest-cov"] +dropbox = ["dropbox", "dropboxdrivefs", "requests"] +full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"] +fuse = ["fusepy"] +gcs = ["gcsfs"] +git = ["pygit2"] +github = ["requests"] +gs = ["gcsfs"] +gui = ["panel"] +hdfs = ["pyarrow (>=1)"] +http = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "requests"] +libarchive = ["libarchive-c"] +oci = ["ocifs"] +s3 = ["s3fs"] +sftp = ["paramiko"] +smb = ["smbprotocol"] +ssh = ["paramiko"] +tqdm = ["tqdm"] + +[[package]] +name = "functorch" +version = "2.0.0" +description = "JAX-like composable function transforms for PyTorch" +optional = false +python-versions = "*" +files = [ + {file = "functorch-2.0.0-py2.py3-none-any.whl", hash = "sha256:ca21ace6b9048e2ec6d132fa0fd18c776eb165ca1c91ef7e3584fdc668eaa4ea"}, +] + +[package.dependencies] +torch = ">=2.0,<2.1" + +[package.extras] +aot = ["networkx"] + +[[package]] +name = "future" +version = "0.18.3" +description = "Clean single-source support for Python 3 and 2" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "future-0.18.3.tar.gz", hash = "sha256:34a17436ed1e96697a86f9de3d15a3b0be01d8bc8de9c1dffd59fb8234ed5307"}, +] + +[[package]] +name = "google-auth" +version = "2.22.0" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "google-auth-2.22.0.tar.gz", hash = "sha256:164cba9af4e6e4e40c3a4f90a1a6c12ee56f14c0b4868d1ca91b32826ab334ce"}, + {file = "google_auth-2.22.0-py2.py3-none-any.whl", hash = "sha256:d61d1b40897407b574da67da1a833bdc10d5a11642566e506565d1b1a46ba873"}, +] + +[package.dependencies] +cachetools = ">=2.0.0,<6.0" +pyasn1-modules = ">=0.2.1" +rsa = ">=3.1.4,<5" +six = ">=1.9.0" +urllib3 = "<2.0" + +[package.extras] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0.dev0)", "requests (>=2.20.0,<3.0.0.dev0)"] +enterprise-cert = ["cryptography (==36.0.2)", "pyopenssl (==22.0.0)"] +pyopenssl = ["cryptography (>=38.0.3)", "pyopenssl (>=20.0.0)"] +reauth = ["pyu2f (>=0.1.5)"] +requests = ["requests (>=2.20.0,<3.0.0.dev0)"] + +[[package]] +name = "google-auth-oauthlib" +version = "1.0.0" +description = "Google Authentication Library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "google-auth-oauthlib-1.0.0.tar.gz", hash = "sha256:e375064964820b47221a7e1b7ee1fd77051b6323c3f9e3e19785f78ab67ecfc5"}, + {file = "google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb"}, +] + +[package.dependencies] +google-auth = ">=2.15.0" +requests-oauthlib = ">=0.7.0" + +[package.extras] +tool = ["click (>=6.0.0)"] + +[[package]] +name = "gradio" +version = "3.38.0" +description = "Python library for easily interacting with trained machine learning models" +optional = false +python-versions = ">=3.8" +files = [ + {file = "gradio-3.38.0-py3-none-any.whl", hash = "sha256:3ac954349ab00e1d78c6b87940e49853e5fb800a1ef35ff0a24796eb2f4803f5"}, + {file = "gradio-3.38.0.tar.gz", hash = "sha256:cc08db00efd8ab00fef2d655f80ee18d4330fe08d0d412914cb0606b4a05881b"}, +] + +[package.dependencies] +aiofiles = ">=22.0,<24.0" +aiohttp = ">=3.0,<4.0" +altair = ">=4.2.0,<6.0" +fastapi = "*" +ffmpy = "*" +gradio-client = ">=0.2.10" +httpx = "*" +huggingface-hub = ">=0.14.0" +jinja2 = "<4.0" +markdown-it-py = {version = ">=2.0.0", extras = ["linkify"]} +markupsafe = ">=2.0,<3.0" +matplotlib = ">=3.0,<4.0" +mdit-py-plugins = "<=0.3.3" +numpy = ">=1.0,<2.0" +orjson = ">=3.0,<4.0" +packaging = "*" +pandas = ">=1.0,<3.0" +pillow = ">=8.0,<11.0" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<3.0.0" +pydub = "*" +python-multipart = "*" +pyyaml = ">=5.0,<7.0" +requests = ">=2.0,<3.0" +semantic-version = ">=2.0,<3.0" +typing-extensions = ">=4.0,<5.0" +uvicorn = ">=0.14.0" +websockets = ">=10.0,<12.0" + +[[package]] +name = "gradio-client" +version = "0.2.10" +description = "Python library for easily interacting with trained machine learning models" +optional = false +python-versions = ">=3.8" +files = [ + {file = "gradio_client-0.2.10-py3-none-any.whl", hash = "sha256:8b1e32093f766812b91c65756e85ad8bbe6d60b885b87b47908a72359c8d72a0"}, + {file = "gradio_client-0.2.10.tar.gz", hash = "sha256:d4f93c86649f7662ec16861506ae864d18667422e8c8ecc227360f2aedcffdc9"}, +] + +[package.dependencies] +fsspec = "*" +httpx = "*" +huggingface-hub = ">=0.13.0" +packaging = "*" +requests = ">=2.0,<3.0" +typing-extensions = ">=4.0,<5.0" +websockets = ">=10.0,<12.0" + +[[package]] +name = "grpcio" +version = "1.56.2" +description = "HTTP/2-based RPC framework" +optional = false +python-versions = ">=3.7" +files = [ + {file = "grpcio-1.56.2-cp310-cp310-linux_armv7l.whl", hash = "sha256:bf0b9959e673505ee5869950642428046edb91f99942607c2ecf635f8a4b31c9"}, + {file = "grpcio-1.56.2-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:5144feb20fe76e73e60c7d73ec3bf54f320247d1ebe737d10672480371878b48"}, + {file = "grpcio-1.56.2-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:a72797549935c9e0b9bc1def1768c8b5a709538fa6ab0678e671aec47ebfd55e"}, + {file = "grpcio-1.56.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c3f3237a57e42f79f1e560726576aedb3a7ef931f4e3accb84ebf6acc485d316"}, + {file = "grpcio-1.56.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:900bc0096c2ca2d53f2e5cebf98293a7c32f532c4aeb926345e9747452233950"}, + {file = "grpcio-1.56.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:97e0efaebbfd222bcaac2f1735c010c1d3b167112d9d237daebbeedaaccf3d1d"}, + {file = "grpcio-1.56.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c0c85c5cbe8b30a32fa6d802588d55ffabf720e985abe9590c7c886919d875d4"}, + {file = "grpcio-1.56.2-cp310-cp310-win32.whl", hash = "sha256:06e84ad9ae7668a109e970c7411e7992751a116494cba7c4fb877656527f9a57"}, + {file = "grpcio-1.56.2-cp310-cp310-win_amd64.whl", hash = "sha256:10954662f77dc36c9a1fb5cc4a537f746580d6b5734803be1e587252682cda8d"}, + {file = "grpcio-1.56.2-cp311-cp311-linux_armv7l.whl", hash = "sha256:c435f5ce1705de48e08fcbcfaf8aee660d199c90536e3e06f2016af7d6a938dd"}, + {file = "grpcio-1.56.2-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:6108e5933eb8c22cd3646e72d5b54772c29f57482fd4c41a0640aab99eb5071d"}, + {file = "grpcio-1.56.2-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:8391cea5ce72f4a12368afd17799474015d5d3dc00c936a907eb7c7eaaea98a5"}, + {file = "grpcio-1.56.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:750de923b456ca8c0f1354d6befca45d1f3b3a789e76efc16741bd4132752d95"}, + {file = "grpcio-1.56.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fda2783c12f553cdca11c08e5af6eecbd717280dc8fbe28a110897af1c15a88c"}, + {file = "grpcio-1.56.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:9e04d4e4cfafa7c5264e535b5d28e786f0571bea609c3f0aaab13e891e933e9c"}, + {file = "grpcio-1.56.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:89a49cc5ad08a38b6141af17e00d1dd482dc927c7605bc77af457b5a0fca807c"}, + {file = "grpcio-1.56.2-cp311-cp311-win32.whl", hash = "sha256:6a007a541dff984264981fbafeb052bfe361db63578948d857907df9488d8774"}, + {file = "grpcio-1.56.2-cp311-cp311-win_amd64.whl", hash = "sha256:af4063ef2b11b96d949dccbc5a987272f38d55c23c4c01841ea65a517906397f"}, + {file = "grpcio-1.56.2-cp37-cp37m-linux_armv7l.whl", hash = "sha256:a6ff459dac39541e6a2763a4439c4ca6bc9ecb4acc05a99b79246751f9894756"}, + {file = "grpcio-1.56.2-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:f20fd21f7538f8107451156dd1fe203300b79a9ddceba1ee0ac8132521a008ed"}, + {file = "grpcio-1.56.2-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:d1fbad1f9077372b6587ec589c1fc120b417b6c8ad72d3e3cc86bbbd0a3cee93"}, + {file = "grpcio-1.56.2-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ee26e9dfb3996aff7c870f09dc7ad44a5f6732b8bdb5a5f9905737ac6fd4ef1"}, + {file = "grpcio-1.56.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4c60abd950d6de3e4f1ddbc318075654d275c29c846ab6a043d6ed2c52e4c8c"}, + {file = "grpcio-1.56.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:1c31e52a04e62c8577a7bf772b3e7bed4df9c9e0dd90f92b6ffa07c16cab63c9"}, + {file = "grpcio-1.56.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:345356b307cce5d14355e8e055b4ca5f99bc857c33a3dc1ddbc544fca9cd0475"}, + {file = "grpcio-1.56.2-cp37-cp37m-win_amd64.whl", hash = "sha256:42e63904ee37ae46aa23de50dac8b145b3596f43598fa33fe1098ab2cbda6ff5"}, + {file = "grpcio-1.56.2-cp38-cp38-linux_armv7l.whl", hash = "sha256:7c5ede2e2558f088c49a1ddda19080e4c23fb5d171de80a726b61b567e3766ed"}, + {file = "grpcio-1.56.2-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:33971197c47965cc1d97d78d842163c283e998223b151bab0499b951fd2c0b12"}, + {file = "grpcio-1.56.2-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:d39f5d4af48c138cb146763eda14eb7d8b3ccbbec9fe86fb724cd16e0e914c64"}, + {file = "grpcio-1.56.2-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ded637176addc1d3eef35331c39acc598bac550d213f0a1bedabfceaa2244c87"}, + {file = "grpcio-1.56.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c90da4b124647547a68cf2f197174ada30c7bb9523cb976665dfd26a9963d328"}, + {file = "grpcio-1.56.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:3ccb621749a81dc7755243665a70ce45536ec413ef5818e013fe8dfbf5aa497b"}, + {file = "grpcio-1.56.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4eb37dd8dd1aa40d601212afa27ca5be255ba792e2e0b24d67b8af5e012cdb7d"}, + {file = "grpcio-1.56.2-cp38-cp38-win32.whl", hash = "sha256:ddb4a6061933bd9332b74eac0da25f17f32afa7145a33a0f9711ad74f924b1b8"}, + {file = "grpcio-1.56.2-cp38-cp38-win_amd64.whl", hash = "sha256:8940d6de7068af018dfa9a959a3510e9b7b543f4c405e88463a1cbaa3b2b379a"}, + {file = "grpcio-1.56.2-cp39-cp39-linux_armv7l.whl", hash = "sha256:51173e8fa6d9a2d85c14426bdee5f5c4a0654fd5fddcc21fe9d09ab0f6eb8b35"}, + {file = "grpcio-1.56.2-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:373b48f210f43327a41e397391715cd11cfce9ded2fe76a5068f9bacf91cc226"}, + {file = "grpcio-1.56.2-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:42a3bbb2bc07aef72a7d97e71aabecaf3e4eb616d39e5211e2cfe3689de860ca"}, + {file = "grpcio-1.56.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5344be476ac37eb9c9ad09c22f4ea193c1316bf074f1daf85bddb1b31fda5116"}, + {file = "grpcio-1.56.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3fa3ab0fb200a2c66493828ed06ccd1a94b12eddbfb985e7fd3e5723ff156c6"}, + {file = "grpcio-1.56.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b975b85d1d5efc36cf8b237c5f3849b64d1ba33d6282f5e991f28751317504a1"}, + {file = "grpcio-1.56.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cbdf2c498e077282cd427cfd88bdce4668019791deef0be8155385ab2ba7837f"}, + {file = "grpcio-1.56.2-cp39-cp39-win32.whl", hash = "sha256:139f66656a762572ae718fa0d1f2dce47c05e9fbf7a16acd704c354405b97df9"}, + {file = "grpcio-1.56.2-cp39-cp39-win_amd64.whl", hash = "sha256:830215173ad45d670140ff99aac3b461f9be9a6b11bee1a17265aaaa746a641a"}, + {file = "grpcio-1.56.2.tar.gz", hash = "sha256:0ff789ae7d8ddd76d2ac02e7d13bfef6fc4928ac01e1dcaa182be51b6bcc0aaa"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.56.2)"] + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "httpcore" +version = "0.17.3" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.7" +files = [ + {file = "httpcore-0.17.3-py3-none-any.whl", hash = "sha256:c2789b767ddddfa2a5782e3199b2b7f6894540b17b16ec26b2c4d8e103510b87"}, + {file = "httpcore-0.17.3.tar.gz", hash = "sha256:a6f30213335e34c1ade7be6ec7c47f19f50c56db36abef1a9dfa3815b1cb3888"}, +] + +[package.dependencies] +anyio = ">=3.0,<5.0" +certifi = "*" +h11 = ">=0.13,<0.15" +sniffio = "==1.*" + +[package.extras] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "httpx" +version = "0.24.1" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.7" +files = [ + {file = "httpx-0.24.1-py3-none-any.whl", hash = "sha256:06781eb9ac53cde990577af654bd990a4949de37a28bdb4a230d434f3a30b9bd"}, + {file = "httpx-0.24.1.tar.gz", hash = "sha256:5853a43053df830c20f8110c5e69fe44d035d850b2dfe795e196f00fdb774bdd"}, +] + +[package.dependencies] +certifi = "*" +httpcore = ">=0.15.0,<0.18.0" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "huggingface-hub" +version = "0.16.4" +description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "huggingface_hub-0.16.4-py3-none-any.whl", hash = "sha256:0d3df29932f334fead024afc7cb4cc5149d955238b8b5e42dcf9740d6995a349"}, + {file = "huggingface_hub-0.16.4.tar.gz", hash = "sha256:608c7d4f3d368b326d1747f91523dbd1f692871e8e2e7a4750314a2dd8b63e14"}, +] + +[package.dependencies] +filelock = "*" +fsspec = "*" +packaging = ">=20.9" +pyyaml = ">=5.1" +requests = "*" +tqdm = ">=4.42.1" +typing-extensions = ">=3.7.4.3" + +[package.extras] +all = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +cli = ["InquirerPy (==0.3.4)"] +dev = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "black (>=23.1,<24.0)", "gradio", "jedi", "mypy (==0.982)", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "ruff (>=0.0.241)", "soundfile", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3", "urllib3 (<2.0)"] +fastai = ["fastai (>=2.4)", "fastcore (>=1.3.27)", "toml"] +inference = ["aiohttp", "pydantic"] +quality = ["black (>=23.1,<24.0)", "mypy (==0.982)", "ruff (>=0.0.241)"] +tensorflow = ["graphviz", "pydot", "tensorflow"] +testing = ["InquirerPy (==0.3.4)", "Jinja2", "Pillow", "aiohttp", "gradio", "jedi", "numpy", "pydantic", "pytest", "pytest-asyncio", "pytest-cov", "pytest-env", "pytest-vcr", "pytest-xdist", "soundfile", "urllib3 (<2.0)"] +torch = ["torch"] +typing = ["pydantic", "types-PyYAML", "types-requests", "types-simplejson", "types-toml", "types-tqdm", "types-urllib3"] + +[[package]] +name = "hydra-core" +version = "1.0.7" +description = "A framework for elegantly configuring complex applications" +optional = false +python-versions = "*" +files = [ + {file = "hydra-core-1.0.7.tar.gz", hash = "sha256:58cc3f7531995b6d8de162ca21f936e17bdaebd4d1e8614d63c32e17c2e41e45"}, + {file = "hydra_core-1.0.7-py3-none-any.whl", hash = "sha256:e800c6deb8309395508094851fa93bc13408f2285261eb97e626d37193b58a9f"}, +] + +[package.dependencies] +antlr4-python3-runtime = "4.8" +importlib-resources = {version = "*", markers = "python_version < \"3.9\""} +omegaconf = ">=2.0.5,<2.1" + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "importlib-metadata" +version = "6.8.0" +description = "Read metadata from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_metadata-6.8.0-py3-none-any.whl", hash = "sha256:3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb"}, + {file = "importlib_metadata-6.8.0.tar.gz", hash = "sha256:dbace7892d8c0c4ac1ad096662232f831d4e64f4c4545bd53016a3e9d4654743"}, +] + +[package.dependencies] +zipp = ">=0.5" + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +perf = ["ipython"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf (>=0.9.2)", "pytest-ruff"] + +[[package]] +name = "importlib-resources" +version = "6.0.0" +description = "Read resources from Python packages" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.0.0-py3-none-any.whl", hash = "sha256:d952faee11004c045f785bb5636e8f885bed30dc3c940d5d42798a2a4541c185"}, + {file = "importlib_resources-6.0.0.tar.gz", hash = "sha256:4cf94875a8368bd89531a756df9a9ebe1f150e0f885030b461237bc7f2d905f2"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[[package]] +name = "jinja2" +version = "3.1.2" +description = "A very fast and expressive template engine." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Jinja2-3.1.2-py3-none-any.whl", hash = "sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61"}, + {file = "Jinja2-3.1.2.tar.gz", hash = "sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852"}, +] + +[package.dependencies] +MarkupSafe = ">=2.0" + +[package.extras] +i18n = ["Babel (>=2.7)"] + +[[package]] +name = "joblib" +version = "1.3.1" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "joblib-1.3.1-py3-none-any.whl", hash = "sha256:89cf0529520e01b3de7ac7b74a8102c90d16d54c64b5dd98cafcd14307fdf915"}, + {file = "joblib-1.3.1.tar.gz", hash = "sha256:1f937906df65329ba98013dc9692fe22a4c5e4a648112de500508b18a21b41e3"}, +] + +[[package]] +name = "json5" +version = "0.9.14" +description = "A Python implementation of the JSON5 data format." +optional = false +python-versions = "*" +files = [ + {file = "json5-0.9.14-py2.py3-none-any.whl", hash = "sha256:740c7f1b9e584a468dbb2939d8d458db3427f2c93ae2139d05f47e453eae964f"}, + {file = "json5-0.9.14.tar.gz", hash = "sha256:9ed66c3a6ca3510a976a9ef9b8c0787de24802724ab1860bc0153c7fdd589b02"}, +] + +[package.extras] +dev = ["hypothesis"] + +[[package]] +name = "jsonschema" +version = "4.18.4" +description = "An implementation of JSON Schema validation for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jsonschema-4.18.4-py3-none-any.whl", hash = "sha256:971be834317c22daaa9132340a51c01b50910724082c2c1a2ac87eeec153a3fe"}, + {file = "jsonschema-4.18.4.tar.gz", hash = "sha256:fb3642735399fa958c0d2aad7057901554596c63349f4f6b283c493cf692a25d"}, +] + +[package.dependencies] +attrs = ">=22.2.0" +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} +jsonschema-specifications = ">=2023.03.6" +pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} +referencing = ">=0.28.4" +rpds-py = ">=0.7.1" + +[package.extras] +format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] +format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] + +[[package]] +name = "jsonschema-specifications" +version = "2023.7.1" +description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +optional = false +python-versions = ">=3.8" +files = [ + {file = "jsonschema_specifications-2023.7.1-py3-none-any.whl", hash = "sha256:05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1"}, + {file = "jsonschema_specifications-2023.7.1.tar.gz", hash = "sha256:c91a50404e88a1f6ba40636778e2ee08f6e24c5613fe4c53ac24578a5a7f72bb"}, +] + +[package.dependencies] +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} +referencing = ">=0.28.0" + +[[package]] +name = "kiwisolver" +version = "1.4.4" +description = "A fast implementation of the Cassowary constraint solver" +optional = false +python-versions = ">=3.7" +files = [ + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2f5e60fabb7343a836360c4f0919b8cd0d6dbf08ad2ca6b9cf90bf0c76a3c4f6"}, + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:10ee06759482c78bdb864f4109886dff7b8a56529bc1609d4f1112b93fe6423c"}, + {file = "kiwisolver-1.4.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c79ebe8f3676a4c6630fd3f777f3cfecf9289666c84e775a67d1d358578dc2e3"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:abbe9fa13da955feb8202e215c4018f4bb57469b1b78c7a4c5c7b93001699938"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7577c1987baa3adc4b3c62c33bd1118c3ef5c8ddef36f0f2c950ae0b199e100d"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ad8285b01b0d4695102546b342b493b3ccc6781fc28c8c6a1bb63e95d22f09"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8ed58b8acf29798b036d347791141767ccf65eee7f26bde03a71c944449e53de"}, + {file = "kiwisolver-1.4.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a68b62a02953b9841730db7797422f983935aeefceb1679f0fc85cbfbd311c32"}, + {file = "kiwisolver-1.4.4-cp310-cp310-win32.whl", hash = "sha256:e92a513161077b53447160b9bd8f522edfbed4bd9759e4c18ab05d7ef7e49408"}, + {file = "kiwisolver-1.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:3fe20f63c9ecee44560d0e7f116b3a747a5d7203376abeea292ab3152334d004"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e0ea21f66820452a3f5d1655f8704a60d66ba1191359b96541eaf457710a5fc6"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bc9db8a3efb3e403e4ecc6cd9489ea2bac94244f80c78e27c31dcc00d2790ac2"}, + {file = "kiwisolver-1.4.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d5b61785a9ce44e5a4b880272baa7cf6c8f48a5180c3e81c59553ba0cb0821ca"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c2dbb44c3f7e6c4d3487b31037b1bdbf424d97687c1747ce4ff2895795c9bf69"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6295ecd49304dcf3bfbfa45d9a081c96509e95f4b9d0eb7ee4ec0530c4a96514"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4bd472dbe5e136f96a4b18f295d159d7f26fd399136f5b17b08c4e5f498cd494"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf7d9fce9bcc4752ca4a1b80aabd38f6d19009ea5cbda0e0856983cf6d0023f5"}, + {file = "kiwisolver-1.4.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d6601aed50c74e0ef02f4204da1816147a6d3fbdc8b3872d263338a9052c51"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:877272cf6b4b7e94c9614f9b10140e198d2186363728ed0f701c6eee1baec1da"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:db608a6757adabb32f1cfe6066e39b3706d8c3aa69bbc353a5b61edad36a5cb4"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:5853eb494c71e267912275e5586fe281444eb5e722de4e131cddf9d442615626"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:f0a1dbdb5ecbef0d34eb77e56fcb3e95bbd7e50835d9782a45df81cc46949750"}, + {file = "kiwisolver-1.4.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:283dffbf061a4ec60391d51e6155e372a1f7a4f5b15d59c8505339454f8989e4"}, + {file = "kiwisolver-1.4.4-cp311-cp311-win32.whl", hash = "sha256:d06adcfa62a4431d404c31216f0f8ac97397d799cd53800e9d3efc2fbb3cf14e"}, + {file = "kiwisolver-1.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:e7da3fec7408813a7cebc9e4ec55afed2d0fd65c4754bc376bf03498d4e92686"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:62ac9cc684da4cf1778d07a89bf5f81b35834cb96ca523d3a7fb32509380cbf6"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41dae968a94b1ef1897cb322b39360a0812661dba7c682aa45098eb8e193dbdf"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02f79693ec433cb4b5f51694e8477ae83b3205768a6fb48ffba60549080e295b"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d0611a0a2a518464c05ddd5a3a1a0e856ccc10e67079bb17f265ad19ab3c7597"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:db5283d90da4174865d520e7366801a93777201e91e79bacbac6e6927cbceede"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1041feb4cda8708ce73bb4dcb9ce1ccf49d553bf87c3954bdfa46f0c3f77252c"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-win32.whl", hash = "sha256:a553dadda40fef6bfa1456dc4be49b113aa92c2a9a9e8711e955618cd69622e3"}, + {file = "kiwisolver-1.4.4-cp37-cp37m-win_amd64.whl", hash = "sha256:03baab2d6b4a54ddbb43bba1a3a2d1627e82d205c5cf8f4c924dc49284b87166"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:841293b17ad704d70c578f1f0013c890e219952169ce8a24ebc063eecf775454"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f4f270de01dd3e129a72efad823da90cc4d6aafb64c410c9033aba70db9f1ff0"}, + {file = "kiwisolver-1.4.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f9f39e2f049db33a908319cf46624a569b36983c7c78318e9726a4cb8923b26c"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c97528e64cb9ebeff9701e7938653a9951922f2a38bd847787d4a8e498cc83ae"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d1573129aa0fd901076e2bfb4275a35f5b7aa60fbfb984499d661ec950320b0"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ad881edc7ccb9d65b0224f4e4d05a1e85cf62d73aab798943df6d48ab0cd79a1"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b428ef021242344340460fa4c9185d0b1f66fbdbfecc6c63eff4b7c29fad429d"}, + {file = "kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2e407cb4bd5a13984a6c2c0fe1845e4e41e96f183e5e5cd4d77a857d9693494c"}, + {file = "kiwisolver-1.4.4-cp38-cp38-win32.whl", hash = "sha256:75facbe9606748f43428fc91a43edb46c7ff68889b91fa31f53b58894503a191"}, + {file = "kiwisolver-1.4.4-cp38-cp38-win_amd64.whl", hash = "sha256:5bce61af018b0cb2055e0e72e7d65290d822d3feee430b7b8203d8a855e78766"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8c808594c88a025d4e322d5bb549282c93c8e1ba71b790f539567932722d7bd8"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f0a71d85ecdd570ded8ac3d1c0f480842f49a40beb423bb8014539a9f32a5897"}, + {file = "kiwisolver-1.4.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b533558eae785e33e8c148a8d9921692a9fe5aa516efbdff8606e7d87b9d5824"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:efda5fc8cc1c61e4f639b8067d118e742b812c930f708e6667a5ce0d13499e29"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bc8d3bd6c72b2dd9decf16ce70e20abcb3274ba01b4e1c96031e0c4067d1e7cd"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4ea39b0ccc4f5d803e3337dd46bcce60b702be4d86fd0b3d7531ef10fd99a1ac"}, + {file = "kiwisolver-1.4.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:968f44fdbf6dd757d12920d63b566eeb4d5b395fd2d00d29d7ef00a00582aac9"}, + {file = "kiwisolver-1.4.4-cp39-cp39-win32.whl", hash = "sha256:da7e547706e69e45d95e116e6939488d62174e033b763ab1496b4c29b76fabea"}, + {file = "kiwisolver-1.4.4-cp39-cp39-win_amd64.whl", hash = "sha256:ba59c92039ec0a66103b1d5fe588fa546373587a7d68f5c96f743c3396afc04b"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:91672bacaa030f92fc2f43b620d7b337fd9a5af28b0d6ed3f77afc43c4a64b5a"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:787518a6789009c159453da4d6b683f468ef7a65bbde796bcea803ccf191058d"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da152d8cdcab0e56e4f45eb08b9aea6455845ec83172092f09b0e077ece2cf7a"}, + {file = "kiwisolver-1.4.4-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ecb1fa0db7bf4cff9dac752abb19505a233c7f16684c5826d1f11ebd9472b871"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:28bc5b299f48150b5f822ce68624e445040595a4ac3d59251703779836eceff9"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:81e38381b782cc7e1e46c4e14cd997ee6040768101aefc8fa3c24a4cc58e98f8"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2a66fdfb34e05b705620dd567f5a03f239a088d5a3f321e7b6ac3239d22aa286"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:872b8ca05c40d309ed13eb2e582cab0c5a05e81e987ab9c521bf05ad1d5cf5cb"}, + {file = "kiwisolver-1.4.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:70e7c2e7b750585569564e2e5ca9845acfaa5da56ac46df68414f29fea97be9f"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:9f85003f5dfa867e86d53fac6f7e6f30c045673fa27b603c397753bebadc3008"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e307eb9bd99801f82789b44bb45e9f541961831c7311521b13a6c85afc09767"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1792d939ec70abe76f5054d3f36ed5656021dcad1322d1cc996d4e54165cef9"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6cb459eea32a4e2cf18ba5fcece2dbdf496384413bc1bae15583f19e567f3b2"}, + {file = "kiwisolver-1.4.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:36dafec3d6d6088d34e2de6b85f9d8e2324eb734162fba59d2ba9ed7a2043d5b"}, + {file = "kiwisolver-1.4.4.tar.gz", hash = "sha256:d41997519fcba4a1e46eb4a2fe31bc12f0ff957b2b81bac28db24744f333e955"}, +] + +[[package]] +name = "librosa" +version = "0.9.1" +description = "Python module for audio and music processing" +optional = false +python-versions = ">=3.6" +files = [ + {file = "librosa-0.9.1-py3-none-any.whl", hash = "sha256:c2bb61a8008367cca89a3f1dad352d8e55fe5ca5f7414fb5d5258eb52765db33"}, + {file = "librosa-0.9.1.tar.gz", hash = "sha256:7ed5d6e3f4546e5e3c2840691f9ddc56878f914a35a50060df5fca2b26d4b614"}, +] + +[package.dependencies] +audioread = ">=2.1.5" +decorator = ">=4.0.10" +joblib = ">=0.14" +numba = ">=0.45.1" +numpy = ">=1.17.0" +packaging = ">=20.0" +pooch = ">=1.0" +resampy = ">=0.2.2" +scikit-learn = ">=0.19.1" +scipy = ">=1.2.0" +soundfile = ">=0.10.2" + +[package.extras] +display = ["matplotlib (>=3.3.0)"] +docs = ["ipython (>=7.0)", "matplotlib (>=3.3.0)", "mir-eval (>=0.5)", "numba (<0.50)", "numpydoc", "presets", "sphinx (!=1.3.1)", "sphinx-gallery (>=0.7)", "sphinx-multiversion (>=0.2.3)", "sphinx-rtd-theme (==0.5.*)", "sphinxcontrib-svg2pdfconverter"] +tests = ["contextlib2", "matplotlib (>=3.3.0)", "pytest", "pytest-cov", "pytest-mpl", "samplerate", "soxr"] + +[[package]] +name = "linkify-it-py" +version = "2.0.2" +description = "Links recognition library with FULL unicode support." +optional = false +python-versions = ">=3.7" +files = [ + {file = "linkify-it-py-2.0.2.tar.gz", hash = "sha256:19f3060727842c254c808e99d465c80c49d2c7306788140987a1a7a29b0d6ad2"}, + {file = "linkify_it_py-2.0.2-py3-none-any.whl", hash = "sha256:a3a24428f6c96f27370d7fe61d2ac0be09017be5190d68d8658233171f1b6541"}, +] + +[package.dependencies] +uc-micro-py = "*" + +[package.extras] +benchmark = ["pytest", "pytest-benchmark"] +dev = ["black", "flake8", "isort", "pre-commit", "pyproject-flake8"] +doc = ["myst-parser", "sphinx", "sphinx-book-theme"] +test = ["coverage", "pytest", "pytest-cov"] + +[[package]] +name = "llvmlite" +version = "0.39.0" +description = "lightweight wrapper around basic LLVM functionality" +optional = false +python-versions = ">=3.7" +files = [ + {file = "llvmlite-0.39.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:589f08a1b1920e6004735819ce9aafdd85d030d4a231c1e7adaca9360724b1ed"}, + {file = "llvmlite-0.39.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:44a9a5cbe76db8ba01a5f6fa21649d91aa8a2634cc6f3a60291797e42e67d79e"}, + {file = "llvmlite-0.39.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:74d89f2ec4734d3e200fb90ea0b3ca5e9be40f3b3e50eb368ca9002ed5b3e4f8"}, + {file = "llvmlite-0.39.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8b4cb4f433b48792f02ec4ab619b86b145689302a3088a3f3853f50df6c2559d"}, + {file = "llvmlite-0.39.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:35db4122182cc5112912a3ec94a3c18eab9a990bd588bfda8445087c1b748563"}, + {file = "llvmlite-0.39.0-cp310-cp310-win32.whl", hash = "sha256:c00bf7a8dc56b4b3618c65b67e75046410f751512871d9e23919cf1feb1007b2"}, + {file = "llvmlite-0.39.0-cp310-cp310-win_amd64.whl", hash = "sha256:72bd2e5db9790344ec39cef77098486635853829ecb0e66e6fa516488ff6dd9e"}, + {file = "llvmlite-0.39.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:53c7c608baffdcdc2213926f4e3600036d4048aed08d6209b9f76a5439e529d6"}, + {file = "llvmlite-0.39.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3bbd23e42593f85a842614d8ddb2b2943630e4c4c8418ea0d8cf1dce9f2fa7a"}, + {file = "llvmlite-0.39.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d733eb9c02bb8b01373228a1339901b1e50be4581105239c6052b9573ddb9298"}, + {file = "llvmlite-0.39.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f575fcb9bebe5bcbe20373c56ad3ebf63bae0e27d3c22c1a4dc27fa4666d0324"}, + {file = "llvmlite-0.39.0-cp37-cp37m-win32.whl", hash = "sha256:5ca4ea962da6ec3b007bedab17065781803d71159b03435f24ce6845cf3d1c66"}, + {file = "llvmlite-0.39.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8e461608135859ac40e39211d9c63a1ce35176513f6b8be87efb554d4af3a388"}, + {file = "llvmlite-0.39.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:62a11b8e9e5fc4783d94da45d94c5a047ce6ccc4c112ae5f764109e9405fcc2c"}, + {file = "llvmlite-0.39.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9272b7e344d12b36dafeb6911054eff32d2a9be7256a2866f0c09d08f945e17f"}, + {file = "llvmlite-0.39.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3df59a7c2b60764fb9eeaf9c442d757eca1f3e87298d4f88849203667528581e"}, + {file = "llvmlite-0.39.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4cfd6688efd0f551168dd8626f386464aef25663268a2400c0f6a089b97a73dc"}, + {file = "llvmlite-0.39.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c7292b394956749e51ae3d51a2085932a0e3261108b35eda61d702c1b977102c"}, + {file = "llvmlite-0.39.0-cp38-cp38-win32.whl", hash = "sha256:f8e9463a7d0152994b6f7d630012297bb160db237ad9ca8e75c8dceef7a747cf"}, + {file = "llvmlite-0.39.0-cp38-cp38-win_amd64.whl", hash = "sha256:8d8149fdaab40ae48ea4ec816ae2ae5d36d664795e1b1dfb911fc2c62bc73184"}, + {file = "llvmlite-0.39.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0929e3c26bcafb53545c77bcf7020b943dcefcf8d7d3010f414384458f805cc1"}, + {file = "llvmlite-0.39.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:56ea23c6bbcd25a7c050a26b6effe836a575a33183744cbc28fb21358b3801f8"}, + {file = "llvmlite-0.39.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f82d605c5d6c8df96fe19bc3a61c934580e24cafa694cbf79cb227cdc0e426a"}, + {file = "llvmlite-0.39.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7f7a7278ba6d75533be46abc3d9e242030ab017f0016dd081b55f821cc03be9"}, + {file = "llvmlite-0.39.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56ccfe736a12aef2e39450a22e4c027eee4f488c5786c81d0b18ff8a6cf52531"}, + {file = "llvmlite-0.39.0-cp39-cp39-win32.whl", hash = "sha256:0706abf522dc510ddc818f5c9e1cdae521a1416d3c399bbfc4827813379f0164"}, + {file = "llvmlite-0.39.0-cp39-cp39-win_amd64.whl", hash = "sha256:d4a8199263859b97f174035e39297e770617d3497fac44fe738f74ce9c51d22b"}, + {file = "llvmlite-0.39.0.tar.gz", hash = "sha256:01098be54f1aa25e391cebba8ea71cd1533f8cd1f50e34c7dd7540c2560a93af"}, +] + +[[package]] +name = "lxml" +version = "4.9.3" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" +files = [ + {file = "lxml-4.9.3-cp27-cp27m-macosx_11_0_x86_64.whl", hash = "sha256:b0a545b46b526d418eb91754565ba5b63b1c0b12f9bd2f808c852d9b4b2f9b5c"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:075b731ddd9e7f68ad24c635374211376aa05a281673ede86cbe1d1b3455279d"}, + {file = "lxml-4.9.3-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1e224d5755dba2f4a9498e150c43792392ac9b5380aa1b845f98a1618c94eeef"}, + {file = "lxml-4.9.3-cp27-cp27m-win32.whl", hash = "sha256:2c74524e179f2ad6d2a4f7caf70e2d96639c0954c943ad601a9e146c76408ed7"}, + {file = "lxml-4.9.3-cp27-cp27m-win_amd64.whl", hash = "sha256:4f1026bc732b6a7f96369f7bfe1a4f2290fb34dce00d8644bc3036fb351a4ca1"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c0781a98ff5e6586926293e59480b64ddd46282953203c76ae15dbbbf302e8bb"}, + {file = "lxml-4.9.3-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:cef2502e7e8a96fe5ad686d60b49e1ab03e438bd9123987994528febd569868e"}, + {file = "lxml-4.9.3-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b86164d2cff4d3aaa1f04a14685cbc072efd0b4f99ca5708b2ad1b9b5988a991"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:42871176e7896d5d45138f6d28751053c711ed4d48d8e30b498da155af39aebd"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:ae8b9c6deb1e634ba4f1930eb67ef6e6bf6a44b6eb5ad605642b2d6d5ed9ce3c"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:411007c0d88188d9f621b11d252cce90c4a2d1a49db6c068e3c16422f306eab8"}, + {file = "lxml-4.9.3-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:cd47b4a0d41d2afa3e58e5bf1f62069255aa2fd6ff5ee41604418ca925911d76"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:0e2cb47860da1f7e9a5256254b74ae331687b9672dfa780eed355c4c9c3dbd23"}, + {file = "lxml-4.9.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1247694b26342a7bf47c02e513d32225ededd18045264d40758abeb3c838a51f"}, + {file = "lxml-4.9.3-cp310-cp310-win32.whl", hash = "sha256:cdb650fc86227eba20de1a29d4b2c1bfe139dc75a0669270033cb2ea3d391b85"}, + {file = "lxml-4.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:97047f0d25cd4bcae81f9ec9dc290ca3e15927c192df17331b53bebe0e3ff96d"}, + {file = "lxml-4.9.3-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:1f447ea5429b54f9582d4b955f5f1985f278ce5cf169f72eea8afd9502973dd5"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:57d6ba0ca2b0c462f339640d22882acc711de224d769edf29962b09f77129cbf"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:9767e79108424fb6c3edf8f81e6730666a50feb01a328f4a016464a5893f835a"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:71c52db65e4b56b8ddc5bb89fb2e66c558ed9d1a74a45ceb7dcb20c191c3df2f"}, + {file = "lxml-4.9.3-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:d73d8ecf8ecf10a3bd007f2192725a34bd62898e8da27eb9d32a58084f93962b"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:0a3d3487f07c1d7f150894c238299934a2a074ef590b583103a45002035be120"}, + {file = "lxml-4.9.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9e28c51fa0ce5674be9f560c6761c1b441631901993f76700b1b30ca6c8378d6"}, + {file = "lxml-4.9.3-cp311-cp311-win32.whl", hash = "sha256:0bfd0767c5c1de2551a120673b72e5d4b628737cb05414f03c3277bf9bed3305"}, + {file = "lxml-4.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:25f32acefac14ef7bd53e4218fe93b804ef6f6b92ffdb4322bb6d49d94cad2bc"}, + {file = "lxml-4.9.3-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:d3ff32724f98fbbbfa9f49d82852b159e9784d6094983d9a8b7f2ddaebb063d4"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:48d6ed886b343d11493129e019da91d4039826794a3e3027321c56d9e71505be"}, + {file = "lxml-4.9.3-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:9a92d3faef50658dd2c5470af249985782bf754c4e18e15afb67d3ab06233f13"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b4e4bc18382088514ebde9328da057775055940a1f2e18f6ad2d78aa0f3ec5b9"}, + {file = "lxml-4.9.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:fc9b106a1bf918db68619fdcd6d5ad4f972fdd19c01d19bdb6bf63f3589a9ec5"}, + {file = "lxml-4.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d37017287a7adb6ab77e1c5bee9bcf9660f90ff445042b790402a654d2ad81d8"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56dc1f1ebccc656d1b3ed288f11e27172a01503fc016bcabdcbc0978b19352b7"}, + {file = "lxml-4.9.3-cp35-cp35m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:578695735c5a3f51569810dfebd05dd6f888147a34f0f98d4bb27e92b76e05c2"}, + {file = "lxml-4.9.3-cp35-cp35m-win32.whl", hash = "sha256:704f61ba8c1283c71b16135caf697557f5ecf3e74d9e453233e4771d68a1f42d"}, + {file = "lxml-4.9.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c41bfca0bd3532d53d16fd34d20806d5c2b1ace22a2f2e4c0008570bf2c58833"}, + {file = "lxml-4.9.3-cp36-cp36m-macosx_11_0_x86_64.whl", hash = "sha256:64f479d719dc9f4c813ad9bb6b28f8390360660b73b2e4beb4cb0ae7104f1c12"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:dd708cf4ee4408cf46a48b108fb9427bfa00b9b85812a9262b5c668af2533ea5"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c31c7462abdf8f2ac0577d9f05279727e698f97ecbb02f17939ea99ae8daa98"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e3cd95e10c2610c360154afdc2f1480aea394f4a4f1ea0a5eacce49640c9b190"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:4930be26af26ac545c3dffb662521d4e6268352866956672231887d18f0eaab2"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4aec80cde9197340bc353d2768e2a75f5f60bacda2bab72ab1dc499589b3878c"}, + {file = "lxml-4.9.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:14e019fd83b831b2e61baed40cab76222139926b1fb5ed0e79225bc0cae14584"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0c0850c8b02c298d3c7006b23e98249515ac57430e16a166873fc47a5d549287"}, + {file = "lxml-4.9.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:aca086dc5f9ef98c512bac8efea4483eb84abbf926eaeedf7b91479feb092458"}, + {file = "lxml-4.9.3-cp36-cp36m-win32.whl", hash = "sha256:50baa9c1c47efcaef189f31e3d00d697c6d4afda5c3cde0302d063492ff9b477"}, + {file = "lxml-4.9.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bef4e656f7d98aaa3486d2627e7d2df1157d7e88e7efd43a65aa5dd4714916cf"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:46f409a2d60f634fe550f7133ed30ad5321ae2e6630f13657fb9479506b00601"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:4c28a9144688aef80d6ea666c809b4b0e50010a2aca784c97f5e6bf143d9f129"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:141f1d1a9b663c679dc524af3ea1773e618907e96075262726c7612c02b149a4"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:53ace1c1fd5a74ef662f844a0413446c0629d151055340e9893da958a374f70d"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17a753023436a18e27dd7769e798ce302963c236bc4114ceee5b25c18c52c693"}, + {file = "lxml-4.9.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:7d298a1bd60c067ea75d9f684f5f3992c9d6766fadbc0bcedd39750bf344c2f4"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:081d32421db5df44c41b7f08a334a090a545c54ba977e47fd7cc2deece78809a"}, + {file = "lxml-4.9.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:23eed6d7b1a3336ad92d8e39d4bfe09073c31bfe502f20ca5116b2a334f8ec02"}, + {file = "lxml-4.9.3-cp37-cp37m-win32.whl", hash = "sha256:1509dd12b773c02acd154582088820893109f6ca27ef7291b003d0e81666109f"}, + {file = "lxml-4.9.3-cp37-cp37m-win_amd64.whl", hash = "sha256:120fa9349a24c7043854c53cae8cec227e1f79195a7493e09e0c12e29f918e52"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4d2d1edbca80b510443f51afd8496be95529db04a509bc8faee49c7b0fb6d2cc"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_24_aarch64.whl", hash = "sha256:8d7e43bd40f65f7d97ad8ef5c9b1778943d02f04febef12def25f7583d19baac"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:71d66ee82e7417828af6ecd7db817913cb0cf9d4e61aa0ac1fde0583d84358db"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:6fc3c450eaa0b56f815c7b62f2b7fba7266c4779adcf1cece9e6deb1de7305ce"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:65299ea57d82fb91c7f019300d24050c4ddeb7c5a190e076b5f48a2b43d19c42"}, + {file = "lxml-4.9.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:eadfbbbfb41b44034a4c757fd5d70baccd43296fb894dba0295606a7cf3124aa"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3e9bdd30efde2b9ccfa9cb5768ba04fe71b018a25ea093379c857c9dad262c40"}, + {file = "lxml-4.9.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:fcdd00edfd0a3001e0181eab3e63bd5c74ad3e67152c84f93f13769a40e073a7"}, + {file = "lxml-4.9.3-cp38-cp38-win32.whl", hash = "sha256:57aba1bbdf450b726d58b2aea5fe47c7875f5afb2c4a23784ed78f19a0462574"}, + {file = "lxml-4.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:92af161ecbdb2883c4593d5ed4815ea71b31fafd7fd05789b23100d081ecac96"}, + {file = "lxml-4.9.3-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:9bb6ad405121241e99a86efff22d3ef469024ce22875a7ae045896ad23ba2340"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8ed74706b26ad100433da4b9d807eae371efaa266ffc3e9191ea436087a9d6a7"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:fbf521479bcac1e25a663df882c46a641a9bff6b56dc8b0fafaebd2f66fb231b"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:303bf1edce6ced16bf67a18a1cf8339d0db79577eec5d9a6d4a80f0fb10aa2da"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:5515edd2a6d1a5a70bfcdee23b42ec33425e405c5b351478ab7dc9347228f96e"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:690dafd0b187ed38583a648076865d8c229661ed20e48f2335d68e2cf7dc829d"}, + {file = "lxml-4.9.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b6420a005548ad52154c8ceab4a1290ff78d757f9e5cbc68f8c77089acd3c432"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:bb3bb49c7a6ad9d981d734ef7c7193bc349ac338776a0360cc671eaee89bcf69"}, + {file = "lxml-4.9.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d27be7405547d1f958b60837dc4c1007da90b8b23f54ba1f8b728c78fdb19d50"}, + {file = "lxml-4.9.3-cp39-cp39-win32.whl", hash = "sha256:8df133a2ea5e74eef5e8fc6f19b9e085f758768a16e9877a60aec455ed2609b2"}, + {file = "lxml-4.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:4dd9a263e845a72eacb60d12401e37c616438ea2e5442885f65082c276dfb2b2"}, + {file = "lxml-4.9.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6689a3d7fd13dc687e9102a27e98ef33730ac4fe37795d5036d18b4d527abd35"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:f6bdac493b949141b733c5345b6ba8f87a226029cbabc7e9e121a413e49441e0"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:05186a0f1346ae12553d66df1cfce6f251589fea3ad3da4f3ef4e34b2d58c6a3"}, + {file = "lxml-4.9.3-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c2006f5c8d28dee289f7020f721354362fa304acbaaf9745751ac4006650254b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-macosx_11_0_x86_64.whl", hash = "sha256:5c245b783db29c4e4fbbbfc9c5a78be496c9fea25517f90606aa1f6b2b3d5f7b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:4fb960a632a49f2f089d522f70496640fdf1218f1243889da3822e0a9f5f3ba7"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:50670615eaf97227d5dc60de2dc99fb134a7130d310d783314e7724bf163f75d"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:9719fe17307a9e814580af1f5c6e05ca593b12fb7e44fe62450a5384dbf61b4b"}, + {file = "lxml-4.9.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:3331bece23c9ee066e0fb3f96c61322b9e0f54d775fccefff4c38ca488de283a"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-macosx_11_0_x86_64.whl", hash = "sha256:ed667f49b11360951e201453fc3967344d0d0263aa415e1619e85ae7fd17b4e0"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_24_i686.whl", hash = "sha256:8b77946fd508cbf0fccd8e400a7f71d4ac0e1595812e66025bac475a8e811694"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:e4da8ca0c0c0aea88fd46be8e44bd49716772358d648cce45fe387f7b92374a7"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fe4bda6bd4340caa6e5cf95e73f8fea5c4bfc55763dd42f1b50a94c1b4a2fbd4"}, + {file = "lxml-4.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f3df3db1d336b9356dd3112eae5f5c2b8b377f3bc826848567f10bfddfee77e9"}, + {file = "lxml-4.9.3.tar.gz", hash = "sha256:48628bd53a426c9eb9bc066a923acaa0878d1e86129fd5359aee99285f4eed9c"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=0.29.35)"] + +[[package]] +name = "markdown" +version = "3.4.3" +description = "Python implementation of John Gruber's Markdown." +optional = false +python-versions = ">=3.7" +files = [ + {file = "Markdown-3.4.3-py3-none-any.whl", hash = "sha256:065fd4df22da73a625f14890dd77eb8040edcbd68794bcd35943be14490608b2"}, + {file = "Markdown-3.4.3.tar.gz", hash = "sha256:8bf101198e004dc93e84a12a7395e31aac6a9c9942848ae1d99b9d72cf9b3520"}, +] + +[package.dependencies] +importlib-metadata = {version = ">=4.4", markers = "python_version < \"3.10\""} + +[package.extras] +testing = ["coverage", "pyyaml"] + +[[package]] +name = "markdown-it-py" +version = "2.2.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.7" +files = [ + {file = "markdown-it-py-2.2.0.tar.gz", hash = "sha256:7c9a5e412688bc771c67432cbfebcdd686c93ce6484913dccf06cb5a0bea35a1"}, + {file = "markdown_it_py-2.2.0-py3-none-any.whl", hash = "sha256:5a35f8d1870171d9acc47b99612dc146129b631baf04970128b568f190d0cc30"}, +] + +[package.dependencies] +linkify-it-py = {version = ">=1,<3", optional = true, markers = "extra == \"linkify\""} +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["attrs", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "markupsafe" +version = "2.1.3" +description = "Safely add untrusted strings to HTML/XML markup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cd0f502fe016460680cd20aaa5a76d241d6f35a1c3350c474bac1273803893fa"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e09031c87a1e51556fdcb46e5bd4f59dfb743061cf93c4d6831bf894f125eb57"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68e78619a61ecf91e76aa3e6e8e33fc4894a2bebe93410754bd28fce0a8a4f9f"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65c1a9bcdadc6c28eecee2c119465aebff8f7a584dd719facdd9e825ec61ab52"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:525808b8019e36eb524b8c68acdd63a37e75714eac50e988180b169d64480a00"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:962f82a3086483f5e5f64dbad880d31038b698494799b097bc59c2edf392fce6"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa7bd130efab1c280bed0f45501b7c8795f9fdbeb02e965371bbef3523627779"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:c9c804664ebe8f83a211cace637506669e7890fec1b4195b505c214e50dd4eb7"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win32.whl", hash = "sha256:10bbfe99883db80bdbaff2dcf681dfc6533a614f700da1287707e8a5d78a8431"}, + {file = "MarkupSafe-2.1.3-cp310-cp310-win_amd64.whl", hash = "sha256:1577735524cdad32f9f694208aa75e422adba74f1baee7551620e43a3141f559"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ad9e82fb8f09ade1c3e1b996a6337afac2b8b9e365f926f5a61aacc71adc5b3c"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3c0fae6c3be832a0a0473ac912810b2877c8cb9d76ca48de1ed31e1c68386575"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b076b6226fb84157e3f7c971a47ff3a679d837cf338547532ab866c57930dbee"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:338ae27d6b8745585f87218a3f23f1512dbf52c26c28e322dbe54bcede54ccb9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e4dd52d80b8c83fdce44e12478ad2e85c64ea965e75d66dbeafb0a3e77308fcc"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:df0be2b576a7abbf737b1575f048c23fb1d769f267ec4358296f31c2479db8f9"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5bbe06f8eeafd38e5d0a4894ffec89378b6c6a625ff57e3028921f8ff59318ac"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win32.whl", hash = "sha256:dd15ff04ffd7e05ffcb7fe79f1b98041b8ea30ae9234aed2a9168b5797c3effb"}, + {file = "MarkupSafe-2.1.3-cp311-cp311-win_amd64.whl", hash = "sha256:134da1eca9ec0ae528110ccc9e48041e0828d79f24121a1a146161103c76e686"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8e254ae696c88d98da6555f5ace2279cf7cd5b3f52be2b5cf97feafe883b58d2"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb0932dc158471523c9637e807d9bfb93e06a95cbf010f1a38b98623b929ef2b"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9402b03f1a1b4dc4c19845e5c749e3ab82d5078d16a2a4c2cd2df62d57bb0707"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ca379055a47383d02a5400cb0d110cef0a776fc644cda797db0c5696cfd7e18e"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:b7ff0f54cb4ff66dd38bebd335a38e2c22c41a8ee45aa608efc890ac3e3931bc"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:c011a4149cfbcf9f03994ec2edffcb8b1dc2d2aede7ca243746df97a5d41ce48"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:56d9f2ecac662ca1611d183feb03a3fa4406469dafe241673d521dd5ae92a155"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win32.whl", hash = "sha256:8758846a7e80910096950b67071243da3e5a20ed2546e6392603c096778d48e0"}, + {file = "MarkupSafe-2.1.3-cp37-cp37m-win_amd64.whl", hash = "sha256:787003c0ddb00500e49a10f2844fac87aa6ce977b90b0feaaf9de23c22508b24"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:2ef12179d3a291be237280175b542c07a36e7f60718296278d8593d21ca937d4"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2c1b19b3aaacc6e57b7e25710ff571c24d6c3613a45e905b1fde04d691b98ee0"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8afafd99945ead6e075b973fefa56379c5b5c53fd8937dad92c662da5d8fd5ee"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c41976a29d078bb235fea9b2ecd3da465df42a562910f9022f1a03107bd02be"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d080e0a5eb2529460b30190fcfcc4199bd7f827663f858a226a81bc27beaa97e"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:69c0f17e9f5a7afdf2cc9fb2d1ce6aabdb3bafb7f38017c0b77862bcec2bbad8"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:504b320cd4b7eff6f968eddf81127112db685e81f7e36e75f9f84f0df46041c3"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:42de32b22b6b804f42c5d98be4f7e5e977ecdd9ee9b660fda1a3edf03b11792d"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win32.whl", hash = "sha256:ceb01949af7121f9fc39f7d27f91be8546f3fb112c608bc4029aef0bab86a2a5"}, + {file = "MarkupSafe-2.1.3-cp38-cp38-win_amd64.whl", hash = "sha256:1b40069d487e7edb2676d3fbdb2b0829ffa2cd63a2ec26c4938b2d34391b4ecc"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8023faf4e01efadfa183e863fefde0046de576c6f14659e8782065bcece22198"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6b2b56950d93e41f33b4223ead100ea0fe11f8e6ee5f641eb753ce4b77a7042b"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9dcdfd0eaf283af041973bff14a2e143b8bd64e069f4c383416ecd79a81aab58"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:282c2cb35b5b673bbcadb33a585408104df04f14b2d9b01d4c345a3b92861c2c"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ab4a0df41e7c16a1392727727e7998a467472d0ad65f3ad5e6e765015df08636"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:7ef3cb2ebbf91e330e3bb937efada0edd9003683db6b57bb108c4001f37a02ea"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win32.whl", hash = "sha256:fec21693218efe39aa7f8599346e90c705afa52c5b31ae019b2e57e8f6542bb2"}, + {file = "MarkupSafe-2.1.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fd4abcb888d15a94f32b75d8fd18ee162ca0c064f35b11134be77050296d6ba"}, + {file = "MarkupSafe-2.1.3.tar.gz", hash = "sha256:af598ed32d6ae86f1b747b82783958b1a4ab8f617b06fe68795c7f026abbdcad"}, +] + +[[package]] +name = "matplotlib" +version = "3.7.2" +description = "Python plotting package" +optional = false +python-versions = ">=3.8" +files = [ + {file = "matplotlib-3.7.2-cp310-cp310-macosx_10_12_universal2.whl", hash = "sha256:2699f7e73a76d4c110f4f25be9d2496d6ab4f17345307738557d345f099e07de"}, + {file = "matplotlib-3.7.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a8035ba590658bae7562786c9cc6ea1a84aa49d3afab157e414c9e2ea74f496d"}, + {file = "matplotlib-3.7.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2f8e4a49493add46ad4a8c92f63e19d548b2b6ebbed75c6b4c7f46f57d36cdd1"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71667eb2ccca4c3537d9414b1bc00554cb7f91527c17ee4ec38027201f8f1603"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:152ee0b569a37630d8628534c628456b28686e085d51394da6b71ef84c4da201"}, + {file = "matplotlib-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:070f8dddd1f5939e60aacb8fa08f19551f4b0140fab16a3669d5cd6e9cb28fc8"}, + {file = "matplotlib-3.7.2-cp310-cp310-win32.whl", hash = "sha256:fdbb46fad4fb47443b5b8ac76904b2e7a66556844f33370861b4788db0f8816a"}, + {file = "matplotlib-3.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:23fb1750934e5f0128f9423db27c474aa32534cec21f7b2153262b066a581fd1"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_10_12_universal2.whl", hash = "sha256:30e1409b857aa8a747c5d4f85f63a79e479835f8dffc52992ac1f3f25837b544"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:50e0a55ec74bf2d7a0ebf50ac580a209582c2dd0f7ab51bc270f1b4a0027454e"}, + {file = "matplotlib-3.7.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ac60daa1dc83e8821eed155796b0f7888b6b916cf61d620a4ddd8200ac70cd64"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:305e3da477dc8607336ba10bac96986d6308d614706cae2efe7d3ffa60465b24"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1c308b255efb9b06b23874236ec0f10f026673ad6515f602027cc8ac7805352d"}, + {file = "matplotlib-3.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60c521e21031632aa0d87ca5ba0c1c05f3daacadb34c093585a0be6780f698e4"}, + {file = "matplotlib-3.7.2-cp311-cp311-win32.whl", hash = "sha256:26bede320d77e469fdf1bde212de0ec889169b04f7f1179b8930d66f82b30cbc"}, + {file = "matplotlib-3.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:af4860132c8c05261a5f5f8467f1b269bf1c7c23902d75f2be57c4a7f2394b3e"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_10_12_universal2.whl", hash = "sha256:a1733b8e84e7e40a9853e505fe68cc54339f97273bdfe6f3ed980095f769ddc7"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d9881356dc48e58910c53af82b57183879129fa30492be69058c5b0d9fddf391"}, + {file = "matplotlib-3.7.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f081c03f413f59390a80b3e351cc2b2ea0205839714dbc364519bcf51f4b56ca"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:1cd120fca3407a225168238b790bd5c528f0fafde6172b140a2f3ab7a4ea63e9"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a2c1590b90aa7bd741b54c62b78de05d4186271e34e2377e0289d943b3522273"}, + {file = "matplotlib-3.7.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d2ff3c984b8a569bc1383cd468fc06b70d7b59d5c2854ca39f1436ae8394117"}, + {file = "matplotlib-3.7.2-cp38-cp38-win32.whl", hash = "sha256:5dea00b62d28654b71ca92463656d80646675628d0828e08a5f3b57e12869e13"}, + {file = "matplotlib-3.7.2-cp38-cp38-win_amd64.whl", hash = "sha256:0f506a1776ee94f9e131af1ac6efa6e5bc7cb606a3e389b0ccb6e657f60bb676"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_10_12_universal2.whl", hash = "sha256:6515e878f91894c2e4340d81f0911857998ccaf04dbc1bba781e3d89cbf70608"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:71f7a8c6b124e904db550f5b9fe483d28b896d4135e45c4ea381ad3b8a0e3256"}, + {file = "matplotlib-3.7.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:12f01b92ecd518e0697da4d97d163b2b3aa55eb3eb4e2c98235b3396d7dad55f"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7e28d6396563955f7af437894a36bf2b279462239a41028323e04b85179058b"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbcf59334ff645e6a67cd5f78b4b2cdb76384cdf587fa0d2dc85f634a72e1a3e"}, + {file = "matplotlib-3.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:318c89edde72ff95d8df67d82aca03861240512994a597a435a1011ba18dbc7f"}, + {file = "matplotlib-3.7.2-cp39-cp39-win32.whl", hash = "sha256:ce55289d5659b5b12b3db4dc9b7075b70cef5631e56530f14b2945e8836f2d20"}, + {file = "matplotlib-3.7.2-cp39-cp39-win_amd64.whl", hash = "sha256:2ecb5be2b2815431c81dc115667e33da0f5a1bcf6143980d180d09a717c4a12e"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:fdcd28360dbb6203fb5219b1a5658df226ac9bebc2542a9e8f457de959d713d0"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c3cca3e842b11b55b52c6fb8bd6a4088693829acbfcdb3e815fa9b7d5c92c1b"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ebf577c7a6744e9e1bd3fee45fc74a02710b214f94e2bde344912d85e0c9af7c"}, + {file = "matplotlib-3.7.2-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:936bba394682049919dda062d33435b3be211dc3dcaa011e09634f060ec878b2"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bc221ffbc2150458b1cd71cdd9ddd5bb37962b036e41b8be258280b5b01da1dd"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35d74ebdb3f71f112b36c2629cf32323adfbf42679e2751252acd468f5001c07"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:717157e61b3a71d3d26ad4e1770dc85156c9af435659a25ee6407dc866cb258d"}, + {file = "matplotlib-3.7.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:20f844d6be031948148ba49605c8b96dfe7d3711d1b63592830d650622458c11"}, + {file = "matplotlib-3.7.2.tar.gz", hash = "sha256:a8cdb91dddb04436bd2f098b8fdf4b81352e68cf4d2c6756fcc414791076569b"}, +] + +[package.dependencies] +contourpy = ">=1.0.1" +cycler = ">=0.10" +fonttools = ">=4.22.0" +importlib-resources = {version = ">=3.2.0", markers = "python_version < \"3.10\""} +kiwisolver = ">=1.0.1" +numpy = ">=1.20" +packaging = ">=20.0" +pillow = ">=6.2.0" +pyparsing = ">=2.3.1,<3.1" +python-dateutil = ">=2.7" + +[[package]] +name = "matplotlib-inline" +version = "0.1.6" +description = "Inline Matplotlib backend for Jupyter" +optional = false +python-versions = ">=3.5" +files = [ + {file = "matplotlib-inline-0.1.6.tar.gz", hash = "sha256:f887e5f10ba98e8d2b150ddcf4702c1e5f8b3a20005eb0f74bfdbd360ee6f304"}, + {file = "matplotlib_inline-0.1.6-py3-none-any.whl", hash = "sha256:f1f41aab5328aa5aaea9b16d083b128102f8712542f819fe7e6a420ff581b311"}, +] + +[package.dependencies] +traitlets = "*" + +[[package]] +name = "mdit-py-plugins" +version = "0.3.3" +description = "Collection of plugins for markdown-it-py" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdit-py-plugins-0.3.3.tar.gz", hash = "sha256:5cfd7e7ac582a594e23ba6546a2f406e94e42eb33ae596d0734781261c251260"}, + {file = "mdit_py_plugins-0.3.3-py3-none-any.whl", hash = "sha256:36d08a29def19ec43acdcd8ba471d3ebab132e7879d442760d963f19913e04b9"}, +] + +[package.dependencies] +markdown-it-py = ">=1.0.0,<3.0.0" + +[package.extras] +code-style = ["pre-commit"] +rtd = ["attrs", "myst-parser (>=0.16.1,<0.17.0)", "sphinx-book-theme (>=0.1.0,<0.2.0)"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + +[[package]] +name = "mpmath" +version = "1.3.0" +description = "Python library for arbitrary-precision floating-point arithmetic" +optional = false +python-versions = "*" +files = [ + {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"}, + {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"}, +] + +[package.extras] +develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"] +docs = ["sphinx"] +gmpy = ["gmpy2 (>=2.1.0a4)"] +tests = ["pytest (>=4.6)"] + +[[package]] +name = "multidict" +version = "6.0.4" +description = "multidict implementation" +optional = false +python-versions = ">=3.7" +files = [ + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1a97283e0c85772d613878028fec909f003993e1007eafa715b24b377cb9b8"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:eeb6dcc05e911516ae3d1f207d4b0520d07f54484c49dfc294d6e7d63b734171"}, + {file = "multidict-6.0.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d6d635d5209b82a3492508cf5b365f3446afb65ae7ebd755e70e18f287b0adf7"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c048099e4c9e9d615545e2001d3d8a4380bd403e1a0578734e0d31703d1b0c0b"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea20853c6dbbb53ed34cb4d080382169b6f4554d394015f1bef35e881bf83547"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d232d4e5396c2efbbf4f6d4df89bfa905eb0d4dc5b3549d872ab898451f569"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36c63aaa167f6c6b04ef2c85704e93af16c11d20de1d133e39de6a0e84582a93"}, + {file = "multidict-6.0.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:64bdf1086b6043bf519869678f5f2757f473dee970d7abf6da91ec00acb9cb98"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:43644e38f42e3af682690876cff722d301ac585c5b9e1eacc013b7a3f7b696a0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:7582a1d1030e15422262de9f58711774e02fa80df0d1578995c76214f6954988"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:ddff9c4e225a63a5afab9dd15590432c22e8057e1a9a13d28ed128ecf047bbdc"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:ee2a1ece51b9b9e7752e742cfb661d2a29e7bcdba2d27e66e28a99f1890e4fa0"}, + {file = "multidict-6.0.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:a2e4369eb3d47d2034032a26c7a80fcb21a2cb22e1173d761a162f11e562caa5"}, + {file = "multidict-6.0.4-cp310-cp310-win32.whl", hash = "sha256:574b7eae1ab267e5f8285f0fe881f17efe4b98c39a40858247720935b893bba8"}, + {file = "multidict-6.0.4-cp310-cp310-win_amd64.whl", hash = "sha256:4dcbb0906e38440fa3e325df2359ac6cb043df8e58c965bb45f4e406ecb162cc"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0dfad7a5a1e39c53ed00d2dd0c2e36aed4650936dc18fd9a1826a5ae1cad6f03"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:64da238a09d6039e3bd39bb3aee9c21a5e34f28bfa5aa22518581f910ff94af3"}, + {file = "multidict-6.0.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff959bee35038c4624250473988b24f846cbeb2c6639de3602c073f10410ceba"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:01a3a55bd90018c9c080fbb0b9f4891db37d148a0a18722b42f94694f8b6d4c9"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c5cb09abb18c1ea940fb99360ea0396f34d46566f157122c92dfa069d3e0e982"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:666daae833559deb2d609afa4490b85830ab0dfca811a98b70a205621a6109fe"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:11bdf3f5e1518b24530b8241529d2050014c884cf18b6fc69c0c2b30ca248710"}, + {file = "multidict-6.0.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d18748f2d30f94f498e852c67d61261c643b349b9d2a581131725595c45ec6c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:458f37be2d9e4c95e2d8866a851663cbc76e865b78395090786f6cd9b3bbf4f4"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b1a2eeedcead3a41694130495593a559a668f382eee0727352b9a41e1c45759a"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7d6ae9d593ef8641544d6263c7fa6408cc90370c8cb2bbb65f8d43e5b0351d9c"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:5979b5632c3e3534e42ca6ff856bb24b2e3071b37861c2c727ce220d80eee9ed"}, + {file = "multidict-6.0.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dcfe792765fab89c365123c81046ad4103fcabbc4f56d1c1997e6715e8015461"}, + {file = "multidict-6.0.4-cp311-cp311-win32.whl", hash = "sha256:3601a3cece3819534b11d4efc1eb76047488fddd0c85a3948099d5da4d504636"}, + {file = "multidict-6.0.4-cp311-cp311-win_amd64.whl", hash = "sha256:81a4f0b34bd92df3da93315c6a59034df95866014ac08535fc819f043bfd51f0"}, + {file = "multidict-6.0.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:67040058f37a2a51ed8ea8f6b0e6ee5bd78ca67f169ce6122f3e2ec80dfe9b78"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:853888594621e6604c978ce2a0444a1e6e70c8d253ab65ba11657659dcc9100f"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39ff62e7d0f26c248b15e364517a72932a611a9b75f35b45be078d81bdb86603"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:af048912e045a2dc732847d33821a9d84ba553f5c5f028adbd364dd4765092ac"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1e8b901e607795ec06c9e42530788c45ac21ef3aaa11dbd0c69de543bfb79a9"}, + {file = "multidict-6.0.4-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:62501642008a8b9871ddfccbf83e4222cf8ac0d5aeedf73da36153ef2ec222d2"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:99b76c052e9f1bc0721f7541e5e8c05db3941eb9ebe7b8553c625ef88d6eefde"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:509eac6cf09c794aa27bcacfd4d62c885cce62bef7b2c3e8b2e49d365b5003fe"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:21a12c4eb6ddc9952c415f24eef97e3e55ba3af61f67c7bc388dcdec1404a067"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:5cad9430ab3e2e4fa4a2ef4450f548768400a2ac635841bc2a56a2052cdbeb87"}, + {file = "multidict-6.0.4-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:ab55edc2e84460694295f401215f4a58597f8f7c9466faec545093045476327d"}, + {file = "multidict-6.0.4-cp37-cp37m-win32.whl", hash = "sha256:5a4dcf02b908c3b8b17a45fb0f15b695bf117a67b76b7ad18b73cf8e92608775"}, + {file = "multidict-6.0.4-cp37-cp37m-win_amd64.whl", hash = "sha256:6ed5f161328b7df384d71b07317f4d8656434e34591f20552c7bcef27b0ab88e"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5fc1b16f586f049820c5c5b17bb4ee7583092fa0d1c4e28b5239181ff9532e0c"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1502e24330eb681bdaa3eb70d6358e818e8e8f908a22a1851dfd4e15bc2f8161"}, + {file = "multidict-6.0.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b692f419760c0e65d060959df05f2a531945af31fda0c8a3b3195d4efd06de11"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45e1ecb0379bfaab5eef059f50115b54571acfbe422a14f668fc8c27ba410e7e"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ddd3915998d93fbcd2566ddf9cf62cdb35c9e093075f862935573d265cf8f65d"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d43b61c59d82f2effb39a93c48b845efe23a3852d201ed2d24ba830d0b4cf2"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc8e1d0c705233c5dd0c5e6460fbad7827d5d36f310a0fadfd45cc3029762258"}, + {file = "multidict-6.0.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6aa0418fcc838522256761b3415822626f866758ee0bc6632c9486b179d0b52"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6748717bb10339c4760c1e63da040f5f29f5ed6e59d76daee30305894069a660"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4d1a3d7ef5e96b1c9e92f973e43aa5e5b96c659c9bc3124acbbd81b0b9c8a951"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4372381634485bec7e46718edc71528024fcdc6f835baefe517b34a33c731d60"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:fc35cb4676846ef752816d5be2193a1e8367b4c1397b74a565a9d0389c433a1d"}, + {file = "multidict-6.0.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:4b9d9e4e2b37daddb5c23ea33a3417901fa7c7b3dee2d855f63ee67a0b21e5b1"}, + {file = "multidict-6.0.4-cp38-cp38-win32.whl", hash = "sha256:e41b7e2b59679edfa309e8db64fdf22399eec4b0b24694e1b2104fb789207779"}, + {file = "multidict-6.0.4-cp38-cp38-win_amd64.whl", hash = "sha256:d6c254ba6e45d8e72739281ebc46ea5eb5f101234f3ce171f0e9f5cc86991480"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:16ab77bbeb596e14212e7bab8429f24c1579234a3a462105cda4a66904998664"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc779e9e6f7fda81b3f9aa58e3a6091d49ad528b11ed19f6621408806204ad35"}, + {file = "multidict-6.0.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4ceef517eca3e03c1cceb22030a3e39cb399ac86bff4e426d4fc6ae49052cc60"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:281af09f488903fde97923c7744bb001a9b23b039a909460d0f14edc7bf59706"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:52f2dffc8acaba9a2f27174c41c9e57f60b907bb9f096b36b1a1f3be71c6284d"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b41156839806aecb3641f3208c0dafd3ac7775b9c4c422d82ee2a45c34ba81ca"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d5e3fc56f88cc98ef8139255cf8cd63eb2c586531e43310ff859d6bb3a6b51f1"}, + {file = "multidict-6.0.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8316a77808c501004802f9beebde51c9f857054a0c871bd6da8280e718444449"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:f70b98cd94886b49d91170ef23ec5c0e8ebb6f242d734ed7ed677b24d50c82cf"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:bf6774e60d67a9efe02b3616fee22441d86fab4c6d335f9d2051d19d90a40063"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:e69924bfcdda39b722ef4d9aa762b2dd38e4632b3641b1d9a57ca9cd18f2f83a"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:6b181d8c23da913d4ff585afd1155a0e1194c0b50c54fcfe286f70cdaf2b7176"}, + {file = "multidict-6.0.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52509b5be062d9eafc8170e53026fbc54cf3b32759a23d07fd935fb04fc22d95"}, + {file = "multidict-6.0.4-cp39-cp39-win32.whl", hash = "sha256:27c523fbfbdfd19c6867af7346332b62b586eed663887392cff78d614f9ec313"}, + {file = "multidict-6.0.4-cp39-cp39-win_amd64.whl", hash = "sha256:33029f5734336aa0d4c0384525da0387ef89148dc7191aae00ca5fb23d7aafc2"}, + {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, +] + +[[package]] +name = "networkx" +version = "3.1" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.8" +files = [ + {file = "networkx-3.1-py3-none-any.whl", hash = "sha256:4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36"}, + {file = "networkx-3.1.tar.gz", hash = "sha256:de346335408f84de0eada6ff9fafafff9bcda11f0a0dfaa931133debb146ab61"}, +] + +[package.extras] +default = ["matplotlib (>=3.4)", "numpy (>=1.20)", "pandas (>=1.3)", "scipy (>=1.8)"] +developer = ["mypy (>=1.1)", "pre-commit (>=3.2)"] +doc = ["nb2plots (>=0.6)", "numpydoc (>=1.5)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.13)", "sphinx (>=6.1)", "sphinx-gallery (>=0.12)", "texext (>=0.6.7)"] +extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.10)", "sympy (>=1.10)"] +test = ["codecov (>=2.1)", "pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "numba" +version = "0.56.4" +description = "compiling Python code using LLVM" +optional = false +python-versions = ">=3.7" +files = [ + {file = "numba-0.56.4-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:9f62672145f8669ec08762895fe85f4cf0ead08ce3164667f2b94b2f62ab23c3"}, + {file = "numba-0.56.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c602d015478b7958408d788ba00a50272649c5186ea8baa6cf71d4a1c761bba1"}, + {file = "numba-0.56.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:85dbaed7a05ff96492b69a8900c5ba605551afb9b27774f7f10511095451137c"}, + {file = "numba-0.56.4-cp310-cp310-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:f4cfc3a19d1e26448032049c79fc60331b104f694cf570a9e94f4e2c9d0932bb"}, + {file = "numba-0.56.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4e08e203b163ace08bad500b0c16f6092b1eb34fd1fce4feaf31a67a3a5ecf3b"}, + {file = "numba-0.56.4-cp310-cp310-win32.whl", hash = "sha256:0611e6d3eebe4cb903f1a836ffdb2bda8d18482bcd0a0dcc56e79e2aa3fefef5"}, + {file = "numba-0.56.4-cp310-cp310-win_amd64.whl", hash = "sha256:fbfb45e7b297749029cb28694abf437a78695a100e7c2033983d69f0ba2698d4"}, + {file = "numba-0.56.4-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:3cb1a07a082a61df80a468f232e452d818f5ae254b40c26390054e4e868556e0"}, + {file = "numba-0.56.4-cp37-cp37m-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d69ad934e13c15684e7887100a8f5f0f61d7a8e57e0fd29d9993210089a5b531"}, + {file = "numba-0.56.4-cp37-cp37m-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:dbcc847bac2d225265d054993a7f910fda66e73d6662fe7156452cac0325b073"}, + {file = "numba-0.56.4-cp37-cp37m-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8a95ca9cc77ea4571081f6594e08bd272b66060634b8324e99cd1843020364f9"}, + {file = "numba-0.56.4-cp37-cp37m-win32.whl", hash = "sha256:fcdf84ba3ed8124eb7234adfbb8792f311991cbf8aed1cad4b1b1a7ee08380c1"}, + {file = "numba-0.56.4-cp37-cp37m-win_amd64.whl", hash = "sha256:42f9e1be942b215df7e6cc9948cf9c15bb8170acc8286c063a9e57994ef82fd1"}, + {file = "numba-0.56.4-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:553da2ce74e8862e18a72a209ed3b6d2924403bdd0fb341fa891c6455545ba7c"}, + {file = "numba-0.56.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4373da9757049db7c90591e9ec55a2e97b2b36ba7ae3bf9c956a513374077470"}, + {file = "numba-0.56.4-cp38-cp38-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3a993349b90569518739009d8f4b523dfedd7e0049e6838c0e17435c3e70dcc4"}, + {file = "numba-0.56.4-cp38-cp38-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:720886b852a2d62619ae3900fe71f1852c62db4f287d0c275a60219e1643fc04"}, + {file = "numba-0.56.4-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e64d338b504c9394a4a34942df4627e1e6cb07396ee3b49fe7b8d6420aa5104f"}, + {file = "numba-0.56.4-cp38-cp38-win32.whl", hash = "sha256:03fe94cd31e96185cce2fae005334a8cc712fc2ba7756e52dff8c9400718173f"}, + {file = "numba-0.56.4-cp38-cp38-win_amd64.whl", hash = "sha256:91f021145a8081f881996818474ef737800bcc613ffb1e618a655725a0f9e246"}, + {file = "numba-0.56.4-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:d0ae9270a7a5cc0ede63cd234b4ff1ce166c7a749b91dbbf45e0000c56d3eade"}, + {file = "numba-0.56.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c75e8a5f810ce80a0cfad6e74ee94f9fde9b40c81312949bf356b7304ef20740"}, + {file = "numba-0.56.4-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:a12ef323c0f2101529d455cfde7f4135eaa147bad17afe10b48634f796d96abd"}, + {file = "numba-0.56.4-cp39-cp39-manylinux2014_i686.manylinux_2_17_i686.whl", hash = "sha256:03634579d10a6129181129de293dd6b5eaabee86881369d24d63f8fe352dd6cb"}, + {file = "numba-0.56.4-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0240f9026b015e336069329839208ebd70ec34ae5bfbf402e4fcc8e06197528e"}, + {file = "numba-0.56.4-cp39-cp39-win32.whl", hash = "sha256:14dbbabf6ffcd96ee2ac827389afa59a70ffa9f089576500434c34abf9b054a4"}, + {file = "numba-0.56.4-cp39-cp39-win_amd64.whl", hash = "sha256:0da583c532cd72feefd8e551435747e0e0fbb3c0530357e6845fcc11e38d6aea"}, + {file = "numba-0.56.4.tar.gz", hash = "sha256:32d9fef412c81483d7efe0ceb6cf4d3310fde8b624a9cecca00f790573ac96ee"}, +] + +[package.dependencies] +importlib-metadata = {version = "*", markers = "python_version < \"3.9\""} +llvmlite = "==0.39.*" +numpy = ">=1.18,<1.24" +setuptools = "*" + +[[package]] +name = "numpy" +version = "1.23.5" +description = "NumPy is the fundamental package for array computing with Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "numpy-1.23.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9c88793f78fca17da0145455f0d7826bcb9f37da4764af27ac945488116efe63"}, + {file = "numpy-1.23.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e9f4c4e51567b616be64e05d517c79a8a22f3606499941d97bb76f2ca59f982d"}, + {file = "numpy-1.23.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7903ba8ab592b82014713c491f6c5d3a1cde5b4a3bf116404e08f5b52f6daf43"}, + {file = "numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e05b1c973a9f858c74367553e236f287e749465f773328c8ef31abe18f691e1"}, + {file = "numpy-1.23.5-cp310-cp310-win32.whl", hash = "sha256:522e26bbf6377e4d76403826ed689c295b0b238f46c28a7251ab94716da0b280"}, + {file = "numpy-1.23.5-cp310-cp310-win_amd64.whl", hash = "sha256:dbee87b469018961d1ad79b1a5d50c0ae850000b639bcb1b694e9981083243b6"}, + {file = "numpy-1.23.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ce571367b6dfe60af04e04a1834ca2dc5f46004ac1cc756fb95319f64c095a96"}, + {file = "numpy-1.23.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:56e454c7833e94ec9769fa0f86e6ff8e42ee38ce0ce1fa4cbb747ea7e06d56aa"}, + {file = "numpy-1.23.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5039f55555e1eab31124a5768898c9e22c25a65c1e0037f4d7c495a45778c9f2"}, + {file = "numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58f545efd1108e647604a1b5aa809591ccd2540f468a880bedb97247e72db387"}, + {file = "numpy-1.23.5-cp311-cp311-win32.whl", hash = "sha256:b2a9ab7c279c91974f756c84c365a669a887efa287365a8e2c418f8b3ba73fb0"}, + {file = "numpy-1.23.5-cp311-cp311-win_amd64.whl", hash = "sha256:0cbe9848fad08baf71de1a39e12d1b6310f1d5b2d0ea4de051058e6e1076852d"}, + {file = "numpy-1.23.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f063b69b090c9d918f9df0a12116029e274daf0181df392839661c4c7ec9018a"}, + {file = "numpy-1.23.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0aaee12d8883552fadfc41e96b4c82ee7d794949e2a7c3b3a7201e968c7ecab9"}, + {file = "numpy-1.23.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92c8c1e89a1f5028a4c6d9e3ccbe311b6ba53694811269b992c0b224269e2398"}, + {file = "numpy-1.23.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d208a0f8729f3fb790ed18a003f3a57895b989b40ea4dce4717e9cf4af62c6bb"}, + {file = "numpy-1.23.5-cp38-cp38-win32.whl", hash = "sha256:06005a2ef6014e9956c09ba07654f9837d9e26696a0470e42beedadb78c11b07"}, + {file = "numpy-1.23.5-cp38-cp38-win_amd64.whl", hash = "sha256:ca51fcfcc5f9354c45f400059e88bc09215fb71a48d3768fb80e357f3b457e1e"}, + {file = "numpy-1.23.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8969bfd28e85c81f3f94eb4a66bc2cf1dbdc5c18efc320af34bffc54d6b1e38f"}, + {file = "numpy-1.23.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a7ac231a08bb37f852849bbb387a20a57574a97cfc7b6cabb488a4fc8be176de"}, + {file = "numpy-1.23.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf837dc63ba5c06dc8797c398db1e223a466c7ece27a1f7b5232ba3466aafe3d"}, + {file = "numpy-1.23.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33161613d2269025873025b33e879825ec7b1d831317e68f4f2f0f84ed14c719"}, + {file = "numpy-1.23.5-cp39-cp39-win32.whl", hash = "sha256:af1da88f6bc3d2338ebbf0e22fe487821ea4d8e89053e25fa59d1d79786e7481"}, + {file = "numpy-1.23.5-cp39-cp39-win_amd64.whl", hash = "sha256:09b7847f7e83ca37c6e627682f145856de331049013853f344f37b0c9690e3df"}, + {file = "numpy-1.23.5-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:abdde9f795cf292fb9651ed48185503a2ff29be87770c3b8e2a14b0cd7aa16f8"}, + {file = "numpy-1.23.5-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9a909a8bae284d46bbfdefbdd4a262ba19d3bc9921b1e76126b1d21c3c34135"}, + {file = "numpy-1.23.5-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:01dd17cbb340bf0fc23981e52e1d18a9d4050792e8fb8363cecbf066a84b827d"}, + {file = "numpy-1.23.5.tar.gz", hash = "sha256:1b1766d6f397c18153d40015ddfc79ddb715cabadc04d2d228d4e5a8bc4ded1a"}, +] + +[[package]] +name = "oauthlib" +version = "3.2.2" +description = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +optional = false +python-versions = ">=3.6" +files = [ + {file = "oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca"}, + {file = "oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918"}, +] + +[package.extras] +rsa = ["cryptography (>=3.0.0)"] +signals = ["blinker (>=1.4.0)"] +signedtoken = ["cryptography (>=3.0.0)", "pyjwt (>=2.0.0,<3)"] + +[[package]] +name = "omegaconf" +version = "2.0.6" +description = "A flexible configuration library" +optional = false +python-versions = ">=3.6" +files = [ + {file = "omegaconf-2.0.6-py3-none-any.whl", hash = "sha256:9e349fd76819b95b47aa628edea1ff83fed5b25108608abdd6c7fdca188e302a"}, + {file = "omegaconf-2.0.6.tar.gz", hash = "sha256:92ca535a788d21651bf4c2eaf5c1ca4c7a8003b2dab4a87cbb09109784268806"}, +] + +[package.dependencies] +PyYAML = ">=5.1" +typing-extensions = "*" + +[[package]] +name = "orjson" +version = "3.9.2" +description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" +optional = false +python-versions = ">=3.7" +files = [ + {file = "orjson-3.9.2-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:7323e4ca8322b1ecb87562f1ec2491831c086d9faa9a6c6503f489dadbed37d7"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1272688ea1865f711b01ba479dea2d53e037ea00892fd04196b5875f7021d9d3"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0b9a26f1d1427a9101a1e8910f2e2df1f44d3d18ad5480ba031b15d5c1cb282e"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6a5ca55b0d8f25f18b471e34abaee4b175924b6cd62f59992945b25963443141"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:877872db2c0f41fbe21f852ff642ca842a43bc34895b70f71c9d575df31fffb4"}, + {file = "orjson-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a39c2529d75373b7167bf84c814ef9b8f3737a339c225ed6c0df40736df8748"}, + {file = "orjson-3.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:84ebd6fdf138eb0eb4280045442331ee71c0aab5e16397ba6645f32f911bfb37"}, + {file = "orjson-3.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:5a60a1cfcfe310547a1946506dd4f1ed0a7d5bd5b02c8697d9d5dcd8d2e9245e"}, + {file = "orjson-3.9.2-cp310-none-win_amd64.whl", hash = "sha256:c290c4f81e8fd0c1683638802c11610b2f722b540f8e5e858b6914b495cf90c8"}, + {file = "orjson-3.9.2-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:02ef014f9a605e84b675060785e37ec9c0d2347a04f1307a9d6840ab8ecd6f55"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:992af54265ada1c1579500d6594ed73fe333e726de70d64919cf37f93defdd06"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a40958f7af7c6d992ee67b2da4098dca8b770fc3b4b3834d540477788bfa76d3"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:93864dec3e3dd058a2dbe488d11ac0345214a6a12697f53a63e34de7d28d4257"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16fdf5a82df80c544c3c91516ab3882cd1ac4f1f84eefeafa642e05cef5f6699"}, + {file = "orjson-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:275b5a18fd9ed60b2720543d3ddac170051c43d680e47d04ff5203d2c6d8ebf1"}, + {file = "orjson-3.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b9aea6dcb99fcbc9f6d1dd84fca92322fda261da7fb014514bb4689c7c2097a8"}, + {file = "orjson-3.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7d74ae0e101d17c22ef67b741ba356ab896fc0fa64b301c2bf2bb0a4d874b190"}, + {file = "orjson-3.9.2-cp311-none-win_amd64.whl", hash = "sha256:6320b28e7bdb58c3a3a5efffe04b9edad3318d82409e84670a9b24e8035a249d"}, + {file = "orjson-3.9.2-cp37-cp37m-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:368e9cc91ecb7ac21f2aa475e1901204110cf3e714e98649c2502227d248f947"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58e9e70f0dcd6a802c35887f306b555ff7a214840aad7de24901fc8bd9cf5dde"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:00c983896c2e01c94c0ef72fd7373b2aa06d0c0eed0342c4884559f812a6835b"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ee743e8890b16c87a2f89733f983370672272b61ee77429c0a5899b2c98c1a7"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7b065942d362aad4818ff599d2f104c35a565c2cbcbab8c09ec49edba91da75"}, + {file = "orjson-3.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e46e9c5b404bb9e41d5555762fd410d5466b7eb1ec170ad1b1609cbebe71df21"}, + {file = "orjson-3.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8170157288714678ffd64f5de33039e1164a73fd8b6be40a8a273f80093f5c4f"}, + {file = "orjson-3.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:e3e2f087161947dafe8319ea2cfcb9cea4bb9d2172ecc60ac3c9738f72ef2909"}, + {file = "orjson-3.9.2-cp37-none-win_amd64.whl", hash = "sha256:d7de3dbbe74109ae598692113cec327fd30c5a30ebca819b21dfa4052f7b08ef"}, + {file = "orjson-3.9.2-cp38-cp38-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:8cd4385c59bbc1433cad4a80aca65d2d9039646a9c57f8084897549b55913b17"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a74036aab1a80c361039290cdbc51aa7adc7ea13f56e5ef94e9be536abd227bd"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1aaa46d7d4ae55335f635eadc9be0bd9bcf742e6757209fc6dc697e390010adc"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e52c67ed6bb368083aa2078ea3ccbd9721920b93d4b06c43eb4e20c4c860046"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a6cdfcf9c7dd4026b2b01fdff56986251dc0cc1e980c690c79eec3ae07b36e7"}, + {file = "orjson-3.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1882a70bb69595b9ec5aac0040a819e94d2833fe54901e2b32f5e734bc259a8b"}, + {file = "orjson-3.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:fc05e060d452145ab3c0b5420769e7356050ea311fc03cb9d79c481982917cca"}, + {file = "orjson-3.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f8bc2c40d9bb26efefb10949d261a47ca196772c308babc538dd9f4b73e8d386"}, + {file = "orjson-3.9.2-cp38-none-win_amd64.whl", hash = "sha256:3164fc20a585ec30a9aff33ad5de3b20ce85702b2b2a456852c413e3f0d7ab09"}, + {file = "orjson-3.9.2-cp39-cp39-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:7a6ccadf788531595ed4728aa746bc271955448d2460ff0ef8e21eb3f2a281ba"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3245d230370f571c945f69aab823c279a868dc877352817e22e551de155cb06c"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:205925b179550a4ee39b8418dd4c94ad6b777d165d7d22614771c771d44f57bd"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0325fe2d69512187761f7368c8cda1959bcb75fc56b8e7a884e9569112320e57"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:806704cd58708acc66a064a9a58e3be25cf1c3f9f159e8757bd3f515bfabdfa1"}, + {file = "orjson-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03fb36f187a0c19ff38f6289418863df8b9b7880cdbe279e920bef3a09d8dab1"}, + {file = "orjson-3.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:20925d07a97c49c6305bff1635318d9fc1804aa4ccacb5fb0deb8a910e57d97a"}, + {file = "orjson-3.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:eebfed53bec5674e981ebe8ed2cf00b3f7bcda62d634733ff779c264307ea505"}, + {file = "orjson-3.9.2-cp39-none-win_amd64.whl", hash = "sha256:869b961df5fcedf6c79f4096119b35679b63272362e9b745e668f0391a892d39"}, + {file = "orjson-3.9.2.tar.gz", hash = "sha256:24257c8f641979bf25ecd3e27251b5cc194cdd3a6e96004aac8446f5e63d9664"}, +] + +[[package]] +name = "packaging" +version = "23.1" +description = "Core utilities for Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "packaging-23.1-py3-none-any.whl", hash = "sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61"}, + {file = "packaging-23.1.tar.gz", hash = "sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f"}, +] + +[[package]] +name = "pandas" +version = "2.0.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pandas-2.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e4c7c9f27a4185304c7caf96dc7d91bc60bc162221152de697c98eb0b2648dd8"}, + {file = "pandas-2.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f167beed68918d62bffb6ec64f2e1d8a7d297a038f86d4aed056b9493fca407f"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce0c6f76a0f1ba361551f3e6dceaff06bde7514a374aa43e33b588ec10420183"}, + {file = "pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba619e410a21d8c387a1ea6e8a0e49bb42216474436245718d7f2e88a2f8d7c0"}, + {file = "pandas-2.0.3-cp310-cp310-win32.whl", hash = "sha256:3ef285093b4fe5058eefd756100a367f27029913760773c8bf1d2d8bebe5d210"}, + {file = "pandas-2.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:9ee1a69328d5c36c98d8e74db06f4ad518a1840e8ccb94a4ba86920986bb617e"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b084b91d8d66ab19f5bb3256cbd5ea661848338301940e17f4492b2ce0801fe8"}, + {file = "pandas-2.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:37673e3bdf1551b95bf5d4ce372b37770f9529743d2498032439371fc7b7eb26"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9cb1e14fdb546396b7e1b923ffaeeac24e4cedd14266c3497216dd4448e4f2d"}, + {file = "pandas-2.0.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9cd88488cceb7635aebb84809d087468eb33551097d600c6dad13602029c2df"}, + {file = "pandas-2.0.3-cp311-cp311-win32.whl", hash = "sha256:694888a81198786f0e164ee3a581df7d505024fbb1f15202fc7db88a71d84ebd"}, + {file = "pandas-2.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:6a21ab5c89dcbd57f78d0ae16630b090eec626360085a4148693def5452d8a6b"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9e4da0d45e7f34c069fe4d522359df7d23badf83abc1d1cef398895822d11061"}, + {file = "pandas-2.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32fca2ee1b0d93dd71d979726b12b61faa06aeb93cf77468776287f41ff8fdc5"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:258d3624b3ae734490e4d63c430256e716f488c4fcb7c8e9bde2d3aa46c29089"}, + {file = "pandas-2.0.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eae3dc34fa1aa7772dd3fc60270d13ced7346fcbcfee017d3132ec625e23bb0"}, + {file = "pandas-2.0.3-cp38-cp38-win32.whl", hash = "sha256:f3421a7afb1a43f7e38e82e844e2bca9a6d793d66c1a7f9f0ff39a795bbc5e02"}, + {file = "pandas-2.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:69d7f3884c95da3a31ef82b7618af5710dba95bb885ffab339aad925c3e8ce78"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5247fb1ba347c1261cbbf0fcfba4a3121fbb4029d95d9ef4dc45406620b25c8b"}, + {file = "pandas-2.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:81af086f4543c9d8bb128328b5d32e9986e0c84d3ee673a2ac6fb57fd14f755e"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1994c789bf12a7c5098277fb43836ce090f1073858c10f9220998ac74f37c69b"}, + {file = "pandas-2.0.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ec591c48e29226bcbb316e0c1e9423622bc7a4eaf1ef7c3c9fa1a3981f89641"}, + {file = "pandas-2.0.3-cp39-cp39-win32.whl", hash = "sha256:04dbdbaf2e4d46ca8da896e1805bc04eb85caa9a82e259e8eed00254d5e0c682"}, + {file = "pandas-2.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:1168574b036cd8b93abc746171c9b4f1b83467438a5e45909fed645cf8692dbc"}, + {file = "pandas-2.0.3.tar.gz", hash = "sha256:c02f372a88e0d17f36d3093a644c73cfc1788e876a7c4bcb4020a77512e2043c"}, +] + +[package.dependencies] +numpy = [ + {version = ">=1.20.3", markers = "python_version < \"3.10\""}, + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, +] +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.1" + +[package.extras] +all = ["PyQt5 (>=5.15.1)", "SQLAlchemy (>=1.4.16)", "beautifulsoup4 (>=4.9.3)", "bottleneck (>=1.3.2)", "brotlipy (>=0.7.0)", "fastparquet (>=0.6.3)", "fsspec (>=2021.07.0)", "gcsfs (>=2021.07.0)", "html5lib (>=1.1)", "hypothesis (>=6.34.2)", "jinja2 (>=3.0.0)", "lxml (>=4.6.3)", "matplotlib (>=3.6.1)", "numba (>=0.53.1)", "numexpr (>=2.7.3)", "odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pandas-gbq (>=0.15.0)", "psycopg2 (>=2.8.6)", "pyarrow (>=7.0.0)", "pymysql (>=1.0.2)", "pyreadstat (>=1.1.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)", "python-snappy (>=0.6.0)", "pyxlsb (>=1.0.8)", "qtpy (>=2.2.0)", "s3fs (>=2021.08.0)", "scipy (>=1.7.1)", "tables (>=3.6.1)", "tabulate (>=0.8.9)", "xarray (>=0.21.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)", "zstandard (>=0.15.2)"] +aws = ["s3fs (>=2021.08.0)"] +clipboard = ["PyQt5 (>=5.15.1)", "qtpy (>=2.2.0)"] +compression = ["brotlipy (>=0.7.0)", "python-snappy (>=0.6.0)", "zstandard (>=0.15.2)"] +computation = ["scipy (>=1.7.1)", "xarray (>=0.21.0)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.0.7)", "pyxlsb (>=1.0.8)", "xlrd (>=2.0.1)", "xlsxwriter (>=1.4.3)"] +feather = ["pyarrow (>=7.0.0)"] +fss = ["fsspec (>=2021.07.0)"] +gcp = ["gcsfs (>=2021.07.0)", "pandas-gbq (>=0.15.0)"] +hdf5 = ["tables (>=3.6.1)"] +html = ["beautifulsoup4 (>=4.9.3)", "html5lib (>=1.1)", "lxml (>=4.6.3)"] +mysql = ["SQLAlchemy (>=1.4.16)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.0.0)", "tabulate (>=0.8.9)"] +parquet = ["pyarrow (>=7.0.0)"] +performance = ["bottleneck (>=1.3.2)", "numba (>=0.53.1)", "numexpr (>=2.7.1)"] +plot = ["matplotlib (>=3.6.1)"] +postgresql = ["SQLAlchemy (>=1.4.16)", "psycopg2 (>=2.8.6)"] +spss = ["pyreadstat (>=1.1.2)"] +sql-other = ["SQLAlchemy (>=1.4.16)"] +test = ["hypothesis (>=6.34.2)", "pytest (>=7.3.2)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.6.3)"] + +[[package]] +name = "pillow" +version = "9.3.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "Pillow-9.3.0-1-cp37-cp37m-win32.whl", hash = "sha256:e6ea6b856a74d560d9326c0f5895ef8050126acfdc7ca08ad703eb0081e82b74"}, + {file = "Pillow-9.3.0-1-cp37-cp37m-win_amd64.whl", hash = "sha256:32a44128c4bdca7f31de5be641187367fe2a450ad83b833ef78910397db491aa"}, + {file = "Pillow-9.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:0b7257127d646ff8676ec8a15520013a698d1fdc48bc2a79ba4e53df792526f2"}, + {file = "Pillow-9.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b90f7616ea170e92820775ed47e136208e04c967271c9ef615b6fbd08d9af0e3"}, + {file = "Pillow-9.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:68943d632f1f9e3dce98908e873b3a090f6cba1cbb1b892a9e8d97c938871fbe"}, + {file = "Pillow-9.3.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:be55f8457cd1eac957af0c3f5ece7bc3f033f89b114ef30f710882717670b2a8"}, + {file = "Pillow-9.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d77adcd56a42d00cc1be30843d3426aa4e660cab4a61021dc84467123f7a00c"}, + {file = "Pillow-9.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:829f97c8e258593b9daa80638aee3789b7df9da5cf1336035016d76f03b8860c"}, + {file = "Pillow-9.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:801ec82e4188e935c7f5e22e006d01611d6b41661bba9fe45b60e7ac1a8f84de"}, + {file = "Pillow-9.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:871b72c3643e516db4ecf20efe735deb27fe30ca17800e661d769faab45a18d7"}, + {file = "Pillow-9.3.0-cp310-cp310-win32.whl", hash = "sha256:655a83b0058ba47c7c52e4e2df5ecf484c1b0b0349805896dd350cbc416bdd91"}, + {file = "Pillow-9.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:9f47eabcd2ded7698106b05c2c338672d16a6f2a485e74481f524e2a23c2794b"}, + {file = "Pillow-9.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:57751894f6618fd4308ed8e0c36c333e2f5469744c34729a27532b3db106ee20"}, + {file = "Pillow-9.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7db8b751ad307d7cf238f02101e8e36a128a6cb199326e867d1398067381bff4"}, + {file = "Pillow-9.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3033fbe1feb1b59394615a1cafaee85e49d01b51d54de0cbf6aa8e64182518a1"}, + {file = "Pillow-9.3.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:22b012ea2d065fd163ca096f4e37e47cd8b59cf4b0fd47bfca6abb93df70b34c"}, + {file = "Pillow-9.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9a65733d103311331875c1dca05cb4606997fd33d6acfed695b1232ba1df193"}, + {file = "Pillow-9.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:502526a2cbfa431d9fc2a079bdd9061a2397b842bb6bc4239bb176da00993812"}, + {file = "Pillow-9.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:90fb88843d3902fe7c9586d439d1e8c05258f41da473952aa8b328d8b907498c"}, + {file = "Pillow-9.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:89dca0ce00a2b49024df6325925555d406b14aa3efc2f752dbb5940c52c56b11"}, + {file = "Pillow-9.3.0-cp311-cp311-win32.whl", hash = "sha256:3168434d303babf495d4ba58fc22d6604f6e2afb97adc6a423e917dab828939c"}, + {file = "Pillow-9.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:18498994b29e1cf86d505edcb7edbe814d133d2232d256db8c7a8ceb34d18cef"}, + {file = "Pillow-9.3.0-cp37-cp37m-macosx_10_10_x86_64.whl", hash = "sha256:772a91fc0e03eaf922c63badeca75e91baa80fe2f5f87bdaed4280662aad25c9"}, + {file = "Pillow-9.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:afa4107d1b306cdf8953edde0534562607fe8811b6c4d9a486298ad31de733b2"}, + {file = "Pillow-9.3.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4012d06c846dc2b80651b120e2cdd787b013deb39c09f407727ba90015c684f"}, + {file = "Pillow-9.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77ec3e7be99629898c9a6d24a09de089fa5356ee408cdffffe62d67bb75fdd72"}, + {file = "Pillow-9.3.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:6c738585d7a9961d8c2821a1eb3dcb978d14e238be3d70f0a706f7fa9316946b"}, + {file = "Pillow-9.3.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:828989c45c245518065a110434246c44a56a8b2b2f6347d1409c787e6e4651ee"}, + {file = "Pillow-9.3.0-cp37-cp37m-win32.whl", hash = "sha256:82409ffe29d70fd733ff3c1025a602abb3e67405d41b9403b00b01debc4c9a29"}, + {file = "Pillow-9.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:41e0051336807468be450d52b8edd12ac60bebaa97fe10c8b660f116e50b30e4"}, + {file = "Pillow-9.3.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:b03ae6f1a1878233ac620c98f3459f79fd77c7e3c2b20d460284e1fb370557d4"}, + {file = "Pillow-9.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4390e9ce199fc1951fcfa65795f239a8a4944117b5935a9317fb320e7767b40f"}, + {file = "Pillow-9.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40e1ce476a7804b0fb74bcfa80b0a2206ea6a882938eaba917f7a0f004b42502"}, + {file = "Pillow-9.3.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a0a06a052c5f37b4ed81c613a455a81f9a3a69429b4fd7bb913c3fa98abefc20"}, + {file = "Pillow-9.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03150abd92771742d4a8cd6f2fa6246d847dcd2e332a18d0c15cc75bf6703040"}, + {file = "Pillow-9.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:15c42fb9dea42465dfd902fb0ecf584b8848ceb28b41ee2b58f866411be33f07"}, + {file = "Pillow-9.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:51e0e543a33ed92db9f5ef69a0356e0b1a7a6b6a71b80df99f1d181ae5875636"}, + {file = "Pillow-9.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3dd6caf940756101205dffc5367babf288a30043d35f80936f9bfb37f8355b32"}, + {file = "Pillow-9.3.0-cp38-cp38-win32.whl", hash = "sha256:f1ff2ee69f10f13a9596480335f406dd1f70c3650349e2be67ca3139280cade0"}, + {file = "Pillow-9.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:276a5ca930c913f714e372b2591a22c4bd3b81a418c0f6635ba832daec1cbcfc"}, + {file = "Pillow-9.3.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:73bd195e43f3fadecfc50c682f5055ec32ee2c933243cafbfdec69ab1aa87cad"}, + {file = "Pillow-9.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c7c8ae3864846fc95f4611c78129301e203aaa2af813b703c55d10cc1628535"}, + {file = "Pillow-9.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e0918e03aa0c72ea56edbb00d4d664294815aa11291a11504a377ea018330d3"}, + {file = "Pillow-9.3.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0915e734b33a474d76c28e07292f196cdf2a590a0d25bcc06e64e545f2d146c"}, + {file = "Pillow-9.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af0372acb5d3598f36ec0914deed2a63f6bcdb7b606da04dc19a88d31bf0c05b"}, + {file = "Pillow-9.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:ad58d27a5b0262c0c19b47d54c5802db9b34d38bbf886665b626aff83c74bacd"}, + {file = "Pillow-9.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:97aabc5c50312afa5e0a2b07c17d4ac5e865b250986f8afe2b02d772567a380c"}, + {file = "Pillow-9.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9aaa107275d8527e9d6e7670b64aabaaa36e5b6bd71a1015ddd21da0d4e06448"}, + {file = "Pillow-9.3.0-cp39-cp39-win32.whl", hash = "sha256:bac18ab8d2d1e6b4ce25e3424f709aceef668347db8637c2296bcf41acb7cf48"}, + {file = "Pillow-9.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:b472b5ea442148d1c3e2209f20f1e0bb0eb556538690fa70b5e1f79fa0ba8dc2"}, + {file = "Pillow-9.3.0-pp37-pypy37_pp73-macosx_10_10_x86_64.whl", hash = "sha256:ab388aaa3f6ce52ac1cb8e122c4bd46657c15905904b3120a6248b5b8b0bc228"}, + {file = "Pillow-9.3.0-pp37-pypy37_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbb8e7f2abee51cef77673be97760abff1674ed32847ce04b4af90f610144c7b"}, + {file = "Pillow-9.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca31dd6014cb8b0b2db1e46081b0ca7d936f856da3b39744aef499db5d84d02"}, + {file = "Pillow-9.3.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c7025dce65566eb6e89f56c9509d4f628fddcedb131d9465cacd3d8bac337e7e"}, + {file = "Pillow-9.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:ebf2029c1f464c59b8bdbe5143c79fa2045a581ac53679733d3a91d400ff9efb"}, + {file = "Pillow-9.3.0-pp38-pypy38_pp73-macosx_10_10_x86_64.whl", hash = "sha256:b59430236b8e58840a0dfb4099a0e8717ffb779c952426a69ae435ca1f57210c"}, + {file = "Pillow-9.3.0-pp38-pypy38_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12ce4932caf2ddf3e41d17fc9c02d67126935a44b86df6a206cf0d7161548627"}, + {file = "Pillow-9.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae5331c23ce118c53b172fa64a4c037eb83c9165aba3a7ba9ddd3ec9fa64a699"}, + {file = "Pillow-9.3.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:0b07fffc13f474264c336298d1b4ce01d9c5a011415b79d4ee5527bb69ae6f65"}, + {file = "Pillow-9.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:073adb2ae23431d3b9bcbcff3fe698b62ed47211d0716b067385538a1b0f28b8"}, + {file = "Pillow-9.3.0.tar.gz", hash = "sha256:c935a22a557a560108d780f9a0fc426dd7459940dc54faa49d83249c8d3e760f"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=2.4)", "sphinx-copybutton", "sphinx-issues (>=3.0.1)", "sphinx-removed-in", "sphinxext-opengraph"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] + +[[package]] +name = "pkgutil-resolve-name" +version = "1.3.10" +description = "Resolve a name to an object." +optional = false +python-versions = ">=3.6" +files = [ + {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, + {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, +] + +[[package]] +name = "platformdirs" +version = "3.9.1" +description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." +optional = false +python-versions = ">=3.7" +files = [ + {file = "platformdirs-3.9.1-py3-none-any.whl", hash = "sha256:ad8291ae0ae5072f66c16945166cb11c63394c7a3ad1b1bc9828ca3162da8c2f"}, + {file = "platformdirs-3.9.1.tar.gz", hash = "sha256:1b42b450ad933e981d56e59f1b97495428c9bd60698baab9f3eb3d00d5822421"}, +] + +[package.extras] +docs = ["furo (>=2023.5.20)", "proselint (>=0.13)", "sphinx (>=7.0.1)", "sphinx-autodoc-typehints (>=1.23,!=1.23.4)"] +test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-cov (>=4.1)", "pytest-mock (>=3.10)"] + +[[package]] +name = "pooch" +version = "1.7.0" +description = "\"Pooch manages your Python library's sample data files: it automatically downloads and stores them in a local directory, with support for versioning and corruption checks.\"" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pooch-1.7.0-py3-none-any.whl", hash = "sha256:74258224fc33d58f53113cf955e8d51bf01386b91492927d0d1b6b341a765ad7"}, + {file = "pooch-1.7.0.tar.gz", hash = "sha256:f174a1041b6447f0eef8860f76d17f60ed2f857dc0efa387a7f08228af05d998"}, +] + +[package.dependencies] +packaging = ">=20.0" +platformdirs = ">=2.5.0" +requests = ">=2.19.0" + +[package.extras] +progress = ["tqdm (>=4.41.0,<5.0.0)"] +sftp = ["paramiko (>=2.7.0)"] +xxhash = ["xxhash (>=1.4.3)"] + +[[package]] +name = "portalocker" +version = "2.7.0" +description = "Wraps the portalocker recipe for easy usage" +optional = false +python-versions = ">=3.5" +files = [ + {file = "portalocker-2.7.0-py2.py3-none-any.whl", hash = "sha256:a07c5b4f3985c3cf4798369631fb7011adb498e2a46d8440efc75a8f29a0f983"}, + {file = "portalocker-2.7.0.tar.gz", hash = "sha256:032e81d534a88ec1736d03f780ba073f047a06c478b06e2937486f334e955c51"}, +] + +[package.dependencies] +pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} + +[package.extras] +docs = ["sphinx (>=1.7.1)"] +redis = ["redis"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)"] + +[[package]] +name = "praat-parselmouth" +version = "0.4.3" +description = "Praat in Python, the Pythonic way" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" +files = [ + {file = "praat-parselmouth-0.4.3.tar.gz", hash = "sha256:93538d0ba06444b68d18b793efb436b0d645c62c0397c4977c1d27b679aee168"}, + {file = "praat_parselmouth-0.4.3-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:124925f3e40a6d626d65789d449bdabe43078528efbee6f3a1df6e67db60c971"}, + {file = "praat_parselmouth-0.4.3-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:0d3023d9b625c6b0a3cbe8a4f09cc23f666f9b9df40c59e33c4c9ca5b8ea1dac"}, + {file = "praat_parselmouth-0.4.3-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:6841b9d9d2a614382cf186311610d663f0170ba20824296878eb98905b04899a"}, + {file = "praat_parselmouth-0.4.3-cp27-cp27m-win32.whl", hash = "sha256:4fee56603cb57326457c6af779b89f96e7b2745114baa996659e1d52e5f245a3"}, + {file = "praat_parselmouth-0.4.3-cp27-cp27m-win_amd64.whl", hash = "sha256:dc688749a0db4144936d3ed5180996500eb927bbf321192019ddee535fb97f3d"}, + {file = "praat_parselmouth-0.4.3-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:c0ccf73de16c0f69162952b0d1865d4dbc929de0f9b88a9d7aea57f454de3cb8"}, + {file = "praat_parselmouth-0.4.3-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:87fa2dd7f8b5dd5e3127af82e97b229ae2db8e1656525329224df4c0bffa024c"}, + {file = "praat_parselmouth-0.4.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2bc111055efccf2bb25039a7891ec9ef106b13ddc5680293659ff0b4c5f4353f"}, + {file = "praat_parselmouth-0.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:cd38542210b1f381086b4a9424832b2330c42712e0fb7ea6c28c9200119c294b"}, + {file = "praat_parselmouth-0.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a536b37411c52171500984c97bfd66dc000701a7dc0807e11061b85a653a600a"}, + {file = "praat_parselmouth-0.4.3-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6ea1ab0632eff129516f147041aaf7874e50770561a2e9b9c81913b6de243f2a"}, + {file = "praat_parselmouth-0.4.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:543ba3deb32502e93074b76b1cfb3f09e598e5d9f74a0345fa5b3928fedb5a51"}, + {file = "praat_parselmouth-0.4.3-cp310-cp310-win32.whl", hash = "sha256:e0addf774a57d57a54df2b06de04ad0de34e81a3abfda03f744c732776c779ec"}, + {file = "praat_parselmouth-0.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:fc497357aeea2e3cbca2fb308d66b9de9739dc6b320ca2661ca6250f7a7489bd"}, + {file = "praat_parselmouth-0.4.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:afac52cb7a72cda7fe2ec1d9573d8f402786abcb06bd7a22f2ca240f95e33263"}, + {file = "praat_parselmouth-0.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b2261a79c2dc5387a7a678ec304ef8dd00ed93d9e028148bbb064fd0ac222a3a"}, + {file = "praat_parselmouth-0.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:de31b458d3c1ca7ee45506871a38fdc3aec44526c065552adf8bec2876e816bd"}, + {file = "praat_parselmouth-0.4.3-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:63ff24e045bed7c44f140fb7bab910d89fd3a45b7e8afe5b5e936aa2eea62904"}, + {file = "praat_parselmouth-0.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a40c51c628235c54c8956306fc58fd14cd04127d85359134ef73ef35ff19d651"}, + {file = "praat_parselmouth-0.4.3-cp311-cp311-win32.whl", hash = "sha256:f8ad9ee3be60d33f1ad593ec5f99466b1c266e00d29a5ec5787f969c618a7a9a"}, + {file = "praat_parselmouth-0.4.3-cp311-cp311-win_amd64.whl", hash = "sha256:c32b1f3632e69ed67f501c635fff37ad72e1eae4ddd1c2c0827c4690c06ee990"}, + {file = "praat_parselmouth-0.4.3-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:1dcb6f55376f193c83d123953a55de471bcadd756af3b157c13d455b0c052999"}, + {file = "praat_parselmouth-0.4.3-cp35-cp35m-manylinux2010_i686.whl", hash = "sha256:0970facd26b771f5799a396a0e54d12a69fbf8904a4f6ae0442f3831175e4508"}, + {file = "praat_parselmouth-0.4.3-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:5c1104f41d9fef48cd44247738b9c8735e10a12ba0a1860e478e0bd69201813e"}, + {file = "praat_parselmouth-0.4.3-cp35-cp35m-win32.whl", hash = "sha256:3d12469e301d9a25f29f6cb5427aa9a1276e7f2f1edf1a3caede69a84c46170f"}, + {file = "praat_parselmouth-0.4.3-cp35-cp35m-win_amd64.whl", hash = "sha256:c4142faf664dd6c7f1773d04331b278d92e17064eaaef09132954f72a9041ea0"}, + {file = "praat_parselmouth-0.4.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:5ea2079d519e8d42ed8d2de3c4f68803110060a8ae5d1c56df795c600aa1c3be"}, + {file = "praat_parselmouth-0.4.3-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:2e88f00b740548cf3de5768b2d06e296e525164ea71ccc991920f41f2e277ad2"}, + {file = "praat_parselmouth-0.4.3-cp36-cp36m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2da226bccd52fd81223eb94a7ea43a1a7588e4384ea65ce0818329b73ef8df6d"}, + {file = "praat_parselmouth-0.4.3-cp36-cp36m-win32.whl", hash = "sha256:0f3af0413992398ac613b0eefdfbcb8cad064c36a28b972300a2bb760523c109"}, + {file = "praat_parselmouth-0.4.3-cp36-cp36m-win_amd64.whl", hash = "sha256:e0ed79941b6e37a440860511767eedd85ec003060870d10ff1f98773b2a268ae"}, + {file = "praat_parselmouth-0.4.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:10f3113ad4f5f6df5fe81d4080ca3ad46de2fe0fdb8ebbcad1ba884b1cae3b9d"}, + {file = "praat_parselmouth-0.4.3-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:6e9338f7a1b304390014bb2eec619e5a306527a4df438e68439c92aa968627dc"}, + {file = "praat_parselmouth-0.4.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cb3798b2ca8163444662b6ae84a74b1add38b2c04e5af8d07bde55cf0335300a"}, + {file = "praat_parselmouth-0.4.3-cp37-cp37m-win32.whl", hash = "sha256:d947f9d1fb092b91acca1259ce4dd62ff4f456338958fd1fd41ee65efc53ca2c"}, + {file = "praat_parselmouth-0.4.3-cp37-cp37m-win_amd64.whl", hash = "sha256:2f3e026f590aeec8f68921359f56a42efa43076942f271244bee57fd22db8eef"}, + {file = "praat_parselmouth-0.4.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:28844229dab2a9335629b4526188b9540d02208856f48b1a46776279c022f937"}, + {file = "praat_parselmouth-0.4.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:410748af84eb8c2eb69e408e300694a45090ed7c4f31375c4ec75a8c18f87169"}, + {file = "praat_parselmouth-0.4.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:30ff6f17babad25b9d6ab086465a54494eef9d1b4368b0722230c5282be2bf94"}, + {file = "praat_parselmouth-0.4.3-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:ff7096bc3e87a8f719e66f5e16a90e2f6de445612abd234f86837d390b947421"}, + {file = "praat_parselmouth-0.4.3-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f41d121c4d2322ff12808bb2c4490609f750f89064170e327dfd74fca13cc212"}, + {file = "praat_parselmouth-0.4.3-cp38-cp38-win32.whl", hash = "sha256:9af9945db11fab0e1ed29ad20f7c97a3e7a8d016328ad6d7237a0d7819db075e"}, + {file = "praat_parselmouth-0.4.3-cp38-cp38-win_amd64.whl", hash = "sha256:ae0c63c432e8216d7c70da44131f51c845fb81d48ac04eb5f39ebcfae34624be"}, + {file = "praat_parselmouth-0.4.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8e25658af5a87ed502753de6924c51bf3400d4078e67a611b5874ab08b478fdb"}, + {file = "praat_parselmouth-0.4.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7aa2ffd0c6e47feda35343a9d6722b2558f3677a4a51bf5ec864f27ab80e2f42"}, + {file = "praat_parselmouth-0.4.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3b245d9457ab39f12142da160cda12c4c2a58d9b916e5bb33e6b3ac267882d46"}, + {file = "praat_parselmouth-0.4.3-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:da9779a694941074bc5b199dd3cb41ad4af3306552f06af8dbfdea6ab0a87dec"}, + {file = "praat_parselmouth-0.4.3-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cfa141c04fd8a0477f91c168878112098a25cbac7ac4a20de627bc9293ee4667"}, + {file = "praat_parselmouth-0.4.3-cp39-cp39-win32.whl", hash = "sha256:6941fe602802fd57ecbedecd612b41493b7d1c6bf722ac0cbf3f47f805fbbd43"}, + {file = "praat_parselmouth-0.4.3-cp39-cp39-win_amd64.whl", hash = "sha256:5252496e0391754a642973837670c56ecd39c8e0a1f7ec6e6b60b0cd2cc9f51d"}, + {file = "praat_parselmouth-0.4.3-pp27-pypy_73-macosx_10_9_x86_64.whl", hash = "sha256:fd7c143c6511807b67c92b3ab94733746c0ae3a7b4ba52d6763585c4d459061d"}, + {file = "praat_parselmouth-0.4.3-pp27-pypy_73-manylinux2010_x86_64.whl", hash = "sha256:7ad0739ad6c102817c7d43b67b7270f78cb431eb72b6ecd9a17e354d1b379deb"}, + {file = "praat_parselmouth-0.4.3-pp27-pypy_73-win32.whl", hash = "sha256:f5e98ec1f41efba90bedab358cff8e6a3c6473978e1f42b55d0977e580efe673"}, + {file = "praat_parselmouth-0.4.3-pp36-pypy36_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7b58c1c8fd967446f6d74775b5d9bceadfe35a928fa5f192d4d03d80cb005d92"}, + {file = "praat_parselmouth-0.4.3-pp36-pypy36_pp73-manylinux2010_x86_64.whl", hash = "sha256:d217df07c770156fa284aff3e7a5c11eb43e37f0226730d729d6b45be8a7c4d7"}, + {file = "praat_parselmouth-0.4.3-pp36-pypy36_pp73-win32.whl", hash = "sha256:29cb47438989f8155c3b3dca987afd48999dec71e4b79564aa7e922c3c5c1f9a"}, + {file = "praat_parselmouth-0.4.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:5f772b4a097654883f4bba41efae419f9ebdd5e83ef7a857e547100d26663e2c"}, + {file = "praat_parselmouth-0.4.3-pp37-pypy37_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:bf9634a6986732dc43a88b3a16a0000cff903da1db6556b7959a6a4897f25570"}, + {file = "praat_parselmouth-0.4.3-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:fab1bbb6a88f47cb5d0db07a4fd6d88b9294d2775a7556aeb459e96ac372e29f"}, + {file = "praat_parselmouth-0.4.3-pp37-pypy37_pp73-win32.whl", hash = "sha256:261f03f95f25943da2cf746599e47acfcf79b7fc823c871571901d6c97bad948"}, + {file = "praat_parselmouth-0.4.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:199b8df2659a1e6f30e9ae3064b0a28a661d834d2bccb56d22051c40cc348817"}, + {file = "praat_parselmouth-0.4.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ef1f3f6bd08cc410d0d595f6a9c7dd72558e30ad3bd7949c94ea4e07a2de2605"}, + {file = "praat_parselmouth-0.4.3-pp38-pypy38_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:28a61b7a3cf95a53554dd3ebb4f48e991d4b913ae2d2fbc3868a4e864d69794f"}, + {file = "praat_parselmouth-0.4.3-pp38-pypy38_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:488833ee33690fa1a57a3c429d286e42e6882748f5c3d28dc50889abec12b8c2"}, + {file = "praat_parselmouth-0.4.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:10f181e199c47fa90fe7cad065275f7f3ccda2de6febf86394cf96aa48531079"}, + {file = "praat_parselmouth-0.4.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:52702bc5cdf59b2b4db87448fe9042307e5ebce6b67ee5ea55c2b8627ce803e0"}, + {file = "praat_parselmouth-0.4.3-pp39-pypy39_pp73-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7d4f5d7c701517986654365f0a41b8b4a610a2ddc0365da60e48c098774259b"}, + {file = "praat_parselmouth-0.4.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4dc013608a536ad74efdc3242421cabfcb8cb2e9cd1259ec9de9aeaa141c2d14"}, + {file = "praat_parselmouth-0.4.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:d593065ed1500d305d9cf3d20f5ac7e3671061c3c073ef6e94e97817a664d399"}, +] + +[package.dependencies] +numpy = ">=1.7.0" + +[[package]] +name = "protobuf" +version = "4.23.4" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "protobuf-4.23.4-cp310-abi3-win32.whl", hash = "sha256:5fea3c64d41ea5ecf5697b83e41d09b9589e6f20b677ab3c48e5f242d9b7897b"}, + {file = "protobuf-4.23.4-cp310-abi3-win_amd64.whl", hash = "sha256:7b19b6266d92ca6a2a87effa88ecc4af73ebc5cfde194dc737cf8ef23a9a3b12"}, + {file = "protobuf-4.23.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8547bf44fe8cec3c69e3042f5c4fb3e36eb2a7a013bb0a44c018fc1e427aafbd"}, + {file = "protobuf-4.23.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:fee88269a090ada09ca63551bf2f573eb2424035bcf2cb1b121895b01a46594a"}, + {file = "protobuf-4.23.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:effeac51ab79332d44fba74660d40ae79985901ac21bca408f8dc335a81aa597"}, + {file = "protobuf-4.23.4-cp37-cp37m-win32.whl", hash = "sha256:c3e0939433c40796ca4cfc0fac08af50b00eb66a40bbbc5dee711998fb0bbc1e"}, + {file = "protobuf-4.23.4-cp37-cp37m-win_amd64.whl", hash = "sha256:9053df6df8e5a76c84339ee4a9f5a2661ceee4a0dab019e8663c50ba324208b0"}, + {file = "protobuf-4.23.4-cp38-cp38-win32.whl", hash = "sha256:e1c915778d8ced71e26fcf43c0866d7499891bca14c4368448a82edc61fdbc70"}, + {file = "protobuf-4.23.4-cp38-cp38-win_amd64.whl", hash = "sha256:351cc90f7d10839c480aeb9b870a211e322bf05f6ab3f55fcb2f51331f80a7d2"}, + {file = "protobuf-4.23.4-cp39-cp39-win32.whl", hash = "sha256:6dd9b9940e3f17077e820b75851126615ee38643c2c5332aa7a359988820c720"}, + {file = "protobuf-4.23.4-cp39-cp39-win_amd64.whl", hash = "sha256:0a5759f5696895de8cc913f084e27fd4125e8fb0914bb729a17816a33819f474"}, + {file = "protobuf-4.23.4-py3-none-any.whl", hash = "sha256:e9d0be5bf34b275b9f87ba7407796556abeeba635455d036c7351f7c183ef8ff"}, + {file = "protobuf-4.23.4.tar.gz", hash = "sha256:ccd9430c0719dce806b93f89c91de7977304729e55377f872a92465d548329a9"}, +] + +[[package]] +name = "pyasn1" +version = "0.4.8" +description = "ASN.1 types and codecs" +optional = false +python-versions = "*" +files = [ + {file = "pyasn1-0.4.8-py2.py3-none-any.whl", hash = "sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d"}, + {file = "pyasn1-0.4.8.tar.gz", hash = "sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba"}, +] + +[[package]] +name = "pyasn1-modules" +version = "0.2.8" +description = "A collection of ASN.1-based protocols modules." +optional = false +python-versions = "*" +files = [ + {file = "pyasn1-modules-0.2.8.tar.gz", hash = "sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e"}, + {file = "pyasn1_modules-0.2.8-py2.py3-none-any.whl", hash = "sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74"}, +] + +[package.dependencies] +pyasn1 = ">=0.4.6,<0.5.0" + +[[package]] +name = "pycparser" +version = "2.21" +description = "C parser in Python" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "pycparser-2.21-py2.py3-none-any.whl", hash = "sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9"}, + {file = "pycparser-2.21.tar.gz", hash = "sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206"}, +] + +[[package]] +name = "pydantic" +version = "2.0.3" +description = "Data validation using Python type hints" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-2.0.3-py3-none-any.whl", hash = "sha256:614eb3321eb600c81899a88fa9858b008e3c79e0d4f1b49ab1f516b4b0c27cfb"}, + {file = "pydantic-2.0.3.tar.gz", hash = "sha256:94f13e0dcf139a5125e88283fc999788d894e14ed90cf478bcc2ee50bd4fc630"}, +] + +[package.dependencies] +annotated-types = ">=0.4.0" +pydantic-core = "2.3.0" +typing-extensions = ">=4.6.1" + +[package.extras] +email = ["email-validator (>=2.0.0)"] + +[[package]] +name = "pydantic-core" +version = "2.3.0" +description = "" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic_core-2.3.0-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:4542c98b8364b976593703a2dda97377433b102f380b61bc3a2cbc2fbdae1d1f"}, + {file = "pydantic_core-2.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9342de50824b40f55d2600f66c6f9a91a3a24851eca39145a749a3dc804ee599"}, + {file = "pydantic_core-2.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:539432f911686cb80284c30b33eaf9f4fd9a11e1111fe0dc98fdbdce69b49821"}, + {file = "pydantic_core-2.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:38a0e7ee65c8999394d92d9c724434cb629279d19844f2b69d9bbc46dc8b8b61"}, + {file = "pydantic_core-2.3.0-cp310-cp310-manylinux_2_24_armv7l.whl", hash = "sha256:e3ed6834cc005798187a56c248a2240207cb8ffdda1c89e9afda4c3d526c2ea0"}, + {file = "pydantic_core-2.3.0-cp310-cp310-manylinux_2_24_ppc64le.whl", hash = "sha256:e72ac299a6bf732a60852d052acf3999d234686755a02ba111e85e7ebf8155b1"}, + {file = "pydantic_core-2.3.0-cp310-cp310-manylinux_2_24_s390x.whl", hash = "sha256:616b3451b05ca63b8f433c627f68046b39543faeaa4e50d8c6699a2a1e4b85a5"}, + {file = "pydantic_core-2.3.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:adcb9c8848e15c613e483e0b99767ae325af27fe0dbd866df01fe5849d06e6e1"}, + {file = "pydantic_core-2.3.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:464bf799b422be662e5e562e62beeffc9eaa907d381a9d63a2556615bbda286d"}, + {file = "pydantic_core-2.3.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4638ebc17de08c2f3acba557efeb6f195c88b7299d8c55c0bb4e20638bbd4d03"}, + {file = "pydantic_core-2.3.0-cp310-none-win32.whl", hash = "sha256:9ff322c7e1030543d35d83bb521b69114d3d150750528d7757544f639def9ad6"}, + {file = "pydantic_core-2.3.0-cp310-none-win_amd64.whl", hash = "sha256:4824eb018f0a4680b1e434697a9bf3f41c7799b80076d06530cbbd212e040ccc"}, + {file = "pydantic_core-2.3.0-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:0aa429578e23885b3984c49d687cd05ab06f0b908ea1711a8bf7e503b7f97160"}, + {file = "pydantic_core-2.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20d710c1f79af930b8891bcebd84096798e4387ab64023ef41521d58f21277d3"}, + {file = "pydantic_core-2.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:309f45d4d7481d6f09cb9e35c72caa0e50add4a30bb08c04c5fe5956a0158633"}, + {file = "pydantic_core-2.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bcfb7be905aa849bd882262e1df3f75b564e2f708b4b4c7ad2d3deaf5410562"}, + {file = "pydantic_core-2.3.0-cp311-cp311-manylinux_2_24_armv7l.whl", hash = "sha256:85cd9c0af34e371390e3cb2f3a470b0b40cc07568c1e966c638c49062be6352d"}, + {file = "pydantic_core-2.3.0-cp311-cp311-manylinux_2_24_ppc64le.whl", hash = "sha256:37c5028cebdf731298724070838fb3a71ef1fbd201d193d311ac2cbdbca25a23"}, + {file = "pydantic_core-2.3.0-cp311-cp311-manylinux_2_24_s390x.whl", hash = "sha256:e4208f23f12d0ad206a07a489ef4cb15722c10b62774c4460ee4123250be938e"}, + {file = "pydantic_core-2.3.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c24465dd11b65c8510f251b095fc788c7c91481c81840112fe3f76c30793a455"}, + {file = "pydantic_core-2.3.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:3cd7ee8bbfab277ab56e272221886fd33a1b5943fbf45ae9195aa6a48715a8a0"}, + {file = "pydantic_core-2.3.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0fc7e0b056b66cc536e97ef60f48b3b289f6b3b62ac225afd4b22a42434617bf"}, + {file = "pydantic_core-2.3.0-cp311-none-win32.whl", hash = "sha256:4788135db4bd83a5edc3522b11544b013be7d25b74b155e08dd3b20cd6663bbb"}, + {file = "pydantic_core-2.3.0-cp311-none-win_amd64.whl", hash = "sha256:f93c867e5e85584a28c6a6feb6f2086d717266eb5d1210d096dd717b7f4dec04"}, + {file = "pydantic_core-2.3.0-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:73f62bb7fd862d9bcd886e10612bade6fe042eda8b47e8c129892bcfb7b45e84"}, + {file = "pydantic_core-2.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d889d498fce64bfcd8adf1a78579a7f626f825cbeb2956a24a29b35f9a1df32"}, + {file = "pydantic_core-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d55e38a89ec2ae17b2fa7ffeda6b70f63afab1888bd0d57aaa7b7879760acb4"}, + {file = "pydantic_core-2.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1aefebb506bc1fe355d91d25f12bcdea7f4d7c2d9f0f6716dd025543777c99a5"}, + {file = "pydantic_core-2.3.0-cp312-cp312-manylinux_2_24_armv7l.whl", hash = "sha256:6441a29f42585f085db0c04cd0557d4cbbb46fa68a0972409b1cfe9f430280c1"}, + {file = "pydantic_core-2.3.0-cp312-cp312-manylinux_2_24_ppc64le.whl", hash = "sha256:47e8f034be31390a8f525431eb5e803a78ce7e2e11b32abf5361a972e14e6b61"}, + {file = "pydantic_core-2.3.0-cp312-cp312-manylinux_2_24_s390x.whl", hash = "sha256:ad814864aba263be9c83ada44a95f72d10caabbf91589321f95c29c902bdcff0"}, + {file = "pydantic_core-2.3.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9eff3837d447fccf2ac38c259b14ab9cbde700df355a45a1f3ff244d5e78f8b6"}, + {file = "pydantic_core-2.3.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:534f3f63c000f08050c6f7f4378bf2b52d7ba9214e9d35e3f60f7ad24a4d6425"}, + {file = "pydantic_core-2.3.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ef6a222d54f742c24f6b143aab088702db3a827b224e75b9dd28b38597c595fe"}, + {file = "pydantic_core-2.3.0-cp312-none-win32.whl", hash = "sha256:4e26944e64ecc1d7b19db954c0f7b471f3b141ec8e1a9f57cfe27671525cd248"}, + {file = "pydantic_core-2.3.0-cp312-none-win_amd64.whl", hash = "sha256:019c5c41941438570dfc7d3f0ae389b2425add1775a357ce1e83ed1434f943d6"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-macosx_10_7_x86_64.whl", hash = "sha256:27c1bbfb9d84a75cf33b7f19b53c29eb7ead99b235fce52aced5507174ab8f98"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-macosx_11_0_arm64.whl", hash = "sha256:7cb496e934b71f1ade844ab91d6ccac78a3520e5df02fdb2357f85a71e541e69"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5af2d43b1978958d91351afbcc9b4d0cfe144c46c61740e82aaac8bb39ab1a4d"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d3097c39d7d4e8dba2ef86de171dcccad876c36d8379415ba18a5a4d0533510"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-manylinux_2_24_armv7l.whl", hash = "sha256:dd3b023f3317dbbbc775e43651ce1a31a9cea46216ad0b5be37afc18a2007699"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-manylinux_2_24_ppc64le.whl", hash = "sha256:27babb9879bf2c45ed655d02639f4c30e2b9ef1b71ce59c2305bbf7287910a18"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-manylinux_2_24_s390x.whl", hash = "sha256:2183a9e18cdc0de53bdaa1675f237259162abeb62d6ac9e527c359c1074dc55d"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c089d8e7f1b4db08b2f8e4107304eec338df046275dad432635a9be9531e2fc8"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:2f10aa5452b865818dd0137f568d443f5e93b60a27080a01aa4b7512c7ba13a3"}, + {file = "pydantic_core-2.3.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f642313d559f9d9a00c4de6820124059cc3342a0d0127b18301de2c680d5ea40"}, + {file = "pydantic_core-2.3.0-cp37-none-win32.whl", hash = "sha256:45327fc57afbe3f2c3d7f54a335d5cecee8a9fdb3906a2fbed8af4092f4926df"}, + {file = "pydantic_core-2.3.0-cp37-none-win_amd64.whl", hash = "sha256:e427b66596a6441a5607dfc0085b47d36073f88da7ac48afd284263b9b99e6ce"}, + {file = "pydantic_core-2.3.0-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:0b3d781c71b8bfb621ef23b9c874933e2cd33237c1a65cc20eeb37437f8e7e18"}, + {file = "pydantic_core-2.3.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ad46027dbd5c1db87dc0b49becbe23093b143a20302028d387dae37ee5ef95f5"}, + {file = "pydantic_core-2.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39aa09ed7ce2a648c904f79032d16dda29e6913112af8465a7bf710eef23c7ca"}, + {file = "pydantic_core-2.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05b4bf8c58409586a7a04c858a86ab10f28c6c1a7c33da65e0326c59d5b0ab16"}, + {file = "pydantic_core-2.3.0-cp38-cp38-manylinux_2_24_armv7l.whl", hash = "sha256:ba2b807d2b62c446120906b8580cddae1d76d3de4efbb95ccc87f5e35c75b4b2"}, + {file = "pydantic_core-2.3.0-cp38-cp38-manylinux_2_24_ppc64le.whl", hash = "sha256:ea955e4ed21f4bbb9b83fea09fc6af0bed82e69ecf6b35ec89237a0a49633033"}, + {file = "pydantic_core-2.3.0-cp38-cp38-manylinux_2_24_s390x.whl", hash = "sha256:06884c07956526ac9ebfef40fe21a11605569b8fc0e2054a375fb39c978bf48f"}, + {file = "pydantic_core-2.3.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f868e731a18b403b88aa434d960489ceeed0ddeb44ebc02389540731a67705e0"}, + {file = "pydantic_core-2.3.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:cb08fab0fc1db15c277b72e33ac74ad9c0c789413da8984a3eacb22a94b42ef4"}, + {file = "pydantic_core-2.3.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6ca34c29fbd6592de5fd39e80c1993634d704c4e7e14ba54c87b2c7c53da68fe"}, + {file = "pydantic_core-2.3.0-cp38-none-win32.whl", hash = "sha256:cd782807d35c8a41aaa7d30b5107784420eefd9fdc1c760d86007d43ae00b15d"}, + {file = "pydantic_core-2.3.0-cp38-none-win_amd64.whl", hash = "sha256:01f56d5ee70b1d39c0fd08372cc5142274070ab7181d17c86035f130eebc05b8"}, + {file = "pydantic_core-2.3.0-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:78b1ac0151271ce62bc2b33755f1043eda6a310373143a2f27e2bcd3d5fc8633"}, + {file = "pydantic_core-2.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:64bfd2c35a2c350f73ac52dc134d8775f93359c4c969280a6fe5301b5b6e7431"}, + {file = "pydantic_core-2.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:937c0fe9538f1212b62df6a68f8d78df3572fe3682d9a0dd8851eac8a4e46063"}, + {file = "pydantic_core-2.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d965c7c4b40d1cedec9188782e98bd576f9a04868835604200c3a6e817b824f"}, + {file = "pydantic_core-2.3.0-cp39-cp39-manylinux_2_24_armv7l.whl", hash = "sha256:ad442b8585ed4a3c2d22e4bf7b465d9b7d281e055b09719a8aeb5b576422dc9b"}, + {file = "pydantic_core-2.3.0-cp39-cp39-manylinux_2_24_ppc64le.whl", hash = "sha256:4bf20c9722821fce766e685718e739deeccc60d6bc7be5029281db41f999ee0c"}, + {file = "pydantic_core-2.3.0-cp39-cp39-manylinux_2_24_s390x.whl", hash = "sha256:f3dd5333049b5b3faa739e0f40b77cc8b7a1aded2f2da0e28794c81586d7b08a"}, + {file = "pydantic_core-2.3.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dc5f516b24d24bc9e8dd9305460899f38302b3c4f9752663b396ef9848557bf"}, + {file = "pydantic_core-2.3.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:055f7ea6b1fbb37880d66d70eefd22dd319b09c79d2cb99b1dbfeb34b653b0b2"}, + {file = "pydantic_core-2.3.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:af693a89db6d6ac97dd84dd7769b3f2bd9007b578127d0e7dda03053f4d3b34b"}, + {file = "pydantic_core-2.3.0-cp39-none-win32.whl", hash = "sha256:f60e31e3e15e8c294bf70c60f8ae4d0c3caf3af8f26466e9aa8ea4c01302749b"}, + {file = "pydantic_core-2.3.0-cp39-none-win_amd64.whl", hash = "sha256:2b79f3681481f4424d7845cc7a261d5a4baa810d656b631fa844dc9967b36a7b"}, + {file = "pydantic_core-2.3.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:a666134b41712e30a71afaa26deeb4da374179f769fa49784cdf0e7698880fab"}, + {file = "pydantic_core-2.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c119e9227487ad3d7c3c737d896afe548a6be554091f9745da1f4b489c40561"}, + {file = "pydantic_core-2.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73929a2fb600a2333fce2efd92596cff5e6bf8946e20e93c067b220760064862"}, + {file = "pydantic_core-2.3.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:41bbc2678a5b6a19371b2cb51f30ccea71f0c14b26477d2d884fed761cea42c7"}, + {file = "pydantic_core-2.3.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:dcbff997f47d45bf028bda4c3036bb3101e89a3df271281d392b6175f71c71d1"}, + {file = "pydantic_core-2.3.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:afa8808159169368b66e4fbeafac6c6fd8f26246dc4d0dcc2caf94bd9cf1b828"}, + {file = "pydantic_core-2.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:12be3b5f54f8111ca38e6b7277f26c23ba5cb3344fae06f879a0a93dfc8b479e"}, + {file = "pydantic_core-2.3.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ed5babdcd3d052ba5cf8832561f18df20778c7ccf12587b2d82f7bf3bf259a0e"}, + {file = "pydantic_core-2.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d642e5c029e2acfacf6aa0a7a3e822086b3b777c70d364742561f9ca64c1ffc"}, + {file = "pydantic_core-2.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ba3073eb38a1294e8c7902989fb80a7a147a69db2396818722bd078476586a0"}, + {file = "pydantic_core-2.3.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5146a6749b1905e04e62e0ad4622f079e5582f8b3abef5fb64516c623127908"}, + {file = "pydantic_core-2.3.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:deeb64335f489c3c11949cbd1d1668b3f1fb2d1c6a5bf40e126ef7bf95f9fa40"}, + {file = "pydantic_core-2.3.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:31acc37288b8e69e4849f618c3d5cf13b58077c1a1ff9ade0b3065ba974cd385"}, + {file = "pydantic_core-2.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:e09d9f6d722de9d4c1c5f122ea9bc6b25a05f975457805af4dcab7b0128aacbf"}, + {file = "pydantic_core-2.3.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ba6a8cf089222a171b8f84e6ec2d10f7a9d14f26be3a347b14775a8741810676"}, + {file = "pydantic_core-2.3.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ef1fd1b24e9bcddcb168437686677104e205c8e25b066e73ffdf331d3bb8792b"}, + {file = "pydantic_core-2.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eda1a89c4526826c0a87d33596a4cd15b8f58e9250f503e39af1699ba9c878e8"}, + {file = "pydantic_core-2.3.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a3e9a18401a28db4358da2e191508702dbf065f2664c710708cdf9552b9fa50c"}, + {file = "pydantic_core-2.3.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:a439fd0d45d51245bbde799726adda5bd18aed3fa2b01ab2e6a64d6d13776fa3"}, + {file = "pydantic_core-2.3.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:bf6a1d2c920cc9528e884850a4b2ee7629e3d362d5c44c66526d4097bbb07a1a"}, + {file = "pydantic_core-2.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:e33fcbea3b63a339dd94de0fc442fefacfe681cc7027ce63f67af9f7ceec7422"}, + {file = "pydantic_core-2.3.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:bf3ed993bdf4754909f175ff348cf8f78d4451215b8aa338633f149ca3b1f37a"}, + {file = "pydantic_core-2.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7584171eb3115acd4aba699bc836634783f5bd5aab131e88d8eeb8a3328a4a72"}, + {file = "pydantic_core-2.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1624baa76d1740711b2048f302ae9a6d73d277c55a8c3e88b53b773ebf73a971"}, + {file = "pydantic_core-2.3.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:06f33f695527f5a86e090f208978f9fd252c9cfc7e869d3b679bd71f7cb2c1fa"}, + {file = "pydantic_core-2.3.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:7ecf0a67b212900e92f328181fed02840d74ed39553cdb38d27314e2b9c89dfa"}, + {file = "pydantic_core-2.3.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:45fa1e8ad6f4367ad73674ca560da8e827cc890eaf371f3ee063d6d7366a207b"}, + {file = "pydantic_core-2.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:8d0dbcc57839831ae79fd24b1b83d42bc9448d79feaf3ed3fb5cbf94ffbf3eb7"}, + {file = "pydantic_core-2.3.0.tar.gz", hash = "sha256:5cfb5ac4e82c47d5dc25b209dd4c3989e284b80109f9e08b33c895080c424b4f"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" + +[[package]] +name = "pydub" +version = "0.25.1" +description = "Manipulate audio with an simple and easy high level interface" +optional = false +python-versions = "*" +files = [ + {file = "pydub-0.25.1-py2.py3-none-any.whl", hash = "sha256:65617e33033874b59d87db603aa1ed450633288aefead953b30bded59cb599a6"}, + {file = "pydub-0.25.1.tar.gz", hash = "sha256:980a33ce9949cab2a569606b65674d748ecbca4f0796887fd6f46173a7b0d30f"}, +] + +[[package]] +name = "pyparsing" +version = "3.0.9" +description = "pyparsing module - Classes and methods to define and execute parsing grammars" +optional = false +python-versions = ">=3.6.8" +files = [ + {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, + {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, +] + +[package.extras] +diagrams = ["jinja2", "railroad-diagrams"] + +[[package]] +name = "python-dateutil" +version = "2.8.2" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, + {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, +] + +[package.dependencies] +six = ">=1.5" + +[[package]] +name = "python-multipart" +version = "0.0.6" +description = "A streaming multipart parser for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "python_multipart-0.0.6-py3-none-any.whl", hash = "sha256:ee698bab5ef148b0a760751c261902cd096e57e10558e11aca17646b74ee1c18"}, + {file = "python_multipart-0.0.6.tar.gz", hash = "sha256:e9925a80bb668529f1b67c7fdb0a5dacdd7cbfc6fb0bff3ea443fe22bdd62132"}, +] + +[package.extras] +dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatch", "invoke (==1.7.3)", "more-itertools (==4.3.0)", "pbr (==4.3.0)", "pluggy (==1.0.0)", "py (==1.11.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-timeout (==2.1.0)", "pyyaml (==5.1)"] + +[[package]] +name = "pytz" +version = "2023.3" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, + {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, +] + +[[package]] +name = "pywin32" +version = "306" +description = "Python for Window Extensions" +optional = false +python-versions = "*" +files = [ + {file = "pywin32-306-cp310-cp310-win32.whl", hash = "sha256:06d3420a5155ba65f0b72f2699b5bacf3109f36acbe8923765c22938a69dfc8d"}, + {file = "pywin32-306-cp310-cp310-win_amd64.whl", hash = "sha256:84f4471dbca1887ea3803d8848a1616429ac94a4a8d05f4bc9c5dcfd42ca99c8"}, + {file = "pywin32-306-cp311-cp311-win32.whl", hash = "sha256:e65028133d15b64d2ed8f06dd9fbc268352478d4f9289e69c190ecd6818b6407"}, + {file = "pywin32-306-cp311-cp311-win_amd64.whl", hash = "sha256:a7639f51c184c0272e93f244eb24dafca9b1855707d94c192d4a0b4c01e1100e"}, + {file = "pywin32-306-cp311-cp311-win_arm64.whl", hash = "sha256:70dba0c913d19f942a2db25217d9a1b726c278f483a919f1abfed79c9cf64d3a"}, + {file = "pywin32-306-cp312-cp312-win32.whl", hash = "sha256:383229d515657f4e3ed1343da8be101000562bf514591ff383ae940cad65458b"}, + {file = "pywin32-306-cp312-cp312-win_amd64.whl", hash = "sha256:37257794c1ad39ee9be652da0462dc2e394c8159dfd913a8a4e8eb6fd346da0e"}, + {file = "pywin32-306-cp312-cp312-win_arm64.whl", hash = "sha256:5821ec52f6d321aa59e2db7e0a35b997de60c201943557d108af9d4ae1ec7040"}, + {file = "pywin32-306-cp37-cp37m-win32.whl", hash = "sha256:1c73ea9a0d2283d889001998059f5eaaba3b6238f767c9cf2833b13e6a685f65"}, + {file = "pywin32-306-cp37-cp37m-win_amd64.whl", hash = "sha256:72c5f621542d7bdd4fdb716227be0dd3f8565c11b280be6315b06ace35487d36"}, + {file = "pywin32-306-cp38-cp38-win32.whl", hash = "sha256:e4c092e2589b5cf0d365849e73e02c391c1349958c5ac3e9d5ccb9a28e017b3a"}, + {file = "pywin32-306-cp38-cp38-win_amd64.whl", hash = "sha256:e8ac1ae3601bee6ca9f7cb4b5363bf1c0badb935ef243c4733ff9a393b1690c0"}, + {file = "pywin32-306-cp39-cp39-win32.whl", hash = "sha256:e25fd5b485b55ac9c057f67d94bc203f3f6595078d1fb3b458c9c28b7153a802"}, + {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, +] + +[[package]] +name = "pyworld" +version = "0.3.4" +description = "PyWorld: a Python wrapper for WORLD vocoder" +optional = false +python-versions = "*" +files = [ + {file = "pyworld-0.3.4-cp310-cp310-win_amd64.whl", hash = "sha256:699d2478b31215893152f67bb3eef244ed77fd3b8ac6757428fbcf7f8e1a057c"}, + {file = "pyworld-0.3.4-cp311-cp311-win_amd64.whl", hash = "sha256:61292a4dced5f4a0677aa7afdd5c601df79e70771aa63bbdd8279e6a1d0f5d7a"}, + {file = "pyworld-0.3.4-cp36-cp36m-win32.whl", hash = "sha256:ebd9f918f54fb5dd6a16059635d904b56a5165e7e5207ab1b4fa0fb839b4efa7"}, + {file = "pyworld-0.3.4-cp36-cp36m-win_amd64.whl", hash = "sha256:34db25abde77cc231605d0f3748742cf9535156ee239fd861241a66d43c5134c"}, + {file = "pyworld-0.3.4-cp37-cp37m-win32.whl", hash = "sha256:8c0a4c6ef1d3ba7742b4cf773c2733743392ce525d75efda4b832abb623d8689"}, + {file = "pyworld-0.3.4-cp37-cp37m-win_amd64.whl", hash = "sha256:235ce1c9b97986897cf60544c6d46a5eef814688b1aa0ea9d88e9877e6df015f"}, + {file = "pyworld-0.3.4-cp38-cp38-win_amd64.whl", hash = "sha256:a1aa7ec78e4575c231bc374c825a3c5c1fefdff5c83932dcf90b7d333c156c2e"}, + {file = "pyworld-0.3.4-cp39-cp39-win_amd64.whl", hash = "sha256:fddbbb9aec3f2341279237a907b8b625fa099086205b24aa740008becbd7e3d4"}, + {file = "pyworld-0.3.4.tar.gz", hash = "sha256:106c70ee7a7d8c9ba488d8022f2033706929a40f0264256b8e87da5aab98303a"}, +] + +[package.dependencies] +cython = ">=0.24" +numpy = "*" + +[package.extras] +sdist = ["cython (>=0.24)", "numpy"] +test = ["nose"] + +[[package]] +name = "pyyaml" +version = "6.0.1" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.6" +files = [ + {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"}, + {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, + {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, + {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, + {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, + {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, + {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, + {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, + {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, + {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, + {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"}, + {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"}, + {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"}, + {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"}, + {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"}, + {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, + {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, + {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, + {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, + {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, + {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, + {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, + {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, +] + +[[package]] +name = "referencing" +version = "0.30.0" +description = "JSON Referencing + Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "referencing-0.30.0-py3-none-any.whl", hash = "sha256:c257b08a399b6c2f5a3510a50d28ab5dbc7bbde049bcaf954d43c446f83ab548"}, + {file = "referencing-0.30.0.tar.gz", hash = "sha256:47237742e990457f7512c7d27486394a9aadaf876cbfaa4be65b27b4f4d47c6b"}, +] + +[package.dependencies] +attrs = ">=22.2.0" +rpds-py = ">=0.7.0" + +[[package]] +name = "regex" +version = "2023.6.3" +description = "Alternative regular expression module, to replace re." +optional = false +python-versions = ">=3.6" +files = [ + {file = "regex-2023.6.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:824bf3ac11001849aec3fa1d69abcb67aac3e150a933963fb12bda5151fe1bfd"}, + {file = "regex-2023.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:05ed27acdf4465c95826962528f9e8d41dbf9b1aa8531a387dee6ed215a3e9ef"}, + {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b49c764f88a79160fa64f9a7b425620e87c9f46095ef9c9920542ab2495c8bc"}, + {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8e3f1316c2293e5469f8f09dc2d76efb6c3982d3da91ba95061a7e69489a14ef"}, + {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:43e1dd9d12df9004246bacb79a0e5886b3b6071b32e41f83b0acbf293f820ee8"}, + {file = "regex-2023.6.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4959e8bcbfda5146477d21c3a8ad81b185cd252f3d0d6e4724a5ef11c012fb06"}, + {file = "regex-2023.6.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:af4dd387354dc83a3bff67127a124c21116feb0d2ef536805c454721c5d7993d"}, + {file = "regex-2023.6.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2239d95d8e243658b8dbb36b12bd10c33ad6e6933a54d36ff053713f129aa536"}, + {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:890e5a11c97cf0d0c550eb661b937a1e45431ffa79803b942a057c4fb12a2da2"}, + {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:a8105e9af3b029f243ab11ad47c19b566482c150c754e4c717900a798806b222"}, + {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:25be746a8ec7bc7b082783216de8e9473803706723b3f6bef34b3d0ed03d57e2"}, + {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:3676f1dd082be28b1266c93f618ee07741b704ab7b68501a173ce7d8d0d0ca18"}, + {file = "regex-2023.6.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:10cb847aeb1728412c666ab2e2000ba6f174f25b2bdc7292e7dd71b16db07568"}, + {file = "regex-2023.6.3-cp310-cp310-win32.whl", hash = "sha256:dbbbfce33cd98f97f6bffb17801b0576e653f4fdb1d399b2ea89638bc8d08ae1"}, + {file = "regex-2023.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:c5f8037000eb21e4823aa485149f2299eb589f8d1fe4b448036d230c3f4e68e0"}, + {file = "regex-2023.6.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c123f662be8ec5ab4ea72ea300359023a5d1df095b7ead76fedcd8babbedf969"}, + {file = "regex-2023.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9edcbad1f8a407e450fbac88d89e04e0b99a08473f666a3f3de0fd292badb6aa"}, + {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcba6dae7de533c876255317c11f3abe4907ba7d9aa15d13e3d9710d4315ec0e"}, + {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29cdd471ebf9e0f2fb3cac165efedc3c58db841d83a518b082077e612d3ee5df"}, + {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:12b74fbbf6cbbf9dbce20eb9b5879469e97aeeaa874145517563cca4029db65c"}, + {file = "regex-2023.6.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c29ca1bd61b16b67be247be87390ef1d1ef702800f91fbd1991f5c4421ebae8"}, + {file = "regex-2023.6.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d77f09bc4b55d4bf7cc5eba785d87001d6757b7c9eec237fe2af57aba1a071d9"}, + {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ea353ecb6ab5f7e7d2f4372b1e779796ebd7b37352d290096978fea83c4dba0c"}, + {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:10590510780b7541969287512d1b43f19f965c2ece6c9b1c00fc367b29d8dce7"}, + {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:e2fbd6236aae3b7f9d514312cdb58e6494ee1c76a9948adde6eba33eb1c4264f"}, + {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:6b2675068c8b56f6bfd5a2bda55b8accbb96c02fd563704732fd1c95e2083461"}, + {file = "regex-2023.6.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:74419d2b50ecb98360cfaa2974da8689cb3b45b9deff0dcf489c0d333bcc1477"}, + {file = "regex-2023.6.3-cp311-cp311-win32.whl", hash = "sha256:fb5ec16523dc573a4b277663a2b5a364e2099902d3944c9419a40ebd56a118f9"}, + {file = "regex-2023.6.3-cp311-cp311-win_amd64.whl", hash = "sha256:09e4a1a6acc39294a36b7338819b10baceb227f7f7dbbea0506d419b5a1dd8af"}, + {file = "regex-2023.6.3-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0654bca0cdf28a5956c83839162692725159f4cda8d63e0911a2c0dc76166525"}, + {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:463b6a3ceb5ca952e66550a4532cef94c9a0c80dc156c4cc343041951aec1697"}, + {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:87b2a5bb5e78ee0ad1de71c664d6eb536dc3947a46a69182a90f4410f5e3f7dd"}, + {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6343c6928282c1f6a9db41f5fd551662310e8774c0e5ebccb767002fcf663ca9"}, + {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6192d5af2ccd2a38877bfef086d35e6659566a335b1492786ff254c168b1693"}, + {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74390d18c75054947e4194019077e243c06fbb62e541d8817a0fa822ea310c14"}, + {file = "regex-2023.6.3-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:742e19a90d9bb2f4a6cf2862b8b06dea5e09b96c9f2df1779e53432d7275331f"}, + {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:8abbc5d54ea0ee80e37fef009e3cec5dafd722ed3c829126253d3e22f3846f1e"}, + {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:c2b867c17a7a7ae44c43ebbeb1b5ff406b3e8d5b3e14662683e5e66e6cc868d3"}, + {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_ppc64le.whl", hash = "sha256:d831c2f8ff278179705ca59f7e8524069c1a989e716a1874d6d1aab6119d91d1"}, + {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_s390x.whl", hash = "sha256:ee2d1a9a253b1729bb2de27d41f696ae893507c7db224436abe83ee25356f5c1"}, + {file = "regex-2023.6.3-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:61474f0b41fe1a80e8dfa70f70ea1e047387b7cd01c85ec88fa44f5d7561d787"}, + {file = "regex-2023.6.3-cp36-cp36m-win32.whl", hash = "sha256:0b71e63226e393b534105fcbdd8740410dc6b0854c2bfa39bbda6b0d40e59a54"}, + {file = "regex-2023.6.3-cp36-cp36m-win_amd64.whl", hash = "sha256:bbb02fd4462f37060122e5acacec78e49c0fbb303c30dd49c7f493cf21fc5b27"}, + {file = "regex-2023.6.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b862c2b9d5ae38a68b92e215b93f98d4c5e9454fa36aae4450f61dd33ff48487"}, + {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:976d7a304b59ede34ca2921305b57356694f9e6879db323fd90a80f865d355a3"}, + {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:83320a09188e0e6c39088355d423aa9d056ad57a0b6c6381b300ec1a04ec3d16"}, + {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9427a399501818a7564f8c90eced1e9e20709ece36be701f394ada99890ea4b3"}, + {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7178bbc1b2ec40eaca599d13c092079bf529679bf0371c602edaa555e10b41c3"}, + {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:837328d14cde912af625d5f303ec29f7e28cdab588674897baafaf505341f2fc"}, + {file = "regex-2023.6.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d44dc13229905ae96dd2ae2dd7cebf824ee92bc52e8cf03dcead37d926da019"}, + {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:d54af539295392611e7efbe94e827311eb8b29668e2b3f4cadcfe6f46df9c777"}, + {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:7117d10690c38a622e54c432dfbbd3cbd92f09401d622902c32f6d377e2300ee"}, + {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bb60b503ec8a6e4e3e03a681072fa3a5adcbfa5479fa2d898ae2b4a8e24c4591"}, + {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:65ba8603753cec91c71de423a943ba506363b0e5c3fdb913ef8f9caa14b2c7e0"}, + {file = "regex-2023.6.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:271f0bdba3c70b58e6f500b205d10a36fb4b58bd06ac61381b68de66442efddb"}, + {file = "regex-2023.6.3-cp37-cp37m-win32.whl", hash = "sha256:9beb322958aaca059f34975b0df135181f2e5d7a13b84d3e0e45434749cb20f7"}, + {file = "regex-2023.6.3-cp37-cp37m-win_amd64.whl", hash = "sha256:fea75c3710d4f31389eed3c02f62d0b66a9da282521075061ce875eb5300cf23"}, + {file = "regex-2023.6.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8f56fcb7ff7bf7404becdfc60b1e81a6d0561807051fd2f1860b0d0348156a07"}, + {file = "regex-2023.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d2da3abc88711bce7557412310dfa50327d5769a31d1c894b58eb256459dc289"}, + {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a99b50300df5add73d307cf66abea093304a07eb017bce94f01e795090dea87c"}, + {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5708089ed5b40a7b2dc561e0c8baa9535b77771b64a8330b684823cfd5116036"}, + {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:687ea9d78a4b1cf82f8479cab23678aff723108df3edeac098e5b2498879f4a7"}, + {file = "regex-2023.6.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4d3850beab9f527f06ccc94b446c864059c57651b3f911fddb8d9d3ec1d1b25d"}, + {file = "regex-2023.6.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e8915cc96abeb8983cea1df3c939e3c6e1ac778340c17732eb63bb96247b91d2"}, + {file = "regex-2023.6.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:841d6e0e5663d4c7b4c8099c9997be748677d46cbf43f9f471150e560791f7ff"}, + {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:9edce5281f965cf135e19840f4d93d55b3835122aa76ccacfd389e880ba4cf82"}, + {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b956231ebdc45f5b7a2e1f90f66a12be9610ce775fe1b1d50414aac1e9206c06"}, + {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:36efeba71c6539d23c4643be88295ce8c82c88bbd7c65e8a24081d2ca123da3f"}, + {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:cf67ca618b4fd34aee78740bea954d7c69fdda419eb208c2c0c7060bb822d747"}, + {file = "regex-2023.6.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b4598b1897837067a57b08147a68ac026c1e73b31ef6e36deeeb1fa60b2933c9"}, + {file = "regex-2023.6.3-cp38-cp38-win32.whl", hash = "sha256:f415f802fbcafed5dcc694c13b1292f07fe0befdb94aa8a52905bd115ff41e88"}, + {file = "regex-2023.6.3-cp38-cp38-win_amd64.whl", hash = "sha256:d4f03bb71d482f979bda92e1427f3ec9b220e62a7dd337af0aa6b47bf4498f72"}, + {file = "regex-2023.6.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ccf91346b7bd20c790310c4147eee6ed495a54ddb6737162a36ce9dbef3e4751"}, + {file = "regex-2023.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b28f5024a3a041009eb4c333863d7894d191215b39576535c6734cd88b0fcb68"}, + {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e0bb18053dfcfed432cc3ac632b5e5e5c5b7e55fb3f8090e867bfd9b054dbcbf"}, + {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a5bfb3004f2144a084a16ce19ca56b8ac46e6fd0651f54269fc9e230edb5e4a"}, + {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5c6b48d0fa50d8f4df3daf451be7f9689c2bde1a52b1225c5926e3f54b6a9ed1"}, + {file = "regex-2023.6.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:051da80e6eeb6e239e394ae60704d2b566aa6a7aed6f2890a7967307267a5dc6"}, + {file = "regex-2023.6.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4c3b7fa4cdaa69268748665a1a6ff70c014d39bb69c50fda64b396c9116cf77"}, + {file = "regex-2023.6.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:457b6cce21bee41ac292d6753d5e94dcbc5c9e3e3a834da285b0bde7aa4a11e9"}, + {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:aad51907d74fc183033ad796dd4c2e080d1adcc4fd3c0fd4fd499f30c03011cd"}, + {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:0385e73da22363778ef2324950e08b689abdf0b108a7d8decb403ad7f5191938"}, + {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:c6a57b742133830eec44d9b2290daf5cbe0a2f1d6acee1b3c7b1c7b2f3606df7"}, + {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:3e5219bf9e75993d73ab3d25985c857c77e614525fac9ae02b1bebd92f7cecac"}, + {file = "regex-2023.6.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:e5087a3c59eef624a4591ef9eaa6e9a8d8a94c779dade95d27c0bc24650261cd"}, + {file = "regex-2023.6.3-cp39-cp39-win32.whl", hash = "sha256:20326216cc2afe69b6e98528160b225d72f85ab080cbdf0b11528cbbaba2248f"}, + {file = "regex-2023.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:bdff5eab10e59cf26bc479f565e25ed71a7d041d1ded04ccf9aee1d9f208487a"}, + {file = "regex-2023.6.3.tar.gz", hash = "sha256:72d1a25bf36d2050ceb35b517afe13864865268dfb45910e2e17a84be6cbfeb0"}, +] + +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "requests-oauthlib" +version = "1.3.1" +description = "OAuthlib authentication support for Requests." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "requests-oauthlib-1.3.1.tar.gz", hash = "sha256:75beac4a47881eeb94d5ea5d6ad31ef88856affe2332b9aafb52c6452ccf0d7a"}, + {file = "requests_oauthlib-1.3.1-py2.py3-none-any.whl", hash = "sha256:2577c501a2fb8d05a304c09d090d6e47c306fef15809d102b327cf8364bddab5"}, +] + +[package.dependencies] +oauthlib = ">=3.0.0" +requests = ">=2.0.0" + +[package.extras] +rsa = ["oauthlib[signedtoken] (>=3.0.0)"] + +[[package]] +name = "resampy" +version = "0.4.2" +description = "Efficient signal resampling" +optional = false +python-versions = "*" +files = [ + {file = "resampy-0.4.2-py3-none-any.whl", hash = "sha256:4340b6c4e685a865621dfcf016e2a3dd49d865446b6025e30fe88567f22e052e"}, + {file = "resampy-0.4.2.tar.gz", hash = "sha256:0a469e6ddb89956f4fd6c88728300e4bbd186fae569dd4fd17dae51a91cbaa15"}, +] + +[package.dependencies] +numba = ">=0.53" +numpy = ">=1.17" + +[package.extras] +design = ["optuna (>=2.10.0)"] +docs = ["numpydoc", "sphinx (!=1.3.1)"] +tests = ["pytest (<8)", "pytest-cov", "scipy (>=1.0)"] + +[[package]] +name = "rpds-py" +version = "0.9.2" +description = "Python bindings to Rust's persistent data structures (rpds)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "rpds_py-0.9.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:ab6919a09c055c9b092798ce18c6c4adf49d24d4d9e43a92b257e3f2548231e7"}, + {file = "rpds_py-0.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d55777a80f78dd09410bd84ff8c95ee05519f41113b2df90a69622f5540c4f8b"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a216b26e5af0a8e265d4efd65d3bcec5fba6b26909014effe20cd302fd1138fa"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:29cd8bfb2d716366a035913ced99188a79b623a3512292963d84d3e06e63b496"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:44659b1f326214950a8204a248ca6199535e73a694be8d3e0e869f820767f12f"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:745f5a43fdd7d6d25a53ab1a99979e7f8ea419dfefebcab0a5a1e9095490ee5e"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a987578ac5214f18b99d1f2a3851cba5b09f4a689818a106c23dbad0dfeb760f"}, + {file = "rpds_py-0.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bf4151acb541b6e895354f6ff9ac06995ad9e4175cbc6d30aaed08856558201f"}, + {file = "rpds_py-0.9.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:03421628f0dc10a4119d714a17f646e2837126a25ac7a256bdf7c3943400f67f"}, + {file = "rpds_py-0.9.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:13b602dc3e8dff3063734f02dcf05111e887f301fdda74151a93dbbc249930fe"}, + {file = "rpds_py-0.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:fae5cb554b604b3f9e2c608241b5d8d303e410d7dfb6d397c335f983495ce7f6"}, + {file = "rpds_py-0.9.2-cp310-none-win32.whl", hash = "sha256:47c5f58a8e0c2c920cc7783113df2fc4ff12bf3a411d985012f145e9242a2764"}, + {file = "rpds_py-0.9.2-cp310-none-win_amd64.whl", hash = "sha256:4ea6b73c22d8182dff91155af018b11aac9ff7eca085750455c5990cb1cfae6e"}, + {file = "rpds_py-0.9.2-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:e564d2238512c5ef5e9d79338ab77f1cbbda6c2d541ad41b2af445fb200385e3"}, + {file = "rpds_py-0.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f411330a6376fb50e5b7a3e66894e4a39e60ca2e17dce258d53768fea06a37bd"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e7521f5af0233e89939ad626b15278c71b69dc1dfccaa7b97bd4cdf96536bb7"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8d3335c03100a073883857e91db9f2e0ef8a1cf42dc0369cbb9151c149dbbc1b"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d25b1c1096ef0447355f7293fbe9ad740f7c47ae032c2884113f8e87660d8f6e"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6a5d3fbd02efd9cf6a8ffc2f17b53a33542f6b154e88dd7b42ef4a4c0700fdad"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c5934e2833afeaf36bd1eadb57256239785f5af0220ed8d21c2896ec4d3a765f"}, + {file = "rpds_py-0.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:095b460e117685867d45548fbd8598a8d9999227e9061ee7f012d9d264e6048d"}, + {file = "rpds_py-0.9.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:91378d9f4151adc223d584489591dbb79f78814c0734a7c3bfa9c9e09978121c"}, + {file = "rpds_py-0.9.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:24a81c177379300220e907e9b864107614b144f6c2a15ed5c3450e19cf536fae"}, + {file = "rpds_py-0.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:de0b6eceb46141984671802d412568d22c6bacc9b230174f9e55fc72ef4f57de"}, + {file = "rpds_py-0.9.2-cp311-none-win32.whl", hash = "sha256:700375326ed641f3d9d32060a91513ad668bcb7e2cffb18415c399acb25de2ab"}, + {file = "rpds_py-0.9.2-cp311-none-win_amd64.whl", hash = "sha256:0766babfcf941db8607bdaf82569ec38107dbb03c7f0b72604a0b346b6eb3298"}, + {file = "rpds_py-0.9.2-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:b1440c291db3f98a914e1afd9d6541e8fc60b4c3aab1a9008d03da4651e67386"}, + {file = "rpds_py-0.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0f2996fbac8e0b77fd67102becb9229986396e051f33dbceada3debaacc7033f"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f30d205755566a25f2ae0382944fcae2f350500ae4df4e795efa9e850821d82"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:159fba751a1e6b1c69244e23ba6c28f879a8758a3e992ed056d86d74a194a0f3"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a1f044792e1adcea82468a72310c66a7f08728d72a244730d14880cd1dabe36b"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9251eb8aa82e6cf88510530b29eef4fac825a2b709baf5b94a6094894f252387"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01899794b654e616c8625b194ddd1e5b51ef5b60ed61baa7a2d9c2ad7b2a4238"}, + {file = "rpds_py-0.9.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b0c43f8ae8f6be1d605b0465671124aa8d6a0e40f1fb81dcea28b7e3d87ca1e1"}, + {file = "rpds_py-0.9.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:207f57c402d1f8712618f737356e4b6f35253b6d20a324d9a47cb9f38ee43a6b"}, + {file = "rpds_py-0.9.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b52e7c5ae35b00566d244ffefba0f46bb6bec749a50412acf42b1c3f402e2c90"}, + {file = "rpds_py-0.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:978fa96dbb005d599ec4fd9ed301b1cc45f1a8f7982d4793faf20b404b56677d"}, + {file = "rpds_py-0.9.2-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:6aa8326a4a608e1c28da191edd7c924dff445251b94653988efb059b16577a4d"}, + {file = "rpds_py-0.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:aad51239bee6bff6823bbbdc8ad85136c6125542bbc609e035ab98ca1e32a192"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4bd4dc3602370679c2dfb818d9c97b1137d4dd412230cfecd3c66a1bf388a196"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dd9da77c6ec1f258387957b754f0df60766ac23ed698b61941ba9acccd3284d1"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:190ca6f55042ea4649ed19c9093a9be9d63cd8a97880106747d7147f88a49d18"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:876bf9ed62323bc7dcfc261dbc5572c996ef26fe6406b0ff985cbcf460fc8a4c"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa2818759aba55df50592ecbc95ebcdc99917fa7b55cc6796235b04193eb3c55"}, + {file = "rpds_py-0.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9ea4d00850ef1e917815e59b078ecb338f6a8efda23369677c54a5825dbebb55"}, + {file = "rpds_py-0.9.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5855c85eb8b8a968a74dc7fb014c9166a05e7e7a8377fb91d78512900aadd13d"}, + {file = "rpds_py-0.9.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:14c408e9d1a80dcb45c05a5149e5961aadb912fff42ca1dd9b68c0044904eb32"}, + {file = "rpds_py-0.9.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:65a0583c43d9f22cb2130c7b110e695fff834fd5e832a776a107197e59a1898e"}, + {file = "rpds_py-0.9.2-cp38-none-win32.whl", hash = "sha256:71f2f7715935a61fa3e4ae91d91b67e571aeb5cb5d10331ab681256bda2ad920"}, + {file = "rpds_py-0.9.2-cp38-none-win_amd64.whl", hash = "sha256:674c704605092e3ebbbd13687b09c9f78c362a4bc710343efe37a91457123044"}, + {file = "rpds_py-0.9.2-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:07e2c54bef6838fa44c48dfbc8234e8e2466d851124b551fc4e07a1cfeb37260"}, + {file = "rpds_py-0.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f7fdf55283ad38c33e35e2855565361f4bf0abd02470b8ab28d499c663bc5d7c"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:890ba852c16ace6ed9f90e8670f2c1c178d96510a21b06d2fa12d8783a905193"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:50025635ba8b629a86d9d5474e650da304cb46bbb4d18690532dd79341467846"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:517cbf6e67ae3623c5127206489d69eb2bdb27239a3c3cc559350ef52a3bbf0b"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0836d71ca19071090d524739420a61580f3f894618d10b666cf3d9a1688355b1"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c439fd54b2b9053717cca3de9583be6584b384d88d045f97d409f0ca867d80f"}, + {file = "rpds_py-0.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f68996a3b3dc9335037f82754f9cdbe3a95db42bde571d8c3be26cc6245f2324"}, + {file = "rpds_py-0.9.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7d68dc8acded354c972116f59b5eb2e5864432948e098c19fe6994926d8e15c3"}, + {file = "rpds_py-0.9.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:f963c6b1218b96db85fc37a9f0851eaf8b9040aa46dec112611697a7023da535"}, + {file = "rpds_py-0.9.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5a46859d7f947061b4010e554ccd1791467d1b1759f2dc2ec9055fa239f1bc26"}, + {file = "rpds_py-0.9.2-cp39-none-win32.whl", hash = "sha256:e07e5dbf8a83c66783a9fe2d4566968ea8c161199680e8ad38d53e075df5f0d0"}, + {file = "rpds_py-0.9.2-cp39-none-win_amd64.whl", hash = "sha256:682726178138ea45a0766907957b60f3a1bf3acdf212436be9733f28b6c5af3c"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:196cb208825a8b9c8fc360dc0f87993b8b260038615230242bf18ec84447c08d"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:c7671d45530fcb6d5e22fd40c97e1e1e01965fc298cbda523bb640f3d923b387"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83b32f0940adec65099f3b1c215ef7f1d025d13ff947975a055989cb7fd019a4"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7f67da97f5b9eac838b6980fc6da268622e91f8960e083a34533ca710bec8611"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:03975db5f103997904c37e804e5f340c8fdabbb5883f26ee50a255d664eed58c"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:987b06d1cdb28f88a42e4fb8a87f094e43f3c435ed8e486533aea0bf2e53d931"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c861a7e4aef15ff91233751619ce3a3d2b9e5877e0fcd76f9ea4f6847183aa16"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:02938432352359805b6da099c9c95c8a0547fe4b274ce8f1a91677401bb9a45f"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:ef1f08f2a924837e112cba2953e15aacfccbbfcd773b4b9b4723f8f2ddded08e"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:35da5cc5cb37c04c4ee03128ad59b8c3941a1e5cd398d78c37f716f32a9b7f67"}, + {file = "rpds_py-0.9.2-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:141acb9d4ccc04e704e5992d35472f78c35af047fa0cfae2923835d153f091be"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:79f594919d2c1a0cc17d1988a6adaf9a2f000d2e1048f71f298b056b1018e872"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:a06418fe1155e72e16dddc68bb3780ae44cebb2912fbd8bb6ff9161de56e1798"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b2eb034c94b0b96d5eddb290b7b5198460e2d5d0c421751713953a9c4e47d10"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8b08605d248b974eb02f40bdcd1a35d3924c83a2a5e8f5d0fa5af852c4d960af"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a0805911caedfe2736935250be5008b261f10a729a303f676d3d5fea6900c96a"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab2299e3f92aa5417d5e16bb45bb4586171c1327568f638e8453c9f8d9e0f020"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c8d7594e38cf98d8a7df25b440f684b510cf4627fe038c297a87496d10a174f"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8b9ec12ad5f0a4625db34db7e0005be2632c1013b253a4a60e8302ad4d462afd"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:1fcdee18fea97238ed17ab6478c66b2095e4ae7177e35fb71fbe561a27adf620"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:933a7d5cd4b84f959aedeb84f2030f0a01d63ae6cf256629af3081cf3e3426e8"}, + {file = "rpds_py-0.9.2-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:686ba516e02db6d6f8c279d1641f7067ebb5dc58b1d0536c4aaebb7bf01cdc5d"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:0173c0444bec0a3d7d848eaeca2d8bd32a1b43f3d3fde6617aac3731fa4be05f"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:d576c3ef8c7b2d560e301eb33891d1944d965a4d7a2eacb6332eee8a71827db6"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed89861ee8c8c47d6beb742a602f912b1bb64f598b1e2f3d758948721d44d468"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1054a08e818f8e18910f1bee731583fe8f899b0a0a5044c6e680ceea34f93876"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:99e7c4bb27ff1aab90dcc3e9d37ee5af0231ed98d99cb6f5250de28889a3d502"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c545d9d14d47be716495076b659db179206e3fd997769bc01e2d550eeb685596"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9039a11bca3c41be5a58282ed81ae422fa680409022b996032a43badef2a3752"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fb39aca7a64ad0c9490adfa719dbeeb87d13be137ca189d2564e596f8ba32c07"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:2d8b3b3a2ce0eaa00c5bbbb60b6713e94e7e0becab7b3db6c5c77f979e8ed1f1"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:99b1c16f732b3a9971406fbfe18468592c5a3529585a45a35adbc1389a529a03"}, + {file = "rpds_py-0.9.2-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:c27ee01a6c3223025f4badd533bea5e87c988cb0ba2811b690395dfe16088cfe"}, + {file = "rpds_py-0.9.2.tar.gz", hash = "sha256:8d70e8f14900f2657c249ea4def963bed86a29b81f81f5b76b5a9215680de945"}, +] + +[[package]] +name = "rsa" +version = "4.9" +description = "Pure-Python RSA implementation" +optional = false +python-versions = ">=3.6,<4" +files = [ + {file = "rsa-4.9-py3-none-any.whl", hash = "sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7"}, + {file = "rsa-4.9.tar.gz", hash = "sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21"}, +] + +[package.dependencies] +pyasn1 = ">=0.1.3" + +[[package]] +name = "sacrebleu" +version = "2.3.1" +description = "Hassle-free computation of shareable, comparable, and reproducible BLEU, chrF, and TER scores" +optional = false +python-versions = ">=3.6" +files = [ + {file = "sacrebleu-2.3.1-py3-none-any.whl", hash = "sha256:352227b8ca9e04ed509266d1fee6c8cff0ea1417c429f8c684645ad2db8b02e7"}, + {file = "sacrebleu-2.3.1.tar.gz", hash = "sha256:7969b294f15dae84d80fb2b76d30c83b245f49f4ecb1cac79acb553eb93cb537"}, +] + +[package.dependencies] +colorama = "*" +lxml = "*" +numpy = ">=1.17" +portalocker = "*" +regex = "*" +tabulate = ">=0.8.9" + +[package.extras] +ja = ["ipadic (>=1.0,<2.0)", "mecab-python3 (==1.0.5)"] +ko = ["mecab-ko (==1.0.0)", "mecab-ko-dic (>=1.0,<2.0)"] + +[[package]] +name = "scikit-learn" +version = "1.3.0" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scikit-learn-1.3.0.tar.gz", hash = "sha256:8be549886f5eda46436b6e555b0e4873b4f10aa21c07df45c4bc1735afbccd7a"}, + {file = "scikit_learn-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:981287869e576d42c682cf7ca96af0c6ac544ed9316328fd0d9292795c742cf5"}, + {file = "scikit_learn-1.3.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:436aaaae2c916ad16631142488e4c82f4296af2404f480e031d866863425d2a2"}, + {file = "scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7e28d8fa47a0b30ae1bd7a079519dd852764e31708a7804da6cb6f8b36e3630"}, + {file = "scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ae80c08834a473d08a204d966982a62e11c976228d306a2648c575e3ead12111"}, + {file = "scikit_learn-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:552fd1b6ee22900cf1780d7386a554bb96949e9a359999177cf30211e6b20df6"}, + {file = "scikit_learn-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:79970a6d759eb00a62266a31e2637d07d2d28446fca8079cf9afa7c07b0427f8"}, + {file = "scikit_learn-1.3.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:850a00b559e636b23901aabbe79b73dc604b4e4248ba9e2d6e72f95063765603"}, + {file = "scikit_learn-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee04835fb016e8062ee9fe9074aef9b82e430504e420bff51e3e5fffe72750ca"}, + {file = "scikit_learn-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d953531f5d9f00c90c34fa3b7d7cfb43ecff4c605dac9e4255a20b114a27369"}, + {file = "scikit_learn-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:151ac2bf65ccf363664a689b8beafc9e6aae36263db114b4ca06fbbbf827444a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6a885a9edc9c0a341cab27ec4f8a6c58b35f3d449c9d2503a6fd23e06bbd4f6a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:9877af9c6d1b15486e18a94101b742e9d0d2f343d35a634e337411ddb57783f3"}, + {file = "scikit_learn-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c470f53cea065ff3d588050955c492793bb50c19a92923490d18fcb637f6383a"}, + {file = "scikit_learn-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd6e2d7389542eae01077a1ee0318c4fec20c66c957f45c7aac0c6eb0fe3c612"}, + {file = "scikit_learn-1.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:3a11936adbc379a6061ea32fa03338d4ca7248d86dd507c81e13af428a5bc1db"}, + {file = "scikit_learn-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:998d38fcec96584deee1e79cd127469b3ad6fefd1ea6c2dfc54e8db367eb396b"}, + {file = "scikit_learn-1.3.0-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:ded35e810438a527e17623ac6deae3b360134345b7c598175ab7741720d7ffa7"}, + {file = "scikit_learn-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e8102d5036e28d08ab47166b48c8d5e5810704daecf3a476a4282d562be9a28"}, + {file = "scikit_learn-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7617164951c422747e7c32be4afa15d75ad8044f42e7d70d3e2e0429a50e6718"}, + {file = "scikit_learn-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:1d54fb9e6038284548072df22fd34777e434153f7ffac72c8596f2d6987110dd"}, +] + +[package.dependencies] +joblib = ">=1.1.1" +numpy = ">=1.17.3" +scipy = ">=1.5.0" +threadpoolctl = ">=2.0.0" + +[package.extras] +benchmark = ["matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "pandas (>=1.0.5)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.1.3)", "memory-profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)", "sphinx (>=6.0.0)", "sphinx-copybutton (>=0.5.2)", "sphinx-gallery (>=0.10.1)", "sphinx-prompt (>=1.3.0)", "sphinxext-opengraph (>=0.4.2)"] +examples = ["matplotlib (>=3.1.3)", "pandas (>=1.0.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.16.2)", "seaborn (>=0.9.0)"] +tests = ["black (>=23.3.0)", "matplotlib (>=3.1.3)", "mypy (>=1.3)", "numpydoc (>=1.2.0)", "pandas (>=1.0.5)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.0.272)", "scikit-image (>=0.16.2)"] + +[[package]] +name = "scipy" +version = "1.9.3" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "scipy-1.9.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1884b66a54887e21addf9c16fb588720a8309a57b2e258ae1c7986d4444d3bc0"}, + {file = "scipy-1.9.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:83b89e9586c62e787f5012e8475fbb12185bafb996a03257e9675cd73d3736dd"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a72d885fa44247f92743fc20732ae55564ff2a519e8302fb7e18717c5355a8b"}, + {file = "scipy-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d01e1dd7b15bd2449c8bfc6b7cc67d630700ed655654f0dfcf121600bad205c9"}, + {file = "scipy-1.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:68239b6aa6f9c593da8be1509a05cb7f9efe98b80f43a5861cd24c7557e98523"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b41bc822679ad1c9a5f023bc93f6d0543129ca0f37c1ce294dd9d386f0a21096"}, + {file = "scipy-1.9.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:90453d2b93ea82a9f434e4e1cba043e779ff67b92f7a0e85d05d286a3625df3c"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83c06e62a390a9167da60bedd4575a14c1f58ca9dfde59830fc42e5197283dab"}, + {file = "scipy-1.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abaf921531b5aeaafced90157db505e10345e45038c39e5d9b6c7922d68085cb"}, + {file = "scipy-1.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:06d2e1b4c491dc7d8eacea139a1b0b295f74e1a1a0f704c375028f8320d16e31"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:5a04cd7d0d3eff6ea4719371cbc44df31411862b9646db617c99718ff68d4840"}, + {file = "scipy-1.9.3-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:545c83ffb518094d8c9d83cce216c0c32f8c04aaf28b92cc8283eda0685162d5"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d54222d7a3ba6022fdf5773931b5d7c56efe41ede7f7128c7b1637700409108"}, + {file = "scipy-1.9.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cff3a5295234037e39500d35316a4c5794739433528310e117b8a9a0c76d20fc"}, + {file = "scipy-1.9.3-cp38-cp38-win_amd64.whl", hash = "sha256:2318bef588acc7a574f5bfdff9c172d0b1bf2c8143d9582e05f878e580a3781e"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d644a64e174c16cb4b2e41dfea6af722053e83d066da7343f333a54dae9bc31c"}, + {file = "scipy-1.9.3-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:da8245491d73ed0a994ed9c2e380fd058ce2fa8a18da204681f2fe1f57f98f95"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4db5b30849606a95dcf519763dd3ab6fe9bd91df49eba517359e450a7d80ce2e"}, + {file = "scipy-1.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c68db6b290cbd4049012990d7fe71a2abd9ffbe82c0056ebe0f01df8be5436b0"}, + {file = "scipy-1.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:5b88e6d91ad9d59478fafe92a7c757d00c59e3bdc3331be8ada76a4f8d683f58"}, + {file = "scipy-1.9.3.tar.gz", hash = "sha256:fbc5c05c85c1a02be77b1ff591087c83bc44579c6d2bd9fb798bb64ea5e1a027"}, +] + +[package.dependencies] +numpy = ">=1.18.5,<1.26.0" + +[package.extras] +dev = ["flake8", "mypy", "pycodestyle", "typing_extensions"] +doc = ["matplotlib (>2)", "numpydoc", "pydata-sphinx-theme (==0.9.0)", "sphinx (!=4.1.0)", "sphinx-panels (>=0.5.2)", "sphinx-tabs"] +test = ["asv", "gmpy2", "mpmath", "pytest", "pytest-cov", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + +[[package]] +name = "semantic-version" +version = "2.10.0" +description = "A library implementing the 'SemVer' scheme." +optional = false +python-versions = ">=2.7" +files = [ + {file = "semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177"}, + {file = "semantic_version-2.10.0.tar.gz", hash = "sha256:bdabb6d336998cbb378d4b9db3a4b56a1e3235701dc05ea2690d9a997ed5041c"}, +] + +[package.extras] +dev = ["Django (>=1.11)", "check-manifest", "colorama (<=0.4.1)", "coverage", "flake8", "nose2", "readme-renderer (<25.0)", "tox", "wheel", "zest.releaser[recommended]"] +doc = ["Sphinx", "sphinx-rtd-theme"] + +[[package]] +name = "setuptools" +version = "68.0.0" +description = "Easily download, build, install, upgrade, and uninstall Python packages" +optional = false +python-versions = ">=3.7" +files = [ + {file = "setuptools-68.0.0-py3-none-any.whl", hash = "sha256:11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f"}, + {file = "setuptools-68.0.0.tar.gz", hash = "sha256:baf1fdb41c6da4cd2eae722e135500da913332ab3f2f5c7d33af9b492acb5235"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pip-run (>=8.8)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] + +[[package]] +name = "six" +version = "1.16.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] + +[[package]] +name = "soundfile" +version = "0.12.1" +description = "An audio library based on libsndfile, CFFI and NumPy" +optional = false +python-versions = "*" +files = [ + {file = "soundfile-0.12.1-py2.py3-none-any.whl", hash = "sha256:828a79c2e75abab5359f780c81dccd4953c45a2c4cd4f05ba3e233ddf984b882"}, + {file = "soundfile-0.12.1-py2.py3-none-macosx_10_9_x86_64.whl", hash = "sha256:d922be1563ce17a69582a352a86f28ed8c9f6a8bc951df63476ffc310c064bfa"}, + {file = "soundfile-0.12.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:bceaab5c4febb11ea0554566784bcf4bc2e3977b53946dda2b12804b4fe524a8"}, + {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_17_x86_64.whl", hash = "sha256:2dc3685bed7187c072a46ab4ffddd38cef7de9ae5eb05c03df2ad569cf4dacbc"}, + {file = "soundfile-0.12.1-py2.py3-none-manylinux_2_31_x86_64.whl", hash = "sha256:074247b771a181859d2bc1f98b5ebf6d5153d2c397b86ee9e29ba602a8dfe2a6"}, + {file = "soundfile-0.12.1-py2.py3-none-win32.whl", hash = "sha256:59dfd88c79b48f441bbf6994142a19ab1de3b9bb7c12863402c2bc621e49091a"}, + {file = "soundfile-0.12.1-py2.py3-none-win_amd64.whl", hash = "sha256:0d86924c00b62552b650ddd28af426e3ff2d4dc2e9047dae5b3d8452e0a49a77"}, + {file = "soundfile-0.12.1.tar.gz", hash = "sha256:e8e1017b2cf1dda767aef19d2fd9ee5ebe07e050d430f77a0a7c66ba08b8cdae"}, +] + +[package.dependencies] +cffi = ">=1.0" + +[package.extras] +numpy = ["numpy"] + +[[package]] +name = "starlette" +version = "0.27.0" +description = "The little ASGI library that shines." +optional = false +python-versions = ">=3.7" +files = [ + {file = "starlette-0.27.0-py3-none-any.whl", hash = "sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91"}, + {file = "starlette-0.27.0.tar.gz", hash = "sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75"}, +] + +[package.dependencies] +anyio = ">=3.4.0,<5" +typing-extensions = {version = ">=3.10.0", markers = "python_version < \"3.10\""} + +[package.extras] +full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] + +[[package]] +name = "sympy" +version = "1.12" +description = "Computer algebra system (CAS) in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "sympy-1.12-py3-none-any.whl", hash = "sha256:c3588cd4295d0c0f603d0f2ae780587e64e2efeedb3521e46b9bb1d08d184fa5"}, + {file = "sympy-1.12.tar.gz", hash = "sha256:ebf595c8dac3e0fdc4152c51878b498396ec7f30e7a914d6071e674d49420fb8"}, +] + +[package.dependencies] +mpmath = ">=0.19" + +[[package]] +name = "tabulate" +version = "0.9.0" +description = "Pretty-print tabular data" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f"}, + {file = "tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c"}, +] + +[package.extras] +widechars = ["wcwidth"] + +[[package]] +name = "tensorboard" +version = "2.13.0" +description = "TensorBoard lets you watch Tensors Flow" +optional = false +python-versions = ">=3.8" +files = [ + {file = "tensorboard-2.13.0-py3-none-any.whl", hash = "sha256:ab69961ebddbddc83f5fa2ff9233572bdad5b883778c35e4fe94bf1798bd8481"}, +] + +[package.dependencies] +absl-py = ">=0.4" +google-auth = ">=1.6.3,<3" +google-auth-oauthlib = ">=0.5,<1.1" +grpcio = ">=1.48.2" +markdown = ">=2.6.8" +numpy = ">=1.12.0" +protobuf = ">=3.19.6" +requests = ">=2.21.0,<3" +setuptools = ">=41.0.0" +tensorboard-data-server = ">=0.7.0,<0.8.0" +werkzeug = ">=1.0.1" +wheel = ">=0.26" + +[[package]] +name = "tensorboard-data-server" +version = "0.7.1" +description = "Fast data loading for TensorBoard" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tensorboard_data_server-0.7.1-py3-none-any.whl", hash = "sha256:9938bd39f5041797b33921066fba0eab03a0dd10d1887a05e62ae58841ad4c3f"}, + {file = "tensorboard_data_server-0.7.1-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:be8d016a1aa394e6198280d4a3dc37898f56467310c5f5e617cac10a783e055a"}, + {file = "tensorboard_data_server-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:255c02b7f5b03dd5c0a88c928e563441ff39e1d4b4a234cdbe09f016e53d9594"}, +] + +[[package]] +name = "tensorboard-plugin-wit" +version = "1.8.1" +description = "What-If Tool TensorBoard plugin." +optional = false +python-versions = "*" +files = [ + {file = "tensorboard_plugin_wit-1.8.1-py3-none-any.whl", hash = "sha256:ff26bdd583d155aa951ee3b152b3d0cffae8005dc697f72b44a8e8c2a77a8cbe"}, +] + +[[package]] +name = "tensorboardx" +version = "2.6.1" +description = "TensorBoardX lets you watch Tensors Flow without Tensorflow" +optional = false +python-versions = "*" +files = [ + {file = "tensorboardX-2.6.1-py2.py3-none-any.whl", hash = "sha256:4960feb79b1b84fd2b020885b09fd70962caec277d4bc194f338a6c203cd78ca"}, + {file = "tensorboardX-2.6.1.tar.gz", hash = "sha256:02e2b84d7dc102edb7a052c77041db30fd6ba9b990635178919b8e9cfa157e96"}, +] + +[package.dependencies] +numpy = "*" +packaging = "*" +protobuf = ">=4.22.3" + +[[package]] +name = "threadpoolctl" +version = "3.2.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.2.0-py3-none-any.whl", hash = "sha256:2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032"}, + {file = "threadpoolctl-3.2.0.tar.gz", hash = "sha256:c96a0ba3bdddeaca37dc4cc7344aafad41cdb8c313f74fdfe387a867bba93355"}, +] + +[[package]] +name = "toolz" +version = "0.12.0" +description = "List processing tools and functional utilities" +optional = false +python-versions = ">=3.5" +files = [ + {file = "toolz-0.12.0-py3-none-any.whl", hash = "sha256:2059bd4148deb1884bb0eb770a3cde70e7f954cfbbdc2285f1f2de01fd21eb6f"}, + {file = "toolz-0.12.0.tar.gz", hash = "sha256:88c570861c440ee3f2f6037c4654613228ff40c93a6c25e0eba70d17282c6194"}, +] + +[[package]] +name = "torch" +version = "2.0.1" +description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration" +optional = false +python-versions = ">=3.8.0" +files = [ + {file = "torch-2.0.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:8ced00b3ba471856b993822508f77c98f48a458623596a4c43136158781e306a"}, + {file = "torch-2.0.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:359bfaad94d1cda02ab775dc1cc386d585712329bb47b8741607ef6ef4950747"}, + {file = "torch-2.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:7c84e44d9002182edd859f3400deaa7410f5ec948a519cc7ef512c2f9b34d2c4"}, + {file = "torch-2.0.1-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:567f84d657edc5582d716900543e6e62353dbe275e61cdc36eda4929e46df9e7"}, + {file = "torch-2.0.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:787b5a78aa7917465e9b96399b883920c88a08f4eb63b5a5d2d1a16e27d2f89b"}, + {file = "torch-2.0.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:e617b1d0abaf6ced02dbb9486803abfef0d581609b09641b34fa315c9c40766d"}, + {file = "torch-2.0.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b6019b1de4978e96daa21d6a3ebb41e88a0b474898fe251fd96189587408873e"}, + {file = "torch-2.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:dbd68cbd1cd9da32fe5d294dd3411509b3d841baecb780b38b3b7b06c7754434"}, + {file = "torch-2.0.1-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:ef654427d91600129864644e35deea761fb1fe131710180b952a6f2e2207075e"}, + {file = "torch-2.0.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:25aa43ca80dcdf32f13da04c503ec7afdf8e77e3a0183dd85cd3e53b2842e527"}, + {file = "torch-2.0.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:5ef3ea3d25441d3957348f7e99c7824d33798258a2bf5f0f0277cbcadad2e20d"}, + {file = "torch-2.0.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:0882243755ff28895e8e6dc6bc26ebcf5aa0911ed81b2a12f241fc4b09075b13"}, + {file = "torch-2.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:f66aa6b9580a22b04d0af54fcd042f52406a8479e2b6a550e3d9f95963e168c8"}, + {file = "torch-2.0.1-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:1adb60d369f2650cac8e9a95b1d5758e25d526a34808f7448d0bd599e4ae9072"}, + {file = "torch-2.0.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:1bcffc16b89e296826b33b98db5166f990e3b72654a2b90673e817b16c50e32b"}, + {file = "torch-2.0.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:e10e1597f2175365285db1b24019eb6f04d53dcd626c735fc502f1e8b6be9875"}, + {file = "torch-2.0.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:423e0ae257b756bb45a4b49072046772d1ad0c592265c5080070e0767da4e490"}, + {file = "torch-2.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:8742bdc62946c93f75ff92da00e3803216c6cce9b132fbca69664ca38cfb3e18"}, + {file = "torch-2.0.1-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:c62df99352bd6ee5a5a8d1832452110435d178b5164de450831a3a8cc14dc680"}, + {file = "torch-2.0.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:671a2565e3f63b8fe8e42ae3e36ad249fe5e567435ea27b94edaa672a7d0c416"}, +] + +[package.dependencies] +filelock = "*" +jinja2 = "*" +networkx = "*" +sympy = "*" +typing-extensions = "*" + +[package.extras] +opt-einsum = ["opt-einsum (>=3.3)"] + +[[package]] +name = "torchaudio" +version = "2.0.2" +description = "An audio package for PyTorch" +optional = false +python-versions = "*" +files = [ + {file = "torchaudio-2.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:80c64dabb9d8c33bc6f2a8e0c7ebe17ea87f5028931c0d6a2f73b9e16b5272d0"}, + {file = "torchaudio-2.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1619673500fe08ae96b71952f03ecfc74e7d0843cd9882193d0642a82724f537"}, + {file = "torchaudio-2.0.2-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4dbc7dd84be522f6d1159d33f5cf2fe08656ac5e8402ac2aa07cb626fed4b700"}, + {file = "torchaudio-2.0.2-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:dadf237b4fd155a3d213bdfeffeded47f5a553d383817500438b44f24fa53851"}, + {file = "torchaudio-2.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:2f975083ba66f0837ea4b55fb8d81f31a63a4a27f8628e54c1e16ebda1842931"}, + {file = "torchaudio-2.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a6cba80d9ab3a2ec1317cdc5cbc0654a189a26e3d8b28ef9f83336159fd5e5e9"}, + {file = "torchaudio-2.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:15327132b28f34963baa6fe1813030a634d2581aa9ca120f730c1e8fabdc1102"}, + {file = "torchaudio-2.0.2-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:b44ec89d4274856f58d55bce4f90e4294ee26ec3020dc39b3081d541d7fd6184"}, + {file = "torchaudio-2.0.2-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:b22aceaa1ec5a3310cc15642d19dd00d53a7ce399b9096ad1dea0b24e5097af3"}, + {file = "torchaudio-2.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:2b37ed5ea4846ce334e7a4a93ca798683088b49e9a4943ed04f4fc4ba1ddc3c4"}, + {file = "torchaudio-2.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:a8283dd61c579ea5b14d6773bbc0bf84573b12b37f05f02bb4b2425d77767284"}, + {file = "torchaudio-2.0.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:28c9be830608f93c906770eb7b4880962f8fef9bd5275ac5b48c850f3cc4bc32"}, + {file = "torchaudio-2.0.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:286061f9999905a6b96107c7ac751a4016985a8e2087250ab8328845e4128952"}, + {file = "torchaudio-2.0.2-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:ee08ec303050405998e74a0a3649aee4d16408c2eb4bb1f8c7a726318b1ce1b7"}, + {file = "torchaudio-2.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:ad43c0ffaa6771bfa05669bb1d8c3c179c859e92efd985683a78d6d313ecefb6"}, + {file = "torchaudio-2.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1138a1c39da2445a1caca20ddce1e77c9657e92263eb34376024f517f5284d4b"}, + {file = "torchaudio-2.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:de5d94cb8305c00268dfbc576ca7e445f40891e024a9e5e28c63ad9f851e541a"}, + {file = "torchaudio-2.0.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:6c617d5978a6b8a29d6675dcd18196e6dc541daac44b7b2b6eeb31aed82f3203"}, + {file = "torchaudio-2.0.2-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:a74f33da0b3c53b770f583a02cabd59196f089fb77a65eb39cd5d811b5a21d63"}, + {file = "torchaudio-2.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:66df470da29964d7c1e8106a42f5f5c9bc09e824083675ce3aba054d68b4af54"}, +] + +[package.dependencies] +torch = "2.0.1" + +[[package]] +name = "torchcrepe" +version = "0.0.20" +description = "Pytorch implementation of CREPE pitch tracker" +optional = false +python-versions = "*" +files = [ + {file = "torchcrepe-0.0.20-py3-none-any.whl", hash = "sha256:a8803df94cd7675c0ee1f8f6ca6ea78075bca0825497632914b76e0dd5b61d31"}, + {file = "torchcrepe-0.0.20.tar.gz", hash = "sha256:b4eac23d0a85582739fe69eeb3136e2fb6ebb7e5363a6556f7c9fe83b11f8e2c"}, +] + +[package.dependencies] +librosa = "0.9.1" +resampy = "*" +scipy = "*" +torch = "*" +tqdm = "*" + +[[package]] +name = "torchgen" +version = "0.0.1" +description = "Ready to use implementations of state-of-the-art generative models in PyTorch" +optional = false +python-versions = ">=3.7, <4" +files = [ + {file = "torchgen-0.0.1-py3-none-any.whl", hash = "sha256:78d02b5e4ea0231ce46b4262564a05a9cb2047fcfcdcf4a4ab56230a0f21be66"}, +] + +[[package]] +name = "tornado" +version = "6.3.2" +description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +optional = false +python-versions = ">= 3.8" +files = [ + {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:c367ab6c0393d71171123ca5515c61ff62fe09024fa6bf299cd1339dc9456829"}, + {file = "tornado-6.3.2-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:b46a6ab20f5c7c1cb949c72c1994a4585d2eaa0be4853f50a03b5031e964fc7c"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2de14066c4a38b4ecbbcd55c5cc4b5340eb04f1c5e81da7451ef555859c833f"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:05615096845cf50a895026f749195bf0b10b8909f9be672f50b0fe69cba368e4"}, + {file = "tornado-6.3.2-cp38-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5b17b1cf5f8354efa3d37c6e28fdfd9c1c1e5122f2cb56dac121ac61baa47cbe"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:29e71c847a35f6e10ca3b5c2990a52ce38b233019d8e858b755ea6ce4dcdd19d"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_i686.whl", hash = "sha256:834ae7540ad3a83199a8da8f9f2d383e3c3d5130a328889e4cc991acc81e87a0"}, + {file = "tornado-6.3.2-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6a0848f1aea0d196a7c4f6772197cbe2abc4266f836b0aac76947872cd29b411"}, + {file = "tornado-6.3.2-cp38-abi3-win32.whl", hash = "sha256:7efcbcc30b7c654eb6a8c9c9da787a851c18f8ccd4a5a3a95b05c7accfa068d2"}, + {file = "tornado-6.3.2-cp38-abi3-win_amd64.whl", hash = "sha256:0c325e66c8123c606eea33084976c832aa4e766b7dff8aedd7587ea44a604cdf"}, + {file = "tornado-6.3.2.tar.gz", hash = "sha256:4b927c4f19b71e627b13f3db2324e4ae660527143f9e1f2e2fb404f3a187e2ba"}, +] + +[[package]] +name = "tqdm" +version = "4.65.0" +description = "Fast, Extensible Progress Meter" +optional = false +python-versions = ">=3.7" +files = [ + {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"}, + {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[package.extras] +dev = ["py-make (>=0.1.0)", "twine", "wheel"] +notebook = ["ipywidgets (>=6)"] +slack = ["slack-sdk"] +telegram = ["requests"] + +[[package]] +name = "traitlets" +version = "5.9.0" +description = "Traitlets Python configuration system" +optional = false +python-versions = ">=3.7" +files = [ + {file = "traitlets-5.9.0-py3-none-any.whl", hash = "sha256:9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8"}, + {file = "traitlets-5.9.0.tar.gz", hash = "sha256:f6cde21a9c68cf756af02035f72d5a723bf607e862e7be33ece505abf4a3bad9"}, +] + +[package.extras] +docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] +test = ["argcomplete (>=2.0)", "pre-commit", "pytest", "pytest-mock"] + +[[package]] +name = "typing-extensions" +version = "4.7.1" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.7.1-py3-none-any.whl", hash = "sha256:440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36"}, + {file = "typing_extensions-4.7.1.tar.gz", hash = "sha256:b75ddc264f0ba5615db7ba217daeb99701ad295353c45f9e95963337ceeeffb2"}, +] + +[[package]] +name = "tzdata" +version = "2023.3" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2023.3-py2.py3-none-any.whl", hash = "sha256:7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda"}, + {file = "tzdata-2023.3.tar.gz", hash = "sha256:11ef1e08e54acb0d4f95bdb1be05da659673de4acbd21bf9c69e94cc5e907a3a"}, +] + +[[package]] +name = "uc-micro-py" +version = "1.0.2" +description = "Micro subset of unicode data files for linkify-it-py projects." +optional = false +python-versions = ">=3.7" +files = [ + {file = "uc-micro-py-1.0.2.tar.gz", hash = "sha256:30ae2ac9c49f39ac6dce743bd187fcd2b574b16ca095fa74cd9396795c954c54"}, + {file = "uc_micro_py-1.0.2-py3-none-any.whl", hash = "sha256:8c9110c309db9d9e87302e2f4ad2c3152770930d88ab385cd544e7a7e75f3de0"}, +] + +[package.extras] +test = ["coverage", "pytest", "pytest-cov"] + +[[package]] +name = "urllib3" +version = "1.26.16" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*" +files = [ + {file = "urllib3-1.26.16-py2.py3-none-any.whl", hash = "sha256:8d36afa7616d8ab714608411b4a3b13e58f463aee519024578e062e141dce20f"}, + {file = "urllib3-1.26.16.tar.gz", hash = "sha256:8f135f6502756bde6b2a9b28989df5fbe87c9970cecaa69041edcce7f0589b14"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] +secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] +socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] + +[[package]] +name = "uvicorn" +version = "0.21.1" +description = "The lightning-fast ASGI server." +optional = false +python-versions = ">=3.7" +files = [ + {file = "uvicorn-0.21.1-py3-none-any.whl", hash = "sha256:e47cac98a6da10cd41e6fd036d472c6f58ede6c5dbee3dbee3ef7a100ed97742"}, + {file = "uvicorn-0.21.1.tar.gz", hash = "sha256:0fac9cb342ba099e0d582966005f3fdba5b0290579fed4a6266dc702ca7bb032"}, +] + +[package.dependencies] +click = ">=7.0" +h11 = ">=0.8" + +[package.extras] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] + +[[package]] +name = "websockets" +version = "11.0.3" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3ccc8a0c387629aec40f2fc9fdcb4b9d5431954f934da3eaf16cdc94f67dbfac"}, + {file = "websockets-11.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d67ac60a307f760c6e65dad586f556dde58e683fab03323221a4e530ead6f74d"}, + {file = "websockets-11.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d27a4832cc1a0ee07cdcf2b0629a8a72db73f4cf6de6f0904f6661227f256f"}, + {file = "websockets-11.0.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ffd7dcaf744f25f82190856bc26ed81721508fc5cbf2a330751e135ff1283564"}, + {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7622a89d696fc87af8e8d280d9b421db5133ef5b29d3f7a1ce9f1a7bf7fcfa11"}, + {file = "websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bceab846bac555aff6427d060f2fcfff71042dba6f5fca7dc4f75cac815e57ca"}, + {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:54c6e5b3d3a8936a4ab6870d46bdd6ec500ad62bde9e44462c32d18f1e9a8e54"}, + {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:41f696ba95cd92dc047e46b41b26dd24518384749ed0d99bea0a941ca87404c4"}, + {file = "websockets-11.0.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:86d2a77fd490ae3ff6fae1c6ceaecad063d3cc2320b44377efdde79880e11526"}, + {file = "websockets-11.0.3-cp310-cp310-win32.whl", hash = "sha256:2d903ad4419f5b472de90cd2d40384573b25da71e33519a67797de17ef849b69"}, + {file = "websockets-11.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:1d2256283fa4b7f4c7d7d3e84dc2ece74d341bce57d5b9bf385df109c2a1a82f"}, + {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e848f46a58b9fcf3d06061d17be388caf70ea5b8cc3466251963c8345e13f7eb"}, + {file = "websockets-11.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aa5003845cdd21ac0dc6c9bf661c5beddd01116f6eb9eb3c8e272353d45b3288"}, + {file = "websockets-11.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b58cbf0697721120866820b89f93659abc31c1e876bf20d0b3d03cef14faf84d"}, + {file = "websockets-11.0.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:660e2d9068d2bedc0912af508f30bbeb505bbbf9774d98def45f68278cea20d3"}, + {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c1f0524f203e3bd35149f12157438f406eff2e4fb30f71221c8a5eceb3617b6b"}, + {file = "websockets-11.0.3-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:def07915168ac8f7853812cc593c71185a16216e9e4fa886358a17ed0fd9fcf6"}, + {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:b30c6590146e53149f04e85a6e4fcae068df4289e31e4aee1fdf56a0dead8f97"}, + {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:619d9f06372b3a42bc29d0cd0354c9bb9fb39c2cbc1a9c5025b4538738dbffaf"}, + {file = "websockets-11.0.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:01f5567d9cf6f502d655151645d4e8b72b453413d3819d2b6f1185abc23e82dd"}, + {file = "websockets-11.0.3-cp311-cp311-win32.whl", hash = "sha256:e1459677e5d12be8bbc7584c35b992eea142911a6236a3278b9b5ce3326f282c"}, + {file = "websockets-11.0.3-cp311-cp311-win_amd64.whl", hash = "sha256:e7837cb169eca3b3ae94cc5787c4fed99eef74c0ab9506756eea335e0d6f3ed8"}, + {file = "websockets-11.0.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:9f59a3c656fef341a99e3d63189852be7084c0e54b75734cde571182c087b152"}, + {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2529338a6ff0eb0b50c7be33dc3d0e456381157a31eefc561771ee431134a97f"}, + {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:34fd59a4ac42dff6d4681d8843217137f6bc85ed29722f2f7222bd619d15e95b"}, + {file = "websockets-11.0.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:332d126167ddddec94597c2365537baf9ff62dfcc9db4266f263d455f2f031cb"}, + {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:6505c1b31274723ccaf5f515c1824a4ad2f0d191cec942666b3d0f3aa4cb4007"}, + {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:f467ba0050b7de85016b43f5a22b46383ef004c4f672148a8abf32bc999a87f0"}, + {file = "websockets-11.0.3-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:9d9acd80072abcc98bd2c86c3c9cd4ac2347b5a5a0cae7ed5c0ee5675f86d9af"}, + {file = "websockets-11.0.3-cp37-cp37m-win32.whl", hash = "sha256:e590228200fcfc7e9109509e4d9125eace2042fd52b595dd22bbc34bb282307f"}, + {file = "websockets-11.0.3-cp37-cp37m-win_amd64.whl", hash = "sha256:b16fff62b45eccb9c7abb18e60e7e446998093cdcb50fed33134b9b6878836de"}, + {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:fb06eea71a00a7af0ae6aefbb932fb8a7df3cb390cc217d51a9ad7343de1b8d0"}, + {file = "websockets-11.0.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:8a34e13a62a59c871064dfd8ffb150867e54291e46d4a7cf11d02c94a5275bae"}, + {file = "websockets-11.0.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4841ed00f1026dfbced6fca7d963c4e7043aa832648671b5138008dc5a8f6d99"}, + {file = "websockets-11.0.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a073fc9ab1c8aff37c99f11f1641e16da517770e31a37265d2755282a5d28aa"}, + {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:68b977f21ce443d6d378dbd5ca38621755f2063d6fdb3335bda981d552cfff86"}, + {file = "websockets-11.0.3-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1a99a7a71631f0efe727c10edfba09ea6bee4166a6f9c19aafb6c0b5917d09c"}, + {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:bee9fcb41db2a23bed96c6b6ead6489702c12334ea20a297aa095ce6d31370d0"}, + {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:4b253869ea05a5a073ebfdcb5cb3b0266a57c3764cf6fe114e4cd90f4bfa5f5e"}, + {file = "websockets-11.0.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:1553cb82942b2a74dd9b15a018dce645d4e68674de2ca31ff13ebc2d9f283788"}, + {file = "websockets-11.0.3-cp38-cp38-win32.whl", hash = "sha256:f61bdb1df43dc9c131791fbc2355535f9024b9a04398d3bd0684fc16ab07df74"}, + {file = "websockets-11.0.3-cp38-cp38-win_amd64.whl", hash = "sha256:03aae4edc0b1c68498f41a6772d80ac7c1e33c06c6ffa2ac1c27a07653e79d6f"}, + {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:777354ee16f02f643a4c7f2b3eff8027a33c9861edc691a2003531f5da4f6bc8"}, + {file = "websockets-11.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:8c82f11964f010053e13daafdc7154ce7385ecc538989a354ccc7067fd7028fd"}, + {file = "websockets-11.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3580dd9c1ad0701169e4d6fc41e878ffe05e6bdcaf3c412f9d559389d0c9e016"}, + {file = "websockets-11.0.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f1a3f10f836fab6ca6efa97bb952300b20ae56b409414ca85bff2ad241d2a61"}, + {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:df41b9bc27c2c25b486bae7cf42fccdc52ff181c8c387bfd026624a491c2671b"}, + {file = "websockets-11.0.3-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:279e5de4671e79a9ac877427f4ac4ce93751b8823f276b681d04b2156713b9dd"}, + {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1fdf26fa8a6a592f8f9235285b8affa72748dc12e964a5518c6c5e8f916716f7"}, + {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:69269f3a0b472e91125b503d3c0b3566bda26da0a3261c49f0027eb6075086d1"}, + {file = "websockets-11.0.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:97b52894d948d2f6ea480171a27122d77af14ced35f62e5c892ca2fae9344311"}, + {file = "websockets-11.0.3-cp39-cp39-win32.whl", hash = "sha256:c7f3cb904cce8e1be667c7e6fef4516b98d1a6a0635a58a57528d577ac18a128"}, + {file = "websockets-11.0.3-cp39-cp39-win_amd64.whl", hash = "sha256:c792ea4eabc0159535608fc5658a74d1a81020eb35195dd63214dcf07556f67e"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f2e58f2c36cc52d41f2659e4c0cbf7353e28c8c9e63e30d8c6d3494dc9fdedcf"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de36fe9c02995c7e6ae6efe2e205816f5f00c22fd1fbf343d4d18c3d5ceac2f5"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0ac56b661e60edd453585f4bd68eb6a29ae25b5184fd5ba51e97652580458998"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e052b8467dd07d4943936009f46ae5ce7b908ddcac3fda581656b1b19c083d9b"}, + {file = "websockets-11.0.3-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:42cc5452a54a8e46a032521d7365da775823e21bfba2895fb7b77633cce031bb"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:e6316827e3e79b7b8e7d8e3b08f4e331af91a48e794d5d8b099928b6f0b85f20"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8531fdcad636d82c517b26a448dcfe62f720e1922b33c81ce695d0edb91eb931"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c114e8da9b475739dde229fd3bc6b05a6537a88a578358bc8eb29b4030fac9c9"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e063b1865974611313a3849d43f2c3f5368093691349cf3c7c8f8f75ad7cb280"}, + {file = "websockets-11.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:92b2065d642bf8c0a82d59e59053dd2fdde64d4ed44efe4870fa816c1232647b"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0ee68fe502f9031f19d495dae2c268830df2760c0524cbac5d759921ba8c8e82"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcacf2c7a6c3a84e720d1bb2b543c675bf6c40e460300b628bab1b1efc7c034c"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b67c6f5e5a401fc56394f191f00f9b3811fe843ee93f4a70df3c389d1adf857d"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d5023a4b6a5b183dc838808087033ec5df77580485fc533e7dab2567851b0a4"}, + {file = "websockets-11.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:ed058398f55163a79bb9f06a90ef9ccc063b204bb346c4de78efc5d15abfe602"}, + {file = "websockets-11.0.3-py3-none-any.whl", hash = "sha256:6681ba9e7f8f3b19440921e99efbb40fc89f26cd71bf539e45d8c8a25c976dc6"}, + {file = "websockets-11.0.3.tar.gz", hash = "sha256:88fc51d9a26b10fc331be344f1781224a375b78488fc343620184e95a4b27016"}, +] + +[[package]] +name = "werkzeug" +version = "2.3.6" +description = "The comprehensive WSGI web application library." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Werkzeug-2.3.6-py3-none-any.whl", hash = "sha256:935539fa1413afbb9195b24880778422ed620c0fc09670945185cce4d91a8890"}, + {file = "Werkzeug-2.3.6.tar.gz", hash = "sha256:98c774df2f91b05550078891dee5f0eb0cb797a522c757a2452b9cee5b202330"}, +] + +[package.dependencies] +MarkupSafe = ">=2.1.1" + +[package.extras] +watchdog = ["watchdog (>=2.3)"] + +[[package]] +name = "wheel" +version = "0.41.0" +description = "A built-package format for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "wheel-0.41.0-py3-none-any.whl", hash = "sha256:7e9be3bbd0078f6147d82ed9ed957e323e7708f57e134743d2edef3a7b7972a9"}, + {file = "wheel-0.41.0.tar.gz", hash = "sha256:55a0f0a5a84869bce5ba775abfd9c462e3a6b1b7b7ec69d72c0b83d673a5114d"}, +] + +[package.extras] +test = ["pytest (>=6.0.0)", "setuptools (>=65)"] + +[[package]] +name = "yarl" +version = "1.9.2" +description = "Yet another URL library" +optional = false +python-versions = ">=3.7" +files = [ + {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:8c2ad583743d16ddbdf6bb14b5cd76bf43b0d0006e918809d5d4ddf7bde8dd82"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:82aa6264b36c50acfb2424ad5ca537a2060ab6de158a5bd2a72a032cc75b9eb8"}, + {file = "yarl-1.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c0c77533b5ed4bcc38e943178ccae29b9bcf48ffd1063f5821192f23a1bd27b9"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee4afac41415d52d53a9833ebae7e32b344be72835bbb589018c9e938045a560"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9bf345c3a4f5ba7f766430f97f9cc1320786f19584acc7086491f45524a551ac"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2a96c19c52ff442a808c105901d0bdfd2e28575b3d5f82e2f5fd67e20dc5f4ea"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:891c0e3ec5ec881541f6c5113d8df0315ce5440e244a716b95f2525b7b9f3608"}, + {file = "yarl-1.9.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c3a53ba34a636a256d767c086ceb111358876e1fb6b50dfc4d3f4951d40133d5"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:566185e8ebc0898b11f8026447eacd02e46226716229cea8db37496c8cdd26e0"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:2b0738fb871812722a0ac2154be1f049c6223b9f6f22eec352996b69775b36d4"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:32f1d071b3f362c80f1a7d322bfd7b2d11e33d2adf395cc1dd4df36c9c243095"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:e9fdc7ac0d42bc3ea78818557fab03af6181e076a2944f43c38684b4b6bed8e3"}, + {file = "yarl-1.9.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:56ff08ab5df8429901ebdc5d15941b59f6253393cb5da07b4170beefcf1b2528"}, + {file = "yarl-1.9.2-cp310-cp310-win32.whl", hash = "sha256:8ea48e0a2f931064469bdabca50c2f578b565fc446f302a79ba6cc0ee7f384d3"}, + {file = "yarl-1.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:50f33040f3836e912ed16d212f6cc1efb3231a8a60526a407aeb66c1c1956dde"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:646d663eb2232d7909e6601f1a9107e66f9791f290a1b3dc7057818fe44fc2b6"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:aff634b15beff8902d1f918012fc2a42e0dbae6f469fce134c8a0dc51ca423bb"}, + {file = "yarl-1.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a83503934c6273806aed765035716216cc9ab4e0364f7f066227e1aaea90b8d0"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b25322201585c69abc7b0e89e72790469f7dad90d26754717f3310bfe30331c2"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:22a94666751778629f1ec4280b08eb11815783c63f52092a5953faf73be24191"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ec53a0ea2a80c5cd1ab397925f94bff59222aa3cf9c6da938ce05c9ec20428d"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:159d81f22d7a43e6eabc36d7194cb53f2f15f498dbbfa8edc8a3239350f59fe7"}, + {file = "yarl-1.9.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:832b7e711027c114d79dffb92576acd1bd2decc467dec60e1cac96912602d0e6"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:95d2ecefbcf4e744ea952d073c6922e72ee650ffc79028eb1e320e732898d7e8"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:d4e2c6d555e77b37288eaf45b8f60f0737c9efa3452c6c44626a5455aeb250b9"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:783185c75c12a017cc345015ea359cc801c3b29a2966c2655cd12b233bf5a2be"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:b8cc1863402472f16c600e3e93d542b7e7542a540f95c30afd472e8e549fc3f7"}, + {file = "yarl-1.9.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:822b30a0f22e588b32d3120f6d41e4ed021806418b4c9f0bc3048b8c8cb3f92a"}, + {file = "yarl-1.9.2-cp311-cp311-win32.whl", hash = "sha256:a60347f234c2212a9f0361955007fcf4033a75bf600a33c88a0a8e91af77c0e8"}, + {file = "yarl-1.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:be6b3fdec5c62f2a67cb3f8c6dbf56bbf3f61c0f046f84645cd1ca73532ea051"}, + {file = "yarl-1.9.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38a3928ae37558bc1b559f67410df446d1fbfa87318b124bf5032c31e3447b74"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac9bb4c5ce3975aeac288cfcb5061ce60e0d14d92209e780c93954076c7c4367"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3da8a678ca8b96c8606bbb8bfacd99a12ad5dd288bc6f7979baddd62f71c63ef"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13414591ff516e04fcdee8dc051c13fd3db13b673c7a4cb1350e6b2ad9639ad3"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf74d08542c3a9ea97bb8f343d4fcbd4d8f91bba5ec9d5d7f792dbe727f88938"}, + {file = "yarl-1.9.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6e7221580dc1db478464cfeef9b03b95c5852cc22894e418562997df0d074ccc"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:494053246b119b041960ddcd20fd76224149cfea8ed8777b687358727911dd33"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:52a25809fcbecfc63ac9ba0c0fb586f90837f5425edfd1ec9f3372b119585e45"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:e65610c5792870d45d7b68c677681376fcf9cc1c289f23e8e8b39c1485384185"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:1b1bba902cba32cdec51fca038fd53f8beee88b77efc373968d1ed021024cc04"}, + {file = "yarl-1.9.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:662e6016409828ee910f5d9602a2729a8a57d74b163c89a837de3fea050c7582"}, + {file = "yarl-1.9.2-cp37-cp37m-win32.whl", hash = "sha256:f364d3480bffd3aa566e886587eaca7c8c04d74f6e8933f3f2c996b7f09bee1b"}, + {file = "yarl-1.9.2-cp37-cp37m-win_amd64.whl", hash = "sha256:6a5883464143ab3ae9ba68daae8e7c5c95b969462bbe42e2464d60e7e2698368"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5610f80cf43b6202e2c33ba3ec2ee0a2884f8f423c8f4f62906731d876ef4fac"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b9a4e67ad7b646cd6f0938c7ebfd60e481b7410f574c560e455e938d2da8e0f4"}, + {file = "yarl-1.9.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:83fcc480d7549ccebe9415d96d9263e2d4226798c37ebd18c930fce43dfb9574"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5fcd436ea16fee7d4207c045b1e340020e58a2597301cfbcfdbe5abd2356c2fb"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84e0b1599334b1e1478db01b756e55937d4614f8654311eb26012091be109d59"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3458a24e4ea3fd8930e934c129b676c27452e4ebda80fbe47b56d8c6c7a63a9e"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:838162460b3a08987546e881a2bfa573960bb559dfa739e7800ceeec92e64417"}, + {file = "yarl-1.9.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4e2d08f07a3d7d3e12549052eb5ad3eab1c349c53ac51c209a0e5991bbada78"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:de119f56f3c5f0e2fb4dee508531a32b069a5f2c6e827b272d1e0ff5ac040333"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:149ddea5abf329752ea5051b61bd6c1d979e13fbf122d3a1f9f0c8be6cb6f63c"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:674ca19cbee4a82c9f54e0d1eee28116e63bc6fd1e96c43031d11cbab8b2afd5"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:9b3152f2f5677b997ae6c804b73da05a39daa6a9e85a512e0e6823d81cdad7cc"}, + {file = "yarl-1.9.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5415d5a4b080dc9612b1b63cba008db84e908b95848369aa1da3686ae27b6d2b"}, + {file = "yarl-1.9.2-cp38-cp38-win32.whl", hash = "sha256:f7a3d8146575e08c29ed1cd287068e6d02f1c7bdff8970db96683b9591b86ee7"}, + {file = "yarl-1.9.2-cp38-cp38-win_amd64.whl", hash = "sha256:63c48f6cef34e6319a74c727376e95626f84ea091f92c0250a98e53e62c77c72"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:75df5ef94c3fdc393c6b19d80e6ef1ecc9ae2f4263c09cacb178d871c02a5ba9"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c027a6e96ef77d401d8d5a5c8d6bc478e8042f1e448272e8d9752cb0aff8b5c8"}, + {file = "yarl-1.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3b078dbe227f79be488ffcfc7a9edb3409d018e0952cf13f15fd6512847f3f7"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59723a029760079b7d991a401386390c4be5bfec1e7dd83e25a6a0881859e716"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b03917871bf859a81ccb180c9a2e6c1e04d2f6a51d953e6a5cdd70c93d4e5a2a"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c1012fa63eb6c032f3ce5d2171c267992ae0c00b9e164efe4d73db818465fac3"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a74dcbfe780e62f4b5a062714576f16c2f3493a0394e555ab141bf0d746bb955"}, + {file = "yarl-1.9.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8c56986609b057b4839968ba901944af91b8e92f1725d1a2d77cbac6972b9ed1"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:2c315df3293cd521033533d242d15eab26583360b58f7ee5d9565f15fee1bef4"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b7232f8dfbd225d57340e441d8caf8652a6acd06b389ea2d3222b8bc89cbfca6"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:53338749febd28935d55b41bf0bcc79d634881195a39f6b2f767870b72514caf"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:066c163aec9d3d073dc9ffe5dd3ad05069bcb03fcaab8d221290ba99f9f69ee3"}, + {file = "yarl-1.9.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:8288d7cd28f8119b07dd49b7230d6b4562f9b61ee9a4ab02221060d21136be80"}, + {file = "yarl-1.9.2-cp39-cp39-win32.whl", hash = "sha256:b124e2a6d223b65ba8768d5706d103280914d61f5cae3afbc50fc3dfcc016623"}, + {file = "yarl-1.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:61016e7d582bc46a5378ffdd02cd0314fb8ba52f40f9cf4d9a5e7dbef88dee18"}, + {file = "yarl-1.9.2.tar.gz", hash = "sha256:04ab9d4b9f587c06d801c2abfe9317b77cdf996c65a90d5e84ecc45010823571"}, +] + +[package.dependencies] +idna = ">=2.0" +multidict = ">=4.0" + +[[package]] +name = "zipp" +version = "3.16.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +optional = false +python-versions = ">=3.8" +files = [ + {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, + {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + +[metadata] +lock-version = "2.0" +python-versions = "^3.8" +content-hash = "7a38547adbfe6581d1a950f9ece483e2d4e9c3c00802ca5da288d15d4c6e8c44" diff --git a/AIMeiSheng/pretrained/.gitignore b/AIMeiSheng/pretrained/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/AIMeiSheng/pretrained/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/AIMeiSheng/pretrained_v2/.gitignore b/AIMeiSheng/pretrained_v2/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/AIMeiSheng/pretrained_v2/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/AIMeiSheng/pyproject.toml b/AIMeiSheng/pyproject.toml new file mode 100644 index 0000000..e5fd998 --- /dev/null +++ b/AIMeiSheng/pyproject.toml @@ -0,0 +1,62 @@ +[tool.poetry] +name = "rvc-beta" +version = "0.1.0" +description = "" +authors = ["lj1995"] +license = "MIT" + +[tool.poetry.dependencies] +python = "^3.8" +torch = "^2.0.0" +torchaudio = "^2.0.1" +Cython = "^0.29.34" +gradio = "^3.34.0" +future = "^0.18.3" +pydub = "^0.25.1" +soundfile = "^0.12.1" +ffmpeg-python = "^0.2.0" +tensorboardX = "^2.6" +functorch = "^2.0.0" +fairseq = "^0.12.2" +faiss-cpu = "^1.7.2" +Jinja2 = "^3.1.2" +json5 = "^0.9.11" +librosa = "0.9.1" +llvmlite = "0.39.0" +Markdown = "^3.4.3" +matplotlib = "^3.7.1" +matplotlib-inline = "^0.1.6" +numba = "0.56.4" +numpy = "1.23.5" +scipy = "1.9.3" +praat-parselmouth = "^0.4.3" +Pillow = "9.3.0" +pyworld = "^0.3.2" +resampy = "^0.4.2" +scikit-learn = "^1.2.2" +starlette = "^0.27.0" +tensorboard = "^2.12.1" +tensorboard-data-server = "^0.7.0" +tensorboard-plugin-wit = "^1.8.1" +torchgen = "^0.0.1" +tqdm = "^4.65.0" +tornado = "^6.3" +Werkzeug = "^2.2.3" +uc-micro-py = "^1.0.1" +sympy = "^1.11.1" +tabulate = "^0.9.0" +PyYAML = "^6.0" +pyasn1 = "^0.4.8" +pyasn1-modules = "^0.2.8" +fsspec = "^2023.3.0" +absl-py = "^1.4.0" +audioread = "^3.0.0" +uvicorn = "^0.21.1" +colorama = "^0.4.6" +torchcrepe = "0.0.20" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/AIMeiSheng/readme_meisheng.md b/AIMeiSheng/readme_meisheng.md new file mode 100644 index 0000000..e69de29 diff --git a/AIMeiSheng/requirements-win-for-realtime_vc_gui.txt b/AIMeiSheng/requirements-win-for-realtime_vc_gui.txt new file mode 100644 index 0000000..37ca238 --- /dev/null +++ b/AIMeiSheng/requirements-win-for-realtime_vc_gui.txt @@ -0,0 +1,28 @@ +#1.Install torch from pytorch.org: +#torch 2.0 with cuda 11.8 +#pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 +#torch 1.11.0 with cuda 11.3 +#pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 torchaudio==0.11.0 --extra-index-url https://download.pytorch.org/whl/cu113 +einops +fairseq +flask +flask_cors +gin +gin_config +librosa +local_attention +matplotlib +praat-parselmouth +pyworld +PyYAML +resampy +scikit_learn +scipy +SoundFile +tensorboard +tqdm +wave +PySimpleGUI +sounddevice +gradio +noisereduce diff --git a/AIMeiSheng/requirements.txt b/AIMeiSheng/requirements.txt new file mode 100644 index 0000000..5175650 --- /dev/null +++ b/AIMeiSheng/requirements.txt @@ -0,0 +1,171 @@ +absl-py==1.4.0 +aiofiles==23.2.1 +altair==5.1.1 +annotated-types==0.5.0 +antlr4-python3-runtime==4.8 +anyio==3.7.1 +asteroid-filterbanks==0.4.0 +astunparse==1.6.3 +attrs==23.1.0 +audioread==3.0.0 +bitarray==2.8.1 +cachetools==5.3.1 +certifi==2023.7.22 +cffi==1.15.1 +charset-normalizer==3.2.0 +click==8.1.7 +cmake==3.27.4.1 +colorama==0.4.6 +contourpy==1.1.0 +cycler==0.11.0 +Cython==3.0.2 +decorator==5.1.1 +exceptiongroup==1.1.3 +fairseq==0.12.2 +faiss-cpu==1.7.2 +fastapi==0.103.1 +ffmpeg-python==0.2.0 +ffmpy==0.3.1 +filelock==3.12.3 +flatbuffers==24.3.7 +fonttools==4.42.1 +fsspec==2023.9.0 +functorch==2.0.0 +future==0.18.3 +gast==0.4.0 +google-auth==2.22.0 +google-auth-oauthlib==1.0.0 +google-pasta==0.2.0 +gradio==3.42.0 +gradio_client==0.5.0 +grpcio==1.57.0 +h11==0.14.0 +h5py==3.10.0 +httpcore==0.17.3 +httpx==0.24.1 +huggingface-hub==0.16.4 +hydra-core==1.0.7 +idna==3.4 +importlib-metadata==6.8.0 +importlib-resources==6.0.1 +Jinja2==3.1.2 +jiwer==3.0.3 +joblib==1.3.2 +json5==0.9.14 +jsonschema==4.19.0 +jsonschema-specifications==2023.7.1 +keras==2.13.1 +kiwisolver==1.4.5 +lazy_loader==0.3 +libclang==18.1.1 +librosa==0.9.1 +lit==16.0.6 +llvmlite==0.39.0 +lxml==4.9.3 +Markdown==3.4.4 +MarkupSafe==2.1.3 +matplotlib==3.7.2 +matplotlib-inline==0.1.6 +mpmath==1.3.0 +msgpack==1.0.8 +networkx==3.1 +numba==0.56.4 +numpy==1.23.5 +nvidia-cublas-cu11==11.10.3.66 +nvidia-cublas-cu12==12.1.3.1 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-cupti-cu12==12.1.105 +nvidia-cuda-nvrtc-cu11==11.7.99 +nvidia-cuda-nvrtc-cu12==12.1.105 +nvidia-cuda-runtime-cu11==11.7.99 +nvidia-cuda-runtime-cu12==12.1.105 +nvidia-cudnn-cu11==8.5.0.96 +nvidia-cudnn-cu12==8.9.2.26 +nvidia-cufft-cu11==10.9.0.58 +nvidia-cufft-cu12==11.0.2.54 +nvidia-curand-cu11==10.2.10.91 +nvidia-curand-cu12==10.3.2.106 +nvidia-cusolver-cu11==11.4.0.1 +nvidia-cusolver-cu12==11.4.5.107 +nvidia-cusparse-cu11==11.7.4.91 +nvidia-cusparse-cu12==12.1.0.106 +nvidia-nccl-cu11==2.14.3 +nvidia-nccl-cu12==2.19.3 +nvidia-nvjitlink-cu12==12.4.99 +nvidia-nvtx-cu11==11.7.91 +nvidia-nvtx-cu12==12.1.105 +oauthlib==3.2.2 +omegaconf==2.0.6 +opt-einsum==3.3.0 +orjson==3.9.5 +packaging==23.1 +pandas==2.0.3 +Pillow==10.0.0 +pkgutil_resolve_name==1.3.10 +platformdirs==3.10.0 +pooch==1.7.0 +portalocker==2.7.0 +praat-parselmouth==0.4.3 +protobuf==4.24.2 +pyasn1==0.5.0 +pyasn1-modules==0.3.0 +pycparser==2.21 +pydantic==2.3.0 +pydantic_core==2.6.3 +pydub==0.25.1 +pyparsing==3.0.9 +python-dateutil==2.8.2 +python-multipart==0.0.6 +pytz==2023.3.post1 +pyworld==0.3.4 +PyYAML==6.0.1 +rapidfuzz==3.7.0 +referencing==0.30.2 +regex==2023.8.8 +requests==2.31.0 +requests-oauthlib==1.3.1 +resampy==0.4.2 +rpds-py==0.10.2 +rsa==4.9 +sacrebleu==2.3.1 +scikit-learn==1.3.0 +scipy==1.9.3 +semantic-version==2.10.0 +six==1.16.0 +sniffio==1.3.0 +soundfile==0.12.1 +soxr==0.3.7 +starlette==0.27.0 +sympy==1.12 +tabulate==0.9.0 +tensorboard==2.13.0 +tensorboard-data-server==0.7.1 +tensorboard-plugin-wit==1.8.1 +tensorboardX==2.6.2.2 +tensorflow==2.13.1 +tensorflow-estimator==2.13.0 +tensorflow-io-gcs-filesystem==0.34.0 +termcolor==2.4.0 +thop==0.1.1.post2209072238 +threadpoolctl==3.2.0 +toolz==0.12.0 +torch==2.0.1 +torchaudio==2.2.0+cu121 +torchcrepe==0.0.21 +torchgen==0.0.1 +torchsummary==1.5.1 +torchvision==0.17.0+cu121 +tornado==6.3.3 +tqdm==4.66.1 +traitlets==5.9.0 +triton==2.0.0 +typing_extensions==4.10.0 +tzdata==2023.3 +uc-micro-py==1.0.2 +urllib3==1.26.16 +uvicorn==0.23.2 +websockets==11.0.3 +Werkzeug==2.3.7 +whisper==1.1.10 +wrapt==1.16.0 +zipp==3.16.2 diff --git a/AIMeiSheng/run.sh b/AIMeiSheng/run.sh new file mode 100644 index 0000000..9f7186e --- /dev/null +++ b/AIMeiSheng/run.sh @@ -0,0 +1,45 @@ +#!/bin/bash + +if [[ "$(uname)" == "Darwin" ]]; then + # macOS specific env: + export PYTORCH_ENABLE_MPS_FALLBACK=1 + export PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 +elif [[ "$(uname)" != "Linux" ]]; then + echo "Unsupported operating system." + exit 1 +fi + +requirements_file="requirements.txt" + +# Check if Python 3.8 is installed +if ! command -v python3.8 &> /dev/null; then + echo "Python 3.8 not found. Attempting to install..." + if [[ "$(uname)" == "Darwin" ]] && command -v brew &> /dev/null; then + brew install python@3.8 + elif [[ "$(uname)" == "Linux" ]] && command -v apt-get &> /dev/null; then + sudo apt-get update + sudo apt-get install python3.8 + else + echo "Please install Python 3.8 manually." + exit 1 + fi +fi + +# Check if required packages are installed and install them if not +if [ -f "${requirements_file}" ]; then + installed_packages=$(python3.8 -m pip freeze) + while IFS= read -r package; do + [[ "${package}" =~ ^#.* ]] && continue + package_name=$(echo "${package}" | sed 's/[<>=!].*//') + if ! echo "${installed_packages}" | grep -q "${package_name}"; then + echo "${package_name} not found. Attempting to install..." + python3.8 -m pip install --upgrade "${package}" + fi + done < "${requirements_file}" +else + echo "${requirements_file} not found. Please ensure the requirements file with required packages exists." + exit 1 +fi + +# Run the main script +python3.8 infer-web.py --pycmd python3.8 \ No newline at end of file diff --git a/AIMeiSheng/rvc_for_realtime.py b/AIMeiSheng/rvc_for_realtime.py new file mode 100644 index 0000000..132e80c --- /dev/null +++ b/AIMeiSheng/rvc_for_realtime.py @@ -0,0 +1,297 @@ +import faiss, torch, traceback, parselmouth, numpy as np, torchcrepe, torch.nn as nn, pyworld +from fairseq import checkpoint_utils +from lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid, + SynthesizerTrnMs256NSFsid_nono, + SynthesizerTrnMs768NSFsid, + SynthesizerTrnMs768NSFsid_nono, +) +import os, sys +from time import time as ttime +import torch.nn.functional as F +import scipy.signal as signal + +now_dir = os.getcwd() +sys.path.append(now_dir) +from config import Config +from multiprocessing import Manager as M + +mm = M() +config = Config() + + +class RVC: + def __init__( + self, key, pth_path, index_path, index_rate, n_cpu, inp_q, opt_q, device + ) -> None: + """ + 初始化 + """ + try: + global config + self.inp_q = inp_q + self.opt_q = opt_q + self.device = device + self.f0_up_key = key + self.time_step = 160 / 16000 * 1000 + self.f0_min = 50 + self.f0_max = 1100 + self.f0_mel_min = 1127 * np.log(1 + self.f0_min / 700) + self.f0_mel_max = 1127 * np.log(1 + self.f0_max / 700) + self.sr = 16000 + self.window = 160 + self.n_cpu = n_cpu + if index_rate != 0: + self.index = faiss.read_index(index_path) + self.big_npy = self.index.reconstruct_n(0, self.index.ntotal) + print("index search enabled") + self.index_rate = index_rate + models, _, _ = checkpoint_utils.load_model_ensemble_and_task( + ["hubert_base.pt"], + suffix="", + ) + hubert_model = models[0] + hubert_model = hubert_model.to(config.device) + if config.is_half: + hubert_model = hubert_model.half() + else: + hubert_model = hubert_model.float() + hubert_model.eval() + self.model = hubert_model + cpt = torch.load(pth_path, map_location="cpu") + self.tgt_sr = cpt["config"][-1] + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] + self.if_f0 = cpt.get("f0", 1) + self.version = cpt.get("version", "v1") + if self.version == "v1": + if self.if_f0 == 1: + self.net_g = SynthesizerTrnMs256NSFsid( + *cpt["config"], is_half=config.is_half + ) + else: + self.net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) + elif self.version == "v2": + if self.if_f0 == 1: + self.net_g = SynthesizerTrnMs768NSFsid( + *cpt["config"], is_half=config.is_half + ) + else: + self.net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) + del self.net_g.enc_q + print(self.net_g.load_state_dict(cpt["weight"], strict=False)) + self.net_g.eval().to(device) + if config.is_half: + self.net_g = self.net_g.half() + else: + self.net_g = self.net_g.float() + self.is_half = config.is_half + except: + print(traceback.format_exc()) + + def get_f0_post(self, f0): + f0_min = self.f0_min + f0_max = self.f0_max + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + f0bak = f0.copy() + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( + f0_mel_max - f0_mel_min + ) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0bak + + def get_f0(self, x, f0_up_key, n_cpu, method="harvest"): + n_cpu = int(n_cpu) + if method == "crepe": + return self.get_f0_crepe(x, f0_up_key) + if method == "rmvpe": + return self.get_f0_rmvpe(x, f0_up_key) + if method == "pm": + p_len = x.shape[0] // 160 + f0 = ( + parselmouth.Sound(x, 16000) + .to_pitch_ac( + time_step=0.01, + voicing_threshold=0.6, + pitch_floor=50, + pitch_ceiling=1100, + ) + .selected_array["frequency"] + ) + + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + print(pad_size, p_len - len(f0) - pad_size) + f0 = np.pad( + f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" + ) + + f0 *= pow(2, f0_up_key / 12) + return self.get_f0_post(f0) + if n_cpu == 1: + f0, t = pyworld.harvest( + x.astype(np.double), + fs=16000, + f0_ceil=1100, + f0_floor=50, + frame_period=10, + ) + f0 = signal.medfilt(f0, 3) + f0 *= pow(2, f0_up_key / 12) + return self.get_f0_post(f0) + f0bak = np.zeros(x.shape[0] // 160, dtype=np.float64) + length = len(x) + part_length = int(length / n_cpu / 160) * 160 + ts = ttime() + res_f0 = mm.dict() + for idx in range(n_cpu): + tail = part_length * (idx + 1) + 320 + if idx == 0: + self.inp_q.put((idx, x[:tail], res_f0, n_cpu, ts)) + else: + self.inp_q.put( + (idx, x[part_length * idx - 320 : tail], res_f0, n_cpu, ts) + ) + while 1: + res_ts = self.opt_q.get() + if res_ts == ts: + break + f0s = [i[1] for i in sorted(res_f0.items(), key=lambda x: x[0])] + for idx, f0 in enumerate(f0s): + if idx == 0: + f0 = f0[:-3] + elif idx != n_cpu - 1: + f0 = f0[2:-3] + else: + f0 = f0[2:-1] + f0bak[ + part_length * idx // 160 : part_length * idx // 160 + f0.shape[0] + ] = f0 + f0bak = signal.medfilt(f0bak, 3) + f0bak *= pow(2, f0_up_key / 12) + return self.get_f0_post(f0bak) + + def get_f0_crepe(self, x, f0_up_key): + audio = torch.tensor(np.copy(x))[None].float() + f0, pd = torchcrepe.predict( + audio, + self.sr, + 160, + self.f0_min, + self.f0_max, + "full", + batch_size=512, + device=self.device, + return_periodicity=True, + ) + pd = torchcrepe.filter.median(pd, 3) + f0 = torchcrepe.filter.mean(f0, 3) + f0[pd < 0.1] = 0 + f0 = f0[0].cpu().numpy() + f0 *= pow(2, f0_up_key / 12) + return self.get_f0_post(f0) + + def get_f0_rmvpe(self, x, f0_up_key): + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE( + "rmvpe.pt", is_half=self.is_half, device=self.device + ) + # self.model_rmvpe = RMVPE("aug2_58000_half.pt", is_half=self.is_half, device=self.device) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + f0 *= pow(2, f0_up_key / 12) + return self.get_f0_post(f0) + + def infer( + self, + feats: torch.Tensor, + indata: np.ndarray, + rate1, + rate2, + cache_pitch, + cache_pitchf, + f0method, + ) -> np.ndarray: + feats = feats.view(1, -1) + if config.is_half: + feats = feats.half() + else: + feats = feats.float() + feats = feats.to(self.device) + t1 = ttime() + with torch.no_grad(): + padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) + inputs = { + "source": feats, + "padding_mask": padding_mask, + "output_layer": 9 if self.version == "v1" else 12, + } + logits = self.model.extract_features(**inputs) + feats = ( + self.model.final_proj(logits[0]) if self.version == "v1" else logits[0] + ) + t2 = ttime() + try: + if hasattr(self, "index") and self.index_rate != 0: + leng_replace_head = int(rate1 * feats[0].shape[0]) + npy = feats[0][-leng_replace_head:].cpu().numpy().astype("float32") + score, ix = self.index.search(npy, k=8) + weight = np.square(1 / score) + weight /= weight.sum(axis=1, keepdims=True) + npy = np.sum(self.big_npy[ix] * np.expand_dims(weight, axis=2), axis=1) + if config.is_half: + npy = npy.astype("float16") + feats[0][-leng_replace_head:] = ( + torch.from_numpy(npy).unsqueeze(0).to(self.device) * self.index_rate + + (1 - self.index_rate) * feats[0][-leng_replace_head:] + ) + else: + print("index search FAIL or disabled") + except: + traceback.print_exc() + print("index search FAIL") + feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) + t3 = ttime() + if self.if_f0 == 1: + pitch, pitchf = self.get_f0(indata, self.f0_up_key, self.n_cpu, f0method) + cache_pitch[:] = np.append(cache_pitch[pitch[:-1].shape[0] :], pitch[:-1]) + cache_pitchf[:] = np.append( + cache_pitchf[pitchf[:-1].shape[0] :], pitchf[:-1] + ) + p_len = min(feats.shape[1], 13000, cache_pitch.shape[0]) + else: + cache_pitch, cache_pitchf = None, None + p_len = min(feats.shape[1], 13000) + t4 = ttime() + feats = feats[:, :p_len, :] + if self.if_f0 == 1: + cache_pitch = cache_pitch[:p_len] + cache_pitchf = cache_pitchf[:p_len] + cache_pitch = torch.LongTensor(cache_pitch).unsqueeze(0).to(self.device) + cache_pitchf = torch.FloatTensor(cache_pitchf).unsqueeze(0).to(self.device) + p_len = torch.LongTensor([p_len]).to(self.device) + ii = 0 # sid + sid = torch.LongTensor([ii]).to(self.device) + with torch.no_grad(): + if self.if_f0 == 1: + infered_audio = ( + self.net_g.infer( + feats, p_len, cache_pitch, cache_pitchf, sid, rate2 + )[0][0, 0] + .data.cpu() + .float() + ) + else: + infered_audio = ( + self.net_g.infer(feats, p_len, sid, rate2)[0][0, 0] + .data.cpu() + .float() + ) + t5 = ttime() + print("time->fea-index-f0-model:", t2 - t1, t3 - t2, t4 - t3, t5 - t4) + return infered_audio diff --git a/AIMeiSheng/similar_sumary.csv b/AIMeiSheng/similar_sumary.csv new file mode 100644 index 0000000..63fd6fb --- /dev/null +++ b/AIMeiSheng/similar_sumary.csv @@ -0,0 +1,22 @@ +,NewColumn,A_Thousand_Miles-Vanessa_Carlton,drivers_license,lost_stars,someone_you_loved,Bad_Guy,good_4_u,shallow,Diamonds-Rihanna,Levitating,shape_of_you,all_of_me,creep,just_the_way_you_are,love_your_self,photograph,see_you_again,wky,as_it_was,gdg,nwdsl,prefect +baiyunwen321.wav,,0.6287181973457336,0.7062621712684631,0.6039978265762329,0.6344354152679443,0.6632968783378601,0.7249758243560791,0.6278610825538635,0.7113260626792908,0.5800484418869019,0.6956256031990051,,,,,,,,,,, +lidan.wav,,0.750545859336853,0.7507007718086243,0.7392199635505676,0.7216790914535522,0.8131476640701294,0.7889232635498047,0.7860447764396667,0.7414127588272095,0.7279138565063477,0.8469708561897278,,,,,,,,,,, +taylor100.wav,,0.725504457950592,0.6654554009437561,0.6618318557739258,0.6416096687316895,0.7136266827583313,0.70440274477005,0.6139499545097351,0.585353672504425,0.6161661744117737,0.6240761876106262,,,,,,,,,,, +changying.wav,,0.7035785913467407,0.7885100841522217,0.6470870971679688,0.6619430780410767,0.7234369516372681,0.7221003174781799,0.6835318207740784,0.6468107104301453,0.672366201877594,0.6719210147857666,,,,,,,,,,, +rainy_day321_01_40k.wav,,0.6032866835594177,0.6037033200263977,0.6311532855033875,0.6735936999320984,0.6855268478393555,0.6475541591644287,0.6208397746086121,0.5552607178688049,0.5320074558258057,0.6615167260169983,,,,,,,,,,, +tonejun_female321.wav,,0.7127857804298401,0.7921009659767151,0.8134440183639526,0.7411824464797974,0.7995394468307495,0.7898921966552734,0.7511440515518188,0.7603922486305237,0.731090784072876,0.7955360412597656,,,,,,,,,,, +coloris.wav,,0.7003938555717468,0.7485960125923157,0.6121039390563965,0.6894606947898865,0.7518377304077148,0.7238749861717224,0.6157845854759216,0.6795948147773743,0.6303950548171997,0.7212966680526733,,,,,,,,,,, +syz_yujian_voce_vocals_9.wav,,0.6574335694313049,0.663131594657898,0.7389997839927673,0.783748209476471,0.7509288191795349,0.7728791236877441,0.7317531108856201,0.7385985255241394,0.7161365151405334,0.8051944971084595,,,,,,,,,,, +xuwei321.wav,,0.6294721961021423,0.5817475914955139,0.6894537210464478,0.7202823758125305,0.7408128976821899,0.7338061928749084,0.6748303174972534,0.6463344693183899,0.6395360231399536,0.7515098452568054,,,,,,,,,,, +fang.wav,,,,0.6857283711433411,,,,,,,0.7221803069114685,0.6531956195831299,0.5554507374763489,0.5103073716163635,0.5862836241722107,0.497910737991333,0.6531967520713806,0.6583329439163208,0.6463508605957031,0.5251644849777222,0.6764618754386902,0.7019667029380798 +jianjun321.wav,,,,0.7266292572021484,,,,,,,0.7348967790603638,0.7025390863418579,0.6688407063484192,0.5668326616287231,0.6789692044258118,0.6789674162864685,0.7046930193901062,0.7464907765388489,0.6656272411346436,0.5927402973175049,0.6991226673126221,0.7376254200935364 +max.wav,,,,0.552604615688324,,,,,,,0.610629141330719,0.5048797130584717,0.5520310997962952,0.6053433418273926,0.6022115349769592,0.5752971768379211,0.5517048239707947,0.4789518117904663,0.6534878015518188,0.4665497839450836,0.41164007782936096,0.48655563592910767 +xiafan_321.wav,,,,0.7406137585639954,,,,,,,0.7579551935195923,0.7288272976875305,0.7165343761444092,0.6354859471321106,0.7016772627830505,0.6827289462089539,0.7336599826812744,0.7601665258407593,0.7184494733810425,0.6444903612136841,0.7247397899627686,0.7377326488494873 +zihao.wav,,,,0.7743924260139465,,,,,,,0.7776678204536438,0.7114849090576172,0.653855562210083,0.6443037986755371,0.7154525518417358,0.5600646138191223,0.7940617203712463,0.7718338370323181,0.6842178106307983,0.6950864791870117,0.815219521522522,0.7677204012870789 +guanshirui.wav,,,,0.7601445317268372,,,,,,,0.6829525232315063,0.7723525762557983,0.7119938135147095,0.6860454082489014,0.6586390137672424,0.7515836358070374,0.7302749156951904,0.7752156853675842,0.6369043588638306,0.522952139377594,0.7576860785484314,0.7319825291633606 +jl.wav,,,,0.7179255485534668,,,,,,,0.7980654835700989,0.701452910900116,0.6444212794303894,0.6640974283218384,0.6692264676094055,0.7257829904556274,0.7835369110107422,0.7797330021858215,0.6613600850105286,0.5675634145736694,0.7353779673576355,0.7784366607666016 +qiankun.wav,,,,0.6595435738563538,,,,,,,0.6842532157897949,0.6477659344673157,0.5565845370292664,0.6242323517799377,0.63969486951828,0.6876257658004761,0.7225720286369324,0.6682022213935852,0.7047301530838013,0.5753499865531921,0.6755936741828918,0.6782413721084595 +guodegang.wav,,,,0.5176332592964172,,,,,,,0.5353715419769287,0.4679104685783386,0.520545482635498,0.3481065332889557,0.37042441964149475,0.57423335313797,0.47399044036865234,0.4410187304019928,0.4851469397544861,0.7387096285820007,0.54266357421875,0.5491830110549927 +lixianfeng.wav,,,,0.7615509033203125,,,,,,,0.7659692764282227,0.7679950594902039,0.6310744285583496,0.6753622889518738,0.5955511927604675,0.7234187126159668,0.817263126373291,0.7882950901985168,0.635635495185852,0.6790913939476013,0.7986761331558228,0.7745717167854309 +wenchen.wav,,,,0.7641370892524719,,,,,,,0.8261916041374207,0.7273371815681458,0.7110679149627686,0.7485860586166382,0.7353999614715576,0.617393434047699,0.8123143315315247,0.7880005836486816,0.7438898682594299,0.6490622162818909,0.7943901419639587,0.7849905490875244 +yibo.wav,,,,0.3306368291378021,,,,,,,0.37031981348991394,0.2708905339241028,0.6391834020614624,0.42891111969947815,0.38284555077552795,0.5897773504257202,0.42005646228790283,0.4957413375377655,0.3493381142616272,0.2771102488040924,0.33814117312431335,0.3904726803302765 diff --git a/AIMeiSheng/tools/calc_rvc_model_similarity.py b/AIMeiSheng/tools/calc_rvc_model_similarity.py new file mode 100644 index 0000000..edc1cf8 --- /dev/null +++ b/AIMeiSheng/tools/calc_rvc_model_similarity.py @@ -0,0 +1,92 @@ +# This code references https://huggingface.co/JosephusCheung/ASimilarityCalculatior/blob/main/qwerty.py +# Fill in the path of the model to be queried and the root directory of the reference models, and this script will return the similarity between the model to be queried and all reference models. +import sys, os +import torch +import torch.nn as nn +import torch.nn.functional as F + + +def cal_cross_attn(to_q, to_k, to_v, rand_input): + hidden_dim, embed_dim = to_q.shape + attn_to_q = nn.Linear(hidden_dim, embed_dim, bias=False) + attn_to_k = nn.Linear(hidden_dim, embed_dim, bias=False) + attn_to_v = nn.Linear(hidden_dim, embed_dim, bias=False) + attn_to_q.load_state_dict({"weight": to_q}) + attn_to_k.load_state_dict({"weight": to_k}) + attn_to_v.load_state_dict({"weight": to_v}) + + return torch.einsum( + "ik, jk -> ik", + F.softmax( + torch.einsum("ij, kj -> ik", attn_to_q(rand_input), attn_to_k(rand_input)), + dim=-1, + ), + attn_to_v(rand_input), + ) + + +def model_hash(filename): + try: + with open(filename, "rb") as file: + import hashlib + + m = hashlib.sha256() + + file.seek(0x100000) + m.update(file.read(0x10000)) + return m.hexdigest()[0:8] + except FileNotFoundError: + return "NOFILE" + + +def eval(model, n, input): + qk = f"enc_p.encoder.attn_layers.{n}.conv_q.weight" + uk = f"enc_p.encoder.attn_layers.{n}.conv_k.weight" + vk = f"enc_p.encoder.attn_layers.{n}.conv_v.weight" + atoq, atok, atov = model[qk][:, :, 0], model[uk][:, :, 0], model[vk][:, :, 0] + + attn = cal_cross_attn(atoq, atok, atov, input) + return attn + + +def main(path, root): + torch.manual_seed(114514) + model_a = torch.load(path, map_location="cpu")["weight"] + + print("query:\t\t%s\t%s" % (path, model_hash(path))) + + map_attn_a = {} + map_rand_input = {} + for n in range(6): + hidden_dim, embed_dim, _ = model_a[ + f"enc_p.encoder.attn_layers.{n}.conv_v.weight" + ].shape + rand_input = torch.randn([embed_dim, hidden_dim]) + + map_attn_a[n] = eval(model_a, n, rand_input) + map_rand_input[n] = rand_input + + del model_a + + for name in sorted(list(os.listdir(root))): + path = "%s/%s" % (root, name) + model_b = torch.load(path, map_location="cpu")["weight"] + + sims = [] + for n in range(6): + attn_a = map_attn_a[n] + attn_b = eval(model_b, n, map_rand_input[n]) + + sim = torch.mean(torch.cosine_similarity(attn_a, attn_b)) + sims.append(sim) + + print( + "reference:\t%s\t%s\t%s" + % (path, model_hash(path), f"{torch.mean(torch.stack(sims)) * 1e2:.2f}%") + ) + + +if __name__ == "__main__": + query_path = r"weights\mi v3.pth" + reference_root = r"weights" + main(query_path, reference_root) diff --git a/AIMeiSheng/tools/dlmodels.bat b/AIMeiSheng/tools/dlmodels.bat new file mode 100644 index 0000000..547f2ae --- /dev/null +++ b/AIMeiSheng/tools/dlmodels.bat @@ -0,0 +1,348 @@ +@echo off && chcp 65001 + +echo working dir is %cd% +echo downloading requirement aria2 check. +echo= +dir /a:d/b | findstr "aria2" > flag.txt +findstr "aria2" flag.txt >nul +if %errorlevel% ==0 ( + echo aria2 checked. + echo= +) else ( + echo failed. please downloading aria2 from webpage! + echo unzip it and put in this directory! + timeout /T 5 + start https://github.com/aria2/aria2/releases/tag/release-1.36.0 + echo= + goto end +) + +echo envfiles checking start. +echo= + +for /f %%x in ('findstr /i /c:"aria2" "flag.txt"') do (set aria2=%%x)&goto endSch +:endSch + +set d32=f0D32k.pth +set d40=f0D40k.pth +set d48=f0D48k.pth +set g32=f0G32k.pth +set g40=f0G40k.pth +set g48=f0G48k.pth + +set d40v2=f0D40k.pth +set g40v2=f0G40k.pth + +set dld32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth +set dld40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth +set dld48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth +set dlg32=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth +set dlg40=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth +set dlg48=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth + +set dld40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth +set dlg40v2=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth + +set hp2_all=HP2_all_vocals.pth +set hp3_all=HP3_all_vocals.pth +set hp5_only=HP5_only_main_vocal.pth +set VR_DeEchoAggressive=VR-DeEchoAggressive.pth +set VR_DeEchoDeReverb=VR-DeEchoDeReverb.pth +set VR_DeEchoNormal=VR-DeEchoNormal.pth +set onnx_dereverb=vocals.onnx + +set dlhp2_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth +set dlhp3_all=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth +set dlhp5_only=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth +set dlVR_DeEchoAggressive=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth +set dlVR_DeEchoDeReverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth +set dlVR_DeEchoNormal=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth +set dlonnx_dereverb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx + +set hb=hubert_base.pt + +set dlhb=https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt + +echo dir check start. +echo= + +if exist "%~dp0pretrained" ( + echo dir .\pretrained checked. + ) else ( + echo failed. generating dir .\pretrained. + mkdir pretrained + ) +if exist "%~dp0pretrained_v2" ( + echo dir .\pretrained_v2 checked. + ) else ( + echo failed. generating dir .\pretrained_v2. + mkdir pretrained_v2 + ) +if exist "%~dp0uvr5_weights" ( + echo dir .\uvr5_weights checked. + ) else ( + echo failed. generating dir .\uvr5_weights. + mkdir uvr5_weights + ) +if exist "%~dp0uvr5_weights\onnx_dereverb_By_FoxJoy" ( + echo dir .\uvr5_weights\onnx_dereverb_By_FoxJoy checked. + ) else ( + echo failed. generating dir .\uvr5_weights\onnx_dereverb_By_FoxJoy. + mkdir uvr5_weights\onnx_dereverb_By_FoxJoy + ) + +echo= +echo dir check finished. + +echo= +echo required files check start. + +echo checking D32k.pth +if exist "%~dp0pretrained\D32k.pth" ( + echo D32k.pth in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d %~dp0pretrained -o D32k.pth + if exist "%~dp0pretrained\D32k.pth" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking D40k.pth +if exist "%~dp0pretrained\D40k.pth" ( + echo D40k.pth in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d %~dp0pretrained -o D40k.pth + if exist "%~dp0pretrained\D40k.pth" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking D40k.pth +if exist "%~dp0pretrained_v2\D40k.pth" ( + echo D40k.pth in .\pretrained_v2 checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d %~dp0pretrained_v2 -o D40k.pth + if exist "%~dp0pretrained_v2\D40k.pth" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking D48k.pth +if exist "%~dp0pretrained\D48k.pth" ( + echo D48k.pth in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d %~dp0pretrained -o D48k.pth + if exist "%~dp0pretrained\D48k.pth" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking G32k.pth +if exist "%~dp0pretrained\G32k.pth" ( + echo G32k.pth in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d %~dp0pretrained -o G32k.pth + if exist "%~dp0pretrained\G32k.pth" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking G40k.pth +if exist "%~dp0pretrained\G40k.pth" ( + echo G40k.pth in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d %~dp0pretrained -o G40k.pth + if exist "%~dp0pretrained\G40k.pth" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking G40k.pth +if exist "%~dp0pretrained_v2\G40k.pth" ( + echo G40k.pth in .\pretrained_v2 checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d %~dp0pretrained_v2 -o G40k.pth + if exist "%~dp0pretrained_v2\G40k.pth" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking G48k.pth +if exist "%~dp0pretrained\G48k.pth" ( + echo G48k.pth in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d %~dp0pretrained -o G48k.pth + if exist "%~dp0pretrained\G48k.pth" (echo download successful.) else (echo please try again! + echo=) + ) + +echo checking %d32% +if exist "%~dp0pretrained\%d32%" ( + echo %d32% in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld32% -d %~dp0pretrained -o %d32% + if exist "%~dp0pretrained\%d32%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %d40% +if exist "%~dp0pretrained\%d40%" ( + echo %d40% in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40% -d %~dp0pretrained -o %d40% + if exist "%~dp0pretrained\%d40%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %d40v2% +if exist "%~dp0pretrained_v2\%d40v2%" ( + echo %d40v2% in .\pretrained_v2 checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld40v2% -d %~dp0pretrained_v2 -o %d40v2% + if exist "%~dp0pretrained_v2\%d40v2%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %d48% +if exist "%~dp0pretrained\%d48%" ( + echo %d48% in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dld48% -d %~dp0pretrained -o %d48% + if exist "%~dp0pretrained\%d48%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %g32% +if exist "%~dp0pretrained\%g32%" ( + echo %g32% in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg32% -d %~dp0pretrained -o %g32% + if exist "%~dp0pretrained\%g32%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %g40% +if exist "%~dp0pretrained\%g40%" ( + echo %g40% in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40% -d %~dp0pretrained -o %g40% + if exist "%~dp0pretrained\%g40%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %g40v2% +if exist "%~dp0pretrained_v2\%g40v2%" ( + echo %g40v2% in .\pretrained_v2 checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg40v2% -d %~dp0pretrained_v2 -o %g40v2% + if exist "%~dp0pretrained_v2\%g40v2%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %g48% +if exist "%~dp0pretrained\%g48%" ( + echo %g48% in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlg48% -d %~dp0\pretrained -o %g48% + if exist "%~dp0pretrained\%g48%" (echo download successful.) else (echo please try again! + echo=) + ) + +echo checking %hp2_all% +if exist "%~dp0uvr5_weights\%hp2_all%" ( + echo %hp2_all% in .\uvr5_weights checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp2_all% -d %~dp0\uvr5_weights -o %hp2_all% + if exist "%~dp0uvr5_weights\%hp2_all%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %hp3_all% +if exist "%~dp0uvr5_weights\%hp3_all%" ( + echo %hp3_all% in .\uvr5_weights checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp3_all% -d %~dp0\uvr5_weights -o %hp3_all% + if exist "%~dp0uvr5_weights\%hp3_all%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %hp5_only% +if exist "%~dp0uvr5_weights\%hp5_only%" ( + echo %hp5_only% in .\uvr5_weights checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhp5_only% -d %~dp0\uvr5_weights -o %hp5_only% + if exist "%~dp0uvr5_weights\%hp5_only%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %VR_DeEchoAggressive% +if exist "%~dp0uvr5_weights\%VR_DeEchoAggressive%" ( + echo %VR_DeEchoAggressive% in .\uvr5_weights checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoAggressive% -d %~dp0\uvr5_weights -o %VR_DeEchoAggressive% + if exist "%~dp0uvr5_weights\%VR_DeEchoAggressive%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %VR_DeEchoDeReverb% +if exist "%~dp0uvr5_weights\%VR_DeEchoDeReverb%" ( + echo %VR_DeEchoDeReverb% in .\uvr5_weights checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoDeReverb% -d %~dp0\uvr5_weights -o %VR_DeEchoDeReverb% + if exist "%~dp0uvr5_weights\%VR_DeEchoDeReverb%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %VR_DeEchoNormal% +if exist "%~dp0uvr5_weights\%VR_DeEchoNormal%" ( + echo %VR_DeEchoNormal% in .\uvr5_weights checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlVR_DeEchoNormal% -d %~dp0\uvr5_weights -o %VR_DeEchoNormal% + if exist "%~dp0uvr5_weights\%VR_DeEchoNormal%" (echo download successful.) else (echo please try again! + echo=) + ) +echo checking %onnx_dereverb% +if exist "%~dp0uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" ( + echo %onnx_dereverb% in .\uvr5_weights\onnx_dereverb_By_FoxJoy checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlonnx_dereverb% -d %~dp0\uvr5_weights\onnx_dereverb_By_FoxJoy -o %onnx_dereverb% + if exist "%~dp0uvr5_weights\onnx_dereverb_By_FoxJoy\%onnx_dereverb%" (echo download successful.) else (echo please try again! + echo=) + ) + +echo checking %hb% +if exist "%~dp0%hb%" ( + echo %hb% in .\pretrained checked. + echo= + ) else ( + echo failed. starting download from huggingface. + %~dp0%aria2%\aria2c --console-log-level=error -c -x 16 -s 16 -k 1M %dlhb% -d %~dp0 -o %hb% + if exist "%~dp0%hb%" (echo download successful.) else (echo please try again! + echo=) + ) + +echo required files check finished. +echo envfiles check complete. +pause +:end +del flag.txt diff --git a/AIMeiSheng/tools/dlmodels.sh b/AIMeiSheng/tools/dlmodels.sh new file mode 100644 index 0000000..0ae7f7e --- /dev/null +++ b/AIMeiSheng/tools/dlmodels.sh @@ -0,0 +1,546 @@ +#!/bin/bash + +echo working dir is $(pwd) +echo downloading requirement aria2 check. + +if command -v aria2c &> /dev/null +then + echo "aria2c command found" +else + echo failed. please install aria2 + sleep 5 + exit 1 +fi + +d32="f0D32k.pth" +d40="f0D40k.pth" +d48="f0D48k.pth" +g32="f0G32k.pth" +g40="f0G40k.pth" +g48="f0G48k.pth" + +d40v2="f0D40k.pth" +g40v2="f0G40k.pth" + +dld32="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D32k.pth" +dld40="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth" +dld48="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D48k.pth" +dlg32="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G32k.pth" +dlg40="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth" +dlg48="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G48k.pth" + +dld40v2="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0D40k.pth" +dlg40v2="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/f0G40k.pth" + +hp2_all="HP2_all_vocals.pth" +hp3_all="HP3_all_vocals.pth" +hp5_only="HP5_only_main_vocal.pth" +VR_DeEchoAggressive="VR-DeEchoAggressive.pth" +VR_DeEchoDeReverb="VR-DeEchoDeReverb.pth" +VR_DeEchoNormal="VR-DeEchoNormal.pth" +onnx_dereverb="vocals.onnx" + +dlhp2_all="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP2_all_vocals.pth" +dlhp3_all="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP3_all_vocals.pth" +dlhp5_only="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/HP5_only_main_vocal.pth" +dlVR_DeEchoAggressive="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoAggressive.pth" +dlVR_DeEchoDeReverb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoDeReverb.pth" +dlVR_DeEchoNormal="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/VR-DeEchoNormal.pth" +dlonnx_dereverb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/uvr5_weights/onnx_dereverb_By_FoxJoy/vocals.onnx" + +hb="hubert_base.pt" + +dlhb="https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt" + +echo dir check start. + +if [ -d "./pretrained" ]; then + echo dir ./pretrained checked. +else + echo failed. generating dir ./pretrained. + mkdir pretrained +fi + +if [ -d "./pretrained_v2" ]; then + echo dir ./pretrained_v2 checked. +else + echo failed. generating dir ./pretrained_v2. + mkdir pretrained_v2 +fi + +if [ -d "./uvr5_weights" ]; then + echo dir ./uvr5_weights checked. +else + echo failed. generating dir ./uvr5_weights. + mkdir uvr5_weights +fi + +if [ -d "./uvr5_weights/onnx_dereverb_By_FoxJoy" ]; then + echo dir ./uvr5_weights/onnx_dereverb_By_FoxJoy checked. +else + echo failed. generating dir ./uvr5_weights/onnx_dereverb_By_FoxJoy. + mkdir uvr5_weights/onnx_dereverb_By_FoxJoy +fi + +echo dir check finished. + +echo required files check start. + +echo checking D32k.pth +if [ -f "./pretrained/D32k.pth" ]; then + echo D32k.pth in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D32k.pth -d ./pretrained -o D32k.pth + if [ -f "./pretrained/D32k.pth" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking D40k.pth +if [ -f "./pretrained/D40k.pth" ]; then + echo D40k.pth in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D40k.pth -d ./pretrained -o D40k.pth + if [ -f "./pretrained/D40k.pth" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking D40k.pth +if [ -f "./pretrained_v2/D40k.pth" ]; then + echo D40k.pth in ./pretrained_v2 checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/D40k.pth -d ./pretrained_v2 -o D40k.pth + if [ -f "./pretrained_v2/D40k.pth" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking D48k.pth +if [ -f "./pretrained/D48k.pth" ]; then + echo D48k.pth in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/D48k.pth -d ./pretrained -o D48k.pth + if [ -f "./pretrained/D48k.pth" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking G32k.pth +if [ -f "./pretrained/G32k.pth" ]; then + echo G32k.pth in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G32k.pth -d ./pretrained -o G32k.pth + if [ -f "./pretrained/G32k.pth" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking G40k.pth +if [ -f "./pretrained/G40k.pth" ]; then + echo G40k.pth in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G40k.pth -d ./pretrained -o G40k.pth + if [ -f "./pretrained/G40k.pth" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking G40k.pth +if [ -f "./pretrained_v2/G40k.pth" ]; then + echo G40k.pth in ./pretrained_v2 checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained_v2/G40k.pth -d ./pretrained_v2 -o G40k.pth + if [ -f "./pretrained_v2/G40k.pth" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking G48k.pth +if [ -f "./pretrained/G48k.pth" ]; then + echo G48k.pth in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/G48k.pth -d ./pretrained -o G48k.pth + if [ -f "./pretrained/G48k.pth" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $d32 +if [ -f "./pretrained/$d32" ]; then + echo $d32 in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld32 -d ./pretrained -o $d32 + if [ -f "./pretrained/$d32" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $d40 +if [ -f "./pretrained/$d40" ]; then + echo $d40 in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld40 -d ./pretrained -o $d40 + if [ -f "./pretrained/$d40" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $d40v2 +if [ -f "./pretrained_v2/$d40v2" ]; then + echo $d40v2 in ./pretrained_v2 checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld40v2 -d ./pretrained_v2 -o $d40v2 + if [ -f "./pretrained_v2/$d40v2" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $d48 +if [ -f "./pretrained/$d48" ]; then + echo $d48 in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dld48 -d ./pretrained -o $d48 + if [ -f "./pretrained/$d48" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $g32 +if [ -f "./pretrained/$g32" ]; then + echo $g32 in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg32 -d ./pretrained -o $g32 + if [ -f "./pretrained/$g32" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $g40 +if [ -f "./pretrained/$g40" ]; then + echo $g40 in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg40 -d ./pretrained -o $g40 + if [ -f "./pretrained/$g40" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $g40v2 +if [ -f "./pretrained_v2/$g40v2" ]; then + echo $g40v2 in ./pretrained_v2 checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg40v2 -d ./pretrained_v2 -o $g40v2 + if [ -f "./pretrained_v2/$g40v2" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $g48 +if [ -f "./pretrained/$g48" ]; then + echo $g48 in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlg48 -d ./pretrained -o $g48 + if [ -f "./pretrained/$g48" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $hp2_all +if [ -f "./uvr5_weights/$hp2_all" ]; then + echo $hp2_all in ./uvr5_weights checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp2_all -d ./uvr5_weights -o $hp2_all + if [ -f "./uvr5_weights/$hp2_all" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $hp3_all +if [ -f "./uvr5_weights/$hp3_all" ]; then + echo $hp3_all in ./uvr5_weights checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp3_all -d ./uvr5_weights -o $hp3_all + if [ -f "./uvr5_weights/$hp3_all" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $hp5_only +if [ -f "./uvr5_weights/$hp5_only" ]; then + echo $hp5_only in ./uvr5_weights checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhp5_only -d ./uvr5_weights -o $hp5_only + if [ -f "./uvr5_weights/$hp5_only" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $VR_DeEchoAggressive +if [ -f "./uvr5_weights/$VR_DeEchoAggressive" ]; then + echo $VR_DeEchoAggressive in ./uvr5_weights checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoAggressive -d ./uvr5_weights -o $VR_DeEchoAggressive + if [ -f "./uvr5_weights/$VR_DeEchoAggressive" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $VR_DeEchoDeReverb +if [ -f "./uvr5_weights/$VR_DeEchoDeReverb" ]; then + echo $VR_DeEchoDeReverb in ./uvr5_weights checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoDeReverb -d ./uvr5_weights -o $VR_DeEchoDeReverb + if [ -f "./uvr5_weights/$VR_DeEchoDeReverb" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $VR_DeEchoNormal +if [ -f "./uvr5_weights/$VR_DeEchoNormal" ]; then + echo $VR_DeEchoNormal in ./uvr5_weights checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlVR_DeEchoNormal -d ./uvr5_weights -o $VR_DeEchoNormal + if [ -f "./uvr5_weights/$VR_DeEchoNormal" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $onnx_dereverb +if [ -f "./uvr5_weights/onnx_dereverb_By_FoxJoy/$onnx_dereverb" ]; then + echo $onnx_dereverb in ./uvr5_weights/onnx_dereverb_By_FoxJoy checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlonnx_dereverb -d ./uvr5_weights/onnx_dereverb_By_FoxJoy -o $onnx_dereverb + if [ -f "./uvr5_weights/onnx_dereverb_By_FoxJoy/$onnx_dereverb" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo checking $hb +if [ -f "./pretrained/$hb" ]; then + echo $hb in ./pretrained checked. +else + echo failed. starting download from huggingface. + if command -v aria2c &> /dev/null; then + aria2c --console-log-level=error -c -x 16 -s 16 -k 1M $dlhb -d ./ -o $hb + if [ -f "./$hb" ]; then + echo download successful. + else + echo please try again! + exit 1 + fi + else + echo aria2c command not found. Please install aria2c and try again. + exit 1 + fi +fi + +echo required files check finished. +read -p "Press any key to continue..." -n1 -s diff --git a/AIMeiSheng/tools/export_onnx.py b/AIMeiSheng/tools/export_onnx.py new file mode 100644 index 0000000..2d334a6 --- /dev/null +++ b/AIMeiSheng/tools/export_onnx.py @@ -0,0 +1,54 @@ +from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM +import torch + +if __name__ == "__main__": + MoeVS = True # 模型是否为MoeVoiceStudio(原MoeSS)使用 + + ModelPath = "Shiroha/shiroha.pth" # 模型路径 + ExportedPath = "model.onnx" # 输出路径 + hidden_channels = 256 # hidden_channels,为768Vec做准备 + cpt = torch.load(ModelPath, map_location="cpu") + cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk + print(*cpt["config"]) + + test_phone = torch.rand(1, 200, hidden_channels) # hidden unit + test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用) + test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹) + test_pitchf = torch.rand(1, 200) # nsf基频 + test_ds = torch.LongTensor([0]) # 说话人ID + test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子) + + device = "cpu" # 导出时设备(不影响使用模型) + + net_g = SynthesizerTrnMsNSFsidM( + *cpt["config"], is_half=False + ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16) + net_g.load_state_dict(cpt["weight"], strict=False) + input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] + output_names = [ + "audio", + ] + # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出 + torch.onnx.export( + net_g, + ( + test_phone.to(device), + test_phone_lengths.to(device), + test_pitch.to(device), + test_pitchf.to(device), + test_ds.to(device), + test_rnd.to(device), + ), + ExportedPath, + dynamic_axes={ + "phone": [1], + "pitch": [1], + "pitchf": [1], + "rnd": [2], + }, + do_constant_folding=False, + opset_version=16, + verbose=False, + input_names=input_names, + output_names=output_names, + ) diff --git a/AIMeiSheng/tools/infer/infer-pm-index256.py b/AIMeiSheng/tools/infer/infer-pm-index256.py new file mode 100644 index 0000000..d182e20 --- /dev/null +++ b/AIMeiSheng/tools/infer/infer-pm-index256.py @@ -0,0 +1,199 @@ +""" + +对源特征进行检索 +""" +import torch, pdb, os, parselmouth + +os.environ["CUDA_VISIBLE_DEVICES"] = "0" +import numpy as np +import soundfile as sf + +# from models import SynthesizerTrn256#hifigan_nonsf +# from lib.infer_pack.models import SynthesizerTrn256NSF as SynthesizerTrn256#hifigan_nsf +from lib.infer_pack.models import ( + SynthesizerTrnMs256NSFsid as SynthesizerTrn256, +) # hifigan_nsf + +# from lib.infer_pack.models import SynthesizerTrnMs256NSFsid_sim as SynthesizerTrn256#hifigan_nsf +# from models import SynthesizerTrn256NSFsim as SynthesizerTrn256#hifigan_nsf +# from models import SynthesizerTrn256NSFsimFlow as SynthesizerTrn256#hifigan_nsf + + +from scipy.io import wavfile +from fairseq import checkpoint_utils + +# import pyworld +import librosa +import torch.nn.functional as F +import scipy.signal as signal + +# import torchcrepe +from time import time as ttime + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model_path = r"E:\codes\py39\vits_vc_gpu_train\hubert_base.pt" # +print("load model(s) from {}".format(model_path)) +models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task( + [model_path], + suffix="", +) +model = models[0] +model = model.to(device) +model = model.half() +model.eval() + +# net_g = SynthesizerTrn256(1025,32,192,192,768,2,6,3,0.1,"1", [3,7,11],[[1,3,5], [1,3,5], [1,3,5]],[10,10,2,2],512,[16,16,4,4],183,256,is_half=True)#hifigan#512#256 +# net_g = SynthesizerTrn256(1025,32,192,192,768,2,6,3,0.1,"1", [3,7,11],[[1,3,5], [1,3,5], [1,3,5]],[10,10,2,2],512,[16,16,4,4],109,256,is_half=True)#hifigan#512#256 +net_g = SynthesizerTrn256( + 1025, + 32, + 192, + 192, + 768, + 2, + 6, + 3, + 0, + "1", + [3, 7, 11], + [[1, 3, 5], [1, 3, 5], [1, 3, 5]], + [10, 10, 2, 2], + 512, + [16, 16, 4, 4], + 183, + 256, + is_half=True, +) # hifigan#512#256#no_dropout +# net_g = SynthesizerTrn256(1025,32,192,192,768,2,3,3,0.1,"1", [3,7,11],[[1,3,5], [1,3,5], [1,3,5]],[10,10,2,2],512,[16,16,4,4],0)#ts3 +# net_g = SynthesizerTrn256(1025,32,192,192,768,2,6,3,0.1,"1", [3,7,11],[[1,3,5], [1,3,5], [1,3,5]],[10,10,2],512,[16,16,4],0)#hifigan-ps-sr +# +# net_g = SynthesizerTrn(1025, 32, 192, 192, 768, 2, 6, 3, 0.1, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [5,5], 512, [15,15], 0)#ms +# net_g = SynthesizerTrn(1025, 32, 192, 192, 768, 2, 6, 3, 0.1, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10,10], 512, [16,16], 0)#idwt2 + +# weights=torch.load("infer/ft-mi_1k-noD.pt") +# weights=torch.load("infer/ft-mi-freeze-vocoder-flow-enc_q_1k.pt") +# weights=torch.load("infer/ft-mi-freeze-vocoder_true_1k.pt") +# weights=torch.load("infer/ft-mi-sim1k.pt") +weights = torch.load("infer/ft-mi-no_opt-no_dropout.pt") +print(net_g.load_state_dict(weights, strict=True)) + +net_g.eval().to(device) +net_g.half() + + +def get_f0(x, p_len, f0_up_key=0): + time_step = 160 / 16000 * 1000 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + + f0 = ( + parselmouth.Sound(x, 16000) + .to_pitch_ac( + time_step=time_step / 1000, + voicing_threshold=0.6, + pitch_floor=f0_min, + pitch_ceiling=f0_max, + ) + .selected_array["frequency"] + ) + + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant") + f0 *= pow(2, f0_up_key / 12) + f0bak = f0.copy() + + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( + f0_mel_max - f0_mel_min + ) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + # f0_mel[f0_mel > 188] = 188 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0bak + + +import faiss + +index = faiss.read_index("infer/added_IVF512_Flat_mi_baseline_src_feat.index") +big_npy = np.load("infer/big_src_feature_mi.npy") +ta0 = ta1 = ta2 = 0 +for idx, name in enumerate( + [ + "冬之花clip1.wav", + ] +): ## + wav_path = "todo-songs/%s" % name # + f0_up_key = -2 # + audio, sampling_rate = sf.read(wav_path) + if len(audio.shape) > 1: + audio = librosa.to_mono(audio.transpose(1, 0)) + if sampling_rate != 16000: + audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000) + + feats = torch.from_numpy(audio).float() + if feats.dim() == 2: # double channels + feats = feats.mean(-1) + assert feats.dim() == 1, feats.dim() + feats = feats.view(1, -1) + padding_mask = torch.BoolTensor(feats.shape).fill_(False) + inputs = { + "source": feats.half().to(device), + "padding_mask": padding_mask.to(device), + "output_layer": 9, # layer 9 + } + if torch.cuda.is_available(): + torch.cuda.synchronize() + t0 = ttime() + with torch.no_grad(): + logits = model.extract_features(**inputs) + feats = model.final_proj(logits[0]) + + ####索引优化 + npy = feats[0].cpu().numpy().astype("float32") + D, I = index.search(npy, 1) + feats = ( + torch.from_numpy(big_npy[I.squeeze()].astype("float16")).unsqueeze(0).to(device) + ) + + feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) + if torch.cuda.is_available(): + torch.cuda.synchronize() + t1 = ttime() + # p_len = min(feats.shape[1],10000,pitch.shape[0])#太大了爆显存 + p_len = min(feats.shape[1], 10000) # + pitch, pitchf = get_f0(audio, p_len, f0_up_key) + p_len = min(feats.shape[1], 10000, pitch.shape[0]) # 太大了爆显存 + if torch.cuda.is_available(): + torch.cuda.synchronize() + t2 = ttime() + feats = feats[:, :p_len, :] + pitch = pitch[:p_len] + pitchf = pitchf[:p_len] + p_len = torch.LongTensor([p_len]).to(device) + pitch = torch.LongTensor(pitch).unsqueeze(0).to(device) + sid = torch.LongTensor([0]).to(device) + pitchf = torch.FloatTensor(pitchf).unsqueeze(0).to(device) + with torch.no_grad(): + audio = ( + net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0] + .data.cpu() + .float() + .numpy() + ) # nsf + if torch.cuda.is_available(): + torch.cuda.synchronize() + t3 = ttime() + ta0 += t1 - t0 + ta1 += t2 - t1 + ta2 += t3 - t2 + # wavfile.write("ft-mi_1k-index256-noD-%s.wav"%name, 40000, audio)## + # wavfile.write("ft-mi-freeze-vocoder-flow-enc_q_1k-%s.wav"%name, 40000, audio)## + # wavfile.write("ft-mi-sim1k-%s.wav"%name, 40000, audio)## + wavfile.write("ft-mi-no_opt-no_dropout-%s.wav" % name, 40000, audio) ## + + +print(ta0, ta1, ta2) # diff --git a/AIMeiSheng/tools/infer/train-index-v2.py b/AIMeiSheng/tools/infer/train-index-v2.py new file mode 100644 index 0000000..77dfa0b --- /dev/null +++ b/AIMeiSheng/tools/infer/train-index-v2.py @@ -0,0 +1,72 @@ +""" +格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个 +""" +import faiss, numpy as np, os +from sklearn.cluster import MiniBatchKMeans +import traceback +from multiprocessing import cpu_count + +# ###########如果是原始特征要先写save +n_cpu = 0 +if n_cpu == 0: + n_cpu = cpu_count() +inp_root = r"./logs/anz/3_feature768" +npys = [] +listdir_res = list(os.listdir(inp_root)) +for name in sorted(listdir_res): + phone = np.load("%s/%s" % (inp_root, name)) + npys.append(phone) +big_npy = np.concatenate(npys, 0) +big_npy_idx = np.arange(big_npy.shape[0]) +np.random.shuffle(big_npy_idx) +big_npy = big_npy[big_npy_idx] +print(big_npy.shape) # (6196072, 192)#fp32#4.43G +if big_npy.shape[0] > 2e5: + # if(1): + info = "Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0] + print(info) + try: + big_npy = ( + MiniBatchKMeans( + n_clusters=10000, + verbose=True, + batch_size=256 * n_cpu, + compute_labels=False, + init="random", + ) + .fit(big_npy) + .cluster_centers_ + ) + except: + info = traceback.format_exc() + print(info) + +np.save("tools/infer/big_src_feature_mi.npy", big_npy) + +##################train+add +# big_npy=np.load("/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/inference_f0/big_src_feature_mi.npy") +n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) +index = faiss.index_factory(768, "IVF%s,Flat" % n_ivf) # mi +print("training") +index_ivf = faiss.extract_index_ivf(index) # +index_ivf.nprobe = 1 +index.train(big_npy) +faiss.write_index( + index, "tools/infer/trained_IVF%s_Flat_baseline_src_feat_v2.index" % (n_ivf) +) +print("adding") +batch_size_add = 8192 +for i in range(0, big_npy.shape[0], batch_size_add): + index.add(big_npy[i : i + batch_size_add]) +faiss.write_index( + index, "tools/infer/added_IVF%s_Flat_mi_baseline_src_feat.index" % (n_ivf) +) +""" +大小(都是FP32) +big_src_feature 2.95G + (3098036, 256) +big_emb 4.43G + (6196072, 192) +big_emb双倍是因为求特征要repeat后再加pitch + +""" diff --git a/AIMeiSheng/tools/infer/train-index.py b/AIMeiSheng/tools/infer/train-index.py new file mode 100644 index 0000000..c49f24b --- /dev/null +++ b/AIMeiSheng/tools/infer/train-index.py @@ -0,0 +1,36 @@ +""" +格式:直接cid为自带的index位;aid放不下了,通过字典来查,反正就5w个 +""" +import faiss, numpy as np, os + +# ###########如果是原始特征要先写save +inp_root = r"E:\codes\py39\dataset\mi\2-co256" +npys = [] +for name in sorted(list(os.listdir(inp_root))): + phone = np.load("%s/%s" % (inp_root, name)) + npys.append(phone) +big_npy = np.concatenate(npys, 0) +print(big_npy.shape) # (6196072, 192)#fp32#4.43G +np.save("infer/big_src_feature_mi.npy", big_npy) + +##################train+add +# big_npy=np.load("/bili-coeus/jupyter/jupyterhub-liujing04/vits_ch/inference_f0/big_src_feature_mi.npy") +print(big_npy.shape) +index = faiss.index_factory(256, "IVF512,Flat") # mi +print("training") +index_ivf = faiss.extract_index_ivf(index) # +index_ivf.nprobe = 9 +index.train(big_npy) +faiss.write_index(index, "infer/trained_IVF512_Flat_mi_baseline_src_feat.index") +print("adding") +index.add(big_npy) +faiss.write_index(index, "infer/added_IVF512_Flat_mi_baseline_src_feat.index") +""" +大小(都是FP32) +big_src_feature 2.95G + (3098036, 256) +big_emb 4.43G + (6196072, 192) +big_emb双倍是因为求特征要repeat后再加pitch + +""" diff --git a/AIMeiSheng/tools/infer/trans_weights.py b/AIMeiSheng/tools/infer/trans_weights.py new file mode 100644 index 0000000..e0f7f0c --- /dev/null +++ b/AIMeiSheng/tools/infer/trans_weights.py @@ -0,0 +1,16 @@ +import torch, pdb + +# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-suc\G_1000.pth")["model"]#sim_nsf# +# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-freeze-vocoder-flow-enc_q\G_1000.pth")["model"]#sim_nsf# +# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-freeze-vocoder\G_1000.pth")["model"]#sim_nsf# +# a=torch.load(r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-test\G_1000.pth")["model"]#sim_nsf# +a = torch.load( + r"E:\codes\py39\vits_vc_gpu_train\logs\ft-mi-no_opt-no_dropout\G_1000.pth" +)[ + "model" +] # sim_nsf# +for key in a.keys(): + a[key] = a[key].half() +# torch.save(a,"ft-mi-freeze-vocoder_true_1k.pt")# +# torch.save(a,"ft-mi-sim1k.pt")# +torch.save(a, "ft-mi-no_opt-no_dropout.pt") # diff --git a/AIMeiSheng/tools/onnx_inference_demo.py b/AIMeiSheng/tools/onnx_inference_demo.py new file mode 100644 index 0000000..a4a9490 --- /dev/null +++ b/AIMeiSheng/tools/onnx_inference_demo.py @@ -0,0 +1,20 @@ +import soundfile +from ..lib.infer_pack.onnx_inference import OnnxRVC + +hop_size = 512 +sampling_rate = 40000 # 采样率 +f0_up_key = 0 # 升降调 +sid = 0 # 角色ID +f0_method = "dio" # F0提取算法 +model_path = "ShirohaRVC.onnx" # 模型的完整路径 +vec_name = "vec-256-layer-9" # 内部自动补齐为 f"pretrained/{vec_name}.onnx" 需要onnx的vec模型 +wav_path = "123.wav" # 输入路径或ByteIO实例 +out_path = "out.wav" # 输出路径或ByteIO实例 + +model = OnnxRVC( + model_path, vec_path=vec_name, sr=sampling_rate, hop_size=hop_size, device="cuda" +) + +audio = model.inference(wav_path, sid, f0_method=f0_method, f0_up_key=f0_up_key) + +soundfile.write(out_path, audio, sampling_rate) diff --git a/AIMeiSheng/train_index_print.py b/AIMeiSheng/train_index_print.py new file mode 100644 index 0000000..7c05604 --- /dev/null +++ b/AIMeiSheng/train_index_print.py @@ -0,0 +1,89 @@ +import os, sys, traceback +import faiss +import numpy as np +from sklearn.cluster import MiniBatchKMeans +from multiprocessing import cpu_count + +# Example: +# python3 train_index_print.py mi-test v2 + +exp_dir_arg = sys.argv[1] if len(sys.argv) > 1 else "mi-test" +version_arg = sys.argv[2] if len(sys.argv) > 1 else "v2" + +now_dir = os.getcwd() +n_cpu = cpu_count() + +# скопировано и немного доработаны логи из infer-web.py#L585 +def train_index(exp_dir1, version19): + exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) + os.makedirs(exp_dir, exist_ok=True) + feature_dir = ( + "%s/3_feature256" % (exp_dir) + if version19 == "v1" + else "%s/3_feature768" % (exp_dir) + ) + if not os.path.exists(feature_dir): + return "请先进行特征提取!" + listdir_res = list(os.listdir(feature_dir)) + if len(listdir_res) == 0: + return "请先进行特征提取!" + npys = [] + for name in sorted(listdir_res): + phone = np.load("%s/%s" % (feature_dir, name)) + npys.append(phone) + big_npy = np.concatenate(npys, 0) + big_npy_idx = np.arange(big_npy.shape[0]) + np.random.shuffle(big_npy_idx) + big_npy = big_npy[big_npy_idx] + if big_npy.shape[0] > 2e5: + # if(1): + print("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]) + try: + big_npy = ( + MiniBatchKMeans( + n_clusters=10000, + verbose=True, + batch_size=256 * n_cpu, + compute_labels=False, + init="random", + ) + .fit(big_npy) + .cluster_centers_ + ) + except: + info = traceback.format_exc() + print(info) + + np.save("%s/total_fea.npy" % exp_dir, big_npy) + n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) + print("%s,%s" % (big_npy.shape, n_ivf)) + index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) + # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf) + print("training") + index_ivf = faiss.extract_index_ivf(index) # + index_ivf.nprobe = 1 + index.train(big_npy) + faiss.write_index( + index, + "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), + ) + # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) + print("adding") + batch_size_add = 8192 + for i in range(0, big_npy.shape[0], batch_size_add): + index.add(big_npy[i : i + batch_size_add]) + faiss.write_index( + index, + "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), + ) + print( + "成功构建索引,added_IVF%s_Flat_nprobe_%s_%s_%s.index" + % (n_ivf, index_ivf.nprobe, exp_dir1, version19) + ) + # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) + # print("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19)) + return "OK" + +print(train_index(exp_dir_arg, version_arg)) \ No newline at end of file diff --git a/AIMeiSheng/train_nsf_sim_cache_sid_load_pretrain_embed_IN_dec_rand_energy_zx_diff_fi.py b/AIMeiSheng/train_nsf_sim_cache_sid_load_pretrain_embed_IN_dec_rand_energy_zx_diff_fi.py new file mode 100644 index 0000000..3483237 --- /dev/null +++ b/AIMeiSheng/train_nsf_sim_cache_sid_load_pretrain_embed_IN_dec_rand_energy_zx_diff_fi.py @@ -0,0 +1,694 @@ +import os, sys +ddpm_dp = ddpm_para() + +now_dir = os.getcwd() +sys.path.append(os.path.join(now_dir)) + +from lib.train import utils +import datetime + +hps = utils.get_hparams() +print("hps.gpus: ",hps.gpus.replace("-", ",")) +os.environ["CUDA_VISIBLE_DEVICES"] = hps.gpus.replace("-", ",") +os.environ["CUDA_VISIBLE_DEVICES"] = '1' + +n_gpus = len(hps.gpus.split("-")) +from random import shuffle, randint + +import torch + +torch.backends.cudnn.deterministic = False +torch.backends.cudnn.benchmark = False +from torch.nn import functional as F +from torch.utils.data import DataLoader +from torch.utils.tensorboard import SummaryWriter +import torch.multiprocessing as mp +import torch.distributed as dist +from torch.nn.parallel import DistributedDataParallel as DDP +from torch.cuda.amp import autocast, GradScaler +from lib.infer_pack import commons +from time import sleep +from time import time as ttime +#from lib.train.data_utils import ( +#from lib.train.data_utils_embed import ( +from lib.train.data_utils_embed_random075 import ( + TextAudioLoaderMultiNSFsid, + TextAudioLoader, + TextAudioCollateMultiNSFsid, + TextAudioCollate, + DistributedBucketSampler, +) + +if hps.version == "v1": + from lib.infer_pack.models_embed_in_dec_diff_fi import ( + SynthesizerTrnMs256NSFsid as RVC_Model_f0, + SynthesizerTrnMs256NSFsid_nono as RVC_Model_nof0, + MultiPeriodDiscriminator, + ) +else: + from lib.infer_pack.models_embed_in_dec_diff_fi import ( + SynthesizerTrnMs768NSFsid as RVC_Model_f0, + SynthesizerTrnMs768NSFsid_nono as RVC_Model_nof0, + MultiPeriodDiscriminatorV2 as MultiPeriodDiscriminator, + ) +from lib.train.losses import generator_loss, discriminator_loss, feature_loss, kl_loss +#from lib.train.mel_processing import mel_spectrogram_torch, spec_to_mel_torch +from lib.train.mel_processing_energy1_zx import mel_spectrogram_torch, spec_to_mel_torch +from lib.train.mel_processing_energy1_zx import ( spec_to_mel_torch_energy_zx as spec_to_mel_torch_energy, mel_spectrogram_torch_energy_zx as mel_spectrogram_torch_energy) +from lib.train.process_ckpt import savee + +global_step = 0 + + +class EpochRecorder: + def __init__(self): + self.last_time = ttime() + + def record(self): + now_time = ttime() + elapsed_time = now_time - self.last_time + self.last_time = now_time + elapsed_time_str = str(datetime.timedelta(seconds=elapsed_time)) + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + return f"[{current_time}] | ({elapsed_time_str})" + + +def main(): + n_gpus = torch.cuda.device_count() + print('n_gpus num:',n_gpus) + if torch.cuda.is_available() == False and torch.backends.mps.is_available() == True: + n_gpus = 1 + if n_gpus < 1: + # patch to unblock people without gpus. there is probably a better way. + print("NO GPU DETECTED: falling back to CPU - this may take a while") + n_gpus = 1 + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = str(randint(20000, 55555)) + children = [] + for i in range(n_gpus): + subproc = mp.Process( + target=run, + args=( + i, + n_gpus, + hps, + ), + ) + children.append(subproc) + subproc.start() + + for i in range(n_gpus): + children[i].join() + + +def run(rank, n_gpus, hps): + global global_step + if rank == 0: + logger = utils.get_logger(hps.model_dir) + logger.info(hps) + # utils.check_git_hash(hps.model_dir) + writer = SummaryWriter(log_dir=hps.model_dir) + writer_eval = SummaryWriter(log_dir=os.path.join(hps.model_dir, "eval")) + + dist.init_process_group( + backend="gloo", init_method="env://", world_size=n_gpus, rank=rank + ) + torch.manual_seed(hps.train.seed) + if torch.cuda.is_available(): + torch.cuda.set_device(rank) + + if hps.if_f0 == 1: + train_dataset = TextAudioLoaderMultiNSFsid(hps.data.training_files, hps.data) + print("xxxxxxxxxx using embeding xxxxxxxxx: ") + else: + train_dataset = TextAudioLoader(hps.data.training_files, hps.data) + train_sampler = DistributedBucketSampler( + train_dataset, + hps.train.batch_size * n_gpus, + # [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1200,1400], # 16s + [100, 200, 300, 400, 500, 600, 700, 800, 900], # 16s + num_replicas=n_gpus, + rank=rank, + shuffle=True, + ) + # It is possible that dataloader's workers are out of shared memory. Please try to raise your shared memory limit. + # num_workers=8 -> num_workers=4 + if hps.if_f0 == 1: + collate_fn = TextAudioCollateMultiNSFsid() + else: + collate_fn = TextAudioCollate() + train_loader = DataLoader( + train_dataset, + num_workers=4, + shuffle=False, + pin_memory=True, + collate_fn=collate_fn, + batch_sampler=train_sampler, + persistent_workers=True, + prefetch_factor=8, + ) + if hps.if_f0 == 1: + net_g = RVC_Model_f0( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + **hps.model, + is_half=hps.train.fp16_run, + sr=hps.sample_rate, + ) + else: + net_g = RVC_Model_nof0( + hps.data.filter_length // 2 + 1, + hps.train.segment_size // hps.data.hop_length, + **hps.model, + is_half=hps.train.fp16_run, + ) + if torch.cuda.is_available(): + net_g = net_g.cuda(rank) + net_d = MultiPeriodDiscriminator(hps.model.use_spectral_norm) + if torch.cuda.is_available(): + net_d = net_d.cuda(rank) + optim_g = torch.optim.AdamW( + net_g.parameters(), + hps.train.learning_rate, + betas=hps.train.betas, + eps=hps.train.eps, + ) + optim_d = torch.optim.AdamW( + net_d.parameters(), + hps.train.learning_rate, + betas=hps.train.betas, + eps=hps.train.eps, + ) + # net_g = DDP(net_g, device_ids=[rank], find_unused_parameters=True) + # net_d = DDP(net_d, device_ids=[rank], find_unused_parameters=True) + if torch.cuda.is_available(): + net_g = DDP(net_g, device_ids=[rank],find_unused_parameters=True) + net_d = DDP(net_d, device_ids=[rank],find_unused_parameters=True) + else: + net_g = DDP(net_g) + net_d = DDP(net_d) + #''' + try: # 如果能加载自动resume + _, _, _, epoch_str = utils.load_checkpoint( + utils.latest_checkpoint_path(hps.model_dir, "D_*.pth"), net_d, optim_d) + # D多半加载没事 + if rank == 0: + logger.info("loaded D") + # _, _, _, epoch_str = utils.load_checkpoint(utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, optim_g,load_opt=0) + + _, _, _, epoch_str = utils.load_checkpoint( + utils.latest_checkpoint_path(hps.model_dir, "G_*.pth"), net_g, + optim_g + ) + global_step = (epoch_str - 1) * len(train_loader) + # epoch_str = 1 + # global_step = 0 + except: # 如果首次不能加载,加载pretrain + # traceback.print_exc() + print("@@@@@@@load pretrianed @@@@@@@") + epoch_str = 1 + global_step = 0 + if hps.pretrainG != "": + if rank == 0: + logger.info("loaded pretrained %s" % (hps.pretrainG)) + print( + net_g.module.load_state_dict( + torch.load(hps.pretrainG, map_location="cpu")["model"],strict=False + ) + ) ##测试不加载优化器 + + print("@@@@@@@load pretrianed 1@@@@@@@" ) + if hps.pretrainD != "": + if rank == 0: + logger.info("loaded pretrained %s" % (hps.pretrainD)) + print( + net_d.module.load_state_dict( + torch.load(hps.pretrainD, map_location="cpu")["model"],strict=False + ) + ) + print("@@@@@@@load pretrianed 2@@@@@@@") + #''' + scheduler_g = torch.optim.lr_scheduler.ExponentialLR( + optim_g, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2 + ) + scheduler_d = torch.optim.lr_scheduler.ExponentialLR( + optim_d, gamma=hps.train.lr_decay, last_epoch=epoch_str - 2 + ) + + scaler = GradScaler(enabled=hps.train.fp16_run) + + cache = [] + for epoch in range(epoch_str, hps.train.epochs + 1): + if rank == 0: + train_and_evaluate( + rank, + epoch, + hps, + [net_g, net_d], + [optim_g, optim_d], + [scheduler_g, scheduler_d], + scaler, + [train_loader, None], + logger, + [writer, writer_eval], + cache, + ) + else: + train_and_evaluate( + rank, + epoch, + hps, + [net_g, net_d], + [optim_g, optim_d], + [scheduler_g, scheduler_d], + scaler, + [train_loader, None], + None, + None, + cache, + ) + scheduler_g.step() + scheduler_d.step() + + +def train_and_evaluate( + rank, epoch, hps, nets, optims, schedulers, scaler, loaders, logger, writers, cache +): + net_g, net_d = nets + optim_g, optim_d = optims + train_loader, eval_loader = loaders + if writers is not None: + writer, writer_eval = writers + + train_loader.batch_sampler.set_epoch(epoch) + global global_step + + net_g.train() + net_d.train() + + # Prepare data iterator + if hps.if_cache_data_in_gpu == True: + # Use Cache + data_iterator = cache + if cache == []: + # Make new cache + for batch_idx, info in enumerate(train_loader): + # Unpack + if hps.if_f0 == 1: + ( + phone, + phone_lengths, + pitch, + pitchf, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ) = info + else: + ( + phone, + phone_lengths, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ) = info + # Load on CUDA + if torch.cuda.is_available(): + phone = phone.cuda(rank, non_blocking=True) + phone_lengths = phone_lengths.cuda(rank, non_blocking=True) + if hps.if_f0 == 1: + pitch = pitch.cuda(rank, non_blocking=True) + pitchf = pitchf.cuda(rank, non_blocking=True) + sid = sid.cuda(rank, non_blocking=True) + spec = spec.cuda(rank, non_blocking=True) + spec_lengths = spec_lengths.cuda(rank, non_blocking=True) + wave = wave.cuda(rank, non_blocking=True) + wave_lengths = wave_lengths.cuda(rank, non_blocking=True) + # Cache on list + if hps.if_f0 == 1: + cache.append( + ( + batch_idx, + ( + phone, + phone_lengths, + pitch, + pitchf, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ), + ) + ) + else: + cache.append( + ( + batch_idx, + ( + phone, + phone_lengths, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ), + ) + ) + else: + # Load shuffled cache + shuffle(cache) + else: + # Loader + data_iterator = enumerate(train_loader) + + # Run steps + epoch_recorder = EpochRecorder() + for batch_idx, info in data_iterator: + # Data + ## Unpack + if hps.if_f0 == 1: + ( + phone, + phone_lengths, + pitch, + pitchf, + spec, + spec_lengths, + wave, + wave_lengths, + sid, + ) = info + else: + phone, phone_lengths, spec, spec_lengths, wave, wave_lengths, sid = info + ## Load on CUDA + if (hps.if_cache_data_in_gpu == False) and torch.cuda.is_available(): + phone = phone.cuda(rank, non_blocking=True) + phone_lengths = phone_lengths.cuda(rank, non_blocking=True) + if hps.if_f0 == 1: + pitch = pitch.cuda(rank, non_blocking=True) + pitchf = pitchf.cuda(rank, non_blocking=True) + sid = sid.cuda(rank, non_blocking=True) + spec = spec.cuda(rank, non_blocking=True) + spec_lengths = spec_lengths.cuda(rank, non_blocking=True) + wave = wave.cuda(rank, non_blocking=True) + # wave_lengths = wave_lengths.cuda(rank, non_blocking=True) + + # Calculate + with autocast(enabled=hps.train.fp16_run): + if hps.if_f0 == 1: + ( + y_hat, + ids_slice, + x_mask, + z_mask, + (z, z_p, m_p, logs_p, m_q, logs_q),diff_loss + ) = net_g(phone, phone_lengths, pitch, pitchf, spec, spec_lengths, sid) + else: + ( + y_hat, + ids_slice, + x_mask, + z_mask, + (z, z_p, m_p, logs_p, m_q, logs_q) + ) = net_g(phone, phone_lengths, spec, spec_lengths, sid) + ''' + mel = spec_to_mel_torch( + spec, + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.mel_fmin, + hps.data.mel_fmax, + ) + #''' + mel, energy, zx = spec_to_mel_torch_energy( + spec, + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.mel_fmin, + hps.data.mel_fmax, + ) + #y_mel = commons.slice_segments( + # mel, ids_slice, hps.train.segment_size // hps.data.hop_length + #) + y_mel, y_energy = commons.slice_segments_energy( + mel, energy,ids_slice, hps.train.segment_size // hps.data.hop_length + ) + + with autocast(enabled=False): + #y_hat_mel = mel_spectrogram_torch( + y_hat_mel , energy_hat , zx_hat = mel_spectrogram_torch_energy( + y_hat.float().squeeze(1), + hps.data.filter_length, + hps.data.n_mel_channels, + hps.data.sampling_rate, + hps.data.hop_length, + hps.data.win_length, + hps.data.mel_fmin, + hps.data.mel_fmax, + ) + if hps.train.fp16_run == True: + y_hat_mel = y_hat_mel.half() + wave = commons.slice_segments( + wave, ids_slice * hps.data.hop_length, hps.train.segment_size + ) # slice + + # Discriminator + y_d_hat_r, y_d_hat_g, _, _ = net_d(wave, y_hat.detach()) + with autocast(enabled=False): + loss_disc, losses_disc_r, losses_disc_g = discriminator_loss( + y_d_hat_r, y_d_hat_g + ) + optim_d.zero_grad() + scaler.scale(loss_disc).backward() + scaler.unscale_(optim_d) + grad_norm_d = commons.clip_grad_value_(net_d.parameters(), None) + scaler.step(optim_d) + energy_loss = torch.nn.MSELoss() + zx_loss = torch.nn.MSELoss() + + with autocast(enabled=hps.train.fp16_run): + # Generator + y_d_hat_r, y_d_hat_g, fmap_r, fmap_g = net_d(wave, y_hat) + with autocast(enabled=False): + loss_mel = F.l1_loss(y_mel, y_hat_mel) * hps.train.c_mel + loss_kl = kl_loss(z_p, logs_q, m_p, logs_p, z_mask) * hps.train.c_kl + + #print("@@z_p:",z_p.shape,"m_p:",m_p.shape) + loss_fm = feature_loss(fmap_r, fmap_g) + loss_gen, losses_gen = generator_loss(y_d_hat_g) + loss_energy = energy_loss(y_energy,energy_hat)* hps.train.c_mel + + loss_zx = F.l1_loss(zx, zx_hat)#* hps.train.c_mel*0.1 + loss_gen_all = loss_gen + loss_fm + loss_mel + loss_energy + loss_zx + loss_kl + #+ diff_loss + loss_kl + optim_g.zero_grad() + scaler.scale(loss_gen_all).backward() + scaler.unscale_(optim_g) + grad_norm_g = commons.clip_grad_value_(net_g.parameters(), None) + scaler.step(optim_g) + scaler.update() + + if rank == 0: + if global_step % hps.train.log_interval == 0: + lr = optim_g.param_groups[0]["lr"] + logger.info( + "Train Epoch: {} [{:.0f}%]".format( + epoch, 100.0 * batch_idx / len(train_loader) + ) + ) + # Amor For Tensorboard display + if loss_mel > 75: + loss_mel = 75 + if loss_kl > 9: + loss_kl = 9 + + logger.info([global_step, lr]) + logger.info( + f"loss_disc={loss_disc:.3f}, loss_gen={loss_gen:.3f}, loss_fm={loss_fm:.3f},loss_mel={loss_mel:.3f},loss_energy={loss_energy:.3f}, loss_zx={loss_zx:.3f},loss_kl={loss_kl:.3f},"# diff_loss={diff_loss:.3f}" + ) + scalar_dict = { + "loss/g/total": loss_gen_all, + "loss/d/total": loss_disc, + "learning_rate": lr, + "grad_norm_d": grad_norm_d, + "grad_norm_g": grad_norm_g, + } + scalar_dict.update( + { + "loss/g/fm": loss_fm, + "loss/g/mel": loss_mel, + "loss/g/kl": loss_kl, + } + ) + + scalar_dict.update( + {"loss/g/{}".format(i): v for i, v in enumerate(losses_gen)} + ) + scalar_dict.update( + {"loss/d_r/{}".format(i): v for i, v in enumerate(losses_disc_r)} + ) + scalar_dict.update( + {"loss/d_g/{}".format(i): v for i, v in enumerate(losses_disc_g)} + ) + image_dict = { + "slice/mel_org": utils.plot_spectrogram_to_numpy( + y_mel[0].data.cpu().numpy() + ), + "slice/mel_gen": utils.plot_spectrogram_to_numpy( + y_hat_mel[0].data.cpu().numpy() + ), + "all/mel": utils.plot_spectrogram_to_numpy( + mel[0].data.cpu().numpy() + ), + } + utils.summarize( + writer=writer, + global_step=global_step, + images=image_dict, + scalars=scalar_dict, + ) + global_step += 1 + + if global_step % 5000 == 0: + if hps.if_latest == 0: + utils.save_checkpoint( + net_g, + optim_g, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "G_{}.pth".format(global_step)), + ) + utils.save_checkpoint( + net_d, + optim_d, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "D_{}.pth".format(global_step)), + ) + else: + utils.save_checkpoint( + net_g, + optim_g, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "G_{}.pth".format(2333333)), + ) + utils.save_checkpoint( + net_d, + optim_d, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "D_{}.pth".format(2333333)), + ) + if rank == 0 and hps.save_every_weights == "1": + if hasattr(net_g, "module"): + ckpt = net_g.module.state_dict() + else: + ckpt = net_g.state_dict() + logger.info( + "saving ckpt %s_e%s:%s" + % ( + hps.name, + epoch, + savee( + ckpt, + hps.sample_rate, + hps.if_f0, + hps.name + "_e%s_s%s" % (epoch, global_step), + epoch, + hps.version, + hps, + ), + ) + ) + + # /Run steps + + if epoch % hps.save_every_epoch == 0 and rank == 0: + if hps.if_latest == 0: + utils.save_checkpoint( + net_g, + optim_g, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "G_{}.pth".format(global_step)), + ) + utils.save_checkpoint( + net_d, + optim_d, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "D_{}.pth".format(global_step)), + ) + else: + utils.save_checkpoint( + net_g, + optim_g, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "G_{}.pth".format(2333333)), + ) + utils.save_checkpoint( + net_d, + optim_d, + hps.train.learning_rate, + epoch, + os.path.join(hps.model_dir, "D_{}.pth".format(2333333)), + ) + if rank == 0 and hps.save_every_weights == "1": + if hasattr(net_g, "module"): + ckpt = net_g.module.state_dict() + else: + ckpt = net_g.state_dict() + logger.info( + "saving ckpt %s_e%s:%s" + % ( + hps.name, + epoch, + savee( + ckpt, + hps.sample_rate, + hps.if_f0, + hps.name + "_e%s_s%s" % (epoch, global_step), + epoch, + hps.version, + hps, + ), + ) + ) + + if rank == 0: + logger.info("====> Epoch: {} {}".format(epoch, epoch_recorder.record())) + if epoch >= hps.total_epoch and rank == 0: + logger.info("Training is done. The program is closed.") + + if hasattr(net_g, "module"): + ckpt = net_g.module.state_dict() + else: + ckpt = net_g.state_dict() + logger.info( + "saving final ckpt:%s" + % ( + savee( + ckpt, hps.sample_rate, hps.if_f0, hps.name, epoch, hps.version, hps + ) + ) + ) + sleep(1) + os._exit(2333333) + + +if __name__ == "__main__": + n_gpus = torch.cuda.device_count() + print('n_gpus num:',n_gpus) + torch.multiprocessing.set_start_method("spawn")#由父进程创建子进程。 父进程既可以在产生子进程之后继续异步执行 + main() diff --git a/AIMeiSheng/trainset_preprocess_pipeline_print.py b/AIMeiSheng/trainset_preprocess_pipeline_print.py new file mode 100644 index 0000000..62671ba --- /dev/null +++ b/AIMeiSheng/trainset_preprocess_pipeline_print.py @@ -0,0 +1,139 @@ +import sys, os, multiprocessing +from scipy import signal + +now_dir = os.getcwd() +sys.path.append(now_dir) +print(sys.argv) +inp_root = sys.argv[1] +sr = int(sys.argv[2]) +n_p = int(sys.argv[3]) +exp_dir = sys.argv[4] +noparallel = sys.argv[5] == "True" +import numpy as np, os, traceback +from lib.slicer2 import Slicer +import librosa, traceback +from scipy.io import wavfile +import multiprocessing +from lib.audio import load_audio + +mutex = multiprocessing.Lock() +f = open("%s/preprocess.log" % exp_dir, "a+") + + +def println(strr): + mutex.acquire() + print(strr) + f.write("%s\n" % strr) + f.flush() + mutex.release() + + +class PreProcess: + def __init__(self, sr, exp_dir): + self.slicer = Slicer( + sr=sr, + threshold=-42, + min_length=1500, + min_interval=400, + hop_size=15, + max_sil_kept=500, + ) + self.sr = sr + self.bh, self.ah = signal.butter(N=5, Wn=48, btype="high", fs=self.sr) + self.per = 3.0 + self.overlap = 0.3 + self.tail = self.per + self.overlap + self.max = 0.9 + self.alpha = 0.75 + self.exp_dir = exp_dir + self.gt_wavs_dir = "%s/0_gt_wavs" % exp_dir + self.wavs16k_dir = "%s/1_16k_wavs" % exp_dir + os.makedirs(self.exp_dir, exist_ok=True) + os.makedirs(self.gt_wavs_dir, exist_ok=True) + os.makedirs(self.wavs16k_dir, exist_ok=True) + + def norm_write(self, tmp_audio, idx0, idx1): + tmp_max = np.abs(tmp_audio).max() + if tmp_max > 2.5: + print("%s-%s-%s-filtered" % (idx0, idx1, tmp_max)) + return + tmp_audio = (tmp_audio / tmp_max * (self.max * self.alpha)) + ( + 1 - self.alpha + ) * tmp_audio + wavfile.write( + "%s/%s_%s.wav" % (self.gt_wavs_dir, idx0, idx1), + self.sr, + tmp_audio.astype(np.float32), + ) + tmp_audio = librosa.resample( + tmp_audio, orig_sr=self.sr, target_sr=16000 + ) # , res_type="soxr_vhq" + wavfile.write( + "%s/%s_%s.wav" % (self.wavs16k_dir, idx0, idx1), + 16000, + tmp_audio.astype(np.float32), + ) + + def pipeline(self, path, idx0): + try: + audio = load_audio(path, self.sr) + # zero phased digital filter cause pre-ringing noise... + # audio = signal.filtfilt(self.bh, self.ah, audio) + audio = signal.lfilter(self.bh, self.ah, audio) + + idx1 = 0 + for audio in self.slicer.slice(audio): + i = 0 + while 1: + start = int(self.sr * (self.per - self.overlap) * i) + i += 1 + if len(audio[start:]) > self.tail * self.sr: + tmp_audio = audio[start : start + int(self.per * self.sr)] + self.norm_write(tmp_audio, idx0, idx1) + idx1 += 1 + else: + tmp_audio = audio[start:] + idx1 += 1 + break + self.norm_write(tmp_audio, idx0, idx1) + println("%s->Suc." % path) + except: + println("%s->%s" % (path, traceback.format_exc())) + + def pipeline_mp(self, infos): + for path, idx0 in infos: + self.pipeline(path, idx0) + + def pipeline_mp_inp_dir(self, inp_root, n_p): + try: + infos = [ + ("%s/%s" % (inp_root, name), idx) + for idx, name in enumerate(sorted(list(os.listdir(inp_root)))) + ] + if noparallel: + for i in range(n_p): + self.pipeline_mp(infos[i::n_p]) + else: + ps = [] + for i in range(n_p): + p = multiprocessing.Process( + target=self.pipeline_mp, args=(infos[i::n_p],) + ) + ps.append(p) + p.start() + for i in range(n_p): + ps[i].join() + except: + println("Fail. %s" % traceback.format_exc()) + + +def preprocess_trainset(inp_root, sr, n_p, exp_dir): + pp = PreProcess(sr, exp_dir) + println("start preprocess") + println(sys.argv) + pp.pipeline_mp_inp_dir(inp_root, n_p) + println("end preprocess") + + +if __name__ == "__main__": + preprocess_trainset(inp_root, sr, n_p, exp_dir) diff --git a/AIMeiSheng/uvr5_weights/.gitignore b/AIMeiSheng/uvr5_weights/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/AIMeiSheng/uvr5_weights/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/AIMeiSheng/vc_infer_pipeline.py b/AIMeiSheng/vc_infer_pipeline.py new file mode 100644 index 0000000..672033b --- /dev/null +++ b/AIMeiSheng/vc_infer_pipeline.py @@ -0,0 +1,443 @@ +import numpy as np, parselmouth, torch, pdb, sys, os +from time import time as ttime +import torch.nn.functional as F +import scipy.signal as signal +import pyworld, os, traceback, faiss, librosa, torchcrepe +from scipy import signal +from functools import lru_cache + +now_dir = os.getcwd() +sys.path.append(now_dir) + +bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000) + +input_audio_path2wav = {} + + +@lru_cache +def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period): + audio = input_audio_path2wav[input_audio_path] + f0, t = pyworld.harvest( + audio, + fs=fs, + f0_ceil=f0max, + f0_floor=f0min, + frame_period=frame_period, + ) + f0 = pyworld.stonemask(audio, f0, t, fs) + return f0 + + +def change_rms(data1, sr1, data2, sr2, rate): # 1是输入音频,2是输出音频,rate是2的占比 + # print(data1.max(),data2.max()) + rms1 = librosa.feature.rms( + y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2 + ) # 每半秒一个点 + rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2) + rms1 = torch.from_numpy(rms1) + rms1 = F.interpolate( + rms1.unsqueeze(0), size=data2.shape[0], mode="linear" + ).squeeze() + rms2 = torch.from_numpy(rms2) + rms2 = F.interpolate( + rms2.unsqueeze(0), size=data2.shape[0], mode="linear" + ).squeeze() + rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6) + data2 *= ( + torch.pow(rms1, torch.tensor(1 - rate)) + * torch.pow(rms2, torch.tensor(rate - 1)) + ).numpy() + return data2 + + +class VC(object): + def __init__(self, tgt_sr, config): + self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = ( + config.x_pad, + config.x_query, + config.x_center, + config.x_max, + config.is_half, + ) + self.sr = 16000 # hubert输入采样率 + self.window = 160 # 每帧点数 + self.t_pad = self.sr * self.x_pad # 每条前后pad时间 + self.t_pad_tgt = tgt_sr * self.x_pad + self.t_pad2 = self.t_pad * 2 + self.t_query = self.sr * self.x_query # 查询切点前后查询时间 + self.t_center = self.sr * self.x_center # 查询切点位置 + self.t_max = self.sr * self.x_max # 免查询时长阈值 + self.device = config.device + + def get_f0( + self, + input_audio_path, + x, + p_len, + f0_up_key, + f0_method, + filter_radius, + inp_f0=None, + ): + global input_audio_path2wav + time_step = self.window / self.sr * 1000 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + if f0_method == "pm": + f0 = ( + parselmouth.Sound(x, self.sr) + .to_pitch_ac( + time_step=time_step / 1000, + voicing_threshold=0.6, + pitch_floor=f0_min, + pitch_ceiling=f0_max, + ) + .selected_array["frequency"] + ) + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + f0 = np.pad( + f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" + ) + elif f0_method == "harvest": + input_audio_path2wav[input_audio_path] = x.astype(np.double) + f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10) + if filter_radius > 2: + f0 = signal.medfilt(f0, 3) + elif f0_method == "crepe": + model = "full" + # Pick a batch size that doesn't cause memory errors on your gpu + batch_size = 512 + # Compute pitch using first gpu + audio = torch.tensor(np.copy(x))[None].float() + f0, pd = torchcrepe.predict( + audio, + self.sr, + self.window, + f0_min, + f0_max, + model, + batch_size=batch_size, + device=self.device, + return_periodicity=True, + ) + pd = torchcrepe.filter.median(pd, 3) + f0 = torchcrepe.filter.mean(f0, 3) + f0[pd < 0.1] = 0 + f0 = f0[0].cpu().numpy() + elif f0_method == "rmvpe": + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE( + "rmvpe.pt", is_half=self.is_half, device=self.device + ) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + f0 *= pow(2, f0_up_key / 12) + # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + tf0 = self.sr // self.window # 每秒f0点数 + if inp_f0 is not None: + delta_t = np.round( + (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1 + ).astype("int16") + replace_f0 = np.interp( + list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1] + ) + shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0] + f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[ + :shape + ] + # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + f0bak = f0.copy() + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( + f0_mel_max - f0_mel_min + ) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0bak # 1-0 + + def vc( + self, + model, + net_g, + sid, + audio0, + pitch, + pitchf, + times, + index, + big_npy, + index_rate, + version, + protect, + ): # ,file_index,file_big_npy + feats = torch.from_numpy(audio0) + if self.is_half: + feats = feats.half() + else: + feats = feats.float() + if feats.dim() == 2: # double channels + feats = feats.mean(-1) + assert feats.dim() == 1, feats.dim() + feats = feats.view(1, -1) + padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) + + inputs = { + "source": feats.to(self.device), + "padding_mask": padding_mask, + "output_layer": 9 if version == "v1" else 12, + } + t0 = ttime() + with torch.no_grad(): + logits = model.extract_features(**inputs) + feats = model.final_proj(logits[0]) if version == "v1" else logits[0] + if protect < 0.5 and pitch != None and pitchf != None: + feats0 = feats.clone() + if ( + isinstance(index, type(None)) == False + and isinstance(big_npy, type(None)) == False + and index_rate != 0 + ): + npy = feats[0].cpu().numpy() + if self.is_half: + npy = npy.astype("float32") + + # _, I = index.search(npy, 1) + # npy = big_npy[I.squeeze()] + + score, ix = index.search(npy, k=8) + weight = np.square(1 / score) + weight /= weight.sum(axis=1, keepdims=True) + npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1) + + if self.is_half: + npy = npy.astype("float16") + feats = ( + torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate + + (1 - index_rate) * feats + ) + + feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) + if protect < 0.5 and pitch != None and pitchf != None: + feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute( + 0, 2, 1 + ) + t1 = ttime() + p_len = audio0.shape[0] // self.window + if feats.shape[1] < p_len: + p_len = feats.shape[1] + if pitch != None and pitchf != None: + pitch = pitch[:, :p_len] + pitchf = pitchf[:, :p_len] + + if protect < 0.5 and pitch != None and pitchf != None: + pitchff = pitchf.clone() + pitchff[pitchf > 0] = 1 + pitchff[pitchf < 1] = protect + pitchff = pitchff.unsqueeze(-1) + feats = feats * pitchff + feats0 * (1 - pitchff) + feats = feats.to(feats0.dtype) + p_len = torch.tensor([p_len], device=self.device).long() + with torch.no_grad(): + if pitch != None and pitchf != None: + audio1 = ( + (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]) + .data.cpu() + .float() + .numpy() + ) + else: + audio1 = ( + (net_g.infer(feats, p_len, sid)[0][0, 0]).data.cpu().float().numpy() + ) + del feats, p_len, padding_mask + if torch.cuda.is_available(): + torch.cuda.empty_cache() + t2 = ttime() + times[0] += t1 - t0 + times[2] += t2 - t1 + return audio1 + + def pipeline( + self, + model, + net_g, + sid, + audio, + input_audio_path, + times, + f0_up_key, + f0_method, + file_index, + # file_big_npy, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + f0_file=None, + ): + if ( + file_index != "" + # and file_big_npy != "" + # and os.path.exists(file_big_npy) == True + and os.path.exists(file_index) == True + and index_rate != 0 + ): + try: + index = faiss.read_index(file_index) + # big_npy = np.load(file_big_npy) + big_npy = index.reconstruct_n(0, index.ntotal) + except: + traceback.print_exc() + index = big_npy = None + else: + index = big_npy = None + audio = signal.filtfilt(bh, ah, audio) + audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect") + opt_ts = [] + if audio_pad.shape[0] > self.t_max: + audio_sum = np.zeros_like(audio) + for i in range(self.window): + audio_sum += audio_pad[i : i - self.window] + for t in range(self.t_center, audio.shape[0], self.t_center): + opt_ts.append( + t + - self.t_query + + np.where( + np.abs(audio_sum[t - self.t_query : t + self.t_query]) + == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min() + )[0][0] + ) + s = 0 + audio_opt = [] + t = None + t1 = ttime() + audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect") + p_len = audio_pad.shape[0] // self.window + inp_f0 = None + if hasattr(f0_file, "name") == True: + try: + with open(f0_file.name, "r") as f: + lines = f.read().strip("\n").split("\n") + inp_f0 = [] + for line in lines: + inp_f0.append([float(i) for i in line.split(",")]) + inp_f0 = np.array(inp_f0, dtype="float32") + except: + traceback.print_exc() + sid = torch.tensor(sid, device=self.device).unsqueeze(0).long() + pitch, pitchf = None, None + if if_f0 == 1: + pitch, pitchf = self.get_f0( + input_audio_path, + audio_pad, + p_len, + f0_up_key, + f0_method, + filter_radius, + inp_f0, + ) + pitch = pitch[:p_len] + pitchf = pitchf[:p_len] + if self.device == "mps": + pitchf = pitchf.astype(np.float32) + pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long() + pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float() + t2 = ttime() + times[1] += t2 - t1 + for t in opt_ts: + t = t // self.window * self.window + if if_f0 == 1: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[s : t + self.t_pad2 + self.window], + pitch[:, s // self.window : (t + self.t_pad2) // self.window], + pitchf[:, s // self.window : (t + self.t_pad2) // self.window], + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[s : t + self.t_pad2 + self.window], + None, + None, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + s = t + if if_f0 == 1: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + pitch[:, t // self.window :] if t is not None else pitch, + pitchf[:, t // self.window :] if t is not None else pitchf, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + None, + None, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + audio_opt = np.concatenate(audio_opt) + if rms_mix_rate != 1: + audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate) + if resample_sr >= 16000 and tgt_sr != resample_sr: + audio_opt = librosa.resample( + audio_opt, orig_sr=tgt_sr, target_sr=resample_sr + ) + audio_max = np.abs(audio_opt).max() / 0.99 + max_int16 = 32768 + if audio_max > 1: + max_int16 /= audio_max + audio_opt = (audio_opt * max_int16).astype(np.int16) + del pitch, pitchf, sid + if torch.cuda.is_available(): + torch.cuda.empty_cache() + return audio_opt diff --git a/AIMeiSheng/vc_infer_pipeline_org_embed.py b/AIMeiSheng/vc_infer_pipeline_org_embed.py new file mode 100644 index 0000000..bfda281 --- /dev/null +++ b/AIMeiSheng/vc_infer_pipeline_org_embed.py @@ -0,0 +1,760 @@ +import numpy as np, parselmouth, torch, pdb, sys, os +from time import time as ttime +import torch.nn.functional as F +import scipy.signal as signal +import pyworld, os, traceback, faiss, librosa, torchcrepe +from scipy import signal +from functools import lru_cache + +now_dir = os.getcwd() +sys.path.append(now_dir) + +bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000) + +input_audio_path2wav = {} +fidx = 0 + +import threading +import concurrent.futures + + +@lru_cache +def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period): + audio = input_audio_path2wav[input_audio_path] + f0, t = pyworld.harvest( + audio, + fs=fs, + f0_ceil=f0max, + f0_floor=f0min, + frame_period=frame_period, + ) + f0 = pyworld.stonemask(audio, f0, t, fs) + return f0 + + +def change_rms(data1, sr1, data2, sr2, rate): # 1是输入音频,2是输出音频,rate是2的占比 + # print(data1.max(),data2.max()) + rms1 = librosa.feature.rms( + y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2 + ) # 每半秒一个点 + rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2) + rms1 = torch.from_numpy(rms1) + rms1 = F.interpolate( + rms1.unsqueeze(0), size=data2.shape[0], mode="linear" + ).squeeze() + rms2 = torch.from_numpy(rms2) + rms2 = F.interpolate( + rms2.unsqueeze(0), size=data2.shape[0], mode="linear" + ).squeeze() + rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6) + data2 *= ( + torch.pow(rms1, torch.tensor(1 - rate)) + * torch.pow(rms2, torch.tensor(rate - 1)) + ).numpy() + return data2 + + +class VC(object): + def __init__(self, tgt_sr, config): + self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = ( + config.x_pad, ##config会根据设备配置不通知如:3 + config.x_query, # 10 等于x_max-x_center)*2 + config.x_center, #60 + config.x_max, #65 + config.is_half, + ) + self.sr = 16000 # hubert输入采样率 + self.window = 160 # 每帧点数 + self.t_pad = self.sr * self.x_pad # 每条前后pad时间 + self.t_pad_tgt = tgt_sr * self.x_pad + self.t_pad2 = self.t_pad * 2 + self.t_query = self.sr * self.x_query # 查询切点前后查询时间, + self.t_center = self.sr * self.x_center # 查询切点位置 + self.t_max = self.sr * self.x_max # 免查询时长阈值 + self.device = config.device + + def get_f0( + self, + input_audio_path, + x, + p_len, + f0_up_key, + f0_method, + filter_radius, + inp_f0=None, + ): + global input_audio_path2wav + time_step = self.window / self.sr * 1000 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + if f0_method == "pm": + f0 = ( + parselmouth.Sound(x, self.sr) + .to_pitch_ac( + time_step=time_step / 1000, + voicing_threshold=0.6, + pitch_floor=f0_min, + pitch_ceiling=f0_max, + ) + .selected_array["frequency"] + ) + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + f0 = np.pad( + f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" + ) + elif f0_method == "harvest": + input_audio_path2wav[input_audio_path] = x.astype(np.double) + f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10) + if filter_radius > 2: + f0 = signal.medfilt(f0, 3) + elif f0_method == "crepe": + model = "full" + # Pick a batch size that doesn't cause memory errors on your gpu + batch_size = 512 + # Compute pitch using first gpu + audio = torch.tensor(np.copy(x))[None].float() + f0, pd = torchcrepe.predict( + audio, + self.sr, + self.window, + f0_min, + f0_max, + model, + batch_size=batch_size, + device=self.device, + return_periodicity=True, + ) + pd = torchcrepe.filter.median(pd, 3) + f0 = torchcrepe.filter.mean(f0, 3) + f0[pd < 0.1] = 0 + f0 = f0[0].cpu().numpy() + elif f0_method == "rmvpe": + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE( + "rmvpe.pt", is_half=self.is_half, device=self.device + ) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + else: ##for meisheng + self.model_rmvpe = f0_method + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + + ##这里读文件,更改pitch st fang + valid_f0 = f0[f0 > 50] + mean_pitch_cur = np.mean(valid_f0[:min(len(valid_f0),500)]) + + + #print("@@f0_up_key:",f0_up_key) + deta = 0 + if(f0_up_key > 50 ): + deta = -mean_pitch_cur + f0_up_key + + #print("$$$$$$$$$fangxxxxx pitch shift: ",deta) + f0_up_key = int(np.log2(deta/(mean_pitch_cur + 1) + 1) * 12)##方法2 fang + #if( abs(f0_up_key) < 3 ): + # f0_up_key = 0 + f0_up_key = max(min(12,f0_up_key),-12) + #print("f0_up_key: ",f0_up_key) + + f0 *= pow(2, f0_up_key / 12)#这块是音调更改 fang 我设置的0 + # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + tf0 = self.sr // self.window # 每秒f0点数 + if inp_f0 is not None: + delta_t = np.round( + (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1 + ).astype("int16") + replace_f0 = np.interp( + list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1] + ) + shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0] + f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[ + :shape + ] + # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + + + f0bak = f0.copy() + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( + f0_mel_max - f0_mel_min + ) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0bak # 1-0 + + def vc( + self, + model, + net_g, + sid, + audio0, + pitch, + pitchf, + times, + index, + big_npy, + index_rate, + version, + protect, + ): # ,file_index,file_big_npy + feats = torch.from_numpy(audio0) + if self.is_half: + feats = feats.half() + else: + feats = feats.float() + if feats.dim() == 2: # double channels + feats = feats.mean(-1) + assert feats.dim() == 1, feats.dim() + feats = feats.view(1, -1) + padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) + #print("@@@feats: ",feats.shape) + #print("@@@padding_mask: ",padding_mask.shape) + inputs = { + "source": feats.to(self.device), + "padding_mask": padding_mask, + "output_layer": 9 if version == "v1" else 12, + #"output_layer": 6 if version == "v1" else 12, + } + t0 = ttime() + #''' + with torch.no_grad(): + logits = model.extract_features(**inputs) + feats = model.final_proj(logits[0]) if version == "v1" else logits[0]#为何v1要转化,维度问题??? fang + #''' + + #print("@@@feats: ",feats.shape) + ''' + global fidx + feats_name = f"./feats_{fidx}.pt" + fidx += 1 + torch.save(feats, feats_name) + feats = torch.load(feats_name) + #''' + + if protect < 0.5 and pitch != None and pitchf != None: + feats0 = feats.clone() + if ( + isinstance(index, type(None)) == False + and isinstance(big_npy, type(None)) == False + and index_rate != 0 + ): + npy = feats[0].cpu().numpy() + if self.is_half: + npy = npy.astype("float32") + + # _, I = index.search(npy, 1) + # npy = big_npy[I.squeeze()] + + score, ix = index.search(npy, k=8) + weight = np.square(1 / score) + weight /= weight.sum(axis=1, keepdims=True) + npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1) + + if self.is_half: + npy = npy.astype("float16") + feats = ( + torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate + + (1 - index_rate) * feats + )##基于index和实际音频的特征进行组合,作为输入 fang + + #print("@@@feats: ",feats.shape) + feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) + if protect < 0.5 and pitch != None and pitchf != None: + feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute( + 0, 2, 1 + )#feats0的维度1 插值增加一倍 fang + t1 = ttime() + p_len = audio0.shape[0] // self.window ##分帧求pitch fang + if feats.shape[1] < p_len: + p_len = feats.shape[1] + if pitch != None and pitchf != None: + pitch = pitch[:, :p_len] + pitchf = pitchf[:, :p_len] + + if protect < 0.5 and pitch != None and pitchf != None: + pitchff = pitchf.clone() + pitchff[pitchf > 0] = 1 + pitchff[pitchf < 1] = protect + pitchff = pitchff.unsqueeze(-1) + feats = feats * pitchff + feats0 * (1 - pitchff) + feats = feats.to(feats0.dtype) + p_len = torch.tensor([p_len], device=self.device).long() + #print("###feats:",feats.shape,"pitch:",pitch.shape,"p_len:",p_len) + with torch.no_grad(): + if pitch != None and pitchf != None: + audio1 = ( + (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]) + .data.cpu() + .float() + .numpy() + ) + else: + audio1 = ( + (net_g.infer(feats, p_len, sid)[0][0, 0]).data.cpu().float().numpy() + ) + del feats, p_len, padding_mask + if torch.cuda.is_available(): + torch.cuda.empty_cache() + t2 = ttime() + times[0] += t1 - t0 + times[2] += t2 - t1 + return audio1 + + def pipeline( + self, + model, + net_g, + sid, + audio,## input wav + input_audio_path, #input wav name + times, + f0_up_key, + f0_method,# f0 meathod + file_index, #index 路径 + # file_big_npy, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + f0_file=None, + ): + if ( + file_index != "" #.index文件不为空 fang + # and file_big_npy != "" + # and os.path.exists(file_big_npy) == True + and os.path.exists(file_index) == True + and index_rate != 0 + ): + try: + index = faiss.read_index(file_index) + # big_npy = np.load(file_big_npy) + big_npy = index.reconstruct_n(0, index.ntotal) + except: + traceback.print_exc() + index = big_npy = None + else: + index = big_npy = None + #print("####audio 1:",audio.shape) + audio = signal.filtfilt(bh, ah, audio) + #print("####audio 2:",audio.shape) + audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect") + opt_ts = [] + + #print("###t_max:",self.t_max) + #print("###window:",self.window,"self.t_query:",self.t_query,"self.t_pad2:",self.t_pad2) + if audio_pad.shape[0] > self.t_max: + audio_sum = np.zeros_like(audio) + for i in range(self.window): + audio_sum += audio_pad[i : i - self.window]#这样算循环了,每个idx是过去一帧的值的和 fang + for t in range(self.t_center, audio.shape[0], self.t_center):#一分钟一帧?? fang + opt_ts.append( + t + - self.t_query + + np.where( + np.abs(audio_sum[t - self.t_query : t + self.t_query]) + == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min() + )[0][0] + )#返回[ t - self.t_query, t+self.t_query] 区间最小值位置的索引保存,fang + s = 0 + audio_opt = [] + t = None + t1 = ttime() + audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect") + p_len = audio_pad.shape[0] // self.window + inp_f0 = None + if hasattr(f0_file, "name") == True: + try: + with open(f0_file.name, "r") as f: + lines = f.read().strip("\n").split("\n") + inp_f0 = [] + for line in lines: + inp_f0.append([float(i) for i in line.split(",")]) + inp_f0 = np.array(inp_f0, dtype="float32") + except: + traceback.print_exc() + #sid = torch.tensor(sid, device=self.device).unsqueeze(0).long() + sid_embed = np.load(sid) + sid = torch.FloatTensor(sid_embed).to(self.device).half() + pitch, pitchf = None, None + if if_f0 == 1: + pitch, pitchf = self.get_f0( + input_audio_path, + audio_pad, + p_len, + f0_up_key, + f0_method, + filter_radius, + inp_f0, + ) + pitch = pitch[:p_len] + pitchf = pitchf[:p_len] + if self.device == "mps": + pitchf = pitchf.astype(np.float32) + pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long() + pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float() + + #print("&&&&pitch: ",pitchf) + t2 = ttime() + times[1] += t2 - t1 + #print("####len(audio_pad):",len(audio_pad)) + #print("###pitch:", pitch.shape) + for t in opt_ts: #分段推理每段音频,一段这里设置60s左右 fang + t = t // self.window * self.window + if if_f0 == 1: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[s : t + self.t_pad2 + self.window], + pitch[:, s // self.window : (t + self.t_pad2) // self.window], + pitchf[:, s // self.window : (t + self.t_pad2) // self.window], + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[s : t + self.t_pad2 + self.window], + None, + None, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + s = t + if if_f0 == 1: ##后面是最后一段处理 fang + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + pitch[:, t // self.window :] if t is not None else pitch, + pitchf[:, t // self.window :] if t is not None else pitchf, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + None, + None, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + audio_opt = np.concatenate(audio_opt) + if rms_mix_rate != 1: + audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate) + if resample_sr >= 16000 and tgt_sr != resample_sr: + audio_opt = librosa.resample( + audio_opt, orig_sr=tgt_sr, target_sr=resample_sr + ) + audio_max = np.abs(audio_opt).max() / 0.99 + max_int16 = 32768 + if audio_max > 1: + max_int16 /= audio_max + audio_opt = (audio_opt * max_int16).astype(np.int16) + del pitch, pitchf, sid + if torch.cuda.is_available(): + torch.cuda.empty_cache() + return audio_opt + + def infer_core_fang(self,para1,para2,para3,idx, + model, + net_g, + sid, + times, + index, + big_npy, + index_rate, + version, + protect): + return [ self.vc( + model, + net_g, + sid, + para1, para2, para3, + # audio_pad[s: t + self.t_pad2 + self.window], + # pitch[:, s // self.window: (t + self.t_pad2) // self.window], + # pitchf[:, s // self.window: (t + self.t_pad2) // self.window], + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt: -self.t_pad_tgt], idx] + + def ThreadPool_process_core(self, func_process,params1,params2,params3, + model, + net_g, + sid, + # audio_pad[s: t + self.t_pad2 + self.window], + # pitch[:, s // self.window: (t + self.t_pad2) // self.window], + # pitchf[:, s // self.window: (t + self.t_pad2) // self.window], + times, + index, + big_npy, + index_rate, + version, + protect + ): + num_threads = 2 + futures = [] + sort_ret = {} + with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor: + for idx in range(len(params1)): + para1 = params1[idx] + para2 = params2[idx] + para3 = params3[idx] + ret = executor.submit(self.infer_core_fang,para1,para2,para3,idx, + model, + net_g, + sid, + times, + index, + big_npy, + index_rate, + version, + protect) + futures.append(ret) + + cnt = 0 + for future in concurrent.futures.as_completed(futures): + cnt += 1 + #print(f"process finised {cnt}, and index :{future.result()[1]}") + + #print(future.result()) # result + # print(future.result()[1]) ##index + sort_ret[str(future.result()[1])] = future.result()[0] + + + fea_list = [] + for idx in range(len(sort_ret)): + fea_list.append(sort_ret[str(idx)]) + + return fea_list + + def pipeline_mulprocess( + self, + model, + net_g, + sid, + audio, ## input wav + input_audio_path, # input wav name + times, + f0_up_key, + f0_method, # f0 meathod + file_index, # index 路径 + # file_big_npy, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + f0_file=None, + ): + if ( + file_index != "" # .index文件不为空 fang + # and file_big_npy != "" + # and os.path.exists(file_big_npy) == True + and os.path.exists(file_index) == True + and index_rate != 0 + ): + try: + index = faiss.read_index(file_index) + # big_npy = np.load(file_big_npy) + big_npy = index.reconstruct_n(0, index.ntotal) + except: + traceback.print_exc() + index = big_npy = None + else: + index = big_npy = None + audio = signal.filtfilt(bh, ah, audio) + audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect") + opt_ts = [] + if audio_pad.shape[0] > self.t_max: + audio_sum = np.zeros_like(audio) + for i in range(self.window): + audio_sum += audio_pad[i: i - self.window] # 这样算循环了,每个idx是过去一帧的值的和 fang + for t in range(self.t_center, audio.shape[0], self.t_center): # 一分钟一帧?? fang + opt_ts.append( + t + - self.t_query + + np.where( + np.abs(audio_sum[t - self.t_query: t + self.t_query]) + == np.abs(audio_sum[t - self.t_query: t + self.t_query]).min() + )[0][0] + ) # 返回[ t - self.t_query, t+self.t_query] 区间最小值位置的索引保存,fang + s = 0 + + t = None + t1 = ttime() + audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect") + p_len = audio_pad.shape[0] // self.window + inp_f0 = None + if hasattr(f0_file, "name") == True: + try: + with open(f0_file.name, "r") as f: + lines = f.read().strip("\n").split("\n") + inp_f0 = [] + for line in lines: + inp_f0.append([float(i) for i in line.split(",")]) + inp_f0 = np.array(inp_f0, dtype="float32") + except: + traceback.print_exc() + # sid = torch.tensor(sid, device=self.device).unsqueeze(0).long() + sid_embed = np.load(sid) + sid = torch.FloatTensor(sid_embed).to(self.device).half() + pitch, pitchf = None, None + #''' + if if_f0 == 1: + pitch, pitchf = self.get_f0( + input_audio_path, + audio_pad, + p_len, + f0_up_key, + f0_method, + filter_radius, + inp_f0, + ) + pitch = pitch[:p_len] + pitchf = pitchf[:p_len] + if self.device == "mps": + pitchf = pitchf.astype(np.float32) + pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long() + pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float() + #''' + + ''' + pitch_name = "./pitch_pitchf.npz" + #np.savez(pitch_name, pitch = pitch.detach().cpu().numpy(), pitchf = pitchf.detach().cpu().numpy()) + npz_obj = np.load(pitch_name) #文件名的后缀为npz + pitch, pitchf = npz_obj['pitch'], npz_obj['pitchf'] + pitch = torch.tensor(pitch, device=self.device).long() + pitchf = torch.tensor(pitchf, device=self.device).float() + #''' + + t2 = ttime() + times[1] += t2 - t1 + + audio_opt = [] + audio_pad_list = [] + pitch_list = [] + pitchf_list = [] + + + for t in opt_ts: # 分段推理每段音频,一段这里设置60s左右 fang + t = t // self.window * self.window + audio_pad_list.append(audio_pad[s: t + self.t_pad2 + self.window]) + pitch_list.append(pitch[:, s // self.window: (t + self.t_pad2) // self.window]) + pitchf_list.append(pitchf[:, s // self.window: (t + self.t_pad2) // self.window]) + s = t + + audio_pad_list.append(audio_pad[t:]) + pitch_list.append(pitch[:, t // self.window:] if t is not None else pitch) + pitchf_list.append(pitchf[:, t // self.window:] if t is not None else pitchf) + + audio_opt = self.ThreadPool_process_core(self.infer_core_fang, audio_pad_list, pitch_list, pitchf_list, + model, + net_g, + sid, + times, + index, + big_npy, + index_rate, + version, + protect + ) + ''' + if if_f0 == 1: ##后面是最后一段处理 fang + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + pitch[:, t // self.window:] if t is not None else pitch, + pitchf[:, t // self.window:] if t is not None else pitchf, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt: -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + None, + None, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt: -self.t_pad_tgt] + ) + #''' + audio_opt = np.concatenate(audio_opt) + if rms_mix_rate != 1: + audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate) + if resample_sr >= 16000 and tgt_sr != resample_sr: + audio_opt = librosa.resample( + audio_opt, orig_sr=tgt_sr, target_sr=resample_sr + ) + audio_max = np.abs(audio_opt).max() / 0.99 + max_int16 = 32768 + if audio_max > 1: + max_int16 /= audio_max + audio_opt = (audio_opt * max_int16).astype(np.int16) + del pitch, pitchf, sid + if torch.cuda.is_available(): + torch.cuda.empty_cache() + return audio_opt diff --git a/AIMeiSheng/vc_infer_pipeline_org_embed_org.py b/AIMeiSheng/vc_infer_pipeline_org_embed_org.py new file mode 100644 index 0000000..f7e70ae --- /dev/null +++ b/AIMeiSheng/vc_infer_pipeline_org_embed_org.py @@ -0,0 +1,527 @@ +import numpy as np, parselmouth, torch, pdb, sys, os +from time import time as ttime +import torch.nn.functional as F +import scipy.signal as signal +import pyworld, os, traceback, faiss, librosa, torchcrepe +from scipy import signal +from functools import lru_cache + +now_dir = os.getcwd() +sys.path.append(now_dir) + +bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000) + +input_audio_path2wav = {} + + +@lru_cache +def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period): + audio = input_audio_path2wav[input_audio_path] + f0, t = pyworld.harvest( + audio, + fs=fs, + f0_ceil=f0max, + f0_floor=f0min, + frame_period=frame_period, + ) + f0 = pyworld.stonemask(audio, f0, t, fs) + return f0 + + +def change_rms(data1, sr1, data2, sr2, rate): # 1是输入音频,2是输出音频,rate是2的占比 + # print(data1.max(),data2.max()) + rms1 = librosa.feature.rms( + y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2 + ) # 每半秒一个点 + rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2) + rms1 = torch.from_numpy(rms1) + rms1 = F.interpolate( + rms1.unsqueeze(0), size=data2.shape[0], mode="linear" + ).squeeze() + rms2 = torch.from_numpy(rms2) + rms2 = F.interpolate( + rms2.unsqueeze(0), size=data2.shape[0], mode="linear" + ).squeeze() + rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6) + data2 *= ( + torch.pow(rms1, torch.tensor(1 - rate)) + * torch.pow(rms2, torch.tensor(rate - 1)) + ).numpy() + return data2 + + +class VC(object): + def __init__(self, tgt_sr, config): + self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = ( + config.x_pad, ##config会根据设备配置不通知如:3 + config.x_query, # 10 等于x_max-x_center)*2 + config.x_center, #60 + config.x_max, #65 + config.is_half, + ) + self.sr = 16000 # hubert输入采样率 + self.window = 160 # 每帧点数 + self.t_pad = self.sr * self.x_pad # 每条前后pad时间 + self.t_pad_tgt = tgt_sr * self.x_pad + self.t_pad2 = self.t_pad * 2 + self.t_query = self.sr * self.x_query # 查询切点前后查询时间, + self.t_center = self.sr * self.x_center # 查询切点位置 + self.t_max = self.sr * self.x_max # 免查询时长阈值 + self.device = config.device + + def get_f0( + self, + input_audio_path, + x, + p_len, + f0_up_key, + f0_method, + filter_radius, + inp_f0=None, + ): + global input_audio_path2wav + time_step = self.window / self.sr * 1000 + f0_min = 50 + f0_max = 1100 + f0_mel_min = 1127 * np.log(1 + f0_min / 700) + f0_mel_max = 1127 * np.log(1 + f0_max / 700) + if f0_method == "pm": + f0 = ( + parselmouth.Sound(x, self.sr) + .to_pitch_ac( + time_step=time_step / 1000, + voicing_threshold=0.6, + pitch_floor=f0_min, + pitch_ceiling=f0_max, + ) + .selected_array["frequency"] + ) + pad_size = (p_len - len(f0) + 1) // 2 + if pad_size > 0 or p_len - len(f0) - pad_size > 0: + f0 = np.pad( + f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant" + ) + elif f0_method == "harvest": + input_audio_path2wav[input_audio_path] = x.astype(np.double) + f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10) + if filter_radius > 2: + f0 = signal.medfilt(f0, 3) + elif f0_method == "crepe": + model = "full" + # Pick a batch size that doesn't cause memory errors on your gpu + batch_size = 512 + # Compute pitch using first gpu + audio = torch.tensor(np.copy(x))[None].float() + f0, pd = torchcrepe.predict( + audio, + self.sr, + self.window, + f0_min, + f0_max, + model, + batch_size=batch_size, + device=self.device, + return_periodicity=True, + ) + pd = torchcrepe.filter.median(pd, 3) + f0 = torchcrepe.filter.mean(f0, 3) + f0[pd < 0.1] = 0 + f0 = f0[0].cpu().numpy() + elif f0_method == "rmvpe": + if hasattr(self, "model_rmvpe") == False: + from lib.rmvpe import RMVPE + + print("loading rmvpe model") + self.model_rmvpe = RMVPE( + "rmvpe.pt", is_half=self.is_half, device=self.device + ) + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + else: ##for meisheng + self.model_rmvpe = f0_method + f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03) + + ##这里读文件,更改pitch st fang + valid_f0 = f0[f0 > 50] + mean_pitch_cur = np.mean(valid_f0[:min(len(valid_f0),500)]) + + ''' + #npy_name="./logs/xusong_v1_org_version/pitch_statistics/pitch_avg_min_max.npy" + npy_name = "./logs/xusong_v2_org_version_女声part12666373958325510/pitch_statistics/pitch_avg_min_max_valid.npy" + npy_name ="./logs/xusong_v2_org_version_男声part4222124654101582/pitch_statistics/pitch_avg_min_max_valid.npy" + mean_pittch_his = np.load(npy_name) + + mean_pittch_his = [118.25333262609048, 83 , 166]#xiafan + #mean_pittch_his = [130.65693262609048, 50 , 179] #建利 + #mean_pittch_his = [252.67887925858173, 116 , 364 ] #wt + #mean_pittch_his = [287.5804867,193,421 ] #syz_yujian_voce_(Vocals)_9 + #mean_pittch_his = [197.2379,160,263] # changying.wav + #mean_pittch_his = [216.978,128,376]# xusong_long.wav + + print("npy:",mean_pittch_his) + #deta = -mean_pitch_cur + mean_pittch_his[0] + ''' + print("@@f0_up_key:",f0_up_key) + deta = 0 + if(f0_up_key > 50 ): + deta = -mean_pitch_cur + f0_up_key + + #deta = f0_up_key + #f0[f0>0] = f0[f0>0] + deta ## 方法1 fang + print("$$$$$$$$$fangxxxxx pitch shift: ",deta) + f0_up_key = int(np.log2(deta/(mean_pitch_cur + 1) + 1) * 12)##方法2 fang + #if( abs(f0_up_key) < 3 ): + # f0_up_key = 0 + f0_up_key = max(min(12,f0_up_key),-12) + print("f0_up_key: ",f0_up_key) + #print("deta : ",int(np.log2(deta/mean_pitch_cur + 1) * 12)) + #print("$$$$$$$$$fangxxxxx pitch f0_up_key: ",f0_up_key) + ##en + f0 *= pow(2, f0_up_key / 12)#这块是音调更改 fang 我设置的0 + # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + tf0 = self.sr // self.window # 每秒f0点数 + if inp_f0 is not None: + delta_t = np.round( + (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1 + ).astype("int16") + replace_f0 = np.interp( + list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1] + ) + shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0] + f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[ + :shape + ] + # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()])) + ''' + ##这里读文件,更改pitch st fang + mean_pitch_cur = np.mean(f0[f0 > 0]) + #npy_name="./logs/xusong_v1_org_version/pitch_statistics/pitch_avg_min_max.npy" + #npy_name="./logs/xusong_v1_org_version_taylor/pitch_statistics/pitch_avg_min_max.npy" + npy_name="./logs/xusong_v2_org_version_MaxVocal_ns_retrain_read_content_guodegang/pitch_statistics/pitch_avg_min_max_valid.npy" + #npy_name="./logs/xusong_v2_org_version_MaxVocal_ns_retrain_read_content/pitch_statistics/pitch_avg_min_max_valid.npy" + npy_name = "./logs/xusong_v2_org_version_女声part12666373958325510/pitch_statistics/pitch_avg_min_max_valid.npy" + mean_pittch_his = np.load(npy_name) + mean_phist,min_phist,max_phist = mean_pittch_his[0],mean_pittch_his[1],mean_pittch_his[2] + deta = mean_pitch_cur - mean_phist + #deta_lower = (min_phist - mean_phist)/2 + #deta_upper = (max_phist - mean_phist)/2 + + i#f(deta > deta_lower and deta < deta_upper): + # deta = 0 + #else: + if deta > 50 : + deta -= 40 + elif deta < -50: + deta -= -40 + else: + deta = 0 + + ###简单均值平移,全部平移 + f0[f0>0] = f0[f0>0] - deta + #print("mean_phist: ",mean_phist,"min_phist: ",min_phist,"max_phist: ",max_phist) + #if + print("npy_name: ",npy_name) + print("npy:",mean_pittch_his) + print("mean_pitch_cur: ",mean_pitch_cur ) + pitch_min = min(f0[f0>0]) + pitch_max = max(f0[f0>0]) + print("cur min:",pitch_min,"pitch_max: ",pitch_max) + print("$$$$$$$$$fangxxxxx pitch shift: ",deta) + ##en + #''' + + f0bak = f0.copy() + f0_mel = 1127 * np.log(1 + f0 / 700) + f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / ( + f0_mel_max - f0_mel_min + ) + 1 + f0_mel[f0_mel <= 1] = 1 + f0_mel[f0_mel > 255] = 255 + f0_coarse = np.rint(f0_mel).astype(np.int) + return f0_coarse, f0bak # 1-0 + + def vc( + self, + model, + net_g, + sid, + audio0, + pitch, + pitchf, + times, + index, + big_npy, + index_rate, + version, + protect, + ): # ,file_index,file_big_npy + feats = torch.from_numpy(audio0) + if self.is_half: + feats = feats.half() + else: + feats = feats.float() + if feats.dim() == 2: # double channels + feats = feats.mean(-1) + assert feats.dim() == 1, feats.dim() + feats = feats.view(1, -1) + padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False) + #print("@@@feats: ",feats.shape) + #print("@@@padding_mask: ",padding_mask.shape) + inputs = { + "source": feats.to(self.device), + "padding_mask": padding_mask, + "output_layer": 9 if version == "v1" else 12, + #"output_layer": 6 if version == "v1" else 12, + } + t0 = ttime() + with torch.no_grad(): + logits = model.extract_features(**inputs) + feats = model.final_proj(logits[0]) if version == "v1" else logits[0]#为何v1要转化,维度问题??? fang + if protect < 0.5 and pitch != None and pitchf != None: + feats0 = feats.clone() + if ( + isinstance(index, type(None)) == False + and isinstance(big_npy, type(None)) == False + and index_rate != 0 + ): + npy = feats[0].cpu().numpy() + if self.is_half: + npy = npy.astype("float32") + + # _, I = index.search(npy, 1) + # npy = big_npy[I.squeeze()] + + score, ix = index.search(npy, k=8) + weight = np.square(1 / score) + weight /= weight.sum(axis=1, keepdims=True) + npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1) + + if self.is_half: + npy = npy.astype("float16") + feats = ( + torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate + + (1 - index_rate) * feats + )##基于index和实际音频的特征进行组合,作为输入 fang + + feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1) + if protect < 0.5 and pitch != None and pitchf != None: + feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute( + 0, 2, 1 + )#feats0的维度1 插值增加一倍 fang + t1 = ttime() + p_len = audio0.shape[0] // self.window ##分帧求pitch fang + if feats.shape[1] < p_len: + p_len = feats.shape[1] + if pitch != None and pitchf != None: + pitch = pitch[:, :p_len] + pitchf = pitchf[:, :p_len] + + if protect < 0.5 and pitch != None and pitchf != None: + pitchff = pitchf.clone() + pitchff[pitchf > 0] = 1 + pitchff[pitchf < 1] = protect + pitchff = pitchff.unsqueeze(-1) + feats = feats * pitchff + feats0 * (1 - pitchff) + feats = feats.to(feats0.dtype) + p_len = torch.tensor([p_len], device=self.device).long() + with torch.no_grad(): + if pitch != None and pitchf != None: + audio1 = ( + (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0]) + .data.cpu() + .float() + .numpy() + ) + else: + audio1 = ( + (net_g.infer(feats, p_len, sid)[0][0, 0]).data.cpu().float().numpy() + ) + del feats, p_len, padding_mask + if torch.cuda.is_available(): + torch.cuda.empty_cache() + t2 = ttime() + times[0] += t1 - t0 + times[2] += t2 - t1 + return audio1 + + def pipeline( + self, + model, + net_g, + sid, + audio,## input wav + input_audio_path, #input wav name + times, + f0_up_key, + f0_method,# f0 meathod + file_index, #index 路径 + # file_big_npy, + index_rate, + if_f0, + filter_radius, + tgt_sr, + resample_sr, + rms_mix_rate, + version, + protect, + f0_file=None, + ): + if ( + file_index != "" #.index文件不为空 fang + # and file_big_npy != "" + # and os.path.exists(file_big_npy) == True + and os.path.exists(file_index) == True + and index_rate != 0 + ): + try: + index = faiss.read_index(file_index) + # big_npy = np.load(file_big_npy) + big_npy = index.reconstruct_n(0, index.ntotal) + except: + traceback.print_exc() + index = big_npy = None + else: + index = big_npy = None + audio = signal.filtfilt(bh, ah, audio) + audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect") + opt_ts = [] + if audio_pad.shape[0] > self.t_max: + audio_sum = np.zeros_like(audio) + for i in range(self.window): + audio_sum += audio_pad[i : i - self.window]#这样算循环了,每个idx是过去一帧的值的和 fang + for t in range(self.t_center, audio.shape[0], self.t_center):#一分钟一帧?? fang + opt_ts.append( + t + - self.t_query + + np.where( + np.abs(audio_sum[t - self.t_query : t + self.t_query]) + == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min() + )[0][0] + )#返回[ t - self.t_query, t+self.t_query] 区间最小值位置的索引保存,fang + s = 0 + audio_opt = [] + t = None + t1 = ttime() + audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect") + p_len = audio_pad.shape[0] // self.window + inp_f0 = None + if hasattr(f0_file, "name") == True: + try: + with open(f0_file.name, "r") as f: + lines = f.read().strip("\n").split("\n") + inp_f0 = [] + for line in lines: + inp_f0.append([float(i) for i in line.split(",")]) + inp_f0 = np.array(inp_f0, dtype="float32") + except: + traceback.print_exc() + #sid = torch.tensor(sid, device=self.device).unsqueeze(0).long() + sid_embed = np.load(sid) + sid = torch.FloatTensor(sid_embed).to(self.device).half() + pitch, pitchf = None, None + if if_f0 == 1: + pitch, pitchf = self.get_f0( + input_audio_path, + audio_pad, + p_len, + f0_up_key, + f0_method, + filter_radius, + inp_f0, + ) + pitch = pitch[:p_len] + pitchf = pitchf[:p_len] + if self.device == "mps": + pitchf = pitchf.astype(np.float32) + pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long() + pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float() + + #print("&&&&pitch: ",pitchf) + t2 = ttime() + times[1] += t2 - t1 + for t in opt_ts: #分段推理每段音频,一段这里设置60s左右 fang + t = t // self.window * self.window + if if_f0 == 1: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[s : t + self.t_pad2 + self.window], + pitch[:, s // self.window : (t + self.t_pad2) // self.window], + pitchf[:, s // self.window : (t + self.t_pad2) // self.window], + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[s : t + self.t_pad2 + self.window], + None, + None, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + s = t + if if_f0 == 1: ##后面是最后一段处理 fang + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + pitch[:, t // self.window :] if t is not None else pitch, + pitchf[:, t // self.window :] if t is not None else pitchf, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + else: + audio_opt.append( + self.vc( + model, + net_g, + sid, + audio_pad[t:], + None, + None, + times, + index, + big_npy, + index_rate, + version, + protect, + )[self.t_pad_tgt : -self.t_pad_tgt] + ) + audio_opt = np.concatenate(audio_opt) + if rms_mix_rate != 1: + audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate) + if resample_sr >= 16000 and tgt_sr != resample_sr: + audio_opt = librosa.resample( + audio_opt, orig_sr=tgt_sr, target_sr=resample_sr + ) + audio_max = np.abs(audio_opt).max() / 0.99 + max_int16 = 32768 + if audio_max > 1: + max_int16 /= audio_max + audio_opt = (audio_opt * max_int16).astype(np.int16) + del pitch, pitchf, sid + if torch.cuda.is_available(): + torch.cuda.empty_cache() + return audio_opt diff --git a/AIMeiSheng/venv.sh b/AIMeiSheng/venv.sh new file mode 100644 index 0000000..17f58bf --- /dev/null +++ b/AIMeiSheng/venv.sh @@ -0,0 +1 @@ +python3 -m venv .venv diff --git a/AIMeiSheng/voice_classification/README.MD b/AIMeiSheng/voice_classification/README.MD new file mode 100644 index 0000000..972f6a4 --- /dev/null +++ b/AIMeiSheng/voice_classification/README.MD @@ -0,0 +1,26 @@ +# 人声分类 + +### 背景介绍 + + 为了通过作品判断作者的性别。 + +### 整体介绍 +#### 代码结构: +``` + 代码结构: + ---online 上线时对外用的代码 + ---script 开发过程中使用的脚本 + ---train 训练时需要的代码位置 +``` +#### 处理逻辑 +先在script里获取数据集并对数据集进行处理,得到特征文件,然后在train里对特征文件 +进行训练得到模型。同时也在train里对模型进行线上数据集的预测,拿到预测文件可以在script/music_voice_class/ana +中进行参数调整,分析. + +#### 数据集地址 +``` +整理人声数据集: +1 分段伴奏占比数据集: https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/dataset/voice_music_dataset.zip +2 随机线上男女声数据集: https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/dataset/online_data_dataset.zip +3 精品池男女声数据集: 曲库的编辑库实例,db=av_db,table=recording_gender +``` \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/online/__pycache__/mobilenet_v2_custom.cpython-38.pyc b/AIMeiSheng/voice_classification/online/__pycache__/mobilenet_v2_custom.cpython-38.pyc new file mode 100644 index 0000000..ca86abc Binary files /dev/null and b/AIMeiSheng/voice_classification/online/__pycache__/mobilenet_v2_custom.cpython-38.pyc differ diff --git a/AIMeiSheng/voice_classification/online/__pycache__/model.cpython-38.pyc b/AIMeiSheng/voice_classification/online/__pycache__/model.cpython-38.pyc new file mode 100644 index 0000000..7d262bc Binary files /dev/null and b/AIMeiSheng/voice_classification/online/__pycache__/model.cpython-38.pyc differ diff --git a/AIMeiSheng/voice_classification/online/__pycache__/voice_class_online_fang.cpython-38.pyc b/AIMeiSheng/voice_classification/online/__pycache__/voice_class_online_fang.cpython-38.pyc new file mode 100644 index 0000000..0837d0e Binary files /dev/null and b/AIMeiSheng/voice_classification/online/__pycache__/voice_class_online_fang.cpython-38.pyc differ diff --git a/AIMeiSheng/voice_classification/online/common.py b/AIMeiSheng/voice_classification/online/common.py new file mode 100644 index 0000000..af3487a --- /dev/null +++ b/AIMeiSheng/voice_classification/online/common.py @@ -0,0 +1,93 @@ +#-*-encording=utf-8-*- +""" +程序绑定核心 +一个脚本启动多次,每次绑定一个核心,不会多次绑定到同一个核心 +每个进程选定绑定n个核心,或者自己传入需要绑定的核心编号 +""" + +import time +import psutil +import os +import sys +import hashlib +import fcntl + +""" +自动获取可用核心 +""" + + +def exec_cmd_ints(cmd): + """ + 执行cmd,获取返回值 + :param cmd: + :return: + """ + r = os.popen(cmd) + lines = r.readlines() + ids = [] + for line in lines: + line = line.strip() + if line.isdigit(): + id = int(float(line)) + ids.append(id) + return ids + + +def get_idle_kernel(n=1): + cur_id = os.getpid() + name = os.path.basename(sys.argv[0]) + command = "ps -ef | grep {} |grep python | awk \'{{print $2}}\'".format(name) + print(command) + ids = exec_cmd_ints(command) + + print(ids, cur_id) + # 获取所有被绑定的核心 + count = psutil.cpu_count() + used = [False] * (count // n) + command = "pidstat | grep {} | awk \'{{print $(NF-1)}}\'" + for i in range(0, len(ids)): + if cur_id != ids[i]: + cmd = command.format(ids[i]) + kers = exec_cmd_ints(cmd) + for ker in kers: + ker = ker // n + used[ker] = True + print(used) + # 获取N个可用的核心 + for i in range(0, len(used)): + if not used[i]: + res = [] + cur_i = i * n + for idx in range(cur_i, cur_i+n): + if idx < count: + res.append(idx) + return res + return 0 + + +def bind_kernel(n=1, kernel=[]): + p = psutil.Process() + + # 加锁 + name = hashlib.md5(os.path.basename(sys.argv[0]).encode('utf-8')).hexdigest() + name = os.path.join("/tmp", name + ".lock") + if not os.path.exists(name): + with open(name, "w") as f: + f.write("0") + file = open(name) + fcntl.flock(file.fileno(), fcntl.LOCK_EX) # 排他锁 + print("lock file --- {}".format(name)) + if len(kernel) > 0: + kernels = kernel + else: + kernels = get_idle_kernel(n) + p.cpu_affinity(kernels) # 绑定特定核心 + print("bind_kernel", kernels) + file.close() # 释放锁 + print("unlock file --- {}".format(name)) + + +def calc_forever(): + for i in range(0, 10000): + time.sleep(1000) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/online/mobilenet_v2_custom.py b/AIMeiSheng/voice_classification/online/mobilenet_v2_custom.py new file mode 100644 index 0000000..57b1227 --- /dev/null +++ b/AIMeiSheng/voice_classification/online/mobilenet_v2_custom.py @@ -0,0 +1,142 @@ +""" +直接从代码库中拷贝出的代码 +目的: mobilenet_v2只允许输入图片的通道数为3,不满足要求,因此拷贝出来做修改 +""" + +from torch import nn + + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + padding = (kernel_size - 1) // 2 + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2Custom(nn.Module): + def __init__(self, num_classes=2, in_channel=1, width_mult=1.0, inverted_residual_setting=None, round_nearest=8): + """ + MobileNet V2 main class + + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + """ + super(MobileNetV2Custom, self).__init__() + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + + if inverted_residual_setting is None: + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + # 修改的地方,原来in_channel=3 + features = [ConvBNReLU(in_channel, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + # building last several layers + features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) + # make it nn.Sequential + self.features = nn.Sequential(*features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(0.2), + nn.Linear(self.last_channel, num_classes), + ) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def forward(self, x): + x = self.features(x) + x = x.mean([2, 3]) + x = self.classifier(x) + return x diff --git a/AIMeiSheng/voice_classification/online/model.py b/AIMeiSheng/voice_classification/online/model.py new file mode 100644 index 0000000..c5e8adc --- /dev/null +++ b/AIMeiSheng/voice_classification/online/model.py @@ -0,0 +1,71 @@ +from mobilenet_v2_custom import MobileNetV2Custom +import torch +import torch.nn as nn + +MFCC_LEN = 80 +FRAME_LEN = 128 + + +class MobileNetV2Gender(MobileNetV2Custom): + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + return super(MobileNetV2Gender, self).forward(x) + + +class MusicVoiceV5Model(nn.Module): + def __init__(self): + super(MusicVoiceV5Model, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + conv_bn(1, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + nn.AvgPool2d((4, 3)), + ) + self.fc = nn.Linear(1024, 2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.model(x) + x = x.view(-1, 1024) + x = self.fc(x) + return x + + +def load_model(model_type, model_path, device): + model = model_type() + params = torch.load(model_path, map_location=torch.device(device)) + model.load_state_dict(state_dict=params) + model.eval() + model.to(device) + return model diff --git a/AIMeiSheng/voice_classification/online/models/gender_8k_ratev5_v6_adam.pth b/AIMeiSheng/voice_classification/online/models/gender_8k_ratev5_v6_adam.pth new file mode 100644 index 0000000..6e7bfe8 Binary files /dev/null and b/AIMeiSheng/voice_classification/online/models/gender_8k_ratev5_v6_adam.pth differ diff --git a/AIMeiSheng/voice_classification/online/models/gender_8k_v6_adam.pth b/AIMeiSheng/voice_classification/online/models/gender_8k_v6_adam.pth new file mode 100644 index 0000000..0f6b0d3 Binary files /dev/null and b/AIMeiSheng/voice_classification/online/models/gender_8k_v6_adam.pth differ diff --git a/AIMeiSheng/voice_classification/online/models/voice_005_rec_v5.pth b/AIMeiSheng/voice_classification/online/models/voice_005_rec_v5.pth new file mode 100644 index 0000000..1f7638a Binary files /dev/null and b/AIMeiSheng/voice_classification/online/models/voice_005_rec_v5.pth differ diff --git a/AIMeiSheng/voice_classification/online/models/voice_10_v5.pth b/AIMeiSheng/voice_classification/online/models/voice_10_v5.pth new file mode 100644 index 0000000..cf9c103 Binary files /dev/null and b/AIMeiSheng/voice_classification/online/models/voice_10_v5.pth differ diff --git a/AIMeiSheng/voice_classification/online/readme.md b/AIMeiSheng/voice_classification/online/readme.md new file mode 100644 index 0000000..10a1f09 --- /dev/null +++ b/AIMeiSheng/voice_classification/online/readme.md @@ -0,0 +1,50 @@ +#男女声识别 + +``` +模型名称以及对应作用: +---gender_8k_ratev5_v6_adam.pth // 男女声(纯人声)分类模型(使用8k纯人声数据集进行训练,mobilenet_v2,adam优化器) +---gender_8k_v6_adam.pth // 男女声(带人声)分类模型(使用8k带人声数据集进行训练,mobilenet_v2,adam优化器) +---voice_005_rec_v5.pth // 纯人声分类模型(400首人工标注的歌曲,判定纯人声段(使用作品中带人声段当作负样本) mobilenet_v1, sgd优化器) +---voice_10_v5.pth // 带人声分类模型(400首人工标注的歌曲,判定带人声段, mobilenet_v1, sgd优化器) +模型地址:https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/models.zip +``` + +# 文件说明 +``` +---common.py // 用于绑定核心的代码 +---mobilenet_v2_custom.py // 模型代码 +---model.py // 调用模型的封装层 +---readme.MD // 说明文件 +---voice_class_online.py // 运行时使用的文件 +``` + +# 环境安装 +``` +cd /home/worker +wget "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/bin/bin.zip" +unzip bin.zip +rm -f bin.zip +export PATH=$PATH:/home/worker/bin # 需要写入到.zshrc中 +sudo yum install libsndfile-devel + +# 以下使用手动安装即可 +conda create -n voice_class python=3.7 -y +conda activate voice_class +pip3 install librosa +pip3 install psutil +pip3 install torch==1.5 torchvision torchaudio +``` + +# 使用说明 +``` +下载模型并解压后,按照voice_class_online.py中的运行方式运行即可 +``` + +# 注意: +目前代码中限制了CPU的核心数量,只允许占用一个核,建议根据核心的情况多开几个进程做处理 + +# 性能测试(不加性能限制的情况下在GPU-2机器上测试得到): +20个线上样本(男10,女10) + +CPU情况:spend_time:tot=31.91|transcode=5.92|vb=3.12|gen_feature=3.5|predict=18.94 +GPU情况:spend_time:tot=15.64|transcode=6.34|vb=4.17|gen_feature=3.3|predict=1.443 diff --git a/AIMeiSheng/voice_classification/online/resource/female/4785074274851990.mp4 b/AIMeiSheng/voice_classification/online/resource/female/4785074274851990.mp4 new file mode 100644 index 0000000..9b225ee Binary files /dev/null and b/AIMeiSheng/voice_classification/online/resource/female/4785074274851990.mp4 differ diff --git a/AIMeiSheng/voice_classification/online/voice_class_online.py b/AIMeiSheng/voice_classification/online/voice_class_online.py new file mode 100644 index 0000000..c101a20 --- /dev/null +++ b/AIMeiSheng/voice_classification/online/voice_class_online.py @@ -0,0 +1,421 @@ +""" +男女声分类在线工具 +1 转码为16bit单声道 +2 均衡化 +3 模型分类 +""" + +import os +import sys +import librosa +import shutil +import logging +import time +import torch.nn.functional as F +import numpy as np +from model import * +# from common import bind_kernel + +logging.basicConfig(level=logging.INFO) + +os.environ["LRU_CACHE_CAPACITY"] = "1" + +# torch.set_num_threads(1) +# bind_kernel(1) + +""" +临时用一下,全局使用的变量 +""" + +transcode_time = 0 +vb_time = 0 +mfcc_time = 0 +predict_time = 0 + +""" +错误码 +""" +ERR_CODE_SUCCESS = 0 # 处理成功 +ERR_CODE_NO_FILE = -1 # 文件不存在 +ERR_CODE_TRANSCODE = -2 # 转码失败 +ERR_CODE_VOLUME_BALANCED = -3 # 均衡化失败 +ERR_CODE_FEATURE_TOO_SHORT = -4 # 特征文件太短 + +""" +常量 +""" + +FRAME_LEN = 128 +MFCC_LEN = 80 + +EBUR128_BIN = "/data/gpu_env_common/res/av_svc/bin/standard_audio_no_cut" +# EBUR128_BIN = "/Users/yangjianli/linux/opt/soft/bin/standard_audio_no_cut" +GENDER_FEMALE = 0 +GENDER_MALE = 1 +GENDER_OTHER = 2 +""" +通用函数 +""" + + +def exec_cmd(cmd): + ret = os.system(cmd) + if ret != 0: + return False + return True + + +""" +业务需要的函数 +""" + + +def get_one_mfcc(file_url): + st = time.time() + data, sr = librosa.load(file_url, sr=16000) + if len(data) < 512: + return [] + mfcc = librosa.feature.mfcc(y=data, sr=sr, n_fft=512, hop_length=256, n_mfcc=MFCC_LEN) + mfcc = mfcc.transpose() + print("get_one_mfcc:spend_time={}".format(time.time() - st)) + global mfcc_time + mfcc_time += time.time() - st + return mfcc + + +def volume_balanced(src, dst): + st = time.time() + cmd = "{} {} {}".format(EBUR128_BIN, src, dst) + logging.info(cmd) + exec_cmd(cmd) + if not os.path.exists(dst): + logging.error("volume_balanced:cmd={}".format(cmd)) + print("volume_balanced:spend_time={}".format(time.time() - st)) + + global vb_time + vb_time += time.time() - st + return os.path.exists(dst) + + +def transcode(src, dst): + st = time.time() + cmd = "ffmpeg -loglevel quiet -i {} -ar 16000 -ac 1 {}".format(src, dst) + logging.info(cmd) + exec_cmd(cmd) + if not os.path.exists(dst): + logging.error("transcode:cmd={}".format(cmd)) + print("transcode:spend_time={}".format(time.time() - st)) + global transcode_time + transcode_time += time.time() - st + return os.path.exists(dst) + + +class VoiceClass: + + def __init__(self, music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model): + """ + 四个模型 + :param music_voice_pure_model: 分辨纯净人声/其他 + :param music_voice_no_pure_model: 分辨有人声/其他 + :param gender_pure_model: 纯净人声分辨男女 + :param gender_no_pure_model: 有人声分辨男女 + """ + st = time.time() + self.device = "cpu" + self.batch_size = 256 + self.music_voice_pure_model = load_model(MusicVoiceV5Model, music_voice_pure_model, self.device) + self.music_voice_no_pure_model = load_model(MusicVoiceV5Model, music_voice_no_pure_model, self.device) + self.gender_pure_model = load_model(MobileNetV2Gender, gender_pure_model, self.device) + self.gender_no_pure_model = load_model(MobileNetV2Gender, gender_no_pure_model, self.device) + logging.info("load model ok ! spend_time={}".format(time.time() - st)) + + def batch_predict(self, model, features): + st = time.time() + scores = [] + with torch.no_grad(): + for i in range(0, len(features), self.batch_size): + cur_data = features[i:i + self.batch_size].to(self.device) + predicts = model(cur_data) + predicts_score = F.softmax(predicts, dim=1) + scores.extend(predicts_score.cpu().numpy()) + ret = np.array(scores) + global predict_time + predict_time += time.time() - st + return ret + + def predict_pure(self, filename, features): + scores = self.batch_predict(self.music_voice_pure_model, features) + new_features = [] + for idx, score in enumerate(scores): + if score[0] > 0.5: # 非人声 + continue + new_features.append(features[idx].numpy()) + + # 人声段太少,不能进行处理 + # 参数可以改 + new_feature_len = len(new_features) + new_feature_rate = len(new_features) / len(features) + if new_feature_len < 4 or new_feature_rate < 0.4: + logging.warning( + "filename={}|predict_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate) + ) + return GENDER_OTHER, -1 + new_features = torch.from_numpy(np.array(new_features)) + scores = self.batch_predict(self.gender_pure_model, new_features) + f_avg = sum(scores[:, 0]) / len(scores) + m_avg = sum(scores[:, 1]) / len(scores) + female_rate = f_avg / (f_avg + m_avg) + if female_rate > 0.65: + return GENDER_FEMALE, female_rate + if female_rate < 0.12: + return GENDER_MALE, female_rate + logging.warning( + "filename={}|predict_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate) + ) + return GENDER_OTHER, female_rate + + def predict_no_pure(self, filename, features): + scores = self.batch_predict(self.music_voice_no_pure_model, features) + new_features = [] + for idx, score in enumerate(scores): + if score[0] > 0.5: # 非人声 + continue + new_features.append(features[idx].numpy()) + + # 人声段太少,不能进行处理 + # 参数可以改 + new_feature_len = len(new_features) + new_feature_rate = len(new_features) / len(features) + if new_feature_len < 4 or new_feature_rate < 0.4: + logging.warning( + "filename={}|predict_no_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate) + ) + return GENDER_OTHER, -1 + new_features = torch.from_numpy(np.array(new_features)) + scores = self.batch_predict(self.gender_no_pure_model, new_features) + f_avg = sum(scores[:, 0]) / len(scores) + m_avg = sum(scores[:, 1]) / len(scores) + female_rate = f_avg / (f_avg + m_avg) + if female_rate > 0.75: + return GENDER_FEMALE, female_rate + if female_rate < 0.1: + return GENDER_MALE, female_rate + logging.warning( + "filename={}|predict_no_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate) + ) + return GENDER_OTHER, female_rate + + def predict(self, filename, features): + st = time.time() + new_features = [] + for i in range(FRAME_LEN, len(features), FRAME_LEN): + new_features.append(features[i - FRAME_LEN: i]) + new_features = torch.from_numpy(np.array(new_features)) + gender, rate = self.predict_pure(filename, new_features) + if gender == GENDER_OTHER: + logging.info("start no pure process...") + gender, rate = self.predict_no_pure(filename, new_features) + return gender, rate, False + print("predict|spend_time={}".format(time.time() - st)) + return gender, rate, True + + def process_one_logic(self, filename, file_path, cache_dir): + tmp_wav = os.path.join(cache_dir, "tmp.wav") + tmp_vb_wav = os.path.join(cache_dir, "tmp_vb.wav") + if not transcode(file_path, tmp_wav): + return ERR_CODE_TRANSCODE, None, None + if not volume_balanced(tmp_wav, tmp_vb_wav): + return ERR_CODE_VOLUME_BALANCED, None, None + features = get_one_mfcc(tmp_vb_wav) + if len(features) < FRAME_LEN: + logging.error("feature too short|file_path={}".format(file_path)) + return ERR_CODE_FEATURE_TOO_SHORT, None, None + return self.predict(filename, features) + + def process_one(self, file_path): + base_dir = os.path.dirname(file_path) + filename = os.path.splitext(file_path)[0] + cache_dir = os.path.join(base_dir, filename + "_cache") + if os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + os.makedirs(cache_dir) + ret = self.process_one_logic(filename, file_path, cache_dir) + shutil.rmtree(cache_dir) + return ret + + def process(self, file_path): + gender, female_rate, is_pure = self.process_one(file_path) + logging.info("{}|gender={}|female_rate={}".format(file_path, gender, female_rate)) + return gender, female_rate, is_pure + + def process_by_feature(self, feature_file): + """ + 直接处理特征文件 + :param feature_file: + :return: + """ + filename = os.path.splitext(feature_file)[0] + features = np.load(feature_file) + gender, female_rate = self.predict(filename, features) + return gender, female_rate + + +def test_all_feature(): + import glob + base_dir = "/data/datasets/music_voice_dataset_full/feature_online_data_v3" + female = glob.glob(os.path.join(base_dir, "female/*feature.npy")) + male = glob.glob(os.path.join(base_dir, "male/*feature.npy")) + other = glob.glob(os.path.join(base_dir, "other/*feature.npy")) + model_path = "/data/jianli.yang/voice_classification/online/models" + music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") + gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + + tot_st = time.time() + ret_map = { + 0: {0: 0, 1: 0, 2: 0}, + 1: {0: 0, 1: 0, 2: 0}, + 2: {0: 0, 1: 0, 2: 0} + } + for file in female: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process_by_feature(file) + ret_map[0][gender] += 1 + if gender != 0: + print("err:female->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + for file in male: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process_by_feature(file) + ret_map[1][gender] += 1 + if gender != 1: + print("err:male->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + for file in other: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process_by_feature(file) + ret_map[2][gender] += 1 + if gender != 2: + print("err:other->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + global transcode_time, vb_time, mfcc_time, predict_time + print("spend_time:tot={}|transcode={}|vb={}|gen_feature={}|predict={}".format(time.time() - tot_st, transcode_time, + vb_time, mfcc_time, predict_time)) + f_f = ret_map[0][0] + f_m = ret_map[0][1] + f_o = ret_map[0][2] + m_f = ret_map[1][0] + m_m = ret_map[1][1] + m_o = ret_map[1][2] + o_f = ret_map[2][0] + o_m = ret_map[2][1] + o_o = ret_map[2][2] + + print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o)) + print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o)) + print("om:{},of:{},oo:{}".format(o_m, o_f, o_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f + o_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m + o_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + +def test_all(): + import glob + base_dir = "/data/datasets/music_voice_dataset_full/online_data_v3_top200" + female = glob.glob(os.path.join(base_dir, "female/*mp4")) + male = glob.glob(os.path.join(base_dir, "male/*mp4")) + other = glob.glob(os.path.join(base_dir, "other/*mp4")) + model_path = "/data/jianli.yang/voice_classification/online/models" + music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") + gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + + tot_st = time.time() + ret_map = { + 0: {0: 0, 1: 0, 2: 0}, + 1: {0: 0, 1: 0, 2: 0}, + 2: {0: 0, 1: 0, 2: 0} + } + for file in female: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process(file) + ret_map[0][gender] += 1 + if gender != 0: + print("err:female->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + for file in male: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process(file) + ret_map[1][gender] += 1 + if gender != 1: + print("err:male->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + for file in other: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process(file) + ret_map[2][gender] += 1 + if gender != 2: + print("err:other->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + global transcode_time, vb_time, mfcc_time, predict_time + print("spend_time:tot={}|transcode={}|vb={}|gen_feature={}|predict={}".format(time.time() - tot_st, transcode_time, + vb_time, mfcc_time, predict_time)) + f_f = ret_map[0][0] + f_m = ret_map[0][1] + f_o = ret_map[0][2] + m_f = ret_map[1][0] + m_m = ret_map[1][1] + m_o = ret_map[1][2] + o_f = ret_map[2][0] + o_m = ret_map[2][1] + o_o = ret_map[2][2] + + print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o)) + print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o)) + print("om:{},of:{},oo:{}".format(o_m, o_f, o_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f + o_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m + o_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + +if __name__ == "__main__": + # test_all() + # test_all_feature() + model_path = sys.argv[1] + voice_path = sys.argv[2] + music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") + gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + for i in range(0, 1): + st = time.time() + print("------------------------------>>>>>") + vc.process(voice_path) + print("process|spend_tm=={}".format(time.time() - st)) diff --git a/AIMeiSheng/voice_classification/online/voice_class_online_fang.py b/AIMeiSheng/voice_classification/online/voice_class_online_fang.py new file mode 100644 index 0000000..dca2cfe --- /dev/null +++ b/AIMeiSheng/voice_classification/online/voice_class_online_fang.py @@ -0,0 +1,423 @@ +""" +男女声分类在线工具 +1 转码为16bit单声道 +2 均衡化 +3 模型分类 +""" + +import os +import sys +import librosa +import shutil +import logging +import time +import torch.nn.functional as F +import numpy as np +from model import * +# from common import bind_kernel + +logging.basicConfig(level=logging.INFO) + +os.environ["LRU_CACHE_CAPACITY"] = "1" + +# torch.set_num_threads(1) +# bind_kernel(1) + +""" +临时用一下,全局使用的变量 +""" + +transcode_time = 0 +vb_time = 0 +mfcc_time = 0 +predict_time = 0 + +""" +错误码 +""" +ERR_CODE_SUCCESS = 0 # 处理成功 +ERR_CODE_NO_FILE = -1 # 文件不存在 +ERR_CODE_TRANSCODE = -2 # 转码失败 +ERR_CODE_VOLUME_BALANCED = -3 # 均衡化失败 +ERR_CODE_FEATURE_TOO_SHORT = -4 # 特征文件太短 + +""" +常量 +""" + +FRAME_LEN = 128 +MFCC_LEN = 80 + +EBUR128_BIN = "/data/gpu_env_common/res/av_svc/bin/standard_audio_no_cut" +# EBUR128_BIN = "/Users/yangjianli/linux/opt/soft/bin/standard_audio_no_cut" +GENDER_FEMALE = 0 +GENDER_MALE = 1 +GENDER_OTHER = 2 +""" +通用函数 +""" + + +def exec_cmd(cmd): + ret = os.system(cmd) + if ret != 0: + return False + return True + + +""" +业务需要的函数 +""" + + +def get_one_mfcc(file_url): + st = time.time() + data, sr = librosa.load(file_url, sr=16000) + if len(data) < 512: + return [] + mfcc = librosa.feature.mfcc(y=data, sr=sr, n_fft=512, hop_length=256, n_mfcc=MFCC_LEN) + mfcc = mfcc.transpose() + print("get_one_mfcc:spend_time={}".format(time.time() - st)) + global mfcc_time + mfcc_time += time.time() - st + return mfcc + + +def volume_balanced(src, dst): + st = time.time() + cmd = "{} {} {}".format(EBUR128_BIN, src, dst) + logging.info(cmd) + exec_cmd(cmd) + if not os.path.exists(dst): + logging.error("volume_balanced:cmd={}".format(cmd)) + print("volume_balanced:spend_time={}".format(time.time() - st)) + + global vb_time + vb_time += time.time() - st + return os.path.exists(dst) + + +def transcode(src, dst): + st = time.time() + cmd = "ffmpeg -loglevel quiet -i {} -ar 16000 -ac 1 {}".format(src, dst) + logging.info(cmd) + exec_cmd(cmd) + if not os.path.exists(dst): + logging.error("transcode:cmd={}".format(cmd)) + print("transcode:spend_time={}".format(time.time() - st)) + global transcode_time + transcode_time += time.time() - st + return os.path.exists(dst) + + +class VoiceClass: + + def __init__(self, music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model): + """ + 四个模型 + :param music_voice_pure_model: 分辨纯净人声/其他 + :param music_voice_no_pure_model: 分辨有人声/其他 + :param gender_pure_model: 纯净人声分辨男女 + :param gender_no_pure_model: 有人声分辨男女 + """ + st = time.time() + self.device = "cpu" + self.batch_size = 256 + self.music_voice_pure_model = load_model(MusicVoiceV5Model, music_voice_pure_model, self.device) + self.music_voice_no_pure_model = load_model(MusicVoiceV5Model, music_voice_no_pure_model, self.device) + self.gender_pure_model = load_model(MobileNetV2Gender, gender_pure_model, self.device) + self.gender_no_pure_model = load_model(MobileNetV2Gender, gender_no_pure_model, self.device) + logging.info("load model ok ! spend_time={}".format(time.time() - st)) + + def batch_predict(self, model, features): + st = time.time() + scores = [] + with torch.no_grad(): + for i in range(0, len(features), self.batch_size): + cur_data = features[i:i + self.batch_size].to(self.device) + predicts = model(cur_data) + predicts_score = F.softmax(predicts, dim=1) + scores.extend(predicts_score.cpu().numpy()) + ret = np.array(scores) + global predict_time + predict_time += time.time() - st + return ret + + def predict_pure(self, filename, features): + scores = self.batch_predict(self.music_voice_pure_model, features) + new_features = [] + for idx, score in enumerate(scores): + if score[0] > 0.5: # 非人声 + continue + new_features.append(features[idx].numpy()) + + # 人声段太少,不能进行处理 + # 参数可以改 + new_feature_len = len(new_features) + new_feature_rate = len(new_features) / len(features) + if new_feature_len < 4 or new_feature_rate < 0.4: + logging.warning( + "filename={}|predict_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate) + ) + return GENDER_OTHER, -1 + new_features = torch.from_numpy(np.array(new_features)) + scores = self.batch_predict(self.gender_pure_model, new_features) + f_avg = sum(scores[:, 0]) / len(scores) + m_avg = sum(scores[:, 1]) / len(scores) + female_rate = f_avg / (f_avg + m_avg) + if female_rate > 0.65: + return GENDER_FEMALE, female_rate + if female_rate < 0.12: + return GENDER_MALE, female_rate + logging.warning( + "filename={}|predict_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate) + ) + return GENDER_OTHER, female_rate + + def predict_no_pure(self, filename, features): + scores = self.batch_predict(self.music_voice_no_pure_model, features) + new_features = [] + for idx, score in enumerate(scores): + if score[0] > 0.5: # 非人声 + continue + new_features.append(features[idx].numpy()) + + # 人声段太少,不能进行处理 + # 参数可以改 + new_feature_len = len(new_features) + new_feature_rate = len(new_features) / len(features) + if new_feature_len < 4 or new_feature_rate < 0.4: + logging.warning( + "filename={}|predict_no_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate) + ) + return GENDER_OTHER, -1 + new_features = torch.from_numpy(np.array(new_features)) + scores = self.batch_predict(self.gender_no_pure_model, new_features) + f_avg = sum(scores[:, 0]) / len(scores) + m_avg = sum(scores[:, 1]) / len(scores) + female_rate = f_avg / (f_avg + m_avg) + if female_rate > 0.75: + return GENDER_FEMALE, female_rate + if female_rate < 0.1: + return GENDER_MALE, female_rate + logging.warning( + "filename={}|predict_no_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate) + ) + return GENDER_OTHER, female_rate + + def predict(self, filename, features): + st = time.time() + new_features = [] + for i in range(FRAME_LEN, len(features), FRAME_LEN): + new_features.append(features[i - FRAME_LEN: i]) + new_features = torch.from_numpy(np.array(new_features)) + gender, rate = self.predict_pure(filename, new_features) + if gender == GENDER_OTHER: + logging.info("start no pure process...") + gender, rate = self.predict_no_pure(filename, new_features) + return gender, rate, False + print("predict|spend_time={}".format(time.time() - st)) + return gender, rate, True + + def process_one_logic(self, filename, file_path, cache_dir): + tmp_wav = os.path.join(cache_dir, "tmp.wav") + tmp_vb_wav = os.path.join(cache_dir, "tmp_vb.wav") + if not transcode(file_path, tmp_wav): + return ERR_CODE_TRANSCODE, None, None + if not volume_balanced(tmp_wav, tmp_vb_wav): + return ERR_CODE_VOLUME_BALANCED, None, None + features = get_one_mfcc(tmp_vb_wav) + if len(features) < FRAME_LEN: + logging.error("feature too short|file_path={}".format(file_path)) + return ERR_CODE_FEATURE_TOO_SHORT, None, None + return self.predict(filename, features) + + def process_one(self, file_path): + base_dir = os.path.dirname(file_path) + filename = os.path.splitext(file_path)[0] + print("filename:",filename) + cache_dir = os.path.join(base_dir, filename + "_cache") + if os.path.exists(cache_dir): + shutil.rmtree(cache_dir) + os.makedirs(cache_dir) + ret = self.process_one_logic(filename, file_path, cache_dir) + shutil.rmtree(cache_dir) + return ret + + def process(self, file_path): + gender, female_rate, is_pure = self.process_one(file_path) + logging.info("{}|gender={}|female_rate={}".format(file_path, gender, female_rate)) + return gender, female_rate, is_pure + + def process_by_feature(self, feature_file): + """ + 直接处理特征文件 + :param feature_file: + :return: + """ + filename = os.path.splitext(feature_file)[0] + features = np.load(feature_file) + gender, female_rate = self.predict(filename, features) + return gender, female_rate + + +def test_all_feature(): + import glob + base_dir = "/data/datasets/music_voice_dataset_full/feature_online_data_v3" + female = glob.glob(os.path.join(base_dir, "female/*feature.npy")) + male = glob.glob(os.path.join(base_dir, "male/*feature.npy")) + other = glob.glob(os.path.join(base_dir, "other/*feature.npy")) + model_path = "/data/jianli.yang/voice_classification/online/models" + music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") + gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + + tot_st = time.time() + ret_map = { + 0: {0: 0, 1: 0, 2: 0}, + 1: {0: 0, 1: 0, 2: 0}, + 2: {0: 0, 1: 0, 2: 0} + } + for file in female: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process_by_feature(file) + ret_map[0][gender] += 1 + if gender != 0: + print("err:female->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + for file in male: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process_by_feature(file) + ret_map[1][gender] += 1 + if gender != 1: + print("err:male->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + for file in other: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process_by_feature(file) + ret_map[2][gender] += 1 + if gender != 2: + print("err:other->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + global transcode_time, vb_time, mfcc_time, predict_time + print("spend_time:tot={}|transcode={}|vb={}|gen_feature={}|predict={}".format(time.time() - tot_st, transcode_time, + vb_time, mfcc_time, predict_time)) + f_f = ret_map[0][0] + f_m = ret_map[0][1] + f_o = ret_map[0][2] + m_f = ret_map[1][0] + m_m = ret_map[1][1] + m_o = ret_map[1][2] + o_f = ret_map[2][0] + o_m = ret_map[2][1] + o_o = ret_map[2][2] + + print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o)) + print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o)) + print("om:{},of:{},oo:{}".format(o_m, o_f, o_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f + o_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m + o_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + +def test_all(): + import glob + base_dir = "/data/datasets/music_voice_dataset_full/online_data_v3_top200" + female = glob.glob(os.path.join(base_dir, "female/*mp4")) + male = glob.glob(os.path.join(base_dir, "male/*mp4")) + other = glob.glob(os.path.join(base_dir, "other/*mp4")) + model_path = "/data/jianli.yang/voice_classification/online/models" + music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") + gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + + tot_st = time.time() + ret_map = { + 0: {0: 0, 1: 0, 2: 0}, + 1: {0: 0, 1: 0, 2: 0}, + 2: {0: 0, 1: 0, 2: 0} + } + for file in female: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process(file) + ret_map[0][gender] += 1 + if gender != 0: + print("err:female->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + for file in male: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process(file) + ret_map[1][gender] += 1 + if gender != 1: + print("err:male->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + for file in other: + st = time.time() + print("------------------------------>>>>>") + gender, female_score = vc.process(file) + ret_map[2][gender] += 1 + if gender != 2: + print("err:other->{}|{}|{}".format(gender, file, female_score)) + print("process|spend_tm=={}".format(time.time() - st)) + + global transcode_time, vb_time, mfcc_time, predict_time + print("spend_time:tot={}|transcode={}|vb={}|gen_feature={}|predict={}".format(time.time() - tot_st, transcode_time, + vb_time, mfcc_time, predict_time)) + f_f = ret_map[0][0] + f_m = ret_map[0][1] + f_o = ret_map[0][2] + m_f = ret_map[1][0] + m_m = ret_map[1][1] + m_o = ret_map[1][2] + o_f = ret_map[2][0] + o_m = ret_map[2][1] + o_o = ret_map[2][2] + + print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o)) + print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o)) + print("om:{},of:{},oo:{}".format(o_m, o_f, o_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f + o_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m + o_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + +if __name__ == "__main__": + # test_all() + # test_all_feature() + model_path = sys.argv[1] + voice_path = sys.argv[2] + music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth") + gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + for i in range(0, 1): + st = time.time() + print("------------------------------>>>>>") + gender, female_rate, is_pure = vc.process(voice_path) + print("process|spend_tm=={}".format(time.time() - st)) + print("gender:{}, female_rate:{},is_pure:{}".format(gender,female_rate,is_pure)) diff --git a/AIMeiSheng/voice_classification/script/README.MD b/AIMeiSheng/voice_classification/script/README.MD new file mode 100644 index 0000000..7266fe0 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/README.MD @@ -0,0 +1,46 @@ +# Script +本文件夹存放偶尔使用,与训练和预测无关的脚本 + +``` +---dataset 数据集处理 + 顺序: 先转码 & 构建id_gender.txt & 切分提取特征 + ---common.py 通用函数 + ---split_gen_mfcc.py 切分数据并提取mfcc + ---transcode.py 转码数据 + +---common.py 通用函数 +---country2av_area.py 将国家划分为人声分类所需要的大区 +---download_msg.py 下载人声分类所需要的数据 +---export_data.py 从精品池获取数据并将其写入到人声分类的数据库 +---download_origin_mp4.py 从线上随机获取一些干声数据 +---download_recording.py 给定文件夹,从文件夹中下载所有伴奏对应的作品 +---music_voice_class + ---ana 模型预测得到的段级别的结果,在这里确定投票参数 + ---find_best_thread_v2.py 分析一组模型的结果(人声+男女声) + ---find_best_thread_v4.py 分析两组模型的结果(纯人声+男女声,带人声+男女声) + ---standard_audio_no_cut 预处理音频(ebur128拉伸alimiter压限,需要编译) + ---format_simple_label2label.py 用于数据标注,详情看代码 + ---gen_acc_dataset.py **切割适量的伴奏用于补充构建数据集** + ---gen_dataset.py **利用现有的数据集标注结果进行切割构建分段(分段伴奏占比数据集_最外层的readme.txt有地址)** + ---gen_dataset_feature.py **利用上面提取到的分段提取特征** + ---gen_dataset_feature_v1.py 提取幅度谱特征_暂时不重要 + ---gen_dataset_files.py **对歌曲级别提取特征文件** + ---gen_dataset_files_v1.py 提取幅度谱特征_暂时不重要 + ---gen_err_dataset.py 获取错误的数据组成的数据集(不重要) + ---gen_file_dict.py **生成伴奏占人声的占比情况的字典(用于训练纯人声的模型使用)** + ---gen_pure_acc_gender_dataset.py **构建补充男女声训练使用的acc的数据集** + ---gen_pure_gender_dataset.py **构建男女声训练使用的数据集(这里需要纯人声或者带人声的模型做标注)** + ---gen_rec_dataset.py **利用干声标注好的分段数据去切割作品用来当作纯人声的负样本** + ---split_by_idx.py 不重要 + ---split_by_idx.py 不重要 +``` + +## 处理逻辑 +### 构建人声/伴奏数据集 +1. 下载数据集(使用链接下载即可) +2. gen_dataset.py 生成人声/伴奏数据集(根据需求决定伴奏的占比系数,需要看代码修改) +3. gen_dataset_feature.py 提取特征,用于训练 +### 构建男女声数据集 +1. 下载数据(使用download_msg.py) 下载男女声数据集 +2. gen_pure_gender_dataset.py 构建男女声特征集 +根据需要选择性切入伴奏和作品信息 \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/common.py b/AIMeiSheng/voice_classification/script/common.py new file mode 100644 index 0000000..d387596 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/common.py @@ -0,0 +1,136 @@ +# -*-encoding=utf8-*- +import pymysql +import os + + +def connect_db(host, port, user, passwd, db=""): + print("connect mysql host={} port={} user={} passwd={} db={}".format(host, port, user, passwd, db)) + return pymysql.connect(host=host, port=port, user=user, passwd=passwd, db=db, read_timeout=3) + + +def get_data_by_mysql(sql, banned_user_map): + db = connect_db(host=banned_user_map["host"], passwd=banned_user_map["passwd"], user=banned_user_map["user"], + db=banned_user_map["db"], port=3306) + db_cursor = db.cursor() + if len(sql) < 100: + print("execute = {}".format(sql)) + else: + print("execute = {}...".format(sql[:100])) + + db_cursor.execute(sql) + res = db_cursor.fetchall() + db_cursor.close() + db.close() + print("res size={}".format(len(res))) + return res + + +def write_data_to_mysql(sql, banned_user_map): + db = connect_db(host=banned_user_map["host"], passwd=banned_user_map["passwd"], user=banned_user_map["user"], + db=banned_user_map["db"], port=3306) + db_cursor = db.cursor() + if len(sql) < 1000: + print("execute = {}".format(sql)) + else: + print("execute = {}...".format(sql[:1000])) + + db_cursor.execute(sql) + db.commit() + db_cursor.close() + db.close() + + +def exec_cmd(cmd): + print(cmd) + ret = os.system(cmd) + if ret != 0: + return False + return True + + +def exec_cmd_and_result(cmd): + print(cmd) + r = os.popen(cmd) + text = r.read() + r.close() + return text + + +shard_map = { + "shard_sm_12": "shard02-r2.db.starmaker.co", + "shard_sm_13": "shard02-r2.db.starmaker.co", + "shard_sm_14": "shard02-r2.db.starmaker.co", + "shard_sm_15": "shard02-r2.db.starmaker.co", + "shard_sm_30": "shard02-r2.db.starmaker.co", + "shard_sm_31": "shard02-r2.db.starmaker.co", + "shard_sm_20": "shard02-r2.db.starmaker.co", + "shard_sm_21": "shard02-r2.db.starmaker.co", + "shard_sm_22": "shard03-r2.db.starmaker.co", + "shard_sm_23": "shard03-r2.db.starmaker.co", + "shard_sm_24": "shard03-r2.db.starmaker.co", + "shard_sm_25": "shard03-r2.db.starmaker.co", + "shard_sm_26": "shard03-r2.db.starmaker.co", + "shard_sm_27": "shard03-r2.db.starmaker.co", + "shard_sm_28": "shard03-r2.db.starmaker.co", + "shard_sm_29": "shard03-r2.db.starmaker.co", + "shard_sm_0": "shard00-r2.db.starmaker.co", + "shard_sm_1": "shard00-r2.db.starmaker.co", + "shard_sm_2": "shard00-r2.db.starmaker.co", + "shard_sm_3": "shard00-r2.db.starmaker.co", + "shard_sm_4": "shard00-r2.db.starmaker.co", + "shard_sm_5": "shard00-r2.db.starmaker.co", + "shard_sm_16": "shard00-r2.db.starmaker.co", + "shard_sm_17": "shard00-r2.db.starmaker.co", + "shard_sm_6": "shard01-r2.db.starmaker.co", + "shard_sm_7": "shard01-r2.db.starmaker.co", + "shard_sm_8": "shard01-r2.db.starmaker.co", + "shard_sm_9": "shard01-r2.db.starmaker.co", + "shard_sm_10": "shard01-r2.db.starmaker.co", + "shard_sm_11": "shard01-r2.db.starmaker.co", + "shard_sm_18": "shard01-r2.db.starmaker.co", + "shard_sm_19": "shard01-r2.db.starmaker.co", + "shard_sm_32": "shard04-r2.db.starmaker.co", + "shard_sm_33": "shard04-r2.db.starmaker.co", + "shard_sm_34": "shard04-r2.db.starmaker.co", + "shard_sm_35": "shard04-r2.db.starmaker.co", + "shard_sm_36": "shard04-r2.db.starmaker.co", + "shard_sm_37": "shard04-r2.db.starmaker.co", + "shard_sm_38": "shard04-r2.db.starmaker.co", + "shard_sm_39": "shard04-r2.db.starmaker.co", + "name": "shard_sm_{}", + "port": 3306, + "user": "readonly", + "passwd": "JKw6woZgRXsveegL" +} + +banned_user_map = { + "host": "starmaker-device-r2.db.starmaker.co", + "user": "worker", + "passwd": "gRYppQtdTpP3nFzH", + "db": "sm_passport" +} + + +def get_shard_data_by_sql(sql, shared_id=None): + # shard_id = get_shard_db(user_id) + db_name = shard_map["name"].format(shared_id) + host = shard_map[db_name] + db = connect_db(host=host, passwd=shard_map["passwd"], user=shard_map["user"], db=db_name, port=3306) + db_cursor = db.cursor() + max_len = 1000 + if len(sql) < max_len: + print("execute = {}".format(sql)) + else: + print("execute = {}...".format(sql[:max_len])) + + db_cursor.execute(sql) + print("execute ....") + res = db_cursor.fetchall() + db_cursor.close() + db.close() + print("res size={}".format(len(res))) + return res + + +def get_shard_db(user_id): + return int(float(user_id)) >> 48 diff --git a/AIMeiSheng/voice_classification/script/country2av_area.py b/AIMeiSheng/voice_classification/script/country2av_area.py new file mode 100644 index 0000000..d7cd69a --- /dev/null +++ b/AIMeiSheng/voice_classification/script/country2av_area.py @@ -0,0 +1,30 @@ +""" +国家转音视频大区 +欧美区: AV_Area_US: Area_US +南美区: AV_Area_SA: Area_BR,Area_AR,Area_CO +东南亚: AV_Area_SEA: Area_ID, Area_MY,Area_PH,Area_TH,Area_VN +中 东: AV_Area_ME: Area_ME, Area_PK,Area_TR +印 度: AV_Area_IN: Area_IN +其他: AV_Area_OTHER: 其他大区 +""" + + +COUNTRY2AV_AREA_MAP = {} +AV_AREA2COUNTRY_MAP = { + "av_area_us": [["AT", "AU", "CA", "CH", "DE", "DK", "GB", "US", "NZ", "IE", "PL", "PT", "SE", "ZA"]], # US + "av_area_sa": [["BR"], ["AR", "UY", "PY", "BO", "SV"], ["CO", "EC", "PE", "VE", "CL", "DM"]], # BR,AR,CO + "av_area_sea": [["ID"], ["MY", "BN", "SG", "HK"], ["PH"], ["TH"], ["VN"]], # ID,MY, PH, TH, VN + "av_area_me": [ + ["AE", "BH", "DZ", "EG", "KW", "IL", "IQ", "IR", "JO", "LB", "LY", "MA", "QA", "OM", "SA", "SY", "TN", "YE"], + ["PK"], ["TR"]], # ME PK, TR + "av_area_in": [["AF", "BT", "IN", "LK", "MV", "NP"]] # IN +} + + +def get_area_by_country(country): + if len(COUNTRY2AV_AREA_MAP) == 0: + for k, v in AV_AREA2COUNTRY_MAP.items(): + for vv in v: + for ct in vv: + COUNTRY2AV_AREA_MAP[ct] = k + return COUNTRY2AV_AREA_MAP.get(country, "av_area_other") diff --git a/AIMeiSheng/voice_classification/script/dataset/common.py b/AIMeiSheng/voice_classification/script/dataset/common.py new file mode 100644 index 0000000..84916d2 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/common.py @@ -0,0 +1,221 @@ +# -*- coding: utf-8 -*- +import logging +import os +import pymysql +from datetime import datetime, timedelta +import subprocess +import logging +import multiprocessing as mp +import time + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + +def write_file(filename, data): + with open(filename, 'w') as f: + for dt in data: + dt = str(dt).strip('\n') + f.write(str(dt)+"\n") + + +def read_file(filename): + res = [] + with open(filename, 'r') as f: + while True: + line = f.readline() + if not line: + break + res.append(line.strip()) + return res + + +def n_days_ago(n_time, days): + """ + + :param n_time: n_time => 20180719 + :param days: + :return: + """ + now_time = datetime.strptime(n_time, '%Y%m%d') + delta = timedelta(days=days) + n_days = now_time - delta + return n_days.strftime("%Y%m%d") + + +def connect_db(host="research-db-r1.starmaker.co", port=3306, user="root", passwd="Qrdl1130", db="rec"): + logging.info("connect mysql host={} port={} user={} passwd={} db={}".format(host, port, user, passwd, db)) + return pymysql.connect(host=host, port=port, user=user, passwd=passwd, db=db) + + +def get_data_by_sql(sql): + db = connect_db() + db_cursor = db.cursor() + if len(sql) < 100: + logging.info("execute = {}".format(sql)) + else: + logging.info("execute = {}...".format(sql[:100])) + + db_cursor.execute(sql) + res = db_cursor.fetchall() + db_cursor.close() + db.close() + logging.info("res size={}".format(len(res))) + return res + + +def get_recording_msg_batch(filename=None): + """ + 分批获取msg + 获取recording_id/user_id即可 + :return: + """ + rid_uid_label = [] + max_item = 100000 + ssql = "select r_id, r_user_id,sm_labels from recording where r_id > {} and sm_labels like \"%male%\" order by r_id asc limit {}" + current_id = 0 + while True: + res = get_data_by_sql(ssql.format(current_id, max_item)) + if len(res) == 0: + break + current_id = res[-1][0] + rid_uid_label.extend(res) + logging.info("------current_size size={}".format(len(rid_uid_label))) + # 写入文件 + if filename: + res_str = list(map(lambda x: ",".join(map(str, x)), rid_uid_label)) + write_file(filename, res_str) + return rid_uid_label + + +def parse_label(label): + label = str(label).lower() + gender = "female" + idx = label.find(gender) + if idx >= 0: + label = label.replace("female", "") + idx2 = label.find("male") + + # 抛弃同时存在男和女 + if idx2 >= 0: + return "" + return gender + + # 判断是否是男 + gender = "male" + idx = label.find(gender) + if idx >= 0: + return gender + return "" + + +def parse_labels(rid_uid_label, filename=None): + res = [] + for rid, uid, label in rid_uid_label: + gender = parse_label(label) + if "" != gender: + res.append((rid, uid, gender)) + + if filename: + res_str = list(map(lambda x: ",".join(map(str, x)), res)) + write_file(filename, res_str) + return res + + +def parse_line(x): + ss = str(x).strip().split(',') + return ss[0], ss[1], ",".join(ss[2:]) + + +def get_recording_cache(filename=None): + """ + 可以从缓存中取数据 + :param filename: + :return: + """ + if filename: + res = read_file(filename) + res = list(map(parse_line, res)) + + return res + return get_recording_msg_batch(filename) + + +def func_run_time(func): + def wrapper(*args, **kw): + local_time = time.time() + func(*args, **kw) + logging.info('current Function [%s] run time is %.2f' % (func.__name__, time.time() - local_time)) + return wrapper + + +def download_mp4(dir, recording_id): + """ + 1 下载干声文件 + 2 下载完之后重命名 + """ + file_path = os.path.join(dir, recording_id) + filename_download = file_path + ".download" + filename = file_path + ".mp4" + + if os.path.exists(filename_download): + os.unlink(filename_download) + + cmd = "coscmd -b starmaker-1256122840 download production/uploading/recordings/{}/origin_master.mp4 {}"\ + .format(recording_id, filename_download) + # logging.info("now:{}".format(cmd)) + ret = os.system(cmd) + if not ret: + cmd = "mv {} {}".format(filename_download, filename) + os.system(cmd) + return True + return False + + +class SimpleMultiProcesser: + """ + 多进程处理类 + 目的:单进程生产,多进程消费,且不需要返回值 + """ + def __init__(self, data_path, worker_num=1, timeout=10): + self._worker_num = worker_num + self._res = [] + self._timeout = timeout + self._data_path = data_path + @func_run_time + def load_data(self): + """ + 数据载入函数,需要返回一个list + :return: + """ + return [] + + @func_run_time + def processer(self, single_job): + """ + 处理list中单个数据的方法 + :param single_job: + :return: + """ + pass + + def task_error_callback(self, msg): + logging.error(msg) + + @func_run_time + def process(self): + tp_queue = self.load_data() + logging.info("process -- queue_size={} worker_num={} timeout={}".format(len(tp_queue), self._worker_num,self._timeout)) + res = [] + pool = mp.Pool(processes=self._worker_num) + while len(tp_queue) > 0: + job = tp_queue.pop() + ret = pool.apply_async(self.processer, args=(job, ), error_callback=self.task_error_callback) + res.append(ret) + pool.close() + pool.join() + + for i in res: + self._res.append(i.get(timeout=self._timeout)) + + def get_res_data(self): + return self._res \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/gen_id_gender.txt.py b/AIMeiSheng/voice_classification/script/dataset/gen_id_gender.txt.py new file mode 100644 index 0000000..f4d036c --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/gen_id_gender.txt.py @@ -0,0 +1,20 @@ +import glob +import sys +import os + + +def gen_id_gender(work_dir): + dirs = ["female", "male"] + out_files = [] + for dir in dirs: + files = glob.glob("{}/*mp4".format(os.path.join(work_dir, dir))) + for file in files: + id = file.split("/")[-1].split(".")[0] + out_files.append("{},{}".format(id, dir)) + with open(os.path.join(work_dir, "id_gender.txt"), "w") as f: + for out_file in out_files: + f.write(out_file + "\n") + + +if __name__ == "__main__": + gen_id_gender(sys.argv[1]) diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/CMakeLists.txt new file mode 100644 index 0000000..39ac106 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 2.8) +project(pre_process_voice) +set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) +set(CMAKE_MACOSX_RPATH 1) +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") + +#include_directories(ref) +#add_subdirectory(ref) + +file(GLOB_RECURSE SRC_SRC_DIR src/*cpp) +file(GLOB_RECURSE SRC_REF_CPP ref/*cpp) +file(GLOB_RECURSE SRC_REF_C ref/*c) + +include_directories(inc) + +# include ref -------- +include_directories(ref) +include_directories(ref/dpitch) +include_directories(ref/dpitch/inc) +include_directories(ref/ebur128) +include_directories(ref/ebur128/inc) +include_directories(ref/kiss_fft) +include_directories(ref/kiss_fft/inc) +include_directories(ref/resample2) +include_directories(ref/resample2/inc) +include_directories(ref/resample2/inc_common) +include_directories(ref/voice-detect) +include_directories(ref/voice-detect/inc) +include_directories(ref/waves) +include_directories(ref/waves/inc) +include_directories(ref/lib_json) +include_directories(ref/lib_json/inc) +include_directories(ref/st_lyric_parser) +include_directories(ref/st_lyric_parser/inc) +#-------------------------------- + + +add_library(pre_process_voice SHARED ${SRC_SRC_DIR} ${SRC_REF_CPP} ${SRC_REF_C}) +#add_executable(test test.cpp) +#target_link_libraries(pre_process_voice +# ${LIBRARY_OUTPUT_PATH}/libwaves.a +# ${LIBRARY_OUTPUT_PATH}/libebur128.a +# ${LIBRARY_OUTPUT_PATH}/libdpitch.a +# ${LIBRARY_OUTPUT_PATH}/libkiss_fft.a +# ${LIBRARY_OUTPUT_PATH}/libresample2.a +# ${LIBRARY_OUTPUT_PATH}/libvoice_detect.a +# ) +#IF (APPLE) +# target_link_libraries(test ${LIBRARY_OUTPUT_PATH}/libpre_process_voice.dylib) +#ELSE () +# target_link_libraries(test ${LIBRARY_OUTPUT_PATH}/libpre_process_voice.so) +#ENDIF () diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/inc/CPreProcessVoice.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/inc/CPreProcessVoice.h new file mode 100644 index 0000000..d10fd41 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/inc/CPreProcessVoice.h @@ -0,0 +1,83 @@ +// +// Created by yangjianli on 2020/4/9. +// + +#ifndef PRE_TREATMENT_PREPROCESSVOICE_H +#define PRE_TREATMENT_PREPROCESSVOICE_H + +#include "vector" +/** + * 人声数据预处理 + * 1 ebur128进行拉伸 + * 2 使用pitch进行切分 + */ + +struct VOICE_STATE; +struct VOICE_LINE +{ + int st_pos; + int ed_pos; + float st_time; + float ed_time; +}; +class STLyricParser; +struct ST_LINE; + +class CPreProcessVoice +{ +public: + CPreProcessVoice(); + ~CPreProcessVoice(); + +public: + int init(int sample_rate, int channel); + void uninit(); + int process(float* inbuf, int len); + int process_file(char* src_file, char* dst_file); + int split_voice_file(char* src_file,char* json_file, char* dst_dir); + void print_voice_state(); + void print_voice_line(); + +private: + int ebur128_whole(float* inbuf, const int len, double &gated_loudness, double &gain); + int apply_gain(float* inbuf, const int len, double gain); + int split_voice(float* inbuf, const int len); + int calc_voice_line(); +private: + int m_samle_rate; + int m_channel; + std::vector m_voice_state; + std::vector m_voice_line; + +private: + STLyricParser* m_lyric_parser; + std::vector m_lines; +}; + +// 封装C接口 +extern "C" +{ + CPreProcessVoice* cpv; + + // 创建对象 + void create_object() + { + cpv = new CPreProcessVoice(); + } + + // 销毁对象 + void destory_object() + { + if(cpv) + { + delete cpv; + } + } + + // 处理 + void split_voice_file(char* src_file,char* json_file, char* dst_dir) + { + cpv->split_voice_file(src_file,json_file, dst_dir); + } +} +#endif //PRE_TREATMENT_PREPROCESSVOICE_H diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/py/pre_process_voice.py b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/py/pre_process_voice.py new file mode 100644 index 0000000..cc51ee8 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/py/pre_process_voice.py @@ -0,0 +1,46 @@ +# -*- coding:utf8 -*- +import ctypes +import sys +import os + +# 指定动态连接库 +lib = ctypes.cdll.LoadLibrary('/opt/soft/libs/libpre_process_voice.so') + + +class PreProcessVoice(object): + def __init__(self): + lib.create_object() + lib.split_voice_file.restype = ctypes.c_int + + def process(self, src_file, dst_dir): + """ + 输入16000采样率 16bit 单声道wav文件地址 & 输出的文件夹目录 + :param wav_path: + :return: + """ + + b_src_file = bytes(src_file, encoding="utf8") + b_dst_dir = bytes(dst_dir, encoding="utf8") + # b_input_id = bytes(str(0), encoding="utf8") + ret = lib.split_voice_file(b_src_file, b_dst_dir) + return ret + + def transcode(self, src_media, dst_wav): + """ + 转码为标准数据 + """ + cmd = "ffmpeg -i {} -ar 8000 -ac 1 -acodec pcm_s16le {}".format(src_media, dst_wav) + ret = os.system(cmd) + return ret + + +if __name__ == "__main__": + if len(sys.argv) < 3: + print("please input python this.py xxx.wav xxx_dir\n") + exit(-1) + + wave_path = sys.argv[1] + dir = sys.argv[2] + ppv = PreProcessVoice() + ret = ppv.process(wave_path, dir) + print("errcode={}".format(ret)) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/CMakeLists.txt new file mode 100644 index 0000000..7f94a64 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/CMakeLists.txt @@ -0,0 +1,8 @@ +add_subdirectory(ebur128) +add_subdirectory(waves) +add_subdirectory(kiss_fft) +add_subdirectory(resample2) +add_subdirectory(dpitch) +add_subdirectory(voice-detect) +add_subdirectory(lib_json) +add_subdirectory(st_lyric_parser) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/CMakeLists.txt new file mode 100644 index 0000000..54a4274 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/CMakeLists.txt @@ -0,0 +1,3 @@ +AUX_SOURCE_DIRECTORY(./src DIR_DPITCH_R_SRCS) +include_directories(./inc) +ADD_LIBRARY(dpitch ${DIR_DPITCH_R_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPParam.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPParam.h new file mode 100644 index 0000000..af2da73 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPParam.h @@ -0,0 +1,39 @@ + +#ifndef _DP_PARAM_H_ +#define _DP_PARAM_H_ + +#include "DPitchDef.h" + +/* 全局参数和数据,所有线程共享,线程安全,单体模式 **/ +class CDPParam +{ +public: + static const CDPParam& GetInstance() + { + return m_oDPParamInst; + } + +public: + int m_nFFTNum; + int m_nNsampWindow; + int m_nHalfNsampWindow; + DP_DOUBLE* m_pWindow; + DP_DOUBLE* m_pWindowR; + DP_DOUBLE m_fDx; + int m_nSampPeriod; + int m_nHalfSampPeriod; + +private: + CDPParam(); + ~CDPParam(); + +private: + static CDPParam m_oDPParamInst; +}; + +/* 此函数为除fft外的热点,最好能优化 dahaowu log **/ +DP_DOUBLE NumInterpolateSinc(DP_DOUBLE y[], int nX, DP_DOUBLE x, int nMaxDepth); +void DRealFFT(DP_DOUBLE* pData, int nLength, int iSign); + + +#endif // _DP_PARAM_H_ diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPitchDef.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPitchDef.h new file mode 100644 index 0000000..efd553d --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPitchDef.h @@ -0,0 +1,68 @@ + +#ifndef _DPITCH_DEF_H_ +#define _DPITCH_DEF_H_ + +#define DP_USE_DFFT 1 +#define DP_USE_KISS_FFT 0 + +#define DP_ONLINE 1 + +#if DP_ONLINE +#define DP_ONLINE_POOL 100 /* 在线保存 100 帧 dahaowu log **/ +#define DP_AUDIO_BUFFER_LEN (DP_NSAMP_WINDOW * 20) /* 数据buffer的长度 dahaowu this **/ +#else +#define DP_ONLINE_POOL 12000 /* 离线 最多 一分钟 dahaowu log **/ +#endif + +/* 采样率 dahaowu this **/ +#define DP_SAMPLE_RATE /*44100 16000*/ 16000 + +#if (DP_SAMPLE_RATE == 16000) +/* 帧长 16000=550 dahaowu this **/ +#define DP_NSAMP_WINDOW /*560 550*/ 560 + +/* 帧移 16000=160 dahaowu this **/ +#define DP_NSAMP_SHIFT /*80 160*/ 80 +#endif + +#if (DP_SAMPLE_RATE == 44100) +/* 帧长 16000=550 dahaowu this **/ +#define DP_NSAMP_WINDOW 1323 + +/* 帧移 16000=160 dahaowu this **/ +#define DP_NSAMP_SHIFT 1323 +#endif + +/* 进行一次路径选择的最小帧数 dahaowu this **/ +#define DP_DELAY_PATH_NUM 10 + +/* 一次输入的数据最好不要超过 DP_NSAMP_WINDOW-DP_NSAMP_SHIFT_X **/ +#define DP_NSAMP_SHIFT_X 0 + +/* 末尾需要不足的基频点个数 **/ +#define DP_TAIL_ADD (DP_NSAMP_WINDOW / DP_NSAMP_SHIFT) + +/* 候选最大数量 **/ +#define DP_MAX_CANDIDATES 10 + +/* 开始进行路径选择的最小帧数、Online基频提取时计算每帧数据当前数据仅受到此前20帧数据的影响 **/ +#define DP_BEG_PATH_NUM 20 + +/* 在线基频提取时最大帧数 **/ +#define DP_ONLINEF0BUFSIZE 1000 + +/* 最小基频 **/ +#define DP_MINIMUM_PITCH 60 + +#define DP_VOICING_THRESHOLD (0.45f) +#define DP_SILENCE_THRESHOLD (0.03f) +#define DP_GLOBAL_PEAK (0.9f) +#define DP_CEILING 500 +#define DP_OCTAVE_COST (0.01f) +#define DP_VOICED_UNVOICED_COST (0.14f) +#define DP_OCTAVE_JUMP_COST (0.35f) + +typedef float DP_DOUBLE; +typedef float DP_FLOAT; + +#endif // _DPITCH_DEF_H_ diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPitchHandle.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPitchHandle.h new file mode 100644 index 0000000..b69612f --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/inc/DPitchHandle.h @@ -0,0 +1,274 @@ + +#ifndef _DPITCH_HANDLE_H_ +#define _DPITCH_HANDLE_H_ + +#include +#include "DPitchDef.h" +#include "string.h" + +namespace DPitch +{ + class CPitcher + { + public: + CPitcher(); + ~CPitcher(); + + public: + /* 在线获取基频值 **/ + bool Process(short* pSample, int nSampleSize, float* pPitch, int& nFrame, bool bLast); + + /* 再次使用之前重置内部状态 **/ + void Reset(); + + private: + /* 初始化、逆初始化 **/ + int Init(); + int Uninit(); + + /* 追加新的audio数据,返回帧数 **/ + bool AppendData(short* pSample, int nSampleSize); + + /* 计算基频核心函数 **/ + bool ProcessFrame(DP_FLOAT* pFrame, int nSize); + + /* 确定最优基频序列 **/ + bool SearchPath(bool bLast); + + private: + /* 基频候选类 **/ + class CPitchCandidate + { + public: + DP_DOUBLE fFrequency; /* 候选频率 **/ + DP_DOUBLE fStrength; /* 候选分数 **/ + + void operator = (const CPitchCandidate& oCand) + { + fFrequency = oCand.fFrequency; + fStrength = oCand.fStrength; + } + }; + + /* 基频点类 **/ + class CPitchFrame + { + public: + CPitchFrame() + : nCandidates(0) + , nIntensity(0) + , nBest(0) + {} + + public: + DP_DOUBLE nIntensity; /* 能量比 **/ + unsigned int nCandidates; /* 候选个数 **/ + CPitchCandidate oCandidate[DP_MAX_CANDIDATES]; + unsigned int nBest; + }; + + private: + /* 已经得到候选的帧数 **/ + int m_nProcessedCount; + + /* 已经计算完成的帧数 **/ + int m_nPathedCount; + + /* 此处优化,看需要保留多少 dahaowu log **/ + std::vector m_oFitchFrames; + + /* fft 数量 **/ + int m_nFFTNum; + + /* 帧长、帧移 **/ + int m_nNsampWindow; + int m_nHalfNsampWindow; + + /* FFT缓存 **/ + DP_DOUBLE* m_pFFTFrame; + + /* 自相关缓存 **/ + DP_DOUBLE* m_pAutocorrelation; + + /* 正窗与反窗 **/ + DP_DOUBLE* m_pWindow; + DP_DOUBLE* m_pWindowR; + + /* 缓存数值 **/ + float m_fLog2; + float m_fLog440; + + /* 能量计算窗 **/ + int m_nSampPeriod; + int m_nHalfSampPeriod; + + private: + CPitchFrame* GetOnePitchFrame() + { + if ( m_nCount >= DP_ONLINE_POOL - 1 ) + { + m_nCount = 0; + } + return &m_pPitch[m_nCount++]; + } + int m_nCount; + CPitchFrame m_pPitch[DP_ONLINE_POOL]; + + /* 数据存储到buffer中 **/ + void AudioToFloat(short* pSample, DP_FLOAT* pAudio, int nSize) + { + for( int i = 0; i < nSize; i++ ) + { + pAudio[i] = pSample[i] / 32768.f; + } + } + +#if DP_ONLINE + private: + /* 音频数据的存储位置 **/ + DP_FLOAT m_AudioBuf[DP_AUDIO_BUFFER_LEN + DP_NSAMP_WINDOW - DP_NSAMP_SHIFT_X]; + int m_nBufValidHead; + int m_nBufValidTail; + + /* copy 数据 **/ + bool BufAppendData(short* pSample, int nSampleSize) + { + /* 超过了多少 **/ + int nOver = nSampleSize + m_nBufValidTail - DP_AUDIO_BUFFER_LEN; + if( nOver <= 0 ) + { + /* 添加后没有超出AUDIO_BUFFER_LEN **/ + AudioToFloat(pSample, m_AudioBuf + m_nBufValidTail, nSampleSize); /* 先正常存储 **/ + DulpiTail(m_nBufValidTail, m_nBufValidTail + nSampleSize); /* 应该不用处理 **/ + m_nBufValidTail += nSampleSize; + } + else + { + /* 添加后已经超出AUDIO_BUFFER_LEN **/ + int nOver2 = nOver - (DP_NSAMP_WINDOW - DP_NSAMP_SHIFT_X); + if( nOver2 <= 0 ) + { + /* 超出部分不大于一个window-shift **/ + int nDulpLen = m_nBufValidTail + nSampleSize - DP_AUDIO_BUFFER_LEN; /* 需要duplicate的长度 **/ + AudioToFloat(pSample, m_AudioBuf + m_nBufValidTail, nSampleSize); /* 先正常存储 **/ + DulpiTail(m_nBufValidTail, m_nBufValidTail + nSampleSize); /* dulpi超出AUDIO_BUFFER_LEN的部分 **/ + m_nBufValidTail = nDulpLen; + } + else + { + /* 超出部分大于一个window-shift **/ + int nDulpLen = DP_NSAMP_WINDOW - DP_NSAMP_SHIFT_X; /* 需要duplicate的长度 **/ + int nLen1 = DP_AUDIO_BUFFER_LEN - m_nBufValidTail + nDulpLen; + AudioToFloat(pSample, m_AudioBuf+m_nBufValidTail, nLen1); + DulpiTail(DP_AUDIO_BUFFER_LEN, DP_AUDIO_BUFFER_LEN + nDulpLen); + m_nBufValidTail = nDulpLen; + + /* 上下的跟在tail之后 **/ + AudioToFloat(pSample + nLen1, m_AudioBuf + m_nBufValidTail, nSampleSize - nLen1); + m_nBufValidTail += (nSampleSize - nLen1); + } + } + return true; + } + + /* 获取buffer 长度 **/ + int GetBufferUseLen() + { + if( m_nBufValidTail >= m_nBufValidHead ) + return m_nBufValidTail - m_nBufValidHead; + else + return m_nBufValidTail - m_nBufValidHead + DP_AUDIO_BUFFER_LEN; + } + + /* 先连续copy,再向头部处理,防止读取的时候不是完整帧,感觉这个逻辑好蠢,可以优化 **/ + bool DulpiTail(int nBeg, int nEnd) + { + if ( m_nBufValidTail < m_nBufValidHead ) + { + if( nBeg > DP_NSAMP_WINDOW - DP_NSAMP_SHIFT_X ) + return true; + int nDulpiLen = nEnd > DP_NSAMP_WINDOW - DP_NSAMP_SHIFT_X ? DP_NSAMP_WINDOW - DP_NSAMP_SHIFT_X - nBeg : nEnd - nBeg; + memcpy(m_AudioBuf + DP_AUDIO_BUFFER_LEN + nBeg, m_AudioBuf + nBeg, sizeof(DP_FLOAT) * nDulpiLen); + return true; + } + else + { + /* tail和head正常, 只需向头部dulpi **/ + if( nEnd < DP_AUDIO_BUFFER_LEN ) + return true; + if( nEnd > DP_AUDIO_BUFFER_LEN + DP_NSAMP_WINDOW - DP_NSAMP_SHIFT_X ) /* 不可能 **/ + return false; + int nDulpiPos = nBeg < DP_AUDIO_BUFFER_LEN ? DP_AUDIO_BUFFER_LEN : nBeg; + int nDulpiLen = nEnd - nDulpiPos; + memcpy(m_AudioBuf + nDulpiPos - DP_AUDIO_BUFFER_LEN, m_AudioBuf + nDulpiPos, sizeof(DP_FLOAT) * nDulpiLen); + return true; + } + } + + /* 返回一帧的帧头,并删除一帧 **/ + DP_FLOAT* GetCurFrame() + { + if( GetBufferUseLen() < DP_NSAMP_WINDOW ) + return 0; + + DP_FLOAT* pFrame = m_AudioBuf + m_nBufValidHead; + m_nBufValidHead += DP_NSAMP_SHIFT; + if( m_nBufValidHead >= DP_AUDIO_BUFFER_LEN ) + { + m_nBufValidHead = m_nBufValidHead - DP_AUDIO_BUFFER_LEN; + } + return pFrame; + } +#else + private: + short* m_pSample; + int m_nSampleSize; + int m_nThis; + DP_FLOAT m_pOneFrame[DP_NSAMP_WINDOW]; + + /* copy 数据 **/ + bool BufAppendData(short* pSample, int nSampleSize) + { + m_pSample = pSample; + m_nSampleSize = nSampleSize; + return true; + } + + /* 返回一帧的帧头,并删除一帧 **/ + DP_FLOAT* GetCurFrame() + { + if ( m_nThis == 0 ) + { + AudioToFloat(m_pSample, m_pOneFrame, DP_NSAMP_WINDOW); + m_nThis += DP_NSAMP_WINDOW; + return m_pOneFrame; + } + else + { + if ( m_nSampleSize - m_nThis >= DP_NSAMP_WINDOW ) + { + if ( DP_NSAMP_WINDOW > DP_NSAMP_SHIFT ) + { + memcpy(m_pOneFrame, m_pOneFrame + DP_NSAMP_SHIFT, (DP_NSAMP_WINDOW - DP_NSAMP_SHIFT) * sizeof(DP_FLOAT)); + AudioToFloat(m_pSample + m_nThis, m_pOneFrame + (DP_NSAMP_WINDOW - DP_NSAMP_SHIFT), DP_NSAMP_SHIFT); + m_nThis += DP_NSAMP_SHIFT; + return m_pOneFrame; + } + else + { + AudioToFloat(m_pSample + m_nThis, m_pOneFrame, DP_NSAMP_WINDOW); + m_nThis += DP_NSAMP_SHIFT; + return m_pOneFrame; + } + } + else + { + return NULL; + } + } + } +#endif + }; +}; + +#endif // _DPITCH_HANDLE_H_ diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/src/DPParam.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/src/DPParam.cpp new file mode 100644 index 0000000..06ec4c7 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/src/DPParam.cpp @@ -0,0 +1,178 @@ + +//#include "STSInDef.h" +#include "DPParam.h" +#include "math.h" +#include "stdio.h" + +#define DP_PI 3.14159265358979323846 + +CDPParam CDPParam::m_oDPParamInst; + +CDPParam::CDPParam() +{ + m_fDx = (DP_DOUBLE)(1.0 / DP_SAMPLE_RATE); + + m_nSampPeriod = (int)floor(1 / m_fDx / DP_MINIMUM_PITCH); + m_nHalfSampPeriod = m_nSampPeriod / 2 + 1; + + m_nNsampWindow = DP_NSAMP_WINDOW; + m_nHalfNsampWindow = m_nNsampWindow / 2; + m_nNsampWindow = m_nHalfNsampWindow * 2; + + m_pWindow = new DP_DOUBLE[m_nNsampWindow]; /* 一次性 dahaowu new **/ + + m_nFFTNum = 1; while (m_nFFTNum < m_nNsampWindow * (1 + 0.5)) m_nFFTNum *= 2; + + int i = 0; + + for (i = 0; i < m_nNsampWindow; i ++) + m_pWindow[i] = 0.5 - 0.5 * cos ((i+1) * 2 * DP_PI / (m_nNsampWindow + 1)); + + m_pWindowR = new DP_DOUBLE[m_nFFTNum]; /* 一次性 dahaowu new **/ + + for (i = 0; i < m_nNsampWindow; i ++) m_pWindowR [i] = m_pWindow[i]; + for( ; i < m_nFFTNum; i++) m_pWindowR[i] = 0.0; + DRealFFT (m_pWindowR, m_nFFTNum, 1); + m_pWindowR [0] *= m_pWindowR [0]; + m_pWindowR [1] *= m_pWindowR [1]; + for (i = 2; i < m_nFFTNum; i += 2) { + m_pWindowR [i] = m_pWindowR [i] * m_pWindowR [i] + m_pWindowR [i+1] * m_pWindowR [i+1]; + m_pWindowR [i + 1] = 0.0; + } + DRealFFT (m_pWindowR, m_nFFTNum, -1); + for (i = 1; i < m_nNsampWindow; i ++) m_pWindowR [i] /= m_pWindowR [0]; + m_pWindowR [0] = 1.0; +} + +CDPParam::~CDPParam() +{ + if ( m_pWindow ) + { + delete[] m_pWindow; + m_pWindow = NULL; + } + + if ( m_pWindowR ) + { + delete[] m_pWindowR; + m_pWindowR = NULL; + } +} + +void DFFT(DP_DOUBLE* pData, int nLength, int iSign) +{ + long n = nLength << 1, mmax = 2, m, j = 0, i; + for (i = 0; i < n - 1; i += 2) { + if (j > i) { + DP_DOUBLE dum; + dum = pData [j], pData [j] = pData [i], pData [i] = dum; + dum = pData [j+1], pData [j+1] = pData [i+1], pData [i+1] = dum; + } + m = n >> 1; + while (m >= 1 && j + 1 > m) { j -= m; m >>= 1; } + j += m; + } + while (n > mmax) { + long istep = 2 * mmax; + DP_DOUBLE theta = 2 * DP_PI / (iSign * mmax); + DP_DOUBLE wr, wi, wtemp, wpr, wpi; + wtemp = sin (0.5 * theta); + wpr = -2.0 * wtemp * wtemp; + wpi = sin (theta); + wr = 1.0, wi = 0.0; + for (m = 0; m < mmax - 1; m += 2) { + for (i = m; i < n; i += istep) { + DP_DOUBLE tempr, tempi; + j = i + mmax; + tempr = wr * pData [j] - wi * pData [j+1], tempi = wr * pData [j+1] + wi * pData [j]; + pData [j] = pData [i] - tempr, pData [j+1] = pData [i+1] - tempi; + pData [i] += tempr, pData [i+1] += tempi; + } + wtemp = wr, wr = wr * wpr - wi * wpi + wr, wi = wi * wpr + wtemp * wpi + wi; + } + mmax = istep; + } +} + +void DRealFFT(DP_DOUBLE* pData, int nLength, int iSign) +{ + long i, i1, i2, i3, i4, np3; + DP_DOUBLE c1 = 0.5, c2, h1r, h1i, h2r, h2i; + DP_DOUBLE wr, wi, wpr, wpi, wtemp, theta; + theta = DP_PI / (DP_DOUBLE) (nLength >> 1); + if (iSign == 1) { c2 = -0.5; DFFT(pData, nLength >> 1, 1); } + else { c2 = 0.5; theta = - theta; } + wtemp = sin (0.5 * theta); + wpr = -2.0 * wtemp * wtemp; + wpi = sin (theta); + wr = 1.0 + wpr; + wi = wpi; + np3 = nLength + 1; + for (i = 1; i < nLength >> 2; i++) { + i4 = 1 + (i3 = np3 - (i2 = 1 + (i1 = i + i ))); + h1r = c1 * (pData [i1] + pData [i3]); + h1i = c1 * (pData [i2] - pData [i4]); + h2r = - c2 * (pData [i2] + pData [i4]); + h2i = c2 * (pData [i1] - pData [i3]); + pData [i1] = h1r + wr * h2r - wi * h2i; + pData [i2] = h1i + wr * h2i + wi * h2r; + pData [i3] = h1r - wr * h2r + wi * h2i; + pData [i4] = - h1i + wr * h2i + wi * h2r; + wr = (wtemp = wr) * wpr - wi * wpi + wr; + wi = wi * wpr + wtemp * wpi + wi; + } + if (iSign == 1) { + pData [0] = (h1r = pData [0]) + pData [1]; + pData [1] = h1r - pData [1]; + } else { + pData [0] = c1 * ((h1r = pData [0]) + pData [1]); + pData [1] = c1 * (h1r - pData [1]); + DFFT(pData, nLength >> 1, -1); + } +} + +DP_DOUBLE NumInterpolateSinc(DP_DOUBLE y[], int nX, DP_DOUBLE x, int nMaxDepth) +{ + long ix, midleft = (long)floor (x), midright = midleft + 1, left, right; + DP_DOUBLE result = 0.0, a, halfsina, aa, daa; + if (nX < 1) return -1; + if (x > nX) return y [nX]; + if (x < 1) return y [1]; + if (x == midleft) return y [midleft]; + /* 1 < x < nx && x not integer: interpolate. */ + if (nMaxDepth > midright - 1) nMaxDepth = midright - 1; + if (nMaxDepth > nX - midleft) nMaxDepth = nX - midleft; + if (nMaxDepth <= 0) return y [(long) floor (x + 0.5)]; + if (nMaxDepth == 1) return y [midleft] + (x - midleft) * (y [midright] - y [midleft]); + if (nMaxDepth == 2) { + DP_DOUBLE yl = y [midleft], yr = y [midright]; + DP_DOUBLE dyl = 0.5 * (yr - y [midleft - 1]), dyr = 0.5 * (y [midright + 1] - yl); + DP_DOUBLE fil = x - midleft, fir = midright - x; + return yl * fir + yr * fil - fil * fir * (0.5 * (dyr - dyl) + (fil - 0.5) * (dyl + dyr - 2 * (yr - yl))); + } + + left = midright - nMaxDepth, right = midleft + nMaxDepth; + a = DP_PI * (x - midleft); + halfsina = 0.5 * sin (a); + aa = a / (x - left + 1); + daa = DP_PI / (x - left + 1); + for (ix = midleft - 1; ix >= left - 1; ix --) { + DP_DOUBLE d = halfsina / a * (1.0 + cos (aa)); + result += y [ix] * d; + a += (DP_DOUBLE)DP_PI; + aa += daa; + halfsina = - halfsina; + } + a = DP_PI * (midright - x); + halfsina = 0.5 * sin (a); + aa = a / (right - x + 1); + daa = DP_PI / (right - x + 1); + for (ix = midright - 1; ix < right; ix ++) { + DP_DOUBLE d = halfsina / a * (1.0 + cos (aa)); + result += y [ix] * d; + a += (DP_DOUBLE)DP_PI; + aa += daa; + halfsina = - halfsina; + } + return result; +} \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/src/DPitchHandle.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/src/DPitchHandle.cpp new file mode 100644 index 0000000..ab8b0da --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/dpitch/src/DPitchHandle.cpp @@ -0,0 +1,385 @@ + +//#include "STSInDef.h" +#include "DPitchHandle.h" +#include "DPParam.h" + +#include "math.h" +#include "stdio.h" + +#if DP_USE_KISS_FFT +#include "fftwrap.h" +#endif + +using namespace DPitch; + +#define NUMlog2e 1.4426950408889634074 +#define NUMlog2(x) (log (x) * NUMlog2e) + +#if DP_USE_KISS_FFT +void* g_pFFT = NULL; +#endif + +#define GLOBAL_PITCHER_PARAM (CDPParam::GetInstance()) + +CPitcher::CPitcher(void) +{ + Init(); +} + +CPitcher::~CPitcher(void) +{ + Uninit(); +} + +/* 在线获取基频值 **/ +bool CPitcher::Process(short* pSample, int nSampleSize, float* pPitch, int& nFrame, bool bLast) +{ + AppendData(pSample, nSampleSize); + + /* 没处理就不搜索了 **/ + if ( m_nProcessedCount == m_nPathedCount ) + { + nFrame = 0; + return true; + } + + int nPathFrame = m_nPathedCount; + SearchPath(bLast); + nFrame = m_nPathedCount - nPathFrame; + for ( int j = 0; j < nFrame; j++ ) + { + CPitchFrame* pFrame = m_oFitchFrames[j + nPathFrame]; + pPitch[j] = (pFrame->oCandidate[pFrame->nBest].fFrequency); + } + return true; +} + +/* 再次使用之前重置内部状态 **/ +void CPitcher::Reset() +{ + m_nPathedCount = 0; + m_nProcessedCount = 0; +#if DP_ONLINE + m_nBufValidHead = 0; + m_nBufValidTail = 0; +#else + m_pSample = NULL; + m_nSampleSize = 0; + m_nThis = 0; +#endif + m_nCount = 0; + memset(m_pPitch, 0, DP_ONLINE_POOL * sizeof(CPitchFrame)); + m_oFitchFrames.clear(); +} + +int CPitcher::Init() +{ + m_nCount = 0; + m_nPathedCount = 0; + m_nProcessedCount = 0; + +#if DP_ONLINE + m_nBufValidHead = 0; + m_nBufValidTail = 0; + memset(m_AudioBuf, 0, sizeof(m_AudioBuf)); +#else + m_pSample = NULL; + m_nSampleSize = 0; + m_nThis = 0; +#endif + + const CDPParam& param = GLOBAL_PITCHER_PARAM; + + m_nSampPeriod = param.m_nSampPeriod; + m_nHalfSampPeriod = param.m_nHalfSampPeriod; + m_nNsampWindow = param.m_nNsampWindow; + m_nHalfNsampWindow = param.m_nHalfNsampWindow; + m_pWindow = param.m_pWindow; + m_pWindowR = param.m_pWindowR; + m_nFFTNum = param.m_nFFTNum; + + m_fLog440 = log(440.0); + m_fLog2 = log(2.0); + +#if DP_USE_KISS_FFT + g_pFFT = fftwrap_init(m_nFFTNum); +#endif + + m_pFFTFrame = new DP_DOUBLE[m_nFFTNum]; /* 一次性 dahaowu new **/ + m_pAutocorrelation = new DP_DOUBLE[2 * m_nNsampWindow + 1]; /* 一次性 dahaowu new **/ + + return 0; +} + +int CPitcher::Uninit() +{ + m_oFitchFrames.clear(); + delete m_pFFTFrame; + m_pFFTFrame = NULL; + delete m_pAutocorrelation; + m_pAutocorrelation = NULL; + +#if DP_USE_KISS_FFT + fftwrap_destroy(g_pFFT); +#endif + return 0; +} + +/* 输入数据得到候选 **/ +bool CPitcher::AppendData(short* pSample, int nSampleSize) +{ + if( pSample == NULL || nSampleSize <= 0 ) + { + return false; + } + BufAppendData(pSample, nSampleSize); + DP_FLOAT* pFrame = GetCurFrame(); + while( pFrame ) + { + ProcessFrame(pFrame, DP_NSAMP_WINDOW); + pFrame = GetCurFrame(); + } + return true; +} + +/* 计算基频,得到候选 **/ +bool CPitcher::ProcessFrame(DP_FLOAT* pBuffer, int nSize) +{ + m_nProcessedCount ++; + DP_FLOAT* pAmplitude = pBuffer; + DP_DOUBLE* pHalfAutocorrelation = m_pAutocorrelation + m_nNsampWindow; + + CPitchFrame* pPitchFrame = GetOnePitchFrame(); + DP_DOUBLE localMean, localPeak; + long leftSample = m_nHalfNsampWindow, rightSample = leftSample + 1; + long startSample, endSample; + + localMean = 0.0; + startSample = rightSample - m_nHalfNsampWindow; + endSample = leftSample + m_nHalfNsampWindow; + + /* 计算直流分量 **/ + for ( int i = startSample - 1; i < endSample; i ++ ) + localMean += pAmplitude[i]; + localMean /= 2 * m_nHalfNsampWindow; + + //startSample = rightSample - m_nHalfNsampWindow; + //endSample = leftSample + m_nHalfNsampWindow; + + /* 加窗 **/ + for (int j = 0, i = startSample - 1; j < m_nNsampWindow; j ++) + m_pFFTFrame [j] = (pAmplitude [i ++] - localMean) * m_pWindow[j]; + + /* 多余数据置零,可优化 dahaowu log **/ + for (int j = m_nNsampWindow; j < m_nFFTNum; j ++) + m_pFFTFrame [j] = 0.0; + + /* 计算能量和 **/ + localPeak = 0; + if ((startSample = m_nHalfNsampWindow + 1 - m_nHalfSampPeriod) < 0) + startSample = 1; + if ((endSample = m_nHalfNsampWindow + m_nHalfSampPeriod) > m_nNsampWindow) + endSample = m_nNsampWindow; + + for (int j = startSample - 1; j < endSample; j ++) + { + if (fabs (m_pFFTFrame [j]) > localPeak) + localPeak = fabs (m_pFFTFrame[j]); + } + pPitchFrame->nIntensity = localPeak > DP_GLOBAL_PEAK ? 1 : localPeak / DP_GLOBAL_PEAK; + + pPitchFrame->nCandidates = 1; + pPitchFrame->oCandidate[0].fFrequency = 0.0; + pPitchFrame->oCandidate[0].fStrength = 0.0; + + if ( localPeak == 0 ) + { + m_oFitchFrames.push_back(pPitchFrame); + return true; + } + + /* 傅立叶变换 **/ +#if DP_USE_DFFT + DRealFFT(m_pFFTFrame, m_nFFTNum, 1); +#endif + +#if DP_USE_KISS_FFT + fftwrap_fft(g_pFFT, m_pFFTFrame, m_pFFTFrame); +#endif + + /* 功率谱 **/ + m_pFFTFrame [0] *= m_pFFTFrame [0]; + m_pFFTFrame [1] *= m_pFFTFrame [1]; + for (int i = 2; i < m_nFFTNum; i += 2) { + m_pFFTFrame [i] = m_pFFTFrame [i] * m_pFFTFrame [i] + m_pFFTFrame [i+1] * m_pFFTFrame [i+1]; + m_pFFTFrame [i + 1] = 0.0; + } + + /* 傅立叶逆变换,得到自相关函数 **/ +#if DP_USE_DFFT + DRealFFT (m_pFFTFrame, m_nFFTNum, -1); +#endif + +#if DP_USE_KISS_FFT + fftwrap_ifft(g_pFFT, m_pFFTFrame, m_pFFTFrame); +#endif + + /* 加窗自相关 **/ + pHalfAutocorrelation [0] = 1.0; + for ( int i = 1; i <= m_nHalfNsampWindow; i++ ) + pHalfAutocorrelation [- (i)] = pHalfAutocorrelation [i] = m_pFFTFrame [i + 1 - 1] / (m_pFFTFrame [0] * m_pWindowR [i + 1 - 1]); + + /* 只保留 top **/ + int nMax[DP_MAX_CANDIDATES]; + nMax [0] = 0; + + /* 选择候选基频点 **/ + for ( int i = 1; i < m_nHalfNsampWindow; i ++ ) + { + /* 找峰值点 **/ + if ( pHalfAutocorrelation [i] > 0.5 * DP_VOICING_THRESHOLD && + pHalfAutocorrelation [i] > pHalfAutocorrelation [i-1] && pHalfAutocorrelation [i] >= pHalfAutocorrelation [i+1] ) + { + /* 根据公式计算候选分数 **/ + int place = -1; + DP_DOUBLE dr = 0.5 * (pHalfAutocorrelation [i+1] - pHalfAutocorrelation [i-1]), d2r = 2 * pHalfAutocorrelation [i] - pHalfAutocorrelation [i-1] - pHalfAutocorrelation [i+1]; + DP_DOUBLE fFrequencyOfMaximum = DP_SAMPLE_RATE / ((i) + dr / d2r); + long offset = - m_nHalfNsampWindow - 1; + DP_DOUBLE fStrengthOfMaximum = + NumInterpolateSinc (pHalfAutocorrelation + offset + 1, m_nHalfNsampWindow - offset, DP_SAMPLE_RATE / fFrequencyOfMaximum - offset, + 30); + + if (fStrengthOfMaximum > 1.0) fStrengthOfMaximum = 1.0 / fStrengthOfMaximum; + + if (pPitchFrame->nCandidates < DP_MAX_CANDIDATES) { + { + place = pPitchFrame->nCandidates; + pPitchFrame->nCandidates++; + } + } else { + DP_DOUBLE weakest = 2; + int iweak; + for (iweak = 1; iweak < DP_MAX_CANDIDATES; iweak ++) { + DP_DOUBLE localStrength = pPitchFrame->oCandidate[iweak].fStrength - DP_OCTAVE_COST * + NUMlog2 (DP_MINIMUM_PITCH / pPitchFrame->oCandidate[iweak].fFrequency); + if (localStrength < weakest) + { + weakest = localStrength; + place = iweak; + } + } + if (fStrengthOfMaximum - DP_OCTAVE_COST * NUMlog2 (DP_MINIMUM_PITCH / fFrequencyOfMaximum) <= weakest) + place = -1; + } + if ( place >= 0 ) + { + pPitchFrame->oCandidate[place].fFrequency = fFrequencyOfMaximum; + pPitchFrame->oCandidate[place].fStrength = fStrengthOfMaximum; + nMax[place] = i; + } + } + } + m_oFitchFrames.push_back(pPitchFrame); + return true; +} + +/* 确定最优基频序列 **/ +bool CPitcher::SearchPath(bool bLast) +{ + if ( !bLast ) + { + if ( m_nProcessedCount - m_nPathedCount < DP_DELAY_PATH_NUM ) + { + return true; + } + } + + int nAllCount = m_oFitchFrames.size(); + int nStartPos = std::max(0, m_nPathedCount - DP_BEG_PATH_NUM); + + int nProcCount = nAllCount - nStartPos; + + /* 第一次检索要20帧以上 **/ + if( nProcCount < DP_BEG_PATH_NUM ) + { + return true; + } + + /* 这个每次都重新算,不太好,可以优化 dahaowu log **/ + DP_FLOAT (*pDelta)[DP_MAX_CANDIDATES] = new DP_FLOAT[nProcCount][DP_MAX_CANDIDATES]; /* 一次性 dahaowu new **/ + int (*pPsi)[DP_MAX_CANDIDATES] = new int[nProcCount][DP_MAX_CANDIDATES]; /* 一次性 dahaowu new **/ + + /* 将静音的可能计算进去 **/ + for( int i = nStartPos; i < nAllCount; i++ ) + { + CPitchFrame* pFrame = m_oFitchFrames[i]; + DP_DOUBLE fUnvoicedStrength = 2 - pFrame->nIntensity / (DP_SILENCE_THRESHOLD / (1 + DP_VOICING_THRESHOLD)); /* 静音可信度 **/ + fUnvoicedStrength = DP_VOICING_THRESHOLD + (fUnvoicedStrength > 0 ? fUnvoicedStrength : 0); + for ( unsigned int j = 0; j < pFrame->nCandidates; j++ ) + { + CPitchCandidate* pCandidate = &pFrame->oCandidate[j]; + bool bVoiceLess = pCandidate->fFrequency == 0 || pCandidate->fFrequency > DP_CEILING; + pDelta[i - nStartPos][j] = bVoiceLess ? fUnvoicedStrength : /* 计算新的可信度 **/ + pCandidate->fStrength - DP_OCTAVE_COST * NUMlog2 (DP_CEILING/pCandidate->fFrequency); + } + } + + DP_FLOAT fMaximum; /* 最优置信度 **/ + int nPlace; + for ( int i = nStartPos + 1; i < nAllCount; i++ ) + { + CPitchFrame* pRevFrame = m_oFitchFrames[i - 1], *pCurFrame = m_oFitchFrames[i]; + DP_FLOAT* pRevDelta = pDelta[i - 1 - nStartPos], *pCurDelta = pDelta [i - nStartPos]; + int* pCurPsi = pPsi[i - nStartPos]; + /* 计算从上一帧跳转过来的最优置信度 **/ + for ( unsigned int iCand2 = 0; iCand2 < pCurFrame -> nCandidates; iCand2 ++ ) + { + DP_DOUBLE f2 = pCurFrame -> oCandidate [iCand2].fFrequency; + fMaximum = -10; + nPlace = 0; + for ( unsigned int iCand1 = 0; iCand1 < pRevFrame -> nCandidates; iCand1 ++) { + DP_DOUBLE f1 = pRevFrame->oCandidate[iCand1].fFrequency; + DP_DOUBLE pTransitionCost; + bool bPreviousVoiceless = f1 <= 0 || f1 >= DP_CEILING; + bool bCurrentVoiceless = f2 <= 0 || f2 >= DP_CEILING; + if ( bPreviousVoiceless != bCurrentVoiceless ) + pTransitionCost = DP_VOICED_UNVOICED_COST; /* 静音的惩罚 **/ + else if ( bCurrentVoiceless ) + pTransitionCost = 0; + else + pTransitionCost = DP_OCTAVE_JUMP_COST * fabs(NUMlog2 (f1 / f2)); /* 跳频的惩罚 **/ + DP_FLOAT fValue = pRevDelta[iCand1] - pTransitionCost + pCurDelta[iCand2]; /* 新的整体置信度 **/ + if ( fValue > fMaximum ) + { + fMaximum = fValue; + nPlace = iCand1; + } + } + pCurDelta[iCand2] = fMaximum; + pCurPsi[iCand2] = nPlace; + } + } + + /* 从最后一帧开始回溯 **/ + fMaximum = pDelta[nAllCount - 1 - nStartPos][nPlace = 0]; + for ( unsigned int iCand = 0; iCand < m_oFitchFrames[nAllCount - 1]->nCandidates; iCand++ ) + { + if ( pDelta[nAllCount - 1 - nStartPos][iCand] > fMaximum ) + { + fMaximum = pDelta[nAllCount - 1 - nStartPos] [nPlace = iCand]; + } + } + + /* 最后只有最后一帧暂时无法确定 **/ + for ( int i = nAllCount - 1; i >= nStartPos; i-- ) + { + CPitchFrame* pFrame = m_oFitchFrames[i]; + pFrame->nBest = nPlace; + nPlace = pPsi[i - nStartPos][nPlace]; + } + + m_nPathedCount = m_nProcessedCount; + delete[] pDelta; + delete[] pPsi; + return true; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/CMakeLists.txt new file mode 100644 index 0000000..18a5a86 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_EBUR128_SRCS) +add_library(ebur128 ${DIR_EBUR128_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/inc/ebur128.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/inc/ebur128.h new file mode 100644 index 0000000..faa66c6 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/inc/ebur128.h @@ -0,0 +1,425 @@ +/* See COPYING file for copyright and license details. */ + +#ifndef EBUR128_H_ +#define EBUR128_H_ + +/** \file ebur128.h + * \brief libebur128 - a library for loudness measurement according to + * the EBU R128 standard. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define EBUR128_VERSION_MAJOR 1 +#define EBUR128_VERSION_MINOR 2 +#define EBUR128_VERSION_PATCH 4 + +#include /* for size_t */ + +/** \enum channel + * Use these values when setting the channel map with ebur128_set_channel(). + * See definitions in ITU R-REC-BS 1770-4 + */ +enum channel { + EBUR128_UNUSED = 0, /**< unused channel (for example LFE channel) */ + EBUR128_LEFT = 1, + EBUR128_Mp030 = 1, /**< itu M+030 */ + EBUR128_RIGHT = 2, + EBUR128_Mm030 = 2, /**< itu M-030 */ + EBUR128_CENTER = 3, + EBUR128_Mp000 = 3, /**< itu M+000 */ + EBUR128_LEFT_SURROUND = 4, + EBUR128_Mp110 = 4, /**< itu M+110 */ + EBUR128_RIGHT_SURROUND = 5, + EBUR128_Mm110 = 5, /**< itu M-110 */ + EBUR128_DUAL_MONO, /**< a channel that is counted twice */ + EBUR128_MpSC, /**< itu M+SC */ + EBUR128_MmSC, /**< itu M-SC */ + EBUR128_Mp060, /**< itu M+060 */ + EBUR128_Mm060, /**< itu M-060 */ + EBUR128_Mp090, /**< itu M+090 */ + EBUR128_Mm090, /**< itu M-090 */ + EBUR128_Mp135, /**< itu M+135 */ + EBUR128_Mm135, /**< itu M-135 */ + EBUR128_Mp180, /**< itu M+180 */ + EBUR128_Up000, /**< itu U+000 */ + EBUR128_Up030, /**< itu U+030 */ + EBUR128_Um030, /**< itu U-030 */ + EBUR128_Up045, /**< itu U+045 */ + EBUR128_Um045, /**< itu U-030 */ + EBUR128_Up090, /**< itu U+090 */ + EBUR128_Um090, /**< itu U-090 */ + EBUR128_Up110, /**< itu U+110 */ + EBUR128_Um110, /**< itu U-110 */ + EBUR128_Up135, /**< itu U+135 */ + EBUR128_Um135, /**< itu U-135 */ + EBUR128_Up180, /**< itu U+180 */ + EBUR128_Tp000, /**< itu T+000 */ + EBUR128_Bp000, /**< itu B+000 */ + EBUR128_Bp045, /**< itu B+045 */ + EBUR128_Bm045 /**< itu B-045 */ +}; + +/** \enum error + * Error return values. + */ +enum error { + EBUR128_SUCCESS = 0, + EBUR128_ERROR_NOMEM, + EBUR128_ERROR_INVALID_MODE, + EBUR128_ERROR_INVALID_CHANNEL_INDEX, + EBUR128_ERROR_NO_CHANGE +}; + +/** \enum mode + * Use these values in ebur128_init (or'ed). Try to use the lowest possible + * modes that suit your needs, as performance will be better. + */ +enum mode { + /** can call ebur128_loudness_momentary */ + EBUR128_MODE_M = (1 << 0), + /** can call ebur128_loudness_shortterm */ + EBUR128_MODE_S = (1 << 1) | EBUR128_MODE_M, + /** can call ebur128_loudness_global_* and ebur128_relative_threshold */ + EBUR128_MODE_I = (1 << 2) | EBUR128_MODE_M, + /** can call ebur128_loudness_range */ + EBUR128_MODE_LRA = (1 << 3) | EBUR128_MODE_S, + /** can call ebur128_sample_peak */ + EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | EBUR128_MODE_M, + /** can call ebur128_true_peak */ + EBUR128_MODE_TRUE_PEAK = (1 << 5) | EBUR128_MODE_M + | EBUR128_MODE_SAMPLE_PEAK, + /** uses histogram algorithm to calculate loudness */ + EBUR128_MODE_HISTOGRAM = (1 << 6) +}; + +/** forward declaration of ebur128_state_internal */ +struct ebur128_state_internal; + +/** \brief Contains information about the state of a loudness measurement. + * + * You should not need to modify this struct directly. + */ +typedef struct { + int mode; /**< The current mode. */ + unsigned int channels; /**< The number of channels. */ + unsigned long samplerate; /**< The sample rate. */ + struct ebur128_state_internal* d; /**< Internal state. */ +} ebur128_state; + +/** \brief Get library version number. Do not pass null pointers here. + * + * @param major major version number of library + * @param minor minor version number of library + * @param patch patch version number of library + */ +void ebur128_get_version(int* major, int* minor, int* patch); + +/** \brief Initialize library state. + * + * @param channels the number of channels. + * @param samplerate the sample rate. + * @param mode see the mode enum for possible values. + * @return an initialized library state, or NULL on error. + */ +ebur128_state* ebur128_init(unsigned int channels, + unsigned long samplerate, + int mode); + +/** \brief Destroy library state. + * + * @param st pointer to a library state. + */ +void ebur128_destroy(ebur128_state** st); + +/** \brief Set channel type. + * + * The default is: + * - 0 -> EBUR128_LEFT + * - 1 -> EBUR128_RIGHT + * - 2 -> EBUR128_CENTER + * - 3 -> EBUR128_UNUSED + * - 4 -> EBUR128_LEFT_SURROUND + * - 5 -> EBUR128_RIGHT_SURROUND + * + * @param st library state. + * @param channel_number zero based channel index. + * @param value channel type from the "channel" enum. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_set_channel(ebur128_state* st, + unsigned int channel_number, + int value); + +/** \brief Change library parameters. + * + * Note that the channel map will be reset when setting a different number of + * channels. The current unfinished block will be lost. + * + * @param st library state. + * @param channels new number of channels. + * @param samplerate new sample rate. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be + * invalid and must be destroyed. + * - EBUR128_ERROR_NO_CHANGE if channels and sample rate were not changed. + */ +int ebur128_change_parameters(ebur128_state* st, + unsigned int channels, + unsigned long samplerate); + +/** \brief Set the maximum window duration. + * + * Set the maximum duration that will be used for ebur128_window_loudness(). + * Note that this destroys the current content of the audio buffer. + * + * @param st library state. + * @param window duration of the window in ms. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be + * invalid and must be destroyed. + * - EBUR128_ERROR_NO_CHANGE if window duration not changed. + */ +int ebur128_set_max_window(ebur128_state* st, unsigned long window); + +/** \brief Set the maximum history. + * + * Set the maximum history that will be stored for loudness integration. + * More history provides more accurate results, but requires more resources. + * + * Applies to ebur128_loudness_range() and ebur128_loudness_global() when + * EBUR128_MODE_HISTOGRAM is not set. + * + * Default is ULONG_MAX (at least ~50 days). + * Minimum is 3000ms for EBUR128_MODE_LRA and 400ms for EBUR128_MODE_M. + * + * @param st library state. + * @param history duration of history in ms. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NO_CHANGE if history not changed. + */ +int ebur128_set_max_history(ebur128_state* st, unsigned long history); + +/** \brief Add frames to be processed. + * + * @param st library state. + * @param src array of source frames. Channels must be interleaved. + * @param frames number of frames. Not number of samples! + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. + */ +int ebur128_add_frames_short(ebur128_state* st, + const short* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_int(ebur128_state* st, + const int* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_float(ebur128_state* st, + const float* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_double(ebur128_state* st, + const double* src, + size_t frames); + +/** \brief Get global integrated loudness in LUFS. + * + * @param st library state. + * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set. + */ +int ebur128_loudness_global(ebur128_state* st, double* out); +/** \brief Get global integrated loudness in LUFS across multiple instances. + * + * @param sts array of library states. + * @param size length of sts + * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set. + */ +int ebur128_loudness_global_multiple(ebur128_state** sts, + size_t size, + double* out); + +/** \brief Get momentary loudness (last 400ms) in LUFS. + * + * @param st library state. + * @param out momentary loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + */ +int ebur128_loudness_momentary(ebur128_state* st, double* out); +/** \brief Get short-term loudness (last 3s) in LUFS. + * + * @param st library state. + * @param out short-term loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_S" has not been set. + */ +int ebur128_loudness_shortterm(ebur128_state* st, double* out); + +/** \brief Get loudness of the specified window in LUFS. + * + * window must not be larger than the current window set in st. + * The current window can be changed by calling ebur128_set_max_window(). + * + * @param st library state. + * @param window window in ms to calculate loudness. + * @param out loudness in LUFS. -HUGE_VAL if result is negative infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if window larger than current window in st. + */ +int ebur128_loudness_window(ebur128_state* st, + unsigned long window, + double* out); + +/** \brief Get loudness range (LRA) of programme in LU. + * + * Calculates loudness range according to EBU 3342. + * + * @param st library state. + * @param out loudness range (LRA) in LU. Will not be changed in case of + * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be + * returned in this case. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM in case of memory allocation error. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set. + */ +int ebur128_loudness_range(ebur128_state* st, double* out); +/** \brief Get loudness range (LRA) in LU across multiple instances. + * + * Calculates loudness range according to EBU 3342. + * + * @param sts array of library states. + * @param size length of sts + * @param out loudness range (LRA) in LU. Will not be changed in case of + * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be + * returned in this case. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM in case of memory allocation error. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set. + */ +int ebur128_loudness_range_multiple(ebur128_state** sts, + size_t size, + double* out); + +/** \brief Get maximum sample peak from all frames that have been processed. + * + * The equation to convert to dBFS is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum sample peak in float format (1.0 is 0 dBFS) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum sample peak from the last call to add_frames(). + * + * The equation to convert to dBFS is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum sample peak in float format (1.0 is 0 dBFS) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_prev_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum true peak from all frames that have been processed. + * + * Uses an implementation defined algorithm to calculate the true peak. Do not + * try to compare resulting values across different versions of the library, + * as the algorithm may change. + * + * The current implementation uses a custom polyphase FIR interpolator to + * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for + * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz. + * + * The equation to convert to dBTP is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum true peak in float format (1.0 is 0 dBTP) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum true peak from the last call to add_frames(). + * + * Uses an implementation defined algorithm to calculate the true peak. Do not + * try to compare resulting values across different versions of the library, + * as the algorithm may change. + * + * The current implementation uses a custom polyphase FIR interpolator to + * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for + * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz. + * + * The equation to convert to dBTP is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum true peak in float format (1.0 is 0 dBTP) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_prev_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get relative threshold in LUFS. + * + * @param st library state + * @param out relative threshold in LUFS. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not + * been set. + */ +int ebur128_relative_threshold(ebur128_state* st, double* out); +#ifdef __cplusplus +} +#endif + +#endif /* EBUR128_H_ */ diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/src/ebur128.c b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/src/ebur128.c new file mode 100644 index 0000000..6c10f1e --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/ebur128/src/ebur128.c @@ -0,0 +1,1333 @@ +/* See COPYING file for copyright and license details. */ + +#include "ebur128.h" + +#include +#include +#include /* You may have to define _USE_MATH_DEFINES if you use MSVC */ +#include +#include + +/* This can be replaced by any BSD-like queue implementation. */ +#include + +#define CHECK_ERROR(condition, errorcode, goto_point) \ + if ((condition)) { \ + errcode = (errorcode); \ + goto goto_point; \ + } + +STAILQ_HEAD(ebur128_double_queue, ebur128_dq_entry); +struct ebur128_dq_entry { + double z; + STAILQ_ENTRY(ebur128_dq_entry) entries; +}; + +#define ALMOST_ZERO 0.000001 + +typedef struct { /* Data structure for polyphase FIR interpolator */ + unsigned int factor; /* Interpolation factor of the interpolator */ + unsigned int taps; /* Taps (prefer odd to increase zero coeffs) */ + unsigned int channels; /* Number of channels */ + unsigned int delay; /* Size of delay buffer */ + struct { + unsigned int count; /* Number of coefficients in this subfilter */ + unsigned int* index; /* Delay index of corresponding filter coeff */ + double* coeff; /* List of subfilter coefficients */ + }* filter; /* List of subfilters (one for each factor) */ + float** z; /* List of delay buffers (one for each channel) */ + unsigned int zi; /* Current delay buffer index */ +} interpolator; + +struct ebur128_state_internal { + /** Filtered audio data (used as ring buffer). */ + double* audio_data; + /** Size of audio_data array. */ + size_t audio_data_frames; + /** Current index for audio_data. */ + size_t audio_data_index; + /** How many frames are needed for a gating block. Will correspond to 400ms + * of audio at initialization, and 100ms after the first block (75% overlap + * as specified in the 2011 revision of BS1770). */ + unsigned long needed_frames; + /** The channel map. Has as many elements as there are channels. */ + int* channel_map; + /** How many samples fit in 100ms (rounded). */ + unsigned long samples_in_100ms; + /** BS.1770 filter coefficients (nominator). */ + double b[5]; + /** BS.1770 filter coefficients (denominator). */ + double a[5]; + /** BS.1770 filter state. */ + double v[5][5]; + /** Linked list of block energies. */ + struct ebur128_double_queue block_list; + unsigned long block_list_max; + unsigned long block_list_size; + /** Linked list of 3s-block energies, used to calculate LRA. */ + struct ebur128_double_queue short_term_block_list; + unsigned long st_block_list_max; + unsigned long st_block_list_size; + int use_histogram; + unsigned long *block_energy_histogram; + unsigned long *short_term_block_energy_histogram; + /** Keeps track of when a new short term block is needed. */ + size_t short_term_frame_counter; + /** Maximum sample peak, one per channel */ + double* sample_peak; + double* prev_sample_peak; + /** Maximum true peak, one per channel */ + double* true_peak; + double* prev_true_peak; + interpolator* interp; + float* resampler_buffer_input; + size_t resampler_buffer_input_frames; + float* resampler_buffer_output; + size_t resampler_buffer_output_frames; + /** The maximum window duration in ms. */ + unsigned long window; + unsigned long history; +}; + +static double relative_gate = -10.0; + +/* Those will be calculated when initializing the library */ +static double relative_gate_factor; +static double minus_twenty_decibels; +static double histogram_energies[1000]; +static double histogram_energy_boundaries[1001]; + +static interpolator* interp_create(unsigned int taps, unsigned int factor, unsigned int channels) { + interpolator* interp = calloc(1, sizeof(interpolator)); + unsigned int j = 0; + + interp->taps = taps; + interp->factor = factor; + interp->channels = channels; + interp->delay = (interp->taps + interp->factor - 1) / interp->factor; + + /* Initialize the filter memory + * One subfilter per interpolation factor. */ + interp->filter = calloc(interp->factor, sizeof(*interp->filter)); + for (j = 0; j < interp->factor; j++) { + interp->filter[j].index = calloc(interp->delay, sizeof(unsigned int)); + interp->filter[j].coeff = calloc(interp->delay, sizeof(double)); + } + /* One delay buffer per channel. */ + interp->z = calloc(interp->channels, sizeof(float*)); + for (j = 0; j < interp->channels; j++) { + interp->z[j] = calloc( interp->delay, sizeof(float) ); + } + + /* Calculate the filter coefficients */ + for (j = 0; j < interp->taps; j++) { + /* Calculate sinc */ + double m = (double)j - (double)(interp->taps - 1) / 2.0; + double c = 1.0; + if (fabs(m) > ALMOST_ZERO) { + c = sin(m * M_PI / interp->factor) / (m * M_PI / interp->factor); + } + /* Apply Hanning window */ + c *= 0.5 * (1 - cos(2 * M_PI * j / (interp->taps - 1))); + + if (fabs(c) > ALMOST_ZERO) { /* Ignore any zero coeffs. */ + /* Put the coefficient into the correct subfilter */ + unsigned int f = j % interp->factor; + unsigned int t = interp->filter[f].count++; + interp->filter[f].coeff[t] = c; + interp->filter[f].index[t] = j / interp->factor; + } + } + return interp; +} + +static void interp_destroy(interpolator* interp) { + unsigned int j = 0; + if (!interp) { + return; + } + for (j = 0; j < interp->factor; j++) { + free(interp->filter[j].index); + free(interp->filter[j].coeff); + } + free(interp->filter); + for (j = 0; j < interp->channels; j++) { + free(interp->z[j]); + } + free(interp->z); + free(interp); +} + +static size_t interp_process(interpolator* interp, size_t frames, float* in, float* out) { + size_t frame = 0; + unsigned int chan = 0; + unsigned int f = 0; + unsigned int t = 0; + unsigned int out_stride = interp->channels * interp->factor; + float* outp = 0; + double acc = 0; + double c = 0; + + for (frame = 0; frame < frames; frame++) { + for (chan = 0; chan < interp->channels; chan++) { + /* Add sample to delay buffer */ + interp->z[chan][interp->zi] = *in++; + /* Apply coefficients */ + outp = out + chan; + for (f = 0; f < interp->factor; f++) { + acc = 0.0; + for (t = 0; t < interp->filter[f].count; t++) { + int i = (int)interp->zi - (int)interp->filter[f].index[t]; + if (i < 0) { + i += interp->delay; + } + c = interp->filter[f].coeff[t]; + acc += interp->z[chan][i] * c; + } + *outp = (float)acc; + outp += interp->channels; + } + } + out += out_stride; + interp->zi++; + if (interp->zi == interp->delay) { + interp->zi = 0; + } + } + + return frames * interp->factor; +} + +static void ebur128_init_filter(ebur128_state* st) { + int i, j; + + double f0 = 1681.974450955533; + double G = 3.999843853973347; + double Q = 0.7071752369554196; + + double K = tan(M_PI * f0 / (double) st->samplerate); + double Vh = pow(10.0, G / 20.0); + double Vb = pow(Vh, 0.4996667741545416); + + double pb[3] = {0.0, 0.0, 0.0}; + double pa[3] = {1.0, 0.0, 0.0}; + double rb[3] = {1.0, -2.0, 1.0}; + double ra[3] = {1.0, 0.0, 0.0}; + + double a0 = 1.0 + K / Q + K * K ; + pb[0] = (Vh + Vb * K / Q + K * K) / a0; + pb[1] = 2.0 * (K * K - Vh) / a0; + pb[2] = (Vh - Vb * K / Q + K * K) / a0; + pa[1] = 2.0 * (K * K - 1.0) / a0; + pa[2] = (1.0 - K / Q + K * K) / a0; + + /* fprintf(stderr, "%.14f %.14f %.14f %.14f %.14f\n", + b1[0], b1[1], b1[2], a1[1], a1[2]); */ + + f0 = 38.13547087602444; + Q = 0.5003270373238773; + K = tan(M_PI * f0 / (double) st->samplerate); + + ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K); + ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K); + + /* fprintf(stderr, "%.14f %.14f\n", a2[1], a2[2]); */ + + st->d->b[0] = pb[0] * rb[0]; + st->d->b[1] = pb[0] * rb[1] + pb[1] * rb[0]; + st->d->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0]; + st->d->b[3] = pb[1] * rb[2] + pb[2] * rb[1]; + st->d->b[4] = pb[2] * rb[2]; + + st->d->a[0] = pa[0] * ra[0]; + st->d->a[1] = pa[0] * ra[1] + pa[1] * ra[0]; + st->d->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0]; + st->d->a[3] = pa[1] * ra[2] + pa[2] * ra[1]; + st->d->a[4] = pa[2] * ra[2]; + + for (i = 0; i < 5; ++i) { + for (j = 0; j < 5; ++j) { + st->d->v[i][j] = 0.0; + } + } +} + +static int ebur128_init_channel_map(ebur128_state* st) { + size_t i; + st->d->channel_map = (int*) malloc(st->channels * sizeof(int)); + if (!st->d->channel_map) { + return EBUR128_ERROR_NOMEM; + } + if (st->channels == 4) { + st->d->channel_map[0] = EBUR128_LEFT; + st->d->channel_map[1] = EBUR128_RIGHT; + st->d->channel_map[2] = EBUR128_LEFT_SURROUND; + st->d->channel_map[3] = EBUR128_RIGHT_SURROUND; + } else if (st->channels == 5) { + st->d->channel_map[0] = EBUR128_LEFT; + st->d->channel_map[1] = EBUR128_RIGHT; + st->d->channel_map[2] = EBUR128_CENTER; + st->d->channel_map[3] = EBUR128_LEFT_SURROUND; + st->d->channel_map[4] = EBUR128_RIGHT_SURROUND; + } else { + for (i = 0; i < st->channels; ++i) { + switch (i) { + case 0: st->d->channel_map[i] = EBUR128_LEFT; break; + case 1: st->d->channel_map[i] = EBUR128_RIGHT; break; + case 2: st->d->channel_map[i] = EBUR128_CENTER; break; + case 3: st->d->channel_map[i] = EBUR128_UNUSED; break; + case 4: st->d->channel_map[i] = EBUR128_LEFT_SURROUND; break; + case 5: st->d->channel_map[i] = EBUR128_RIGHT_SURROUND; break; + default: st->d->channel_map[i] = EBUR128_UNUSED; break; + } + } + } + return EBUR128_SUCCESS; +} + +static int ebur128_init_resampler(ebur128_state* st) { + int errcode = EBUR128_SUCCESS; + + if (st->samplerate < 96000) { + st->d->interp = interp_create(49, 4, st->channels); + CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit) + } else if (st->samplerate < 192000) { + st->d->interp = interp_create(49, 2, st->channels); + CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit) + } else { + st->d->resampler_buffer_input = NULL; + st->d->resampler_buffer_output = NULL; + st->d->interp = NULL; + goto exit; + } + + st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4; + st->d->resampler_buffer_input = malloc(st->d->resampler_buffer_input_frames * + st->channels * + sizeof(float)); + CHECK_ERROR(!st->d->resampler_buffer_input, EBUR128_ERROR_NOMEM, free_interp) + + st->d->resampler_buffer_output_frames = + st->d->resampler_buffer_input_frames * + st->d->interp->factor; + st->d->resampler_buffer_output = malloc + (st->d->resampler_buffer_output_frames * + st->channels * + sizeof(float)); + CHECK_ERROR(!st->d->resampler_buffer_output, EBUR128_ERROR_NOMEM, free_input) + + return errcode; + +free_interp: + interp_destroy(st->d->interp); + st->d->interp = NULL; +free_input: + free(st->d->resampler_buffer_input); + st->d->resampler_buffer_input = NULL; +exit: + return errcode; +} + +static void ebur128_destroy_resampler(ebur128_state* st) { + free(st->d->resampler_buffer_input); + st->d->resampler_buffer_input = NULL; + free(st->d->resampler_buffer_output); + st->d->resampler_buffer_output = NULL; + interp_destroy(st->d->interp); + st->d->interp = NULL; +} + +void ebur128_get_version(int* major, int* minor, int* patch) { + *major = EBUR128_VERSION_MAJOR; + *minor = EBUR128_VERSION_MINOR; + *patch = EBUR128_VERSION_PATCH; +} + +ebur128_state* ebur128_init(unsigned int channels, + unsigned long samplerate, + int mode) { + int result; + int errcode; + ebur128_state* st; + unsigned int i; + size_t j; + + if (channels == 0 || samplerate < 5) { + return NULL; + } + + st = (ebur128_state*) malloc(sizeof(ebur128_state)); + CHECK_ERROR(!st, 0, exit) + st->d = (struct ebur128_state_internal*) + malloc(sizeof(struct ebur128_state_internal)); + CHECK_ERROR(!st->d, 0, free_state) + st->channels = channels; + errcode = ebur128_init_channel_map(st); + CHECK_ERROR(errcode, 0, free_internal) + + st->d->sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map) + st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_sample_peak, 0, free_sample_peak) + st->d->true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->true_peak, 0, free_prev_sample_peak) + st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_true_peak, 0, free_true_peak) + for (i = 0; i < channels; ++i) { + st->d->sample_peak[i] = 0.0; + st->d->prev_sample_peak[i] = 0.0; + st->d->true_peak[i] = 0.0; + st->d->prev_true_peak[i] = 0.0; + } + + st->d->use_histogram = mode & EBUR128_MODE_HISTOGRAM ? 1 : 0; + st->d->history = ULONG_MAX; + st->samplerate = samplerate; + st->d->samples_in_100ms = (st->samplerate + 5) / 10; + st->mode = mode; + if ((mode & EBUR128_MODE_S) == EBUR128_MODE_S) { + st->d->window = 3000; + } else if ((mode & EBUR128_MODE_M) == EBUR128_MODE_M) { + st->d->window = 400; + } else { + goto free_prev_true_peak; + } + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, 0, free_true_peak) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + ebur128_init_filter(st); + + if (st->d->use_histogram) { + st->d->block_energy_histogram = malloc(1000 * sizeof(unsigned long)); + CHECK_ERROR(!st->d->block_energy_histogram, 0, free_audio_data) + for (i = 0; i < 1000; ++i) { + st->d->block_energy_histogram[i] = 0; + } + } else { + st->d->block_energy_histogram = NULL; + } + if (st->d->use_histogram) { + st->d->short_term_block_energy_histogram = malloc(1000 * sizeof(unsigned long)); + CHECK_ERROR(!st->d->short_term_block_energy_histogram, 0, free_block_energy_histogram) + for (i = 0; i < 1000; ++i) { + st->d->short_term_block_energy_histogram[i] = 0; + } + } else { + st->d->short_term_block_energy_histogram = NULL; + } + STAILQ_INIT(&st->d->block_list); + st->d->block_list_size = 0; + st->d->block_list_max = st->d->history / 100; + STAILQ_INIT(&st->d->short_term_block_list); + st->d->st_block_list_size = 0; + st->d->st_block_list_max = st->d->history / 3000; + st->d->short_term_frame_counter = 0; + + result = ebur128_init_resampler(st); + CHECK_ERROR(result, 0, free_short_term_block_energy_histogram) + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + + /* initialize static constants */ + relative_gate_factor = pow(10.0, relative_gate / 10.0); + minus_twenty_decibels = pow(10.0, -20.0 / 10.0); + histogram_energy_boundaries[0] = pow(10.0, (-70.0 + 0.691) / 10.0); + if (st->d->use_histogram) { + for (i = 0; i < 1000; ++i) { + histogram_energies[i] = pow(10.0, ((double) i / 10.0 - 69.95 + 0.691) / 10.0); + } + for (i = 1; i < 1001; ++i) { + histogram_energy_boundaries[i] = pow(10.0, ((double) i / 10.0 - 70.0 + 0.691) / 10.0); + } + } + + return st; + +free_short_term_block_energy_histogram: + free(st->d->short_term_block_energy_histogram); +free_block_energy_histogram: + free(st->d->block_energy_histogram); +free_audio_data: + free(st->d->audio_data); +free_prev_true_peak: + free(st->d->prev_true_peak); +free_true_peak: + free(st->d->true_peak); +free_prev_sample_peak: + free(st->d->prev_sample_peak); +free_sample_peak: + free(st->d->sample_peak); +free_channel_map: + free(st->d->channel_map); +free_internal: + free(st->d); +free_state: + free(st); +exit: + return NULL; +} + +void ebur128_destroy(ebur128_state** st) { + struct ebur128_dq_entry* entry; + free((*st)->d->block_energy_histogram); + free((*st)->d->short_term_block_energy_histogram); + free((*st)->d->audio_data); + free((*st)->d->channel_map); + free((*st)->d->sample_peak); + free((*st)->d->prev_sample_peak); + free((*st)->d->true_peak); + free((*st)->d->prev_true_peak); + while (!STAILQ_EMPTY(&(*st)->d->block_list)) { + entry = STAILQ_FIRST(&(*st)->d->block_list); + STAILQ_REMOVE_HEAD(&(*st)->d->block_list, entries); + free(entry); + } + while (!STAILQ_EMPTY(&(*st)->d->short_term_block_list)) { + entry = STAILQ_FIRST(&(*st)->d->short_term_block_list); + STAILQ_REMOVE_HEAD(&(*st)->d->short_term_block_list, entries); + free(entry); + } + ebur128_destroy_resampler(*st); + free((*st)->d); + free(*st); + *st = NULL; +} + +static void ebur128_check_true_peak(ebur128_state* st, size_t frames) { + size_t c, i, frames_out; + + frames_out = interp_process(st->d->interp, frames, + st->d->resampler_buffer_input, + st->d->resampler_buffer_output); + + for (i = 0; i < frames_out; ++i) { + for (c = 0; c < st->channels; ++c) { + float val = st->d->resampler_buffer_output[i * st->channels + c]; + + if (val > st->d->prev_true_peak[c]) { + st->d->prev_true_peak[c] = val; + } else if (-val > st->d->prev_true_peak[c]) { + st->d->prev_true_peak[c] = -val; + } + } + } +} + +#ifdef __SSE2_MATH__ +#include +#define TURN_ON_FTZ \ + unsigned int mxcsr = _mm_getcsr(); \ + _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON); +#define TURN_OFF_FTZ _mm_setcsr(mxcsr); +#define FLUSH_MANUALLY +#else +#warning "manual FTZ is being used, please enable SSE2 (-msse2 -mfpmath=sse)" +#define TURN_ON_FTZ +#define TURN_OFF_FTZ +#define FLUSH_MANUALLY \ + st->d->v[ci][4] = fabs(st->d->v[ci][4]) < DBL_MIN ? 0.0 : st->d->v[ci][4]; \ + st->d->v[ci][3] = fabs(st->d->v[ci][3]) < DBL_MIN ? 0.0 : st->d->v[ci][3]; \ + st->d->v[ci][2] = fabs(st->d->v[ci][2]) < DBL_MIN ? 0.0 : st->d->v[ci][2]; \ + st->d->v[ci][1] = fabs(st->d->v[ci][1]) < DBL_MIN ? 0.0 : st->d->v[ci][1]; +#endif + +#define EBUR128_FILTER(type, min_scale, max_scale) \ +static void ebur128_filter_##type(ebur128_state* st, const type* src, \ + size_t frames) { \ + static double scaling_factor = \ + -((double) (min_scale)) > (double) (max_scale) ? \ + -((double) (min_scale)) : (double) (max_scale); \ + double* audio_data = st->d->audio_data + st->d->audio_data_index; \ + size_t i, c; \ + \ + TURN_ON_FTZ \ + \ + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK) { \ + for (c = 0; c < st->channels; ++c) { \ + double max = 0.0; \ + for (i = 0; i < frames; ++i) { \ + if (src[i * st->channels + c] > max) { \ + max = src[i * st->channels + c]; \ + } else if (-src[i * st->channels + c] > max) { \ + max = -1.0 * src[i * st->channels + c]; \ + } \ + } \ + max /= scaling_factor; \ + if (max > st->d->prev_sample_peak[c]) st->d->prev_sample_peak[c] = max; \ + } \ + } \ + if ((st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK && \ + st->d->interp) { \ + for (c = 0; c < st->channels; ++c) { \ + for (i = 0; i < frames; ++i) { \ + st->d->resampler_buffer_input[i * st->channels + c] = \ + (float) (src[i * st->channels + c] / scaling_factor); \ + } \ + } \ + ebur128_check_true_peak(st, frames); \ + } \ + for (c = 0; c < st->channels; ++c) { \ + int ci = st->d->channel_map[c] - 1; \ + if (ci < 0) continue; \ + else if (ci == EBUR128_DUAL_MONO - 1) ci = 0; /*dual mono */ \ + for (i = 0; i < frames; ++i) { \ + st->d->v[ci][0] = (double) (src[i * st->channels + c] / scaling_factor) \ + - st->d->a[1] * st->d->v[ci][1] \ + - st->d->a[2] * st->d->v[ci][2] \ + - st->d->a[3] * st->d->v[ci][3] \ + - st->d->a[4] * st->d->v[ci][4]; \ + audio_data[i * st->channels + c] = \ + st->d->b[0] * st->d->v[ci][0] \ + + st->d->b[1] * st->d->v[ci][1] \ + + st->d->b[2] * st->d->v[ci][2] \ + + st->d->b[3] * st->d->v[ci][3] \ + + st->d->b[4] * st->d->v[ci][4]; \ + st->d->v[ci][4] = st->d->v[ci][3]; \ + st->d->v[ci][3] = st->d->v[ci][2]; \ + st->d->v[ci][2] = st->d->v[ci][1]; \ + st->d->v[ci][1] = st->d->v[ci][0]; \ + } \ + FLUSH_MANUALLY \ + } \ + TURN_OFF_FTZ \ +} +EBUR128_FILTER(short, SHRT_MIN, SHRT_MAX) +EBUR128_FILTER(int, INT_MIN, INT_MAX) +EBUR128_FILTER(float, -1.0f, 1.0f) +EBUR128_FILTER(double, -1.0, 1.0) + +static double ebur128_energy_to_loudness(double energy) { + return 10 * (log(energy) / log(10.0)) - 0.691; +} + +static size_t find_histogram_index(double energy) { + size_t index_min = 0; + size_t index_max = 1000; + size_t index_mid; + + do { + index_mid = (index_min + index_max) / 2; + if (energy >= histogram_energy_boundaries[index_mid]) { + index_min = index_mid; + } else { + index_max = index_mid; + } + } while (index_max - index_min != 1); + + return index_min; +} + +static int ebur128_calc_gating_block(ebur128_state* st, size_t frames_per_block, + double* optional_output) { + size_t i, c; + double sum = 0.0; + double channel_sum; + for (c = 0; c < st->channels; ++c) { + if (st->d->channel_map[c] == EBUR128_UNUSED) { + continue; + } + channel_sum = 0.0; + if (st->d->audio_data_index < frames_per_block * st->channels) { + for (i = 0; i < st->d->audio_data_index / st->channels; ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + for (i = st->d->audio_data_frames - + (frames_per_block - + st->d->audio_data_index / st->channels); + i < st->d->audio_data_frames; ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + } else { + for (i = st->d->audio_data_index / st->channels - frames_per_block; + i < st->d->audio_data_index / st->channels; + ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + } + if (st->d->channel_map[c] == EBUR128_Mp110 || + st->d->channel_map[c] == EBUR128_Mm110 || + st->d->channel_map[c] == EBUR128_Mp060 || + st->d->channel_map[c] == EBUR128_Mm060 || + st->d->channel_map[c] == EBUR128_Mp090 || + st->d->channel_map[c] == EBUR128_Mm090) { + channel_sum *= 1.41; + } else if (st->d->channel_map[c] == EBUR128_DUAL_MONO) { + channel_sum *= 2.0; + } + sum += channel_sum; + } + sum /= (double) frames_per_block; + if (optional_output) { + *optional_output = sum; + return EBUR128_SUCCESS; + } else if (sum >= histogram_energy_boundaries[0]) { + if (st->d->use_histogram) { + ++st->d->block_energy_histogram[find_histogram_index(sum)]; + } else { + struct ebur128_dq_entry* block; + if (st->d->block_list_size == st->d->block_list_max) { + block = STAILQ_FIRST(&st->d->block_list); + STAILQ_REMOVE_HEAD(&st->d->block_list, entries); + } else { + block = (struct ebur128_dq_entry*) malloc(sizeof(struct ebur128_dq_entry)); + if (!block) { + return EBUR128_ERROR_NOMEM; + } + st->d->block_list_size++; + } + block->z = sum; + STAILQ_INSERT_TAIL(&st->d->block_list, block, entries); + } + return EBUR128_SUCCESS; + } else { + return EBUR128_SUCCESS; + } +} + +int ebur128_set_channel(ebur128_state* st, + unsigned int channel_number, + int value) { + if (channel_number >= st->channels) { + return 1; + } + if (value == EBUR128_DUAL_MONO && + (st->channels != 1 || channel_number != 0)) { + fprintf(stderr, "EBUR128_DUAL_MONO only works with mono files!\n"); + return 1; + } + st->d->channel_map[channel_number] = value; + return 0; +} + +int ebur128_change_parameters(ebur128_state* st, + unsigned int channels, + unsigned long samplerate) { + int errcode = EBUR128_SUCCESS; + size_t j; + + if (channels == 0 || samplerate < 5) { + return EBUR128_ERROR_NOMEM; + } + + if (channels == st->channels && + samplerate == st->samplerate) { + return EBUR128_ERROR_NO_CHANGE; + } + + free(st->d->audio_data); + st->d->audio_data = NULL; + + if (channels != st->channels) { + unsigned int i; + + free(st->d->channel_map); st->d->channel_map = NULL; + free(st->d->sample_peak); st->d->sample_peak = NULL; + free(st->d->prev_sample_peak); st->d->prev_sample_peak = NULL; + free(st->d->true_peak); st->d->true_peak = NULL; + free(st->d->prev_true_peak); st->d->prev_true_peak = NULL; + st->channels = channels; + + errcode = ebur128_init_channel_map(st); + CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + + st->d->sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->sample_peak, EBUR128_ERROR_NOMEM, exit) + st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_sample_peak, EBUR128_ERROR_NOMEM, exit) + st->d->true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->true_peak, EBUR128_ERROR_NOMEM, exit) + st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_true_peak, EBUR128_ERROR_NOMEM, exit) + for (i = 0; i < channels; ++i) { + st->d->sample_peak[i] = 0.0; + st->d->prev_sample_peak[i] = 0.0; + st->d->true_peak[i] = 0.0; + st->d->prev_true_peak[i] = 0.0; + } + } + if (samplerate != st->samplerate) { + st->samplerate = samplerate; + st->d->samples_in_100ms = (st->samplerate + 5) / 10; + ebur128_init_filter(st); + } + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + ebur128_destroy_resampler(st); + errcode = ebur128_init_resampler(st); + CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + /* reset short term frame counter */ + st->d->short_term_frame_counter = 0; + +exit: + return errcode; +} + +int ebur128_set_max_window(ebur128_state* st, unsigned long window) +{ + int errcode = EBUR128_SUCCESS; + size_t j; + + if ((st->mode & EBUR128_MODE_S) == EBUR128_MODE_S && window < 3000) { + window = 3000; + } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && window < 400) { + window = 400; + } + if (window == st->d->window) { + return EBUR128_ERROR_NO_CHANGE; + } + + st->d->window = window; + free(st->d->audio_data); + st->d->audio_data = NULL; + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + /* reset short term frame counter */ + st->d->short_term_frame_counter = 0; + +exit: + return errcode; +} + +int ebur128_set_max_history(ebur128_state* st, unsigned long history) +{ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA && history < 3000) { + history = 3000; + } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && history < 400) { + history = 400; + } + if (history == st->d->history) { + return EBUR128_ERROR_NO_CHANGE; + } + st->d->history = history; + st->d->block_list_max = st->d->history / 100; + st->d->st_block_list_max = st->d->history / 3000; + while (st->d->block_list_size > st->d->block_list_max) { + struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->block_list); + STAILQ_REMOVE_HEAD(&st->d->block_list, entries); + free(block); + st->d->block_list_size--; + } + while (st->d->st_block_list_size > st->d->st_block_list_max) { + struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->short_term_block_list); + STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); + free(block); + st->d->st_block_list_size--; + } + return EBUR128_SUCCESS; +} + +static int ebur128_energy_shortterm(ebur128_state* st, double* out); +#define EBUR128_ADD_FRAMES(type) \ +int ebur128_add_frames_##type(ebur128_state* st, \ + const type* src, size_t frames) { \ + size_t src_index = 0; \ + unsigned int c = 0; \ + for (c = 0; c < st->channels; c++) { \ + st->d->prev_sample_peak[c] = 0.0; \ + st->d->prev_true_peak[c] = 0.0; \ + } \ + while (frames > 0) { \ + if (frames >= st->d->needed_frames) { \ + ebur128_filter_##type(st, src + src_index, st->d->needed_frames); \ + src_index += st->d->needed_frames * st->channels; \ + frames -= st->d->needed_frames; \ + st->d->audio_data_index += st->d->needed_frames * st->channels; \ + /* calculate the new gating block */ \ + if ((st->mode & EBUR128_MODE_I) == EBUR128_MODE_I) { \ + if (ebur128_calc_gating_block(st, st->d->samples_in_100ms * 4, NULL)) {\ + return EBUR128_ERROR_NOMEM; \ + } \ + } \ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ + st->d->short_term_frame_counter += st->d->needed_frames; \ + if (st->d->short_term_frame_counter == st->d->samples_in_100ms * 30) { \ + struct ebur128_dq_entry* block; \ + double st_energy; \ + if (ebur128_energy_shortterm(st, &st_energy) == EBUR128_SUCCESS && \ + st_energy >= histogram_energy_boundaries[0]) { \ + if (st->d->use_histogram) { \ + ++st->d->short_term_block_energy_histogram[ \ + find_histogram_index(st_energy)];\ + } else { \ + if (st->d->st_block_list_size == st->d->st_block_list_max) { \ + block = STAILQ_FIRST(&st->d->short_term_block_list); \ + STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); \ + } else { \ + block = (struct ebur128_dq_entry*) \ + malloc(sizeof(struct ebur128_dq_entry)); \ + if (!block) return EBUR128_ERROR_NOMEM; \ + st->d->st_block_list_size++; \ + } \ + block->z = st_energy; \ + STAILQ_INSERT_TAIL(&st->d->short_term_block_list, \ + block, entries); \ + } \ + } \ + st->d->short_term_frame_counter = st->d->samples_in_100ms * 20; \ + } \ + } \ + /* 100ms are needed for all blocks besides the first one */ \ + st->d->needed_frames = st->d->samples_in_100ms; \ + /* reset audio_data_index when buffer full */ \ + if (st->d->audio_data_index == st->d->audio_data_frames * st->channels) {\ + st->d->audio_data_index = 0; \ + } \ + } else { \ + ebur128_filter_##type(st, src + src_index, frames); \ + st->d->audio_data_index += frames * st->channels; \ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ + st->d->short_term_frame_counter += frames; \ + } \ + st->d->needed_frames -= frames; \ + frames = 0; \ + } \ + } \ + for (c = 0; c < st->channels; c++) { \ + if (st->d->prev_sample_peak[c] > st->d->sample_peak[c]) { \ + st->d->sample_peak[c] = st->d->prev_sample_peak[c]; \ + } \ + if (st->d->prev_true_peak[c] > st->d->true_peak[c]) { \ + st->d->true_peak[c] = st->d->prev_true_peak[c]; \ + } \ + } \ + return EBUR128_SUCCESS; \ +} +EBUR128_ADD_FRAMES(short) +EBUR128_ADD_FRAMES(int) +EBUR128_ADD_FRAMES(float) +EBUR128_ADD_FRAMES(double) + +static int ebur128_calc_relative_threshold(ebur128_state* st, + size_t* above_thresh_counter, + double* relative_threshold) { + struct ebur128_dq_entry* it; + size_t i; + + if (st->d->use_histogram) { + for (i = 0; i < 1000; ++i) { + *relative_threshold += st->d->block_energy_histogram[i] * + histogram_energies[i]; + *above_thresh_counter += st->d->block_energy_histogram[i]; + } + } else { + STAILQ_FOREACH(it, &st->d->block_list, entries) { + ++*above_thresh_counter; + *relative_threshold += it->z; + } + } + + return EBUR128_SUCCESS; +} + +static int ebur128_gated_loudness(ebur128_state** sts, size_t size, + double* out) { + struct ebur128_dq_entry* it; + double gated_loudness = 0.0; + double relative_threshold = 0.0; + size_t above_thresh_counter = 0; + size_t i, j, start_index; + + for (i = 0; i < size; i++) { + if (sts[i] && (sts[i]->mode & EBUR128_MODE_I) != EBUR128_MODE_I) { + return EBUR128_ERROR_INVALID_MODE; + } + } + + for (i = 0; i < size; i++) { + if (!sts[i]) { + continue; + } + ebur128_calc_relative_threshold(sts[i], &above_thresh_counter, &relative_threshold); + } + if (!above_thresh_counter) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + + relative_threshold /= (double)above_thresh_counter; + relative_threshold *= relative_gate_factor; + + above_thresh_counter = 0; + if (relative_threshold < histogram_energy_boundaries[0]) { + start_index = 0; + } else { + start_index = find_histogram_index(relative_threshold); + if (relative_threshold > histogram_energies[start_index]) { + ++start_index; + } + } + for (i = 0; i < size; i++) { + if (!sts[i]) { + continue; + } + if (sts[i]->d->use_histogram) { + for (j = start_index; j < 1000; ++j) { + gated_loudness += sts[i]->d->block_energy_histogram[j] * + histogram_energies[j]; + above_thresh_counter += sts[i]->d->block_energy_histogram[j]; + } + } else { + STAILQ_FOREACH(it, &sts[i]->d->block_list, entries) { + if (it->z >= relative_threshold) { + ++above_thresh_counter; + gated_loudness += it->z; + } + } + } + } + if (!above_thresh_counter) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + gated_loudness /= (double) above_thresh_counter; + *out = ebur128_energy_to_loudness(gated_loudness); + return EBUR128_SUCCESS; +} + +int ebur128_relative_threshold(ebur128_state* st, double* out) { + double relative_threshold = 0.0; + size_t above_thresh_counter = 0; + + if ((st->mode & EBUR128_MODE_I) != EBUR128_MODE_I) { + return EBUR128_ERROR_INVALID_MODE; + } + + ebur128_calc_relative_threshold(st, &above_thresh_counter, &relative_threshold); + + if (!above_thresh_counter) { + *out = -70.0; + return EBUR128_SUCCESS; + } + + relative_threshold /= (double)above_thresh_counter; + relative_threshold *= relative_gate_factor; + + *out = ebur128_energy_to_loudness(relative_threshold); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_global(ebur128_state* st, double* out) { + return ebur128_gated_loudness(&st, 1, out); +} + +int ebur128_loudness_global_multiple(ebur128_state** sts, size_t size, + double* out) { + return ebur128_gated_loudness(sts, size, out); +} + +static int ebur128_energy_in_interval(ebur128_state* st, + size_t interval_frames, + double* out) { + if (interval_frames > st->d->audio_data_frames) { + return EBUR128_ERROR_INVALID_MODE; + } + ebur128_calc_gating_block(st, interval_frames, out); + return EBUR128_SUCCESS; +} + +static int ebur128_energy_shortterm(ebur128_state* st, double* out) { + return ebur128_energy_in_interval(st, st->d->samples_in_100ms * 30, out); +} + +int ebur128_loudness_momentary(ebur128_state* st, double* out) { + double energy; + int error = ebur128_energy_in_interval(st, st->d->samples_in_100ms * 4, + &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_shortterm(ebur128_state* st, double* out) { + double energy; + int error = ebur128_energy_shortterm(st, &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_window(ebur128_state* st, + unsigned long window, + double* out) { + double energy; + size_t interval_frames = st->samplerate * window / 1000; + int error = ebur128_energy_in_interval(st, interval_frames, &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +static int ebur128_double_cmp(const void *p1, const void *p2) { + const double* d1 = (const double*) p1; + const double* d2 = (const double*) p2; + return (*d1 > *d2) - (*d1 < *d2); +} + +/* EBU - TECH 3342 */ +int ebur128_loudness_range_multiple(ebur128_state** sts, size_t size, + double* out) { + size_t i, j; + struct ebur128_dq_entry* it; + double* stl_vector; + size_t stl_size; + double* stl_relgated; + size_t stl_relgated_size; + double stl_power, stl_integrated; + /* High and low percentile energy */ + double h_en, l_en; + int use_histogram = 0; + + for (i = 0; i < size; ++i) { + if (sts[i]) { + if ((sts[i]->mode & EBUR128_MODE_LRA) != EBUR128_MODE_LRA) { + return EBUR128_ERROR_INVALID_MODE; + } + if (i == 0 && sts[i]->mode & EBUR128_MODE_HISTOGRAM) { + use_histogram = 1; + } else if (use_histogram != !!(sts[i]->mode & EBUR128_MODE_HISTOGRAM)) { + return EBUR128_ERROR_INVALID_MODE; + } + } + } + + if (use_histogram) { + unsigned long hist[1000] = { 0 }; + size_t percentile_low, percentile_high; + size_t index; + + stl_size = 0; + stl_power = 0.0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + for (j = 0; j < 1000; ++j) { + hist[j] += sts[i]->d->short_term_block_energy_histogram[j]; + stl_size += sts[i]->d->short_term_block_energy_histogram[j]; + stl_power += sts[i]->d->short_term_block_energy_histogram[j] + * histogram_energies[j]; + } + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + + stl_power /= stl_size; + stl_integrated = minus_twenty_decibels * stl_power; + + if (stl_integrated < histogram_energy_boundaries[0]) { + index = 0; + } else { + index = find_histogram_index(stl_integrated); + if (stl_integrated > histogram_energies[index]) { + ++index; + } + } + stl_size = 0; + for (j = index; j < 1000; ++j) { + stl_size += hist[j]; + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + + percentile_low = (size_t) ((stl_size - 1) * 0.1 + 0.5); + percentile_high = (size_t) ((stl_size - 1) * 0.95 + 0.5); + + stl_size = 0; + j = index; + while (stl_size <= percentile_low) { + stl_size += hist[j++]; + } + l_en = histogram_energies[j - 1]; + while (stl_size <= percentile_high) { + stl_size += hist[j++]; + } + h_en = histogram_energies[j - 1]; + *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en); + return EBUR128_SUCCESS; + + } else { + stl_size = 0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) { + ++stl_size; + } + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + stl_vector = (double*) malloc(stl_size * sizeof(double)); + if (!stl_vector) { + return EBUR128_ERROR_NOMEM; + } + + j = 0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) { + stl_vector[j] = it->z; + ++j; + } + } + qsort(stl_vector, stl_size, sizeof(double), ebur128_double_cmp); + stl_power = 0.0; + for (i = 0; i < stl_size; ++i) { + stl_power += stl_vector[i]; + } + stl_power /= (double) stl_size; + stl_integrated = minus_twenty_decibels * stl_power; + + stl_relgated = stl_vector; + stl_relgated_size = stl_size; + while (stl_relgated_size > 0 && *stl_relgated < stl_integrated) { + ++stl_relgated; + --stl_relgated_size; + } + + if (stl_relgated_size) { + h_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.95 + 0.5)]; + l_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.1 + 0.5)]; + free(stl_vector); + *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en); + return EBUR128_SUCCESS; + } else { + free(stl_vector); + *out = 0.0; + return EBUR128_SUCCESS; + } + } +} + +int ebur128_loudness_range(ebur128_state* st, double* out) { + return ebur128_loudness_range_multiple(&st, 1, out); +} + +int ebur128_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_prev_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->prev_sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->true_peak[channel_number] > st->d->sample_peak[channel_number] + ? st->d->true_peak[channel_number] + : st->d->sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_prev_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->prev_true_peak[channel_number] + > st->d->prev_sample_peak[channel_number] + ? st->d->prev_true_peak[channel_number] + : st->d->prev_sample_peak[channel_number]; + return EBUR128_SUCCESS; +} \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/CMakeLists.txt new file mode 100644 index 0000000..59f5980 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_KISS_FFT_SRCS) +add_library(kiss_fft ${DIR_KISS_FFT_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/_kiss_fft_guts.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/_kiss_fft_guts.h new file mode 100644 index 0000000..7bf5762 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/_kiss_fft_guts.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +/* kiss_fft.h + defines kiss_fft_scalar as either short or a float type + and defines + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ +#include "kiss_fft.h" +#include + +#define MAXFACTORS 32 +/* e.g. an fft of length 128 has 4 factors + as far as kissfft is concerned + 4*4*4*2 + */ + +struct kiss_fft_state{ + int nfft; + int inverse; + int factors[2*MAXFACTORS]; + kiss_fft_cpx twiddles[1]; +}; + +/* + Explanation of macros dealing with complex math: + + C_MUL(m,a,b) : m = a*b + C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise + C_SUB( res, a,b) : res = a - b + C_SUBFROM( res , a) : res -= a + C_ADDTO( res , a) : res += a + * */ +#ifdef FIXED_POINT +#include +#if (FIXED_POINT==32) +# define FRACBITS 31 +# define SAMPPROD int64_t +#define SAMP_MAX INT32_MAX +#define SAMP_MIN INT32_MIN +#else +# define FRACBITS 15 +# define SAMPPROD int32_t +#define SAMP_MAX INT16_MAX +#define SAMP_MIN INT16_MIN +#endif + +#if defined(CHECK_OVERFLOW) +# define CHECK_OVERFLOW_OP(a,op,b) \ + if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \ + fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) ); } +#endif + + +# define smul(a,b) ( (SAMPPROD)(a)*(b) ) +# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS ) + +# define S_MUL(a,b) sround( smul(a,b) ) + +# define C_MUL(m,a,b) \ + do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ + (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) + +# define DIVSCALAR(x,k) \ + (x) = sround( smul( x, SAMP_MAX/k ) ) + +# define C_FIXDIV(c,div) \ + do { DIVSCALAR( (c).r , div); \ + DIVSCALAR( (c).i , div); }while (0) + +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r = sround( smul( (c).r , s ) ) ;\ + (c).i = sround( smul( (c).i , s ) ) ; }while(0) + +#else /* not FIXED_POINT*/ + +# define S_MUL(a,b) ( (a)*(b) ) +#define C_MUL(m,a,b) \ + do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ + (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) +# define C_FIXDIV(c,div) /* NOOP */ +# define C_MULBYSCALAR( c, s ) \ + do{ (c).r *= (s);\ + (c).i *= (s); }while(0) +#endif + +#ifndef CHECK_OVERFLOW_OP +# define CHECK_OVERFLOW_OP(a,op,b) /* noop */ +#endif + +#define C_ADD( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,+,(b).r)\ + CHECK_OVERFLOW_OP((a).i,+,(b).i)\ + (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ + }while(0) +#define C_SUB( res, a,b)\ + do { \ + CHECK_OVERFLOW_OP((a).r,-,(b).r)\ + CHECK_OVERFLOW_OP((a).i,-,(b).i)\ + (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ + }while(0) +#define C_ADDTO( res , a)\ + do { \ + CHECK_OVERFLOW_OP((res).r,+,(a).r)\ + CHECK_OVERFLOW_OP((res).i,+,(a).i)\ + (res).r += (a).r; (res).i += (a).i;\ + }while(0) + +#define C_SUBFROM( res , a)\ + do {\ + CHECK_OVERFLOW_OP((res).r,-,(a).r)\ + CHECK_OVERFLOW_OP((res).i,-,(a).i)\ + (res).r -= (a).r; (res).i -= (a).i; \ + }while(0) + + +#ifdef FIXED_POINT +# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) +# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) +# define HALF_OF(x) ((x)>>1) +#elif defined(USE_SIMD) +# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) +# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) +# define HALF_OF(x) ((x)*_mm_set1_ps(.5)) +#else +# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) +# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) +# define HALF_OF(x) ((x)*.5) +#endif + +#define kf_cexp(x,phase) \ + do{ \ + (x)->r = KISS_FFT_COS(phase);\ + (x)->i = KISS_FFT_SIN(phase);\ + }while(0) + + +/* a debugging function */ +#define pcpx(c)\ + fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) ) + + +#ifdef KISS_FFT_USE_ALLOCA +// define this to allow use of alloca instead of malloc for temporary buffers +// Temporary buffers are used in two case: +// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5 +// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform. +#include +#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) +#define KISS_FFT_TMP_FREE(ptr) +#else +#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes) +#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr) +#endif diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/kiss_fft.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/kiss_fft.h new file mode 100644 index 0000000..1ca0b45 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/kiss_fft.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FFT_H +#define KISS_FFT_H + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + ATTENTION! + If you would like a : + -- a utility that will handle the caching of fft objects + -- real-only (no imaginary time component ) FFT + -- a multi-dimensional FFT + -- a command-line utility to perform ffts + -- a command-line utility to perform fast-convolution filtering + + Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c + in the tools/ directory. +*/ + +#ifdef USE_SIMD +# include +# define kiss_fft_scalar __m128 +#define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16) +#define KISS_FFT_FREE _mm_free +#else +#define KISS_FFT_MALLOC malloc +#define KISS_FFT_FREE free +#endif + + +#ifdef FIXED_POINT +#include +# if (FIXED_POINT == 32) +# define kiss_fft_scalar int32_t +# else +# define kiss_fft_scalar int16_t +# endif +#else +# ifndef kiss_fft_scalar +/* default is float */ +# define kiss_fft_scalar float +# endif +#endif + +typedef struct { + kiss_fft_scalar r; + kiss_fft_scalar i; +}kiss_fft_cpx; + +typedef struct kiss_fft_state* kiss_fft_cfg; + +/* + * kiss_fft_alloc + * + * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. + * + * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); + * + * The return value from fft_alloc is a cfg buffer used internally + * by the fft routine or NULL. + * + * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. + * The returned value should be free()d when done to avoid memory leaks. + * + * The state can be placed in a user supplied buffer 'mem': + * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, + * then the function places the cfg in mem and the size used in *lenmem + * and returns mem. + * + * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), + * then the function returns NULL and places the minimum cfg + * buffer size in *lenmem. + * */ + +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); + +/* + * kiss_fft(cfg,in_out_buf) + * + * Perform an FFT on a complex input buffer. + * for a forward FFT, + * fin should be f[0] , f[1] , ... ,f[nfft-1] + * fout will be F[0] , F[1] , ... ,F[nfft-1] + * Note that each element is complex and can be accessed like + f[k].r and f[k].i + * */ +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); + +/* + A more generic version of the above function. It reads its input from every Nth sample. + * */ +void kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); + +/* If kiss_fft_alloc allocated a buffer, it is one contiguous + buffer and can be simply free()d when no longer needed*/ +#define kiss_fft_free KISS_FFT_FREE + +/* + Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up + your compiler output to call this before you exit. +*/ +void kiss_fft_cleanup(void); + + +/* + * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) + */ +int kiss_fft_next_fast_size(int n); + +/* for real ffts, we need an even size */ +#define kiss_fftr_next_fast_size_real(n) \ + (kiss_fft_next_fast_size( ((n)+1)>>1)<<1) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/kiss_fftr.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/kiss_fftr.h new file mode 100644 index 0000000..588948d --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/inc/kiss_fftr.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#ifndef KISS_FTR_H +#define KISS_FTR_H + +#include "kiss_fft.h" +#ifdef __cplusplus +extern "C" { +#endif + + +/* + + Real optimized version can save about 45% cpu time vs. complex fft of a real seq. + + + + */ + +typedef struct kiss_fftr_state *kiss_fftr_cfg; + + +kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); +/* + nfft must be even + + If you don't care to allocate space, use mem = lenmem = NULL +*/ + + +void kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); +/* + input timedata has nfft scalar points + output freqdata has nfft/2+1 complex points +*/ + +void kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); +/* + input freqdata has nfft/2+1 complex points + output timedata has nfft scalar points +*/ + +#define kiss_fftr_free KISS_FFT_FREE + +#ifdef __cplusplus +} +#endif +#endif diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/src/kiss_fft.c b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/src/kiss_fft.c new file mode 100644 index 0000000..b34879a --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/src/kiss_fft.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + + +#include "_kiss_fft_guts.h" +/* The guts header contains all the multiplication and addition macros that are defined for + fixed or floating point complex numbers. It also declares the kf_ internal functions. + */ + +static void kf_bfly2( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx * Fout2; + kiss_fft_cpx * tw1 = st->twiddles; + kiss_fft_cpx t; + Fout2 = Fout + m; + do{ + C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); + + C_MUL (t, *Fout2 , *tw1); + tw1 += fstride; + C_SUB( *Fout2 , *Fout , t ); + C_ADDTO( *Fout , t ); + ++Fout2; + ++Fout; + }while (--m); +} + +static void kf_bfly4( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + const size_t m + ) +{ + kiss_fft_cpx *tw1,*tw2,*tw3; + kiss_fft_cpx scratch[6]; + size_t k=m; + const size_t m2=2*m; + const size_t m3=3*m; + + + tw3 = tw2 = tw1 = st->twiddles; + + do { + C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); + + C_MUL(scratch[0],Fout[m] , *tw1 ); + C_MUL(scratch[1],Fout[m2] , *tw2 ); + C_MUL(scratch[2],Fout[m3] , *tw3 ); + + C_SUB( scratch[5] , *Fout, scratch[1] ); + C_ADDTO(*Fout, scratch[1]); + C_ADD( scratch[3] , scratch[0] , scratch[2] ); + C_SUB( scratch[4] , scratch[0] , scratch[2] ); + C_SUB( Fout[m2], *Fout, scratch[3] ); + tw1 += fstride; + tw2 += fstride*2; + tw3 += fstride*3; + C_ADDTO( *Fout , scratch[3] ); + + if(st->inverse) { + Fout[m].r = scratch[5].r - scratch[4].i; + Fout[m].i = scratch[5].i + scratch[4].r; + Fout[m3].r = scratch[5].r + scratch[4].i; + Fout[m3].i = scratch[5].i - scratch[4].r; + }else{ + Fout[m].r = scratch[5].r + scratch[4].i; + Fout[m].i = scratch[5].i - scratch[4].r; + Fout[m3].r = scratch[5].r - scratch[4].i; + Fout[m3].i = scratch[5].i + scratch[4].r; + } + ++Fout; + }while(--k); +} + +static void kf_bfly3( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + size_t m + ) +{ + size_t k=m; + const size_t m2 = 2*m; + kiss_fft_cpx *tw1,*tw2; + kiss_fft_cpx scratch[5]; + kiss_fft_cpx epi3; + epi3 = st->twiddles[fstride*m]; + + tw1=tw2=st->twiddles; + + do{ + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); + + C_MUL(scratch[1],Fout[m] , *tw1); + C_MUL(scratch[2],Fout[m2] , *tw2); + + C_ADD(scratch[3],scratch[1],scratch[2]); + C_SUB(scratch[0],scratch[1],scratch[2]); + tw1 += fstride; + tw2 += fstride*2; + + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + + C_MULBYSCALAR( scratch[0] , epi3.i ); + + C_ADDTO(*Fout,scratch[3]); + + Fout[m2].r = Fout[m].r + scratch[0].i; + Fout[m2].i = Fout[m].i - scratch[0].r; + + Fout[m].r -= scratch[0].i; + Fout[m].i += scratch[0].r; + + ++Fout; + }while(--k); +} + +static void kf_bfly5( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m + ) +{ + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; + int u; + kiss_fft_cpx scratch[13]; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx *tw; + kiss_fft_cpx ya,yb; + ya = twiddles[fstride*m]; + yb = twiddles[fstride*2*m]; + + Fout0=Fout; + Fout1=Fout0+m; + Fout2=Fout0+2*m; + Fout3=Fout0+3*m; + Fout4=Fout0+4*m; + + tw=st->twiddles; + for ( u=0; ur += scratch[7].r + scratch[8].r; + Fout0->i += scratch[7].i + scratch[8].i; + + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + + C_SUB(*Fout1,scratch[5],scratch[6]); + C_ADD(*Fout4,scratch[5],scratch[6]); + + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + + C_ADD(*Fout2,scratch[11],scratch[12]); + C_SUB(*Fout3,scratch[11],scratch[12]); + + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; + } +} + +/* perform the butterfly for one stage of a mixed radix FFT */ +static void kf_bfly_generic( + kiss_fft_cpx * Fout, + const size_t fstride, + const kiss_fft_cfg st, + int m, + int p + ) +{ + int u,k,q1,q; + kiss_fft_cpx * twiddles = st->twiddles; + kiss_fft_cpx t; + int Norig = st->nfft; + + kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); + + for ( u=0; u=Norig) twidx-=Norig; + C_MUL(t,scratch[q] , twiddles[twidx] ); + C_ADDTO( Fout[ k ] ,t); + } + k += m; + } + } + KISS_FFT_TMP_FREE(scratch); +} + +static +void kf_work( + kiss_fft_cpx * Fout, + const kiss_fft_cpx * f, + const size_t fstride, + int in_stride, + int * factors, + const kiss_fft_cfg st + ) +{ + kiss_fft_cpx * Fout_beg=Fout; + const int p=*factors++; /* the radix */ + const int m=*factors++; /* stage's fft length/p */ + const kiss_fft_cpx * Fout_end = Fout + p*m; + +#ifdef _OPENMP + // use openmp extensions at the + // top-level (not recursive) + if (fstride==1 && p<=5) + { + int k; + + // execute the p different work units in different threads +# pragma omp parallel for + for (k=0;k floor_sqrt) + p = n; /* no more factors, skip to end */ + } + n /= p; + *facbuf++ = p; + *facbuf++ = n; + } while (n > 1); +} + +/* + * + * User-callable function to allocate all necessary storage space for the fft. + * + * The return value is a contiguous block of memory, allocated with malloc. As such, + * It can be freed with free(), rather than a kiss_fft-specific function. + * */ +kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) +{ + kiss_fft_cfg st=NULL; + size_t memneeded = sizeof(struct kiss_fft_state) + + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/ + + if ( lenmem==NULL ) { + st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); + }else{ + if (mem != NULL && *lenmem >= memneeded) + st = (kiss_fft_cfg)mem; + *lenmem = memneeded; + } + if (st) { + int i; + st->nfft=nfft; + st->inverse = inverse_fft; + + for (i=0;iinverse) + phase *= -1; + kf_cexp(st->twiddles+i, phase ); + } + + kf_factor(nfft,st->factors); + } + return st; +} + + +void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) +{ + if (fin == fout) { + //NOTE: this is not really an in-place FFT algorithm. + //It just performs an out-of-place FFT into a temp buffer + kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft); + kf_work(tmpbuf,fin,1,in_stride, st->factors,st); + memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); + KISS_FFT_TMP_FREE(tmpbuf); + }else{ + kf_work( fout, fin, 1,in_stride, st->factors,st ); + } +} + +void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) +{ + kiss_fft_stride(cfg,fin,fout,1); +} + + +void kiss_fft_cleanup(void) +{ + // nothing needed any more +} + +int kiss_fft_next_fast_size(int n) +{ + while(1) { + int m=n; + while ( (m%2) == 0 ) m/=2; + while ( (m%3) == 0 ) m/=3; + while ( (m%5) == 0 ) m/=5; + if (m<=1) + break; /* n is completely factorable by twos, threes, and fives */ + n++; + } + return n; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/src/kiss_fftr.c b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/src/kiss_fftr.c new file mode 100644 index 0000000..0c7e731 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/kiss_fft/src/kiss_fftr.c @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft + * + * SPDX-License-Identifier: BSD-3-Clause + * See COPYING file for more information. + */ + +#include "kiss_fftr.h" +#include "_kiss_fft_guts.h" + +struct kiss_fftr_state{ + kiss_fft_cfg substate; + kiss_fft_cpx * tmpbuf; + kiss_fft_cpx * super_twiddles; +#ifdef USE_SIMD + void * pad; +#endif +}; + +kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) +{ + int i; + kiss_fftr_cfg st = NULL; + size_t subsize = 0, memneeded; + + if (nfft & 1) { +// fprintf(stderr,"Real FFT optimization must be even.\n"); + return NULL; + } + nfft >>= 1; + + kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); + memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2); + + if (lenmem == NULL) { + st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded); + } else { + if (*lenmem >= memneeded) + st = (kiss_fftr_cfg) mem; + *lenmem = memneeded; + } + if (!st) + return NULL; + + st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */ + st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); + st->super_twiddles = st->tmpbuf + nfft; + kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); + + for (i = 0; i < nfft/2; ++i) { + double phase = + -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5); + if (inverse_fft) + phase *= -1; + kf_cexp (st->super_twiddles+i,phase); + } + return st; +} + +void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) +{ + /* input buffer timedata is stored row-wise */ + int k,ncfft; + kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; + + if ( st->substate->inverse) { +// fprintf(stderr,"kiss fft usage error: improper alloc\n"); + exit(1); + } + + ncfft = st->substate->nfft; + + /*perform the parallel fft of two real signals packed in real,imag*/ + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); + /* The real part of the DC element of the frequency spectrum in st->tmpbuf + * contains the sum of the even-numbered elements of the input time sequence + * The imag part is the sum of the odd-numbered elements + * + * The sum of tdc.r and tdc.i is the sum of the input time sequence. + * yielding DC of input time sequence + * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... + * yielding Nyquist bin of input time sequence + */ + + tdc.r = st->tmpbuf[0].r; + tdc.i = st->tmpbuf[0].i; + C_FIXDIV(tdc,2); + CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); + CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); + freqdata[0].r = tdc.r + tdc.i; + freqdata[ncfft].r = tdc.r - tdc.i; +#ifdef USE_SIMD + freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); +#else + freqdata[ncfft].i = freqdata[0].i = 0; +#endif + + for ( k=1;k <= ncfft/2 ; ++k ) { + fpk = st->tmpbuf[k]; + fpnk.r = st->tmpbuf[ncfft-k].r; + fpnk.i = - st->tmpbuf[ncfft-k].i; + C_FIXDIV(fpk,2); + C_FIXDIV(fpnk,2); + + C_ADD( f1k, fpk , fpnk ); + C_SUB( f2k, fpk , fpnk ); + C_MUL( tw , f2k , st->super_twiddles[k-1]); + + freqdata[k].r = HALF_OF(f1k.r + tw.r); + freqdata[k].i = HALF_OF(f1k.i + tw.i); + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); + } +} + +void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) +{ + /* input buffer timedata is stored row-wise */ + int k, ncfft; + + if (st->substate->inverse == 0) { +// fprintf (stderr, "kiss fft usage error: improper alloc\n"); + exit (1); + } + + ncfft = st->substate->nfft; + + st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r; + st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r; + C_FIXDIV(st->tmpbuf[0],2); + + for (k = 1; k <= ncfft / 2; ++k) { + kiss_fft_cpx fk, fnkc, fek, fok, tmp; + fk = freqdata[k]; + fnkc.r = freqdata[ncfft - k].r; + fnkc.i = -freqdata[ncfft - k].i; + C_FIXDIV( fk , 2 ); + C_FIXDIV( fnkc , 2 ); + + C_ADD (fek, fk, fnkc); + C_SUB (tmp, fk, fnkc); + C_MUL (fok, tmp, st->super_twiddles[k-1]); + C_ADD (st->tmpbuf[k], fek, fok); + C_SUB (st->tmpbuf[ncfft - k], fek, fok); +#ifdef USE_SIMD + st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); +#else + st->tmpbuf[ncfft - k].i *= -1; +#endif + } + kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/CMakeLists.txt new file mode 100644 index 0000000..e1f115f --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_LIB_JSON_SRCS) +add_library(lib_json ${DIR_LIB_JSON_SRCS}) diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/autolink.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/autolink.h new file mode 100644 index 0000000..37c9258 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/autolink.h @@ -0,0 +1,19 @@ +#ifndef JSON_AUTOLINK_H_INCLUDED +# define JSON_AUTOLINK_H_INCLUDED + +# include "config.h" + +# ifdef JSON_IN_CPPTL +# include +# endif + +# if !defined(JSON_NO_AUTOLINK) && !defined(JSON_DLL_BUILD) && !defined(JSON_IN_CPPTL) +# define CPPTL_AUTOLINK_NAME "json" +# undef CPPTL_AUTOLINK_DLL +# ifdef JSON_DLL +# define CPPTL_AUTOLINK_DLL +# endif +# include "autolink.h" +# endif + +#endif // JSON_AUTOLINK_H_INCLUDED diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/config.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/config.h new file mode 100644 index 0000000..5d334cb --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/config.h @@ -0,0 +1,43 @@ +#ifndef JSON_CONFIG_H_INCLUDED +# define JSON_CONFIG_H_INCLUDED + +/// If defined, indicates that json library is embedded in CppTL library. +//# define JSON_IN_CPPTL 1 + +/// If defined, indicates that json may leverage CppTL library +//# define JSON_USE_CPPTL 1 +/// If defined, indicates that cpptl vector based map should be used instead of std::map +/// as Value container. +//# define JSON_USE_CPPTL_SMALLMAP 1 +/// If defined, indicates that Json specific container should be used +/// (hash table & simple deque container with customizable allocator). +/// THIS FEATURE IS STILL EXPERIMENTAL! +//# define JSON_VALUE_USE_INTERNAL_MAP 1 +/// Force usage of standard new/malloc based allocator instead of memory pool based allocator. +/// The memory pools allocator used optimization (initializing Value and ValueInternalLink +/// as if it was a POD) that may cause some validation tool to report errors. +/// Only has effects if JSON_VALUE_USE_INTERNAL_MAP is defined. +//# define JSON_USE_SIMPLE_INTERNAL_ALLOCATOR 1 + +/// If defined, indicates that Json use exception to report invalid type manipulation +/// instead of C assert macro. +# define JSON_USE_EXCEPTION 1 + +# ifdef JSON_IN_CPPTL +# include +# ifndef JSON_USE_CPPTL +# define JSON_USE_CPPTL 1 +# endif +# endif + +# ifdef JSON_IN_CPPTL +# define JSON_API CPPTL_API +# elif defined(JSON_DLL_BUILD) +# define JSON_API __declspec(dllexport) +# elif defined(JSON_DLL) +# define JSON_API __declspec(dllimport) +# else +# define JSON_API +# endif + +#endif // JSON_CONFIG_H_INCLUDED diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/forwards.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/forwards.h new file mode 100644 index 0000000..d0ce830 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/forwards.h @@ -0,0 +1,39 @@ +#ifndef JSON_FORWARDS_H_INCLUDED +# define JSON_FORWARDS_H_INCLUDED + +# include "config.h" + +namespace Json { + + // writer.h + class FastWriter; + class StyledWriter; + + // reader.h + class Reader; + + // features.h + class Features; + + // value.h + typedef int Int; + typedef unsigned int UInt; + class StaticString; + class Path; + class PathArgument; + class Value; + class ValueIteratorBase; + class ValueIterator; + class ValueConstIterator; +#ifdef JSON_VALUE_USE_INTERNAL_MAP + class ValueAllocator; + class ValueMapAllocator; + class ValueInternalLink; + class ValueInternalArray; + class ValueInternalMap; +#endif // #ifdef JSON_VALUE_USE_INTERNAL_MAP + +} // namespace Json + + +#endif // JSON_FORWARDS_H_INCLUDED diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json.h new file mode 100644 index 0000000..4c269fc --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json.h @@ -0,0 +1,10 @@ +#ifndef JSON_JSON_H_INCLUDED +# define JSON_JSON_H_INCLUDED + +# include "autolink.h" +# include "value.h" +# include "reader.h" +# include "writer.h" +# include "lib_features.h" + +#endif // JSON_JSON_H_INCLUDED diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_batchallocator.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_batchallocator.h new file mode 100644 index 0000000..87ea5ed --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_batchallocator.h @@ -0,0 +1,125 @@ +#ifndef JSONCPP_BATCHALLOCATOR_H_INCLUDED +# define JSONCPP_BATCHALLOCATOR_H_INCLUDED + +# include +# include + +# ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + +namespace Json { + +/* Fast memory allocator. + * + * This memory allocator allocates memory for a batch of object (specified by + * the page size, the number of object in each page). + * + * It does not allow the destruction of a single object. All the allocated objects + * can be destroyed at once. The memory can be either released or reused for future + * allocation. + * + * The in-place new operator must be used to construct the object using the pointer + * returned by allocate. + */ +template +class BatchAllocator +{ +public: + typedef AllocatedType Type; + + BatchAllocator( unsigned int objectsPerPage = 255 ) + : freeHead_( 0 ) + , objectsPerPage_( objectsPerPage ) + { +// printf( "Size: %d => %s\n", sizeof(AllocatedType), typeid(AllocatedType).name() ); + assert( sizeof(AllocatedType) * objectPerAllocation >= sizeof(AllocatedType *) ); // We must be able to store a slist in the object free space. + assert( objectsPerPage >= 16 ); + batches_ = allocateBatch( 0 ); // allocated a dummy page + currentBatch_ = batches_; + } + + ~BatchAllocator() + { + for ( BatchInfo *batch = batches_; batch; ) + { + BatchInfo *nextBatch = batch->next_; + free( batch ); + batch = nextBatch; + } + } + + /// allocate space for an array of objectPerAllocation object. + /// @warning it is the responsability of the caller to call objects constructors. + AllocatedType *allocate() + { + if ( freeHead_ ) // returns node from free list. + { + AllocatedType *object = freeHead_; + freeHead_ = *(AllocatedType **)object; + return object; + } + if ( currentBatch_->used_ == currentBatch_->end_ ) + { + currentBatch_ = currentBatch_->next_; + while ( currentBatch_ && currentBatch_->used_ == currentBatch_->end_ ) + currentBatch_ = currentBatch_->next_; + + if ( !currentBatch_ ) // no free batch found, allocate a new one + { + currentBatch_ = allocateBatch( objectsPerPage_ ); + currentBatch_->next_ = batches_; // insert at the head of the list + batches_ = currentBatch_; + } + } + AllocatedType *allocated = currentBatch_->used_; + currentBatch_->used_ += objectPerAllocation; + return allocated; + } + + /// Release the object. + /// @warning it is the responsability of the caller to actually destruct the object. + void release( AllocatedType *object ) + { + assert( object != 0 ); + *(AllocatedType **)object = freeHead_; + freeHead_ = object; + } + +private: + struct BatchInfo + { + BatchInfo *next_; + AllocatedType *used_; + AllocatedType *end_; + AllocatedType buffer_[objectPerAllocation]; + }; + + // disabled copy constructor and assignement operator. + BatchAllocator( const BatchAllocator & ); + void operator =( const BatchAllocator &); + + static BatchInfo *allocateBatch( unsigned int objectsPerPage ) + { + const unsigned int mallocSize = sizeof(BatchInfo) - sizeof(AllocatedType)* objectPerAllocation + + sizeof(AllocatedType) * objectPerAllocation * objectsPerPage; + BatchInfo *batch = static_cast( malloc( mallocSize ) ); + batch->next_ = 0; + batch->used_ = batch->buffer_; + batch->end_ = batch->buffer_ + objectsPerPage; + return batch; + } + + BatchInfo *batches_; + BatchInfo *currentBatch_; + /// Head of a single linked list within the allocated space of freeed object + AllocatedType *freeHead_; + unsigned int objectsPerPage_; +}; + + +} // namespace Json + +# endif // ifndef JSONCPP_DOC_INCLUDE_IMPLEMENTATION + +#endif // JSONCPP_BATCHALLOCATOR_H_INCLUDED + diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_internalarray.inl b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_internalarray.inl new file mode 100644 index 0000000..9b985d2 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_internalarray.inl @@ -0,0 +1,448 @@ +// included by json_value.cpp +// everything is within Json namespace + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueInternalArray +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueArrayAllocator::~ValueArrayAllocator() +{ +} + +// ////////////////////////////////////////////////////////////////// +// class DefaultValueArrayAllocator +// ////////////////////////////////////////////////////////////////// +#ifdef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR +class DefaultValueArrayAllocator : public ValueArrayAllocator +{ +public: // overridden from ValueArrayAllocator + virtual ~DefaultValueArrayAllocator() + { + } + + virtual ValueInternalArray *newArray() + { + return new ValueInternalArray(); + } + + virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) + { + return new ValueInternalArray( other ); + } + + virtual void destructArray( ValueInternalArray *array ) + { + delete array; + } + + virtual void reallocateArrayPageIndex( Value **&indexes, + ValueInternalArray::PageIndex &indexCount, + ValueInternalArray::PageIndex minNewIndexCount ) + { + ValueInternalArray::PageIndex newIndexCount = (indexCount*3)/2 + 1; + if ( minNewIndexCount > newIndexCount ) + newIndexCount = minNewIndexCount; + void *newIndexes = realloc( indexes, sizeof(Value*) * newIndexCount ); + if ( !newIndexes ) + throw std::bad_alloc(); + indexCount = newIndexCount; + indexes = static_cast( newIndexes ); + } + virtual void releaseArrayPageIndex( Value **indexes, + ValueInternalArray::PageIndex indexCount ) + { + if ( indexes ) + free( indexes ); + } + + virtual Value *allocateArrayPage() + { + return static_cast( malloc( sizeof(Value) * ValueInternalArray::itemsPerPage ) ); + } + + virtual void releaseArrayPage( Value *value ) + { + if ( value ) + free( value ); + } +}; + +#else // #ifdef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR +/// @todo make this thread-safe (lock when accessign batch allocator) +class DefaultValueArrayAllocator : public ValueArrayAllocator +{ +public: // overridden from ValueArrayAllocator + virtual ~DefaultValueArrayAllocator() + { + } + + virtual ValueInternalArray *newArray() + { + ValueInternalArray *array = arraysAllocator_.allocate(); + new (array) ValueInternalArray(); // placement new + return array; + } + + virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) + { + ValueInternalArray *array = arraysAllocator_.allocate(); + new (array) ValueInternalArray( other ); // placement new + return array; + } + + virtual void destructArray( ValueInternalArray *array ) + { + if ( array ) + { + array->~ValueInternalArray(); + arraysAllocator_.release( array ); + } + } + + virtual void reallocateArrayPageIndex( Value **&indexes, + ValueInternalArray::PageIndex &indexCount, + ValueInternalArray::PageIndex minNewIndexCount ) + { + ValueInternalArray::PageIndex newIndexCount = (indexCount*3)/2 + 1; + if ( minNewIndexCount > newIndexCount ) + newIndexCount = minNewIndexCount; + void *newIndexes = realloc( indexes, sizeof(Value*) * newIndexCount ); + if ( !newIndexes ) + throw std::bad_alloc(); + indexCount = newIndexCount; + indexes = static_cast( newIndexes ); + } + virtual void releaseArrayPageIndex( Value **indexes, + ValueInternalArray::PageIndex indexCount ) + { + if ( indexes ) + free( indexes ); + } + + virtual Value *allocateArrayPage() + { + return static_cast( pagesAllocator_.allocate() ); + } + + virtual void releaseArrayPage( Value *value ) + { + if ( value ) + pagesAllocator_.release( value ); + } +private: + BatchAllocator arraysAllocator_; + BatchAllocator pagesAllocator_; +}; +#endif // #ifdef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR + +static ValueArrayAllocator *&arrayAllocator() +{ + static DefaultValueArrayAllocator defaultAllocator; + static ValueArrayAllocator *arrayAllocator = &defaultAllocator; + return arrayAllocator; +} + +static struct DummyArrayAllocatorInitializer { + DummyArrayAllocatorInitializer() + { + arrayAllocator(); // ensure arrayAllocator() statics are initialized before main(). + } +} dummyArrayAllocatorInitializer; + +// ////////////////////////////////////////////////////////////////// +// class ValueInternalArray +// ////////////////////////////////////////////////////////////////// +bool +ValueInternalArray::equals( const IteratorState &x, + const IteratorState &other ) +{ + return x.array_ == other.array_ + && x.currentItemIndex_ == other.currentItemIndex_ + && x.currentPageIndex_ == other.currentPageIndex_; +} + + +void +ValueInternalArray::increment( IteratorState &it ) +{ + JSON_ASSERT_MESSAGE( it.array_ && + (it.currentPageIndex_ - it.array_->pages_)*itemsPerPage + it.currentItemIndex_ + != it.array_->size_, + "ValueInternalArray::increment(): moving iterator beyond end" ); + ++(it.currentItemIndex_); + if ( it.currentItemIndex_ == itemsPerPage ) + { + it.currentItemIndex_ = 0; + ++(it.currentPageIndex_); + } +} + + +void +ValueInternalArray::decrement( IteratorState &it ) +{ + JSON_ASSERT_MESSAGE( it.array_ && it.currentPageIndex_ == it.array_->pages_ + && it.currentItemIndex_ == 0, + "ValueInternalArray::decrement(): moving iterator beyond end" ); + if ( it.currentItemIndex_ == 0 ) + { + it.currentItemIndex_ = itemsPerPage-1; + --(it.currentPageIndex_); + } + else + { + --(it.currentItemIndex_); + } +} + + +Value & +ValueInternalArray::unsafeDereference( const IteratorState &it ) +{ + return (*(it.currentPageIndex_))[it.currentItemIndex_]; +} + + +Value & +ValueInternalArray::dereference( const IteratorState &it ) +{ + JSON_ASSERT_MESSAGE( it.array_ && + (it.currentPageIndex_ - it.array_->pages_)*itemsPerPage + it.currentItemIndex_ + < it.array_->size_, + "ValueInternalArray::dereference(): dereferencing invalid iterator" ); + return unsafeDereference( it ); +} + +void +ValueInternalArray::makeBeginIterator( IteratorState &it ) const +{ + it.array_ = const_cast( this ); + it.currentItemIndex_ = 0; + it.currentPageIndex_ = pages_; +} + + +void +ValueInternalArray::makeIterator( IteratorState &it, ArrayIndex index ) const +{ + it.array_ = const_cast( this ); + it.currentItemIndex_ = index % itemsPerPage; + it.currentPageIndex_ = pages_ + index / itemsPerPage; +} + + +void +ValueInternalArray::makeEndIterator( IteratorState &it ) const +{ + makeIterator( it, size_ ); +} + + +ValueInternalArray::ValueInternalArray() + : pages_( 0 ) + , size_( 0 ) + , pageCount_( 0 ) +{ +} + + +ValueInternalArray::ValueInternalArray( const ValueInternalArray &other ) + : pages_( 0 ) + , pageCount_( 0 ) + , size_( other.size_ ) +{ + PageIndex minNewPages = other.size_ / itemsPerPage; + arrayAllocator()->reallocateArrayPageIndex( pages_, pageCount_, minNewPages ); + JSON_ASSERT_MESSAGE( pageCount_ >= minNewPages, + "ValueInternalArray::reserve(): bad reallocation" ); + IteratorState itOther; + other.makeBeginIterator( itOther ); + Value *value; + for ( ArrayIndex index = 0; index < size_; ++index, increment(itOther) ) + { + if ( index % itemsPerPage == 0 ) + { + PageIndex pageIndex = index / itemsPerPage; + value = arrayAllocator()->allocateArrayPage(); + pages_[pageIndex] = value; + } + new (value) Value( dereference( itOther ) ); + } +} + + +ValueInternalArray & +ValueInternalArray::operator =( const ValueInternalArray &other ) +{ + ValueInternalArray temp( other ); + swap( temp ); + return *this; +} + + +ValueInternalArray::~ValueInternalArray() +{ + // destroy all constructed items + IteratorState it; + IteratorState itEnd; + makeBeginIterator( it); + makeEndIterator( itEnd ); + for ( ; !equals(it,itEnd); increment(it) ) + { + Value *value = &dereference(it); + value->~Value(); + } + // release all pages + PageIndex lastPageIndex = size_ / itemsPerPage; + for ( PageIndex pageIndex = 0; pageIndex < lastPageIndex; ++pageIndex ) + arrayAllocator()->releaseArrayPage( pages_[pageIndex] ); + // release pages index + arrayAllocator()->releaseArrayPageIndex( pages_, pageCount_ ); +} + + +void +ValueInternalArray::swap( ValueInternalArray &other ) +{ + Value **tempPages = pages_; + pages_ = other.pages_; + other.pages_ = tempPages; + ArrayIndex tempSize = size_; + size_ = other.size_; + other.size_ = tempSize; + PageIndex tempPageCount = pageCount_; + pageCount_ = other.pageCount_; + other.pageCount_ = tempPageCount; +} + +void +ValueInternalArray::clear() +{ + ValueInternalArray dummy; + swap( dummy ); +} + + +void +ValueInternalArray::resize( ArrayIndex newSize ) +{ + if ( newSize == 0 ) + clear(); + else if ( newSize < size_ ) + { + IteratorState it; + IteratorState itEnd; + makeIterator( it, newSize ); + makeIterator( itEnd, size_ ); + for ( ; !equals(it,itEnd); increment(it) ) + { + Value *value = &dereference(it); + value->~Value(); + } + PageIndex pageIndex = (newSize + itemsPerPage - 1) / itemsPerPage; + PageIndex lastPageIndex = size_ / itemsPerPage; + for ( ; pageIndex < lastPageIndex; ++pageIndex ) + arrayAllocator()->releaseArrayPage( pages_[pageIndex] ); + size_ = newSize; + } + else if ( newSize > size_ ) + resolveReference( newSize ); +} + + +void +ValueInternalArray::makeIndexValid( ArrayIndex index ) +{ + // Need to enlarge page index ? + if ( index >= pageCount_ * itemsPerPage ) + { + PageIndex minNewPages = (index + 1) / itemsPerPage; + arrayAllocator()->reallocateArrayPageIndex( pages_, pageCount_, minNewPages ); + JSON_ASSERT_MESSAGE( pageCount_ >= minNewPages, "ValueInternalArray::reserve(): bad reallocation" ); + } + + // Need to allocate new pages ? + ArrayIndex nextPageIndex = + (size_ % itemsPerPage) != 0 ? size_ - (size_%itemsPerPage) + itemsPerPage + : size_; + if ( nextPageIndex <= index ) + { + PageIndex pageIndex = nextPageIndex / itemsPerPage; + PageIndex pageToAllocate = (index - nextPageIndex) / itemsPerPage + 1; + for ( ; pageToAllocate-- > 0; ++pageIndex ) + pages_[pageIndex] = arrayAllocator()->allocateArrayPage(); + } + + // Initialize all new entries + IteratorState it; + IteratorState itEnd; + makeIterator( it, size_ ); + size_ = index + 1; + makeIterator( itEnd, size_ ); + for ( ; !equals(it,itEnd); increment(it) ) + { + Value *value = &dereference(it); + new (value) Value(); // Construct a default value using placement new + } +} + +Value & +ValueInternalArray::resolveReference( ArrayIndex index ) +{ + if ( index >= size_ ) + makeIndexValid( index ); + return pages_[index/itemsPerPage][index%itemsPerPage]; +} + +Value * +ValueInternalArray::find( ArrayIndex index ) const +{ + if ( index >= size_ ) + return 0; + return &(pages_[index/itemsPerPage][index%itemsPerPage]); +} + +ValueInternalArray::ArrayIndex +ValueInternalArray::size() const +{ + return size_; +} + +int +ValueInternalArray::distance( const IteratorState &x, const IteratorState &y ) +{ + return indexOf(y) - indexOf(x); +} + + +ValueInternalArray::ArrayIndex +ValueInternalArray::indexOf( const IteratorState &iterator ) +{ + if ( !iterator.array_ ) + return ArrayIndex(-1); + return ArrayIndex( + (iterator.currentPageIndex_ - iterator.array_->pages_) * itemsPerPage + + iterator.currentItemIndex_ ); +} + + +int +ValueInternalArray::compare( const ValueInternalArray &other ) const +{ + int sizeDiff( size_ - other.size_ ); + if ( sizeDiff != 0 ) + return sizeDiff; + + for ( ArrayIndex index =0; index < size_; ++index ) + { + int diff = pages_[index/itemsPerPage][index%itemsPerPage].compare( + other.pages_[index/itemsPerPage][index%itemsPerPage] ); + if ( diff != 0 ) + return diff; + } + return 0; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_internalmap.inl b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_internalmap.inl new file mode 100644 index 0000000..1977148 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_internalmap.inl @@ -0,0 +1,607 @@ +// included by json_value.cpp +// everything is within Json namespace + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueInternalMap +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +/** \internal MUST be safely initialized using memset( this, 0, sizeof(ValueInternalLink) ); + * This optimization is used by the fast allocator. + */ +ValueInternalLink::ValueInternalLink() + : previous_( 0 ) + , next_( 0 ) +{ +} + +ValueInternalLink::~ValueInternalLink() +{ + for ( int index =0; index < itemPerLink; ++index ) + { + if ( !items_[index].isItemAvailable() ) + { + if ( !items_[index].isMemberNameStatic() ) + free( keys_[index] ); + } + else + break; + } +} + + + +ValueMapAllocator::~ValueMapAllocator() +{ +} + +#ifdef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR +class DefaultValueMapAllocator : public ValueMapAllocator +{ +public: // overridden from ValueMapAllocator + virtual ValueInternalMap *newMap() + { + return new ValueInternalMap(); + } + + virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) + { + return new ValueInternalMap( other ); + } + + virtual void destructMap( ValueInternalMap *map ) + { + delete map; + } + + virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) + { + return new ValueInternalLink[size]; + } + + virtual void releaseMapBuckets( ValueInternalLink *links ) + { + delete [] links; + } + + virtual ValueInternalLink *allocateMapLink() + { + return new ValueInternalLink(); + } + + virtual void releaseMapLink( ValueInternalLink *link ) + { + delete link; + } +}; +#else +/// @todo make this thread-safe (lock when accessign batch allocator) +class DefaultValueMapAllocator : public ValueMapAllocator +{ +public: // overridden from ValueMapAllocator + virtual ValueInternalMap *newMap() + { + ValueInternalMap *map = mapsAllocator_.allocate(); + new (map) ValueInternalMap(); // placement new + return map; + } + + virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) + { + ValueInternalMap *map = mapsAllocator_.allocate(); + new (map) ValueInternalMap( other ); // placement new + return map; + } + + virtual void destructMap( ValueInternalMap *map ) + { + if ( map ) + { + map->~ValueInternalMap(); + mapsAllocator_.release( map ); + } + } + + virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) + { + return new ValueInternalLink[size]; + } + + virtual void releaseMapBuckets( ValueInternalLink *links ) + { + delete [] links; + } + + virtual ValueInternalLink *allocateMapLink() + { + ValueInternalLink *link = linksAllocator_.allocate(); + memset( link, 0, sizeof(ValueInternalLink) ); + return link; + } + + virtual void releaseMapLink( ValueInternalLink *link ) + { + link->~ValueInternalLink(); + linksAllocator_.release( link ); + } +private: + BatchAllocator mapsAllocator_; + BatchAllocator linksAllocator_; +}; +#endif + +static ValueMapAllocator *&mapAllocator() +{ + static DefaultValueMapAllocator defaultAllocator; + static ValueMapAllocator *mapAllocator = &defaultAllocator; + return mapAllocator; +} + +static struct DummyMapAllocatorInitializer { + DummyMapAllocatorInitializer() + { + mapAllocator(); // ensure mapAllocator() statics are initialized before main(). + } +} dummyMapAllocatorInitializer; + + + +// h(K) = value * K >> w ; with w = 32 & K prime w.r.t. 2^32. + +/* +use linked list hash map. +buckets array is a container. +linked list element contains 6 key/values. (memory = (16+4) * 6 + 4 = 124) +value have extra state: valid, available, deleted +*/ + + +ValueInternalMap::ValueInternalMap() + : buckets_( 0 ) + , tailLink_( 0 ) + , bucketsSize_( 0 ) + , itemCount_( 0 ) +{ +} + + +ValueInternalMap::ValueInternalMap( const ValueInternalMap &other ) + : buckets_( 0 ) + , tailLink_( 0 ) + , bucketsSize_( 0 ) + , itemCount_( 0 ) +{ + reserve( other.itemCount_ ); + IteratorState it; + IteratorState itEnd; + other.makeBeginIterator( it ); + other.makeEndIterator( itEnd ); + for ( ; !equals(it,itEnd); increment(it) ) + { + bool isStatic; + const char *memberName = key( it, isStatic ); + const Value &aValue = value( it ); + resolveReference(memberName, isStatic) = aValue; + } +} + + +ValueInternalMap & +ValueInternalMap::operator =( const ValueInternalMap &other ) +{ + ValueInternalMap dummy( other ); + swap( dummy ); + return *this; +} + + +ValueInternalMap::~ValueInternalMap() +{ + if ( buckets_ ) + { + for ( BucketIndex bucketIndex =0; bucketIndex < bucketsSize_; ++bucketIndex ) + { + ValueInternalLink *link = buckets_[bucketIndex].next_; + while ( link ) + { + ValueInternalLink *linkToRelease = link; + link = link->next_; + mapAllocator()->releaseMapLink( linkToRelease ); + } + } + mapAllocator()->releaseMapBuckets( buckets_ ); + } +} + + +void +ValueInternalMap::swap( ValueInternalMap &other ) +{ + ValueInternalLink *tempBuckets = buckets_; + buckets_ = other.buckets_; + other.buckets_ = tempBuckets; + ValueInternalLink *tempTailLink = tailLink_; + tailLink_ = other.tailLink_; + other.tailLink_ = tempTailLink; + BucketIndex tempBucketsSize = bucketsSize_; + bucketsSize_ = other.bucketsSize_; + other.bucketsSize_ = tempBucketsSize; + BucketIndex tempItemCount = itemCount_; + itemCount_ = other.itemCount_; + other.itemCount_ = tempItemCount; +} + + +void +ValueInternalMap::clear() +{ + ValueInternalMap dummy; + swap( dummy ); +} + + +ValueInternalMap::BucketIndex +ValueInternalMap::size() const +{ + return itemCount_; +} + +bool +ValueInternalMap::reserveDelta( BucketIndex growth ) +{ + return reserve( itemCount_ + growth ); +} + +bool +ValueInternalMap::reserve( BucketIndex newItemCount ) +{ + if ( !buckets_ && newItemCount > 0 ) + { + buckets_ = mapAllocator()->allocateMapBuckets( 1 ); + bucketsSize_ = 1; + tailLink_ = &buckets_[0]; + } +// BucketIndex idealBucketCount = (newItemCount + ValueInternalLink::itemPerLink) / ValueInternalLink::itemPerLink; + return true; +} + + +const Value * +ValueInternalMap::find( const char *key ) const +{ + if ( !bucketsSize_ ) + return 0; + HashKey hashedKey = hash( key ); + BucketIndex bucketIndex = hashedKey % bucketsSize_; + for ( const ValueInternalLink *current = &buckets_[bucketIndex]; + current != 0; + current = current->next_ ) + { + for ( BucketIndex index=0; index < ValueInternalLink::itemPerLink; ++index ) + { + if ( current->items_[index].isItemAvailable() ) + return 0; + if ( strcmp( key, current->keys_[index] ) == 0 ) + return ¤t->items_[index]; + } + } + return 0; +} + + +Value * +ValueInternalMap::find( const char *key ) +{ + const ValueInternalMap *constThis = this; + return const_cast( constThis->find( key ) ); +} + + +Value & +ValueInternalMap::resolveReference( const char *key, + bool isStatic ) +{ + HashKey hashedKey = hash( key ); + if ( bucketsSize_ ) + { + BucketIndex bucketIndex = hashedKey % bucketsSize_; + ValueInternalLink **previous = 0; + BucketIndex index; + for ( ValueInternalLink *current = &buckets_[bucketIndex]; + current != 0; + previous = ¤t->next_, current = current->next_ ) + { + for ( index=0; index < ValueInternalLink::itemPerLink; ++index ) + { + if ( current->items_[index].isItemAvailable() ) + return setNewItem( key, isStatic, current, index ); + if ( strcmp( key, current->keys_[index] ) == 0 ) + return current->items_[index]; + } + } + } + + reserveDelta( 1 ); + return unsafeAdd( key, isStatic, hashedKey ); +} + + +void +ValueInternalMap::remove( const char *key ) +{ + HashKey hashedKey = hash( key ); + if ( !bucketsSize_ ) + return; + BucketIndex bucketIndex = hashedKey % bucketsSize_; + for ( ValueInternalLink *link = &buckets_[bucketIndex]; + link != 0; + link = link->next_ ) + { + BucketIndex index; + for ( index =0; index < ValueInternalLink::itemPerLink; ++index ) + { + if ( link->items_[index].isItemAvailable() ) + return; + if ( strcmp( key, link->keys_[index] ) == 0 ) + { + doActualRemove( link, index, bucketIndex ); + return; + } + } + } +} + +void +ValueInternalMap::doActualRemove( ValueInternalLink *link, + BucketIndex index, + BucketIndex bucketIndex ) +{ + // find last item of the bucket and swap it with the 'removed' one. + // set removed items flags to 'available'. + // if last page only contains 'available' items, then desallocate it (it's empty) + ValueInternalLink *&lastLink = getLastLinkInBucket( index ); + BucketIndex lastItemIndex = 1; // a link can never be empty, so start at 1 + for ( ; + lastItemIndex < ValueInternalLink::itemPerLink; + ++lastItemIndex ) // may be optimized with dicotomic search + { + if ( lastLink->items_[lastItemIndex].isItemAvailable() ) + break; + } + + BucketIndex lastUsedIndex = lastItemIndex - 1; + Value *valueToDelete = &link->items_[index]; + Value *valueToPreserve = &lastLink->items_[lastUsedIndex]; + if ( valueToDelete != valueToPreserve ) + valueToDelete->swap( *valueToPreserve ); + if ( lastUsedIndex == 0 ) // page is now empty + { // remove it from bucket linked list and delete it. + ValueInternalLink *linkPreviousToLast = lastLink->previous_; + if ( linkPreviousToLast != 0 ) // can not deleted bucket link. + { + mapAllocator()->releaseMapLink( lastLink ); + linkPreviousToLast->next_ = 0; + lastLink = linkPreviousToLast; + } + } + else + { + Value dummy; + valueToPreserve->swap( dummy ); // restore deleted to default Value. + valueToPreserve->setItemUsed( false ); + } + --itemCount_; +} + + +ValueInternalLink *& +ValueInternalMap::getLastLinkInBucket( BucketIndex bucketIndex ) +{ + if ( bucketIndex == bucketsSize_ - 1 ) + return tailLink_; + ValueInternalLink *&previous = buckets_[bucketIndex+1].previous_; + if ( !previous ) + previous = &buckets_[bucketIndex]; + return previous; +} + + +Value & +ValueInternalMap::setNewItem( const char *key, + bool isStatic, + ValueInternalLink *link, + BucketIndex index ) +{ + char *duplicatedKey = valueAllocator()->makeMemberName( key ); + ++itemCount_; + link->keys_[index] = duplicatedKey; + link->items_[index].setItemUsed(); + link->items_[index].setMemberNameIsStatic( isStatic ); + return link->items_[index]; // items already default constructed. +} + + +Value & +ValueInternalMap::unsafeAdd( const char *key, + bool isStatic, + HashKey hashedKey ) +{ + JSON_ASSERT_MESSAGE( bucketsSize_ > 0, "ValueInternalMap::unsafeAdd(): internal logic error." ); + BucketIndex bucketIndex = hashedKey % bucketsSize_; + ValueInternalLink *&previousLink = getLastLinkInBucket( bucketIndex ); + ValueInternalLink *link = previousLink; + BucketIndex index; + for ( index =0; index < ValueInternalLink::itemPerLink; ++index ) + { + if ( link->items_[index].isItemAvailable() ) + break; + } + if ( index == ValueInternalLink::itemPerLink ) // need to add a new page + { + ValueInternalLink *newLink = mapAllocator()->allocateMapLink(); + index = 0; + link->next_ = newLink; + previousLink = newLink; + link = newLink; + } + return setNewItem( key, isStatic, link, index ); +} + + +ValueInternalMap::HashKey +ValueInternalMap::hash( const char *key ) const +{ + HashKey hash = 0; + while ( *key ) + hash += *key++ * 37; + return hash; +} + + +int +ValueInternalMap::compare( const ValueInternalMap &other ) const +{ + int sizeDiff( itemCount_ - other.itemCount_ ); + if ( sizeDiff != 0 ) + return sizeDiff; + // Strict order guaranty is required. Compare all keys FIRST, then compare values. + IteratorState it; + IteratorState itEnd; + makeBeginIterator( it ); + makeEndIterator( itEnd ); + for ( ; !equals(it,itEnd); increment(it) ) + { + if ( !other.find( key( it ) ) ) + return 1; + } + + // All keys are equals, let's compare values + makeBeginIterator( it ); + for ( ; !equals(it,itEnd); increment(it) ) + { + const Value *otherValue = other.find( key( it ) ); + int valueDiff = value(it).compare( *otherValue ); + if ( valueDiff != 0 ) + return valueDiff; + } + return 0; +} + + +void +ValueInternalMap::makeBeginIterator( IteratorState &it ) const +{ + it.map_ = const_cast( this ); + it.bucketIndex_ = 0; + it.itemIndex_ = 0; + it.link_ = buckets_; +} + + +void +ValueInternalMap::makeEndIterator( IteratorState &it ) const +{ + it.map_ = const_cast( this ); + it.bucketIndex_ = bucketsSize_; + it.itemIndex_ = 0; + it.link_ = 0; +} + + +bool +ValueInternalMap::equals( const IteratorState &x, const IteratorState &other ) +{ + return x.map_ == other.map_ + && x.bucketIndex_ == other.bucketIndex_ + && x.link_ == other.link_ + && x.itemIndex_ == other.itemIndex_; +} + + +void +ValueInternalMap::incrementBucket( IteratorState &iterator ) +{ + ++iterator.bucketIndex_; + JSON_ASSERT_MESSAGE( iterator.bucketIndex_ <= iterator.map_->bucketsSize_, + "ValueInternalMap::increment(): attempting to iterate beyond end." ); + if ( iterator.bucketIndex_ == iterator.map_->bucketsSize_ ) + iterator.link_ = 0; + else + iterator.link_ = &(iterator.map_->buckets_[iterator.bucketIndex_]); + iterator.itemIndex_ = 0; +} + + +void +ValueInternalMap::increment( IteratorState &iterator ) +{ + JSON_ASSERT_MESSAGE( iterator.map_, "Attempting to iterator using invalid iterator." ); + ++iterator.itemIndex_; + if ( iterator.itemIndex_ == ValueInternalLink::itemPerLink ) + { + JSON_ASSERT_MESSAGE( iterator.link_ != 0, + "ValueInternalMap::increment(): attempting to iterate beyond end." ); + iterator.link_ = iterator.link_->next_; + if ( iterator.link_ == 0 ) + incrementBucket( iterator ); + } + else if ( iterator.link_->items_[iterator.itemIndex_].isItemAvailable() ) + { + incrementBucket( iterator ); + } +} + + +void +ValueInternalMap::decrement( IteratorState &iterator ) +{ + if ( iterator.itemIndex_ == 0 ) + { + JSON_ASSERT_MESSAGE( iterator.map_, "Attempting to iterate using invalid iterator." ); + if ( iterator.link_ == &iterator.map_->buckets_[iterator.bucketIndex_] ) + { + JSON_ASSERT_MESSAGE( iterator.bucketIndex_ > 0, "Attempting to iterate beyond beginning." ); + --(iterator.bucketIndex_); + } + iterator.link_ = iterator.link_->previous_; + iterator.itemIndex_ = ValueInternalLink::itemPerLink - 1; + } +} + + +const char * +ValueInternalMap::key( const IteratorState &iterator ) +{ + JSON_ASSERT_MESSAGE( iterator.link_, "Attempting to iterate using invalid iterator." ); + return iterator.link_->keys_[iterator.itemIndex_]; +} + +const char * +ValueInternalMap::key( const IteratorState &iterator, bool &isStatic ) +{ + JSON_ASSERT_MESSAGE( iterator.link_, "Attempting to iterate using invalid iterator." ); + isStatic = iterator.link_->items_[iterator.itemIndex_].isMemberNameStatic(); + return iterator.link_->keys_[iterator.itemIndex_]; +} + + +Value & +ValueInternalMap::value( const IteratorState &iterator ) +{ + JSON_ASSERT_MESSAGE( iterator.link_, "Attempting to iterate using invalid iterator." ); + return iterator.link_->items_[iterator.itemIndex_]; +} + + +int +ValueInternalMap::distance( const IteratorState &x, const IteratorState &y ) +{ + int offset = 0; + IteratorState it = x; + while ( !equals( it, y ) ) + increment( it ); + return offset; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_valueiterator.inl b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_valueiterator.inl new file mode 100644 index 0000000..736e260 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/json_valueiterator.inl @@ -0,0 +1,292 @@ +// included by json_value.cpp +// everything is within Json namespace + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueIteratorBase +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueIteratorBase::ValueIteratorBase() +#ifndef JSON_VALUE_USE_INTERNAL_MAP + : current_() + , isNull_( true ) +{ +} +#else + : isArray_( true ) + , isNull_( true ) +{ + iterator_.array_ = ValueInternalArray::IteratorState(); +} +#endif + + +#ifndef JSON_VALUE_USE_INTERNAL_MAP +ValueIteratorBase::ValueIteratorBase( const Value::ObjectValues::iterator ¤t ) + : current_( current ) + , isNull_( false ) +{ +} +#else +ValueIteratorBase::ValueIteratorBase( const ValueInternalArray::IteratorState &state ) + : isArray_( true ) +{ + iterator_.array_ = state; +} + + +ValueIteratorBase::ValueIteratorBase( const ValueInternalMap::IteratorState &state ) + : isArray_( false ) +{ + iterator_.map_ = state; +} +#endif + +Value & +ValueIteratorBase::deref() const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + return current_->second; +#else + if ( isArray_ ) + return ValueInternalArray::dereference( iterator_.array_ ); + return ValueInternalMap::value( iterator_.map_ ); +#endif +} + + +void +ValueIteratorBase::increment() +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + ++current_; +#else + if ( isArray_ ) + ValueInternalArray::increment( iterator_.array_ ); + ValueInternalMap::increment( iterator_.map_ ); +#endif +} + + +void +ValueIteratorBase::decrement() +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + --current_; +#else + if ( isArray_ ) + ValueInternalArray::decrement( iterator_.array_ ); + ValueInternalMap::decrement( iterator_.map_ ); +#endif +} + + +ValueIteratorBase::difference_type +ValueIteratorBase::computeDistance( const SelfType &other ) const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP +# ifdef JSON_USE_CPPTL_SMALLMAP + return current_ - other.current_; +# else + // Iterator for null value are initialized using the default + // constructor, which initialize current_ to the default + // std::map::iterator. As begin() and end() are two instance + // of the default std::map::iterator, they can not be compared. + // To allow this, we handle this comparison specifically. + if ( isNull_ && other.isNull_ ) + { + return 0; + } + + + // Usage of std::distance is not portable (does not compile with Sun Studio 12 RogueWave STL, + // which is the one used by default). + // Using a portable hand-made version for non random iterator instead: + // return difference_type( std::distance( current_, other.current_ ) ); + difference_type myDistance = 0; + for ( Value::ObjectValues::iterator it = current_; it != other.current_; ++it ) + { + ++myDistance; + } + return myDistance; +# endif +#else + if ( isArray_ ) + return ValueInternalArray::distance( iterator_.array_, other.iterator_.array_ ); + return ValueInternalMap::distance( iterator_.map_, other.iterator_.map_ ); +#endif +} + + +bool +ValueIteratorBase::isEqual( const SelfType &other ) const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + if ( isNull_ ) + { + return other.isNull_; + } + return current_ == other.current_; +#else + if ( isArray_ ) + return ValueInternalArray::equals( iterator_.array_, other.iterator_.array_ ); + return ValueInternalMap::equals( iterator_.map_, other.iterator_.map_ ); +#endif +} + + +void +ValueIteratorBase::copy( const SelfType &other ) +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + current_ = other.current_; +#else + if ( isArray_ ) + iterator_.array_ = other.iterator_.array_; + iterator_.map_ = other.iterator_.map_; +#endif +} + + +Value +ValueIteratorBase::key() const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + const Value::CZString czstring = (*current_).first; + if ( czstring.c_str() ) + { + if ( czstring.isStaticString() ) + return Value( StaticString( czstring.c_str() ) ); + return Value( czstring.c_str() ); + } + return Value( czstring.index() ); +#else + if ( isArray_ ) + return Value( ValueInternalArray::indexOf( iterator_.array_ ) ); + bool isStatic; + const char *memberName = ValueInternalMap::key( iterator_.map_, isStatic ); + if ( isStatic ) + return Value( StaticString( memberName ) ); + return Value( memberName ); +#endif +} + + +UInt +ValueIteratorBase::index() const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + const Value::CZString czstring = (*current_).first; + if ( !czstring.c_str() ) + return czstring.index(); + return Value::UInt( -1 ); +#else + if ( isArray_ ) + return Value::UInt( ValueInternalArray::indexOf( iterator_.array_ ) ); + return Value::UInt( -1 ); +#endif +} + + +const char * +ValueIteratorBase::memberName() const +{ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + const char *name = (*current_).first.c_str(); + return name ? name : ""; +#else + if ( !isArray_ ) + return ValueInternalMap::key( iterator_.map_ ); + return ""; +#endif +} + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueConstIterator +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueConstIterator::ValueConstIterator() +{ +} + + +#ifndef JSON_VALUE_USE_INTERNAL_MAP +ValueConstIterator::ValueConstIterator( const Value::ObjectValues::iterator ¤t ) + : ValueIteratorBase( current ) +{ +} +#else +ValueConstIterator::ValueConstIterator( const ValueInternalArray::IteratorState &state ) + : ValueIteratorBase( state ) +{ +} + +ValueConstIterator::ValueConstIterator( const ValueInternalMap::IteratorState &state ) + : ValueIteratorBase( state ) +{ +} +#endif + +ValueConstIterator & +ValueConstIterator::operator =( const ValueIteratorBase &other ) +{ + copy( other ); + return *this; +} + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class ValueIterator +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +ValueIterator::ValueIterator() +{ +} + + +#ifndef JSON_VALUE_USE_INTERNAL_MAP +ValueIterator::ValueIterator( const Value::ObjectValues::iterator ¤t ) + : ValueIteratorBase( current ) +{ +} +#else +ValueIterator::ValueIterator( const ValueInternalArray::IteratorState &state ) + : ValueIteratorBase( state ) +{ +} + +ValueIterator::ValueIterator( const ValueInternalMap::IteratorState &state ) + : ValueIteratorBase( state ) +{ +} +#endif + +ValueIterator::ValueIterator( const ValueConstIterator &other ) + : ValueIteratorBase( other ) +{ +} + +ValueIterator::ValueIterator( const ValueIterator &other ) + : ValueIteratorBase( other ) +{ +} + +ValueIterator & +ValueIterator::operator =( const SelfType &other ) +{ + copy( other ); + return *this; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/lib_features.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/lib_features.h new file mode 100644 index 0000000..5a9adec --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/lib_features.h @@ -0,0 +1,42 @@ +#ifndef CPPTL_JSON_FEATURES_H_INCLUDED +# define CPPTL_JSON_FEATURES_H_INCLUDED + +# include "forwards.h" + +namespace Json { + + /** \brief Configuration passed to reader and writer. + * This configuration object can be used to force the Reader or Writer + * to behave in a standard conforming way. + */ + class JSON_API Features + { + public: + /** \brief A configuration that allows all features and assumes all strings are UTF-8. + * - C & C++ comments are allowed + * - Root object can be any JSON value + * - Assumes Value strings are encoded in UTF-8 + */ + static Features all(); + + /** \brief A configuration that is strictly compatible with the JSON specification. + * - Comments are forbidden. + * - Root object must be either an array or an object value. + * - Assumes Value strings are encoded in UTF-8 + */ + static Features strictMode(); + + /** \brief Initialize the configuration like JsonConfig::allFeatures; + */ + Features(); + + /// \c true if comments are allowed. Default: \c true. + bool allowComments_; + + /// \c true if root must be either an array or an object value. Default: \c false. + bool strictRoot_; + }; + +} // namespace Json + +#endif // CPPTL_JSON_FEATURES_H_INCLUDED diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/reader.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/reader.h new file mode 100644 index 0000000..19ecf66 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/reader.h @@ -0,0 +1,196 @@ +#ifndef CPPTL_JSON_READER_H_INCLUDED +# define CPPTL_JSON_READER_H_INCLUDED + +# include "lib_features.h" +# include "value.h" +# include +# include +# include +# include + +namespace Json { + + /** \brief Unserialize a JSON document into a Value. + * + */ + class JSON_API Reader + { + public: + typedef char Char; + typedef const Char *Location; + + /** \brief Constructs a Reader allowing all features + * for parsing. + */ + Reader(); + + /** \brief Constructs a Reader allowing the specified feature set + * for parsing. + */ + Reader( const Features &features ); + + /** \brief Read a Value from a JSON document. + * \param document UTF-8 encoded string containing the document to read. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param collectComments \c true to collect comment and allow writing them back during + * serialization, \c false to discard comments. + * This parameter is ignored if Features::allowComments_ + * is \c false. + * \return \c true if the document was successfully parsed, \c false if an error occurred. + */ + bool parse( const std::string &document, + Value &root, + bool collectComments = true ); + + /** \brief Read a Value from a JSON document. + * \param document UTF-8 encoded string containing the document to read. + * \param root [out] Contains the root value of the document if it was + * successfully parsed. + * \param collectComments \c true to collect comment and allow writing them back during + * serialization, \c false to discard comments. + * This parameter is ignored if Features::allowComments_ + * is \c false. + * \return \c true if the document was successfully parsed, \c false if an error occurred. + */ + bool parse( const char *beginDoc, const char *endDoc, + Value &root, + bool collectComments = true ); + + /// \brief Parse from input stream. + /// \see Json::operator>>(std::istream&, Json::Value&). + bool parse( std::istream &is, + Value &root, + bool collectComments = true ); + + /** \brief Returns a user friendly string that list errors in the parsed document. + * \return Formatted error message with the list of errors with their location in + * the parsed document. An empty string is returned if no error occurred + * during parsing. + */ + std::string getFormatedErrorMessages() const; + + private: + enum TokenType + { + tokenEndOfStream = 0, + tokenObjectBegin, + tokenObjectEnd, + tokenArrayBegin, + tokenArrayEnd, + tokenString, + tokenNumber, + tokenTrue, + tokenFalse, + tokenNull, + tokenArraySeparator, + tokenMemberSeparator, + tokenComment, + tokenError + }; + + class Token + { + public: + TokenType type_; + Location start_; + Location end_; + }; + + class ErrorInfo + { + public: + Token token_; + std::string message_; + Location extra_; + }; + + typedef std::deque Errors; + + bool expectToken( TokenType type, Token &token, const char *message ); + bool readToken( Token &token ); + void skipSpaces(); + bool match( Location pattern, + int patternLength ); + bool readComment(); + bool readCStyleComment(); + bool readCppStyleComment(); + bool readString(); + void readNumber(); + bool readValue(); + bool readObject( Token &token ); + bool readArray( Token &token ); + bool decodeNumber( Token &token ); + bool decodeString( Token &token ); + bool decodeString( Token &token, std::string &decoded ); + bool decodeDouble( Token &token ); + bool decodeUnicodeCodePoint( Token &token, + Location ¤t, + Location end, + unsigned int &unicode ); + bool decodeUnicodeEscapeSequence( Token &token, + Location ¤t, + Location end, + unsigned int &unicode ); + bool addError( const std::string &message, + Token &token, + Location extra = 0 ); + bool recoverFromError( TokenType skipUntilToken ); + bool addErrorAndRecover( const std::string &message, + Token &token, + TokenType skipUntilToken ); + void skipUntilSpace(); + Value ¤tValue(); + Char getNextChar(); + void getLocationLineAndColumn( Location location, + int &line, + int &column ) const; + std::string getLocationLineAndColumn( Location location ) const; + void addComment( Location begin, + Location end, + CommentPlacement placement ); + void skipCommentTokens( Token &token ); + + typedef std::stack Nodes; + Nodes nodes_; + Errors errors_; + std::string document_; + Location begin_; + Location end_; + Location current_; + Location lastValueEnd_; + Value *lastValue_; + std::string commentsBefore_; + Features features_; + bool collectComments_; + }; + + /** \brief Read from 'sin' into 'root'. + + Always keep comments from the input JSON. + + This can be used to read a file into a particular sub-object. + For example: + \code + Json::Value root; + cin >> root["dir"]["file"]; + cout << root; + \endcode + Result: + \verbatim + { + "dir": { + "file": { + // The input stream JSON would be nested here. + } + } + } + \endverbatim + \throw std::exception on parse error. + \see Json::operator<<() + */ + std::istream& operator>>( std::istream&, Value& ); + +} // namespace Json + +#endif // CPPTL_JSON_READER_H_INCLUDED diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/value.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/value.h new file mode 100644 index 0000000..aa25e3b --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/value.h @@ -0,0 +1,1069 @@ +#ifndef CPPTL_JSON_H_INCLUDED +# define CPPTL_JSON_H_INCLUDED + +# include "forwards.h" +# include +# include + +# ifndef JSON_USE_CPPTL_SMALLMAP +# include +# else +# include +# endif +# ifdef JSON_USE_CPPTL +# include +# endif + +/** \brief JSON (JavaScript Object Notation). + */ +namespace Json { + + /** \brief Type of the value held by a Value object. + */ + enum ValueType + { + nullValue = 0, ///< 'null' value + intValue, ///< signed integer value + uintValue, ///< unsigned integer value + realValue, ///< double value + stringValue, ///< UTF-8 string value + booleanValue, ///< bool value + arrayValue, ///< array value (ordered list) + objectValue ///< object value (collection of name/value pairs). + }; + + enum CommentPlacement + { + commentBefore = 0, ///< a comment placed on the line before a value + commentAfterOnSameLine, ///< a comment just after a value on the same line + commentAfter, ///< a comment on the line after a value (only make sense for root value) + numberOfCommentPlacement + }; + +//# ifdef JSON_USE_CPPTL +// typedef CppTL::AnyEnumerator EnumMemberNames; +// typedef CppTL::AnyEnumerator EnumValues; +//# endif + + /** \brief Lightweight wrapper to tag static string. + * + * Value constructor and objectValue member assignement takes advantage of the + * StaticString and avoid the cost of string duplication when storing the + * string or the member name. + * + * Example of usage: + * \code + * Json::Value aValue( StaticString("some text") ); + * Json::Value object; + * static const StaticString code("code"); + * object[code] = 1234; + * \endcode + */ + class JSON_API StaticString + { + public: + explicit StaticString( const char *czstring ) + : str_( czstring ) + { + } + + operator const char *() const + { + return str_; + } + + const char *c_str() const + { + return str_; + } + + private: + const char *str_; + }; + + /** \brief Represents a JSON value. + * + * This class is a discriminated union wrapper that can represents a: + * - signed integer [range: Value::minInt - Value::maxInt] + * - unsigned integer (range: 0 - Value::maxUInt) + * - double + * - UTF-8 string + * - boolean + * - 'null' + * - an ordered list of Value + * - collection of name/value pairs (javascript object) + * + * The type of the held value is represented by a #ValueType and + * can be obtained using type(). + * + * values of an #objectValue or #arrayValue can be accessed using operator[]() methods. + * Non const methods will automatically create the a #nullValue element + * if it does not exist. + * The sequence of an #arrayValue will be automatically resize and initialized + * with #nullValue. resize() can be used to enlarge or truncate an #arrayValue. + * + * The get() methods can be used to obtanis default value in the case the required element + * does not exist. + * + * It is possible to iterate over the list of a #objectValue values using + * the getMemberNames() method. + */ + class JSON_API Value + { + friend class ValueIteratorBase; +# ifdef JSON_VALUE_USE_INTERNAL_MAP + friend class ValueInternalLink; + friend class ValueInternalMap; +# endif + public: + typedef std::vector Members; + typedef ValueIterator iterator; + typedef ValueConstIterator const_iterator; + typedef Json::UInt UInt; + typedef Json::Int Int; + typedef UInt ArrayIndex; + + static const Value null; + static const Int minInt; + static const Int maxInt; + static const UInt maxUInt; + + private: +#ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION +# ifndef JSON_VALUE_USE_INTERNAL_MAP + class CZString + { + public: + enum DuplicationPolicy + { + noDuplication = 0, + duplicate, + duplicateOnCopy + }; + CZString( int index ); + CZString( const char *cstr, DuplicationPolicy allocate ); + CZString( const CZString &other ); + ~CZString(); + CZString &operator =( const CZString &other ); + bool operator<( const CZString &other ) const; + bool operator==( const CZString &other ) const; + int index() const; + const char *c_str() const; + bool isStaticString() const; + private: + void swap( CZString &other ); + const char *cstr_; + int index_; + }; + + public: +# ifndef JSON_USE_CPPTL_SMALLMAP + typedef std::map ObjectValues; +# else + typedef CppTL::SmallMap ObjectValues; +# endif // ifndef JSON_USE_CPPTL_SMALLMAP +# endif // ifndef JSON_VALUE_USE_INTERNAL_MAP +#endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + + public: + /** \brief Create a default Value of the given type. + + This is a very useful constructor. + To create an empty array, pass arrayValue. + To create an empty object, pass objectValue. + Another Value can then be set to this one by assignment. + This is useful since clear() and resize() will not alter types. + + Examples: + \code + Json::Value null_value; // null + Json::Value arr_value(Json::arrayValue); // [] + Json::Value obj_value(Json::objectValue); // {} + \endcode + */ + Value( ValueType type = nullValue ); + Value( Int value ); + Value( UInt value ); + Value( double value ); + Value( const char *value ); + Value( const char *beginValue, const char *endValue ); + /** \brief Constructs a value from a static string. + + * Like other value string constructor but do not duplicate the string for + * internal storage. The given string must remain alive after the call to this + * constructor. + * Example of usage: + * \code + * Json::Value aValue( StaticString("some text") ); + * \endcode + */ + Value( const StaticString &value ); + Value( const std::string &value ); +# ifdef JSON_USE_CPPTL + Value( const CppTL::ConstString &value ); +# endif + Value( bool value ); + Value( const Value &other ); + ~Value(); + + Value &operator=( const Value &other ); + /// Swap values. + /// \note Currently, comments are intentionally not swapped, for + /// both logic and efficiency. + void swap( Value &other ); + + ValueType type() const; + + bool operator <( const Value &other ) const; + bool operator <=( const Value &other ) const; + bool operator >=( const Value &other ) const; + bool operator >( const Value &other ) const; + + bool operator ==( const Value &other ) const; + bool operator !=( const Value &other ) const; + + int compare( const Value &other ); + + const char *asCString() const; + std::string asString() const; +# ifdef JSON_USE_CPPTL + CppTL::ConstString asConstString() const; +# endif + Int asInt() const; + UInt asUInt() const; + double asDouble() const; + bool asBool() const; + + bool isNull() const; + bool isBool() const; + bool isInt() const; + bool isUInt() const; + bool isIntegral() const; + bool isDouble() const; + bool isNumeric() const; + bool isString() const; + bool isArray() const; + bool isObject() const; + + bool isConvertibleTo( ValueType other ) const; + + /// Number of values in array or object + UInt size() const; + + /// \brief Return true if empty array, empty object, or null; + /// otherwise, false. + bool empty() const; + + /// Return isNull() + bool operator!() const; + + /// Remove all object members and array elements. + /// \pre type() is arrayValue, objectValue, or nullValue + /// \post type() is unchanged + void clear(); + + /// Resize the array to size elements. + /// New elements are initialized to null. + /// May only be called on nullValue or arrayValue. + /// \pre type() is arrayValue or nullValue + /// \post type() is arrayValue + void resize( UInt size ); + + /// Access an array element (zero based index ). + /// If the array contains less than index element, then null value are inserted + /// in the array so that its size is index+1. + /// (You may need to say 'value[0u]' to get your compiler to distinguish + /// this from the operator[] which takes a string.) + Value &operator[]( UInt index ); + /// Access an array element (zero based index ) + /// (You may need to say 'value[0u]' to get your compiler to distinguish + /// this from the operator[] which takes a string.) + const Value &operator[]( UInt index ) const; + /// If the array contains at least index+1 elements, returns the element value, + /// otherwise returns defaultValue. + Value get( UInt index, + const Value &defaultValue ) const; + /// Return true if index < size(). + bool isValidIndex( UInt index ) const; + /// \brief Append value to array at the end. + /// + /// Equivalent to jsonvalue[jsonvalue.size()] = value; + Value &append( const Value &value ); + + /// Access an object value by name, create a null member if it does not exist. + Value &operator[]( const char *key ); + /// Access an object value by name, returns null if there is no member with that name. + const Value &operator[]( const char *key ) const; + /// Access an object value by name, create a null member if it does not exist. + Value &operator[]( const std::string &key ); + /// Access an object value by name, returns null if there is no member with that name. + const Value &operator[]( const std::string &key ) const; + /** \brief Access an object value by name, create a null member if it does not exist. + + * If the object as no entry for that name, then the member name used to store + * the new entry is not duplicated. + * Example of use: + * \code + * Json::Value object; + * static const StaticString code("code"); + * object[code] = 1234; + * \endcode + */ + Value &operator[]( const StaticString &key ); +# ifdef JSON_USE_CPPTL + /// Access an object value by name, create a null member if it does not exist. + Value &operator[]( const CppTL::ConstString &key ); + /// Access an object value by name, returns null if there is no member with that name. + const Value &operator[]( const CppTL::ConstString &key ) const; +# endif + /// Return the member named key if it exist, defaultValue otherwise. + Value get( const char *key, + const Value &defaultValue ) const; + /// Return the member named key if it exist, defaultValue otherwise. + Value get( const std::string &key, + const Value &defaultValue ) const; +# ifdef JSON_USE_CPPTL + /// Return the member named key if it exist, defaultValue otherwise. + Value get( const CppTL::ConstString &key, + const Value &defaultValue ) const; +# endif + /// \brief Remove and return the named member. + /// + /// Do nothing if it did not exist. + /// \return the removed Value, or null. + /// \pre type() is objectValue or nullValue + /// \post type() is unchanged + Value removeMember( const char* key ); + /// Same as removeMember(const char*) + Value removeMember( const std::string &key ); + + /// Return true if the object has a member named key. + bool isMember( const char *key ) const; + /// Return true if the object has a member named key. + bool isMember( const std::string &key ) const; +# ifdef JSON_USE_CPPTL + /// Return true if the object has a member named key. + bool isMember( const CppTL::ConstString &key ) const; +# endif + + /// \brief Return a list of the member names. + /// + /// If null, return an empty list. + /// \pre type() is objectValue or nullValue + /// \post if type() was nullValue, it remains nullValue + Members getMemberNames() const; + +//# ifdef JSON_USE_CPPTL +// EnumMemberNames enumMemberNames() const; +// EnumValues enumValues() const; +//# endif + + /// Comments must be //... or /* ... */ + void setComment( const char *comment, + CommentPlacement placement ); + /// Comments must be //... or /* ... */ + void setComment( const std::string &comment, + CommentPlacement placement ); + bool hasComment( CommentPlacement placement ) const; + /// Include delimiters and embedded newlines. + std::string getComment( CommentPlacement placement ) const; + + std::string toStyledString() const; + + const_iterator begin() const; + const_iterator end() const; + + iterator begin(); + iterator end(); + + private: + Value &resolveReference( const char *key, + bool isStatic ); + +# ifdef JSON_VALUE_USE_INTERNAL_MAP + inline bool isItemAvailable() const + { + return itemIsUsed_ == 0; + } + + inline void setItemUsed( bool isUsed = true ) + { + itemIsUsed_ = isUsed ? 1 : 0; + } + + inline bool isMemberNameStatic() const + { + return memberNameIsStatic_ == 0; + } + + inline void setMemberNameIsStatic( bool isStatic ) + { + memberNameIsStatic_ = isStatic ? 1 : 0; + } +# endif // # ifdef JSON_VALUE_USE_INTERNAL_MAP + + private: + struct CommentInfo + { + CommentInfo(); + ~CommentInfo(); + + void setComment( const char *text ); + + char *comment_; + }; + + //struct MemberNamesTransform + //{ + // typedef const char *result_type; + // const char *operator()( const CZString &name ) const + // { + // return name.c_str(); + // } + //}; + + union ValueHolder + { + Int int_; + UInt uint_; + double real_; + bool bool_; + char *string_; +# ifdef JSON_VALUE_USE_INTERNAL_MAP + ValueInternalArray *array_; + ValueInternalMap *map_; +#else + ObjectValues *map_; +# endif + } value_; + ValueType type_ : 8; + int allocated_ : 1; // Notes: if declared as bool, bitfield is useless. +# ifdef JSON_VALUE_USE_INTERNAL_MAP + unsigned int itemIsUsed_ : 1; // used by the ValueInternalMap container. + int memberNameIsStatic_ : 1; // used by the ValueInternalMap container. +# endif + CommentInfo *comments_; + }; + + + /** \brief Experimental and untested: represents an element of the "path" to access a node. + */ + class PathArgument + { + public: + friend class Path; + + PathArgument(); + PathArgument( UInt index ); + PathArgument( const char *key ); + PathArgument( const std::string &key ); + + private: + enum Kind + { + kindNone = 0, + kindIndex, + kindKey + }; + std::string key_; + UInt index_; + Kind kind_; + }; + + /** \brief Experimental and untested: represents a "path" to access a node. + * + * Syntax: + * - "." => root node + * - ".[n]" => elements at index 'n' of root node (an array value) + * - ".name" => member named 'name' of root node (an object value) + * - ".name1.name2.name3" + * - ".[0][1][2].name1[3]" + * - ".%" => member name is provided as parameter + * - ".[%]" => index is provied as parameter + */ + class Path + { + public: + Path( const std::string &path, + const PathArgument &a1 = PathArgument(), + const PathArgument &a2 = PathArgument(), + const PathArgument &a3 = PathArgument(), + const PathArgument &a4 = PathArgument(), + const PathArgument &a5 = PathArgument() ); + + const Value &resolve( const Value &root ) const; + Value resolve( const Value &root, + const Value &defaultValue ) const; + /// Creates the "path" to access the specified node and returns a reference on the node. + Value &make( Value &root ) const; + + private: + typedef std::vector InArgs; + typedef std::vector Args; + + void makePath( const std::string &path, + const InArgs &in ); + void addPathInArg( const std::string &path, + const InArgs &in, + InArgs::const_iterator &itInArg, + PathArgument::Kind kind ); + void invalidPath( const std::string &path, + int location ); + + Args args_; + }; + + /** \brief Experimental do not use: Allocator to customize member name and string value memory management done by Value. + * + * - makeMemberName() and releaseMemberName() are called to respectively duplicate and + * free an Json::objectValue member name. + * - duplicateStringValue() and releaseStringValue() are called similarly to + * duplicate and free a Json::stringValue value. + */ + class ValueAllocator + { + public: + enum { unknown = (unsigned)-1 }; + + virtual ~ValueAllocator(); + + virtual char *makeMemberName( const char *memberName ) = 0; + virtual void releaseMemberName( char *memberName ) = 0; + virtual char *duplicateStringValue( const char *value, + unsigned int length = unknown ) = 0; + virtual void releaseStringValue( char *value ) = 0; + }; + +#ifdef JSON_VALUE_USE_INTERNAL_MAP + /** \brief Allocator to customize Value internal map. + * Below is an example of a simple implementation (default implementation actually + * use memory pool for speed). + * \code + class DefaultValueMapAllocator : public ValueMapAllocator + { + public: // overridden from ValueMapAllocator + virtual ValueInternalMap *newMap() + { + return new ValueInternalMap(); + } + + virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) + { + return new ValueInternalMap( other ); + } + + virtual void destructMap( ValueInternalMap *map ) + { + delete map; + } + + virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) + { + return new ValueInternalLink[size]; + } + + virtual void releaseMapBuckets( ValueInternalLink *links ) + { + delete [] links; + } + + virtual ValueInternalLink *allocateMapLink() + { + return new ValueInternalLink(); + } + + virtual void releaseMapLink( ValueInternalLink *link ) + { + delete link; + } + }; + * \endcode + */ + class JSON_API ValueMapAllocator + { + public: + virtual ~ValueMapAllocator(); + virtual ValueInternalMap *newMap() = 0; + virtual ValueInternalMap *newMapCopy( const ValueInternalMap &other ) = 0; + virtual void destructMap( ValueInternalMap *map ) = 0; + virtual ValueInternalLink *allocateMapBuckets( unsigned int size ) = 0; + virtual void releaseMapBuckets( ValueInternalLink *links ) = 0; + virtual ValueInternalLink *allocateMapLink() = 0; + virtual void releaseMapLink( ValueInternalLink *link ) = 0; + }; + + /** \brief ValueInternalMap hash-map bucket chain link (for internal use only). + * \internal previous_ & next_ allows for bidirectional traversal. + */ + class JSON_API ValueInternalLink + { + public: + enum { itemPerLink = 6 }; // sizeof(ValueInternalLink) = 128 on 32 bits architecture. + enum InternalFlags { + flagAvailable = 0, + flagUsed = 1 + }; + + ValueInternalLink(); + + ~ValueInternalLink(); + + Value items_[itemPerLink]; + char *keys_[itemPerLink]; + ValueInternalLink *previous_; + ValueInternalLink *next_; + }; + + + /** \brief A linked page based hash-table implementation used internally by Value. + * \internal ValueInternalMap is a tradional bucket based hash-table, with a linked + * list in each bucket to handle collision. There is an additional twist in that + * each node of the collision linked list is a page containing a fixed amount of + * value. This provides a better compromise between memory usage and speed. + * + * Each bucket is made up of a chained list of ValueInternalLink. The last + * link of a given bucket can be found in the 'previous_' field of the following bucket. + * The last link of the last bucket is stored in tailLink_ as it has no following bucket. + * Only the last link of a bucket may contains 'available' item. The last link always + * contains at least one element unless is it the bucket one very first link. + */ + class JSON_API ValueInternalMap + { + friend class ValueIteratorBase; + friend class Value; + public: + typedef unsigned int HashKey; + typedef unsigned int BucketIndex; + +# ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + struct IteratorState + { + IteratorState() + : map_(0) + , link_(0) + , itemIndex_(0) + , bucketIndex_(0) + { + } + ValueInternalMap *map_; + ValueInternalLink *link_; + BucketIndex itemIndex_; + BucketIndex bucketIndex_; + }; +# endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + + ValueInternalMap(); + ValueInternalMap( const ValueInternalMap &other ); + ValueInternalMap &operator =( const ValueInternalMap &other ); + ~ValueInternalMap(); + + void swap( ValueInternalMap &other ); + + BucketIndex size() const; + + void clear(); + + bool reserveDelta( BucketIndex growth ); + + bool reserve( BucketIndex newItemCount ); + + const Value *find( const char *key ) const; + + Value *find( const char *key ); + + Value &resolveReference( const char *key, + bool isStatic ); + + void remove( const char *key ); + + void doActualRemove( ValueInternalLink *link, + BucketIndex index, + BucketIndex bucketIndex ); + + ValueInternalLink *&getLastLinkInBucket( BucketIndex bucketIndex ); + + Value &setNewItem( const char *key, + bool isStatic, + ValueInternalLink *link, + BucketIndex index ); + + Value &unsafeAdd( const char *key, + bool isStatic, + HashKey hashedKey ); + + HashKey hash( const char *key ) const; + + int compare( const ValueInternalMap &other ) const; + + private: + void makeBeginIterator( IteratorState &it ) const; + void makeEndIterator( IteratorState &it ) const; + static bool equals( const IteratorState &x, const IteratorState &other ); + static void increment( IteratorState &iterator ); + static void incrementBucket( IteratorState &iterator ); + static void decrement( IteratorState &iterator ); + static const char *key( const IteratorState &iterator ); + static const char *key( const IteratorState &iterator, bool &isStatic ); + static Value &value( const IteratorState &iterator ); + static int distance( const IteratorState &x, const IteratorState &y ); + + private: + ValueInternalLink *buckets_; + ValueInternalLink *tailLink_; + BucketIndex bucketsSize_; + BucketIndex itemCount_; + }; + + /** \brief A simplified deque implementation used internally by Value. + * \internal + * It is based on a list of fixed "page", each page contains a fixed number of items. + * Instead of using a linked-list, a array of pointer is used for fast item look-up. + * Look-up for an element is as follow: + * - compute page index: pageIndex = itemIndex / itemsPerPage + * - look-up item in page: pages_[pageIndex][itemIndex % itemsPerPage] + * + * Insertion is amortized constant time (only the array containing the index of pointers + * need to be reallocated when items are appended). + */ + class JSON_API ValueInternalArray + { + friend class Value; + friend class ValueIteratorBase; + public: + enum { itemsPerPage = 8 }; // should be a power of 2 for fast divide and modulo. + typedef Value::ArrayIndex ArrayIndex; + typedef unsigned int PageIndex; + +# ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + struct IteratorState // Must be a POD + { + IteratorState() + : array_(0) + , currentPageIndex_(0) + , currentItemIndex_(0) + { + } + ValueInternalArray *array_; + Value **currentPageIndex_; + unsigned int currentItemIndex_; + }; +# endif // ifndef JSONCPP_DOC_EXCLUDE_IMPLEMENTATION + + ValueInternalArray(); + ValueInternalArray( const ValueInternalArray &other ); + ValueInternalArray &operator =( const ValueInternalArray &other ); + ~ValueInternalArray(); + void swap( ValueInternalArray &other ); + + void clear(); + void resize( ArrayIndex newSize ); + + Value &resolveReference( ArrayIndex index ); + + Value *find( ArrayIndex index ) const; + + ArrayIndex size() const; + + int compare( const ValueInternalArray &other ) const; + + private: + static bool equals( const IteratorState &x, const IteratorState &other ); + static void increment( IteratorState &iterator ); + static void decrement( IteratorState &iterator ); + static Value &dereference( const IteratorState &iterator ); + static Value &unsafeDereference( const IteratorState &iterator ); + static int distance( const IteratorState &x, const IteratorState &y ); + static ArrayIndex indexOf( const IteratorState &iterator ); + void makeBeginIterator( IteratorState &it ) const; + void makeEndIterator( IteratorState &it ) const; + void makeIterator( IteratorState &it, ArrayIndex index ) const; + + void makeIndexValid( ArrayIndex index ); + + Value **pages_; + ArrayIndex size_; + PageIndex pageCount_; + }; + + /** \brief Experimental: do not use. Allocator to customize Value internal array. + * Below is an example of a simple implementation (actual implementation use + * memory pool). + \code +class DefaultValueArrayAllocator : public ValueArrayAllocator +{ +public: // overridden from ValueArrayAllocator + virtual ~DefaultValueArrayAllocator() + { + } + + virtual ValueInternalArray *newArray() + { + return new ValueInternalArray(); + } + + virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) + { + return new ValueInternalArray( other ); + } + + virtual void destruct( ValueInternalArray *array ) + { + delete array; + } + + virtual void reallocateArrayPageIndex( Value **&indexes, + ValueInternalArray::PageIndex &indexCount, + ValueInternalArray::PageIndex minNewIndexCount ) + { + ValueInternalArray::PageIndex newIndexCount = (indexCount*3)/2 + 1; + if ( minNewIndexCount > newIndexCount ) + newIndexCount = minNewIndexCount; + void *newIndexes = realloc( indexes, sizeof(Value*) * newIndexCount ); + if ( !newIndexes ) + throw std::bad_alloc(); + indexCount = newIndexCount; + indexes = static_cast( newIndexes ); + } + virtual void releaseArrayPageIndex( Value **indexes, + ValueInternalArray::PageIndex indexCount ) + { + if ( indexes ) + free( indexes ); + } + + virtual Value *allocateArrayPage() + { + return static_cast( malloc( sizeof(Value) * ValueInternalArray::itemsPerPage ) ); + } + + virtual void releaseArrayPage( Value *value ) + { + if ( value ) + free( value ); + } +}; + \endcode + */ + class JSON_API ValueArrayAllocator + { + public: + virtual ~ValueArrayAllocator(); + virtual ValueInternalArray *newArray() = 0; + virtual ValueInternalArray *newArrayCopy( const ValueInternalArray &other ) = 0; + virtual void destructArray( ValueInternalArray *array ) = 0; + /** \brief Reallocate array page index. + * Reallocates an array of pointer on each page. + * \param indexes [input] pointer on the current index. May be \c NULL. + * [output] pointer on the new index of at least + * \a minNewIndexCount pages. + * \param indexCount [input] current number of pages in the index. + * [output] number of page the reallocated index can handle. + * \b MUST be >= \a minNewIndexCount. + * \param minNewIndexCount Minimum number of page the new index must be able to + * handle. + */ + virtual void reallocateArrayPageIndex( Value **&indexes, + ValueInternalArray::PageIndex &indexCount, + ValueInternalArray::PageIndex minNewIndexCount ) = 0; + virtual void releaseArrayPageIndex( Value **indexes, + ValueInternalArray::PageIndex indexCount ) = 0; + virtual Value *allocateArrayPage() = 0; + virtual void releaseArrayPage( Value *value ) = 0; + }; +#endif // #ifdef JSON_VALUE_USE_INTERNAL_MAP + + + /** \brief base class for Value iterators. + * + */ + class ValueIteratorBase + { + public: + typedef unsigned int size_t; + typedef int difference_type; + typedef ValueIteratorBase SelfType; + + ValueIteratorBase(); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + explicit ValueIteratorBase( const Value::ObjectValues::iterator ¤t ); +#else + ValueIteratorBase( const ValueInternalArray::IteratorState &state ); + ValueIteratorBase( const ValueInternalMap::IteratorState &state ); +#endif + + bool operator ==( const SelfType &other ) const + { + return isEqual( other ); + } + + bool operator !=( const SelfType &other ) const + { + return !isEqual( other ); + } + + difference_type operator -( const SelfType &other ) const + { + return computeDistance( other ); + } + + /// Return either the index or the member name of the referenced value as a Value. + Value key() const; + + /// Return the index of the referenced Value. -1 if it is not an arrayValue. + UInt index() const; + + /// Return the member name of the referenced Value. "" if it is not an objectValue. + const char *memberName() const; + + protected: + Value &deref() const; + + void increment(); + + void decrement(); + + difference_type computeDistance( const SelfType &other ) const; + + bool isEqual( const SelfType &other ) const; + + void copy( const SelfType &other ); + + private: +#ifndef JSON_VALUE_USE_INTERNAL_MAP + Value::ObjectValues::iterator current_; + // Indicates that iterator is for a null value. + bool isNull_; +#else + union + { + ValueInternalArray::IteratorState array_; + ValueInternalMap::IteratorState map_; + } iterator_; + bool isArray_; +#endif + }; + + /** \brief const iterator for object and array value. + * + */ + class ValueConstIterator : public ValueIteratorBase + { + friend class Value; + public: + typedef unsigned int size_t; + typedef int difference_type; + typedef const Value &reference; + typedef const Value *pointer; + typedef ValueConstIterator SelfType; + + ValueConstIterator(); + private: + /*! \internal Use by Value to create an iterator. + */ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + explicit ValueConstIterator( const Value::ObjectValues::iterator ¤t ); +#else + ValueConstIterator( const ValueInternalArray::IteratorState &state ); + ValueConstIterator( const ValueInternalMap::IteratorState &state ); +#endif + public: + SelfType &operator =( const ValueIteratorBase &other ); + + SelfType operator++( int ) + { + SelfType temp( *this ); + ++*this; + return temp; + } + + SelfType operator--( int ) + { + SelfType temp( *this ); + --*this; + return temp; + } + + SelfType &operator--() + { + decrement(); + return *this; + } + + SelfType &operator++() + { + increment(); + return *this; + } + + reference operator *() const + { + return deref(); + } + }; + + + /** \brief Iterator for object and array value. + */ + class ValueIterator : public ValueIteratorBase + { + friend class Value; + public: + typedef unsigned int size_t; + typedef int difference_type; + typedef Value &reference; + typedef Value *pointer; + typedef ValueIterator SelfType; + + ValueIterator(); + ValueIterator( const ValueConstIterator &other ); + ValueIterator( const ValueIterator &other ); + private: + /*! \internal Use by Value to create an iterator. + */ +#ifndef JSON_VALUE_USE_INTERNAL_MAP + explicit ValueIterator( const Value::ObjectValues::iterator ¤t ); +#else + ValueIterator( const ValueInternalArray::IteratorState &state ); + ValueIterator( const ValueInternalMap::IteratorState &state ); +#endif + public: + + SelfType &operator =( const SelfType &other ); + + SelfType operator++( int ) + { + SelfType temp( *this ); + ++*this; + return temp; + } + + SelfType operator--( int ) + { + SelfType temp( *this ); + --*this; + return temp; + } + + SelfType &operator--() + { + decrement(); + return *this; + } + + SelfType &operator++() + { + increment(); + return *this; + } + + reference operator *() const + { + return deref(); + } + }; + + +} // namespace Json + + +#endif // CPPTL_JSON_H_INCLUDED diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/writer.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/writer.h new file mode 100644 index 0000000..47b9d13 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/inc/writer.h @@ -0,0 +1,174 @@ +#ifndef JSON_WRITER_H_INCLUDED +# define JSON_WRITER_H_INCLUDED + +# include "value.h" +# include +# include +# include + +namespace Json { + + class Value; + + /** \brief Abstract class for writers. + */ + class JSON_API Writer + { + public: + virtual ~Writer(); + + virtual std::string write( const Value &root ) = 0; + }; + + /** \brief Outputs a Value in JSON format without formatting (not human friendly). + * + * The JSON document is written in a single line. It is not intended for 'human' consumption, + * but may be useful to support feature such as RPC where bandwidth is limited. + * \sa Reader, Value + */ + class JSON_API FastWriter : public Writer + { + public: + FastWriter(); + virtual ~FastWriter(){} + + void enableYAMLCompatibility(); + + public: // overridden from Writer + virtual std::string write( const Value &root ); + + private: + void writeValue( const Value &value ); + + std::string document_; + bool yamlCompatiblityEnabled_; + }; + + /** \brief Writes a Value in JSON format in a human friendly way. + * + * The rules for line break and indent are as follow: + * - Object value: + * - if empty then print {} without indent and line break + * - if not empty the print '{', line break & indent, print one value per line + * and then unindent and line break and print '}'. + * - Array value: + * - if empty then print [] without indent and line break + * - if the array contains no object value, empty array or some other value types, + * and all the values fit on one lines, then print the array on a single line. + * - otherwise, it the values do not fit on one line, or the array contains + * object or non empty array, then print one value per line. + * + * If the Value have comments then they are outputed according to their #CommentPlacement. + * + * \sa Reader, Value, Value::setComment() + */ + class JSON_API StyledWriter: public Writer + { + public: + StyledWriter(); + virtual ~StyledWriter(){} + + public: // overridden from Writer + /** \brief Serialize a Value in JSON format. + * \param root Value to serialize. + * \return String containing the JSON document that represents the root value. + */ + virtual std::string write( const Value &root ); + + private: + void writeValue( const Value &value ); + void writeArrayValue( const Value &value ); + bool isMultineArray( const Value &value ); + void pushValue( const std::string &value ); + void writeIndent(); + void writeWithIndent( const std::string &value ); + void indent(); + void unindent(); + void writeCommentBeforeValue( const Value &root ); + void writeCommentAfterValueOnSameLine( const Value &root ); + bool hasCommentForValue( const Value &value ); + static std::string normalizeEOL( const std::string &text ); + + typedef std::vector ChildValues; + + ChildValues childValues_; + std::string document_; + std::string indentString_; + int rightMargin_; + int indentSize_; + bool addChildValues_; + }; + + /** \brief Writes a Value in JSON format in a human friendly way, + to a stream rather than to a string. + * + * The rules for line break and indent are as follow: + * - Object value: + * - if empty then print {} without indent and line break + * - if not empty the print '{', line break & indent, print one value per line + * and then unindent and line break and print '}'. + * - Array value: + * - if empty then print [] without indent and line break + * - if the array contains no object value, empty array or some other value types, + * and all the values fit on one lines, then print the array on a single line. + * - otherwise, it the values do not fit on one line, or the array contains + * object or non empty array, then print one value per line. + * + * If the Value have comments then they are outputed according to their #CommentPlacement. + * + * \param indentation Each level will be indented by this amount extra. + * \sa Reader, Value, Value::setComment() + */ + class JSON_API StyledStreamWriter + { + public: + StyledStreamWriter( std::string indentation="\t" ); + ~StyledStreamWriter(){} + + public: + /** \brief Serialize a Value in JSON format. + * \param out Stream to write to. (Can be ostringstream, e.g.) + * \param root Value to serialize. + * \note There is no point in deriving from Writer, since write() should not return a value. + */ + void write( std::ostream &out, const Value &root ); + + private: + void writeValue( const Value &value ); + void writeArrayValue( const Value &value ); + bool isMultineArray( const Value &value ); + void pushValue( const std::string &value ); + void writeIndent(); + void writeWithIndent( const std::string &value ); + void indent(); + void unindent(); + void writeCommentBeforeValue( const Value &root ); + void writeCommentAfterValueOnSameLine( const Value &root ); + bool hasCommentForValue( const Value &value ); + static std::string normalizeEOL( const std::string &text ); + + typedef std::vector ChildValues; + + ChildValues childValues_; + std::ostream* document_; + std::string indentString_; + int rightMargin_; + std::string indentation_; + bool addChildValues_; + }; + + std::string JSON_API valueToString( Int value ); + std::string JSON_API valueToString( UInt value ); + std::string JSON_API valueToString( double value ); + std::string JSON_API valueToString( bool value ); + std::string JSON_API valueToQuotedString( const char *value ); + + /// \brief Output using the StyledStreamWriter. + /// \see Json::operator>>() + std::ostream& operator<<( std::ostream&, const Value &root ); + +} // namespace Json + + + +#endif // JSON_WRITER_H_INCLUDED diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_reader.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_reader.cpp new file mode 100644 index 0000000..cee3c34 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_reader.cpp @@ -0,0 +1,885 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#if _MSC_VER >= 1400 // VC++ 8.0 +#pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. +#endif + +namespace Json { + +// Implementation of class Features +// //////////////////////////////// + +Features::Features() + : allowComments_( true ) + , strictRoot_( false ) +{ +} + + +Features +Features::all() +{ + return Features(); +} + + +Features +Features::strictMode() +{ + Features features; + features.allowComments_ = false; + features.strictRoot_ = true; + return features; +} + +// Implementation of class Reader +// //////////////////////////////// + + +static inline bool +in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4 ) +{ + return c == c1 || c == c2 || c == c3 || c == c4; +} + +static inline bool +in( Reader::Char c, Reader::Char c1, Reader::Char c2, Reader::Char c3, Reader::Char c4, Reader::Char c5 ) +{ + return c == c1 || c == c2 || c == c3 || c == c4 || c == c5; +} + + +static bool +containsNewLine( Reader::Location begin, + Reader::Location end ) +{ + for ( ;begin < end; ++begin ) + if ( *begin == '\n' || *begin == '\r' ) + return true; + return false; +} + +static std::string codePointToUTF8(unsigned int cp) +{ + std::string result; + + // based on description from http://en.wikipedia.org/wiki/UTF-8 + + if (cp <= 0x7f) + { + result.resize(1); + result[0] = static_cast(cp); + } + else if (cp <= 0x7FF) + { + result.resize(2); + result[1] = static_cast(0x80 | (0x3f & cp)); + result[0] = static_cast(0xC0 | (0x1f & (cp >> 6))); + } + else if (cp <= 0xFFFF) + { + result.resize(3); + result[2] = static_cast(0x80 | (0x3f & cp)); + result[1] = 0x80 | static_cast((0x3f & (cp >> 6))); + result[0] = 0xE0 | static_cast((0xf & (cp >> 12))); + } + else if (cp <= 0x10FFFF) + { + result.resize(4); + result[3] = static_cast(0x80 | (0x3f & cp)); + result[2] = static_cast(0x80 | (0x3f & (cp >> 6))); + result[1] = static_cast(0x80 | (0x3f & (cp >> 12))); + result[0] = static_cast(0xF0 | (0x7 & (cp >> 18))); + } + + return result; +} + + +// Class Reader +// ////////////////////////////////////////////////////////////////// + +Reader::Reader() + : features_( Features::all() ) +{ +} + + +Reader::Reader( const Features &features ) + : features_( features ) +{ +} + + +bool +Reader::parse( const std::string &document, + Value &root, + bool collectComments ) +{ + document_ = document; + const char *begin = document_.c_str(); + const char *end = begin + document_.length(); + return parse( begin, end, root, collectComments ); +} + + +bool +Reader::parse( std::istream& sin, + Value &root, + bool collectComments ) +{ + //std::istream_iterator begin(sin); + //std::istream_iterator end; + // Those would allow streamed input from a file, if parse() were a + // template function. + + // Since std::string is reference-counted, this at least does not + // create an extra copy. + std::string doc; + std::getline(sin, doc, (char)EOF); + return parse( doc, root, collectComments ); +} + +bool +Reader::parse( const char *beginDoc, const char *endDoc, + Value &root, + bool collectComments ) +{ + if ( !features_.allowComments_ ) + { + collectComments = false; + } + + begin_ = beginDoc; + end_ = endDoc; + collectComments_ = collectComments; + current_ = begin_; + lastValueEnd_ = 0; + lastValue_ = 0; + commentsBefore_ = ""; + errors_.clear(); + while ( !nodes_.empty() ) + nodes_.pop(); + nodes_.push( &root ); + + bool successful = readValue(); + Token token; + skipCommentTokens( token ); + if ( collectComments_ && !commentsBefore_.empty() ) + root.setComment( commentsBefore_, commentAfter ); + if ( features_.strictRoot_ ) + { + if ( !root.isArray() && !root.isObject() ) + { + // Set error location to start of doc, ideally should be first token found in doc + token.type_ = tokenError; + token.start_ = beginDoc; + token.end_ = endDoc; + addError( "A valid JSON document must be either an array or an object value.", + token ); + return false; + } + } + return successful; +} + + +bool +Reader::readValue() +{ + Token token; + skipCommentTokens( token ); + bool successful = true; + + if ( collectComments_ && !commentsBefore_.empty() ) + { + currentValue().setComment( commentsBefore_, commentBefore ); + commentsBefore_ = ""; + } + + + switch ( token.type_ ) + { + case tokenObjectBegin: + successful = readObject( token ); + break; + case tokenArrayBegin: + successful = readArray( token ); + break; + case tokenNumber: + successful = decodeNumber( token ); + break; + case tokenString: + successful = decodeString( token ); + break; + case tokenTrue: + currentValue() = true; + break; + case tokenFalse: + currentValue() = false; + break; + case tokenNull: + currentValue() = Value(); + break; + default: + return addError( "Syntax error: value, object or array expected.", token ); + } + + if ( collectComments_ ) + { + lastValueEnd_ = current_; + lastValue_ = ¤tValue(); + } + + return successful; +} + + +void +Reader::skipCommentTokens( Token &token ) +{ + if ( features_.allowComments_ ) + { + do + { + readToken( token ); + } + while ( token.type_ == tokenComment ); + } + else + { + readToken( token ); + } +} + + +bool +Reader::expectToken( TokenType type, Token &token, const char *message ) +{ + readToken( token ); + if ( token.type_ != type ) + return addError( message, token ); + return true; +} + + +bool +Reader::readToken( Token &token ) +{ + skipSpaces(); + token.start_ = current_; + Char c = getNextChar(); + bool ok = true; + switch ( c ) + { + case '{': + token.type_ = tokenObjectBegin; + break; + case '}': + token.type_ = tokenObjectEnd; + break; + case '[': + token.type_ = tokenArrayBegin; + break; + case ']': + token.type_ = tokenArrayEnd; + break; + case '"': + token.type_ = tokenString; + ok = readString(); + break; + case '/': + token.type_ = tokenComment; + ok = readComment(); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '-': + token.type_ = tokenNumber; + readNumber(); + break; + case 't': + token.type_ = tokenTrue; + ok = match( "rue", 3 ); + break; + case 'f': + token.type_ = tokenFalse; + ok = match( "alse", 4 ); + break; + case 'n': + token.type_ = tokenNull; + ok = match( "ull", 3 ); + break; + case ',': + token.type_ = tokenArraySeparator; + break; + case ':': + token.type_ = tokenMemberSeparator; + break; + case 0: + token.type_ = tokenEndOfStream; + break; + default: + ok = false; + break; + } + if ( !ok ) + token.type_ = tokenError; + token.end_ = current_; + return true; +} + + +void +Reader::skipSpaces() +{ + while ( current_ != end_ ) + { + Char c = *current_; + if ( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) + ++current_; + else + break; + } +} + + +bool +Reader::match( Location pattern, + int patternLength ) +{ + if ( end_ - current_ < patternLength ) + return false; + int index = patternLength; + while ( index-- ) + if ( current_[index] != pattern[index] ) + return false; + current_ += patternLength; + return true; +} + + +bool +Reader::readComment() +{ + Location commentBegin = current_ - 1; + Char c = getNextChar(); + bool successful = false; + if ( c == '*' ) + successful = readCStyleComment(); + else if ( c == '/' ) + successful = readCppStyleComment(); + if ( !successful ) + return false; + + if ( collectComments_ ) + { + CommentPlacement placement = commentBefore; + if ( lastValueEnd_ && !containsNewLine( lastValueEnd_, commentBegin ) ) + { + if ( c != '*' || !containsNewLine( commentBegin, current_ ) ) + placement = commentAfterOnSameLine; + } + + addComment( commentBegin, current_, placement ); + } + return true; +} + + +void +Reader::addComment( Location begin, + Location end, + CommentPlacement placement ) +{ + assert( collectComments_ ); + if ( placement == commentAfterOnSameLine ) + { + assert( lastValue_ != 0 ); + lastValue_->setComment( std::string( begin, end ), placement ); + } + else + { + if ( !commentsBefore_.empty() ) + commentsBefore_ += "\n"; + commentsBefore_ += std::string( begin, end ); + } +} + + +bool +Reader::readCStyleComment() +{ + while ( current_ != end_ ) + { + Char c = getNextChar(); + if ( c == '*' && *current_ == '/' ) + break; + } + return getNextChar() == '/'; +} + + +bool +Reader::readCppStyleComment() +{ + while ( current_ != end_ ) + { + Char c = getNextChar(); + if ( c == '\r' || c == '\n' ) + break; + } + return true; +} + + +void +Reader::readNumber() +{ + while ( current_ != end_ ) + { + if ( !(*current_ >= '0' && *current_ <= '9') && + !in( *current_, '.', 'e', 'E', '+', '-' ) ) + break; + ++current_; + } +} + +bool +Reader::readString() +{ + Char c = 0; + while ( current_ != end_ ) + { + c = getNextChar(); + if ( c == '\\' ) + getNextChar(); + else if ( c == '"' ) + break; + } + return c == '"'; +} + + +bool +Reader::readObject( Token &tokenStart ) +{ + Token tokenName; + std::string name; + currentValue() = Value( objectValue ); + while ( readToken( tokenName ) ) + { + bool initialTokenOk = true; + while ( tokenName.type_ == tokenComment && initialTokenOk ) + initialTokenOk = readToken( tokenName ); + if ( !initialTokenOk ) + break; + if ( tokenName.type_ == tokenObjectEnd && name.empty() ) // empty object + return true; + if ( tokenName.type_ != tokenString ) + break; + + name = ""; + if ( !decodeString( tokenName, name ) ) + return recoverFromError( tokenObjectEnd ); + + Token colon; + if ( !readToken( colon ) || colon.type_ != tokenMemberSeparator ) + { + return addErrorAndRecover( "Missing ':' after object member name", + colon, + tokenObjectEnd ); + } + Value &value = currentValue()[ name ]; + nodes_.push( &value ); + bool ok = readValue(); + nodes_.pop(); + if ( !ok ) // error already set + return recoverFromError( tokenObjectEnd ); + + Token comma; + if ( !readToken( comma ) + || ( comma.type_ != tokenObjectEnd && + comma.type_ != tokenArraySeparator && + comma.type_ != tokenComment ) ) + { + return addErrorAndRecover( "Missing ',' or '}' in object declaration", + comma, + tokenObjectEnd ); + } + bool finalizeTokenOk = true; + while ( comma.type_ == tokenComment && + finalizeTokenOk ) + finalizeTokenOk = readToken( comma ); + if ( comma.type_ == tokenObjectEnd ) + return true; + } + return addErrorAndRecover( "Missing '}' or object member name", + tokenName, + tokenObjectEnd ); +} + + +bool +Reader::readArray( Token &tokenStart ) +{ + currentValue() = Value( arrayValue ); + skipSpaces(); + if ( *current_ == ']' ) // empty array + { + Token endArray; + readToken( endArray ); + return true; + } + int index = 0; + while ( true ) + { + Value &value = currentValue()[ index++ ]; + nodes_.push( &value ); + bool ok = readValue(); + nodes_.pop(); + if ( !ok ) // error already set + return recoverFromError( tokenArrayEnd ); + + Token token; + // Accept Comment after last item in the array. + ok = readToken( token ); + while ( token.type_ == tokenComment && ok ) + { + ok = readToken( token ); + } + bool badTokenType = ( token.type_ == tokenArraySeparator && + token.type_ == tokenArrayEnd ); + if ( !ok || badTokenType ) + { + return addErrorAndRecover( "Missing ',' or ']' in array declaration", + token, + tokenArrayEnd ); + } + if ( token.type_ == tokenArrayEnd ) + break; + } + return true; +} + + +bool +Reader::decodeNumber( Token &token ) +{ + bool isDouble = false; + for ( Location inspect = token.start_; inspect != token.end_; ++inspect ) + { + isDouble = isDouble + || in( *inspect, '.', 'e', 'E', '+' ) + || ( *inspect == '-' && inspect != token.start_ ); + } + if ( isDouble ) + return decodeDouble( token ); + Location current = token.start_; + bool isNegative = *current == '-'; + if ( isNegative ) + ++current; + Value::UInt threshold = (isNegative ? Value::UInt(-Value::minInt) + : Value::maxUInt) / 10; + Value::UInt value = 0; + while ( current < token.end_ ) + { + Char c = *current++; + if ( c < '0' || c > '9' ) + return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); + if ( value >= threshold ) + return decodeDouble( token ); + value = value * 10 + Value::UInt(c - '0'); + } + if ( isNegative ) + currentValue() = -Value::Int( value ); + else if ( value <= Value::UInt(Value::maxInt) ) + currentValue() = Value::Int( value ); + else + currentValue() = value; + return true; +} + + +bool +Reader::decodeDouble( Token &token ) +{ + double value = 0; + const int bufferSize = 32; + int count; + int length = int(token.end_ - token.start_); + if ( length <= bufferSize ) + { + Char buffer[bufferSize]; + memcpy( buffer, token.start_, length ); + buffer[length] = 0; + count = sscanf( buffer, "%lf", &value ); + } + else + { + std::string buffer( token.start_, token.end_ ); + count = sscanf( buffer.c_str(), "%lf", &value ); + } + + if ( count != 1 ) + return addError( "'" + std::string( token.start_, token.end_ ) + "' is not a number.", token ); + currentValue() = value; + return true; +} + + +bool +Reader::decodeString( Token &token ) +{ + std::string decoded; + if ( !decodeString( token, decoded ) ) + return false; + currentValue() = decoded; + return true; +} + + +bool +Reader::decodeString( Token &token, std::string &decoded ) +{ + decoded.reserve( token.end_ - token.start_ - 2 ); + Location current = token.start_ + 1; // skip '"' + Location end = token.end_ - 1; // do not include '"' + while ( current != end ) + { + Char c = *current++; + if ( c == '"' ) + break; + else if ( c == '\\' ) + { + if ( current == end ) + return addError( "Empty escape sequence in string", token, current ); + Char escape = *current++; + switch ( escape ) + { + case '"': decoded += '"'; break; + case '/': decoded += '/'; break; + case '\\': decoded += '\\'; break; + case 'b': decoded += '\b'; break; + case 'f': decoded += '\f'; break; + case 'n': decoded += '\n'; break; + case 'r': decoded += '\r'; break; + case 't': decoded += '\t'; break; + case 'u': + { + unsigned int unicode; + if ( !decodeUnicodeCodePoint( token, current, end, unicode ) ) + return false; + decoded += codePointToUTF8(unicode); + } + break; + default: + return addError( "Bad escape sequence in string", token, current ); + } + } + else + { + decoded += c; + } + } + return true; +} + +bool +Reader::decodeUnicodeCodePoint( Token &token, + Location ¤t, + Location end, + unsigned int &unicode ) +{ + + if ( !decodeUnicodeEscapeSequence( token, current, end, unicode ) ) + return false; + if (unicode >= 0xD800 && unicode <= 0xDBFF) + { + // surrogate pairs + if (end - current < 6) + return addError( "additional six characters expected to parse unicode surrogate pair.", token, current ); + unsigned int surrogatePair; + if (*(current++) == '\\' && *(current++)== 'u') + { + if (decodeUnicodeEscapeSequence( token, current, end, surrogatePair )) + { + unicode = 0x10000 + ((unicode & 0x3FF) << 10) + (surrogatePair & 0x3FF); + } + else + return false; + } + else + return addError( "expecting another \\u token to begin the second half of a unicode surrogate pair", token, current ); + } + return true; +} + +bool +Reader::decodeUnicodeEscapeSequence( Token &token, + Location ¤t, + Location end, + unsigned int &unicode ) +{ + if ( end - current < 4 ) + return addError( "Bad unicode escape sequence in string: four digits expected.", token, current ); + unicode = 0; + for ( int index =0; index < 4; ++index ) + { + Char c = *current++; + unicode *= 16; + if ( c >= '0' && c <= '9' ) + unicode += c - '0'; + else if ( c >= 'a' && c <= 'f' ) + unicode += c - 'a' + 10; + else if ( c >= 'A' && c <= 'F' ) + unicode += c - 'A' + 10; + else + return addError( "Bad unicode escape sequence in string: hexadecimal digit expected.", token, current ); + } + return true; +} + + +bool +Reader::addError( const std::string &message, + Token &token, + Location extra ) +{ + ErrorInfo info; + info.token_ = token; + info.message_ = message; + info.extra_ = extra; + errors_.push_back( info ); + return false; +} + + +bool +Reader::recoverFromError( TokenType skipUntilToken ) +{ + int errorCount = int(errors_.size()); + Token skip; + while ( true ) + { + if ( !readToken(skip) ) + errors_.resize( errorCount ); // discard errors caused by recovery + if ( skip.type_ == skipUntilToken || skip.type_ == tokenEndOfStream ) + break; + } + errors_.resize( errorCount ); + return false; +} + + +bool +Reader::addErrorAndRecover( const std::string &message, + Token &token, + TokenType skipUntilToken ) +{ + addError( message, token ); + return recoverFromError( skipUntilToken ); +} + + +Value & +Reader::currentValue() +{ + return *(nodes_.top()); +} + + +Reader::Char +Reader::getNextChar() +{ + if ( current_ == end_ ) + return 0; + return *current_++; +} + + +void +Reader::getLocationLineAndColumn( Location location, + int &line, + int &column ) const +{ + Location current = begin_; + Location lastLineStart = current; + line = 0; + while ( current < location && current != end_ ) + { + Char c = *current++; + if ( c == '\r' ) + { + if ( *current == '\n' ) + ++current; + lastLineStart = current; + ++line; + } + else if ( c == '\n' ) + { + lastLineStart = current; + ++line; + } + } + // column & line start at 1 + column = int(location - lastLineStart) + 1; + ++line; +} + + +std::string +Reader::getLocationLineAndColumn( Location location ) const +{ + int line, column; + getLocationLineAndColumn( location, line, column ); + char buffer[18+16+16+1]; + sprintf( buffer, "Line %d, Column %d", line, column ); + return buffer; +} + + +std::string +Reader::getFormatedErrorMessages() const +{ + std::string formattedMessage; + for ( Errors::const_iterator itError = errors_.begin(); + itError != errors_.end(); + ++itError ) + { + const ErrorInfo &error = *itError; + formattedMessage += "* " + getLocationLineAndColumn( error.token_.start_ ) + "\n"; + formattedMessage += " " + error.message_ + "\n"; + if ( error.extra_ ) + formattedMessage += "See " + getLocationLineAndColumn( error.extra_ ) + " for detail.\n"; + } + return formattedMessage; +} + + +std::istream& operator>>( std::istream &sin, Value &root ) +{ + Json::Reader reader; + bool ok = reader.parse(sin, root, true); + //JSON_ASSERT( ok ); + if (!ok) throw std::runtime_error(reader.getFormatedErrorMessages()); + return sin; +} + + +} // namespace Json diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_value.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_value.cpp new file mode 100644 index 0000000..9fd32e0 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_value.cpp @@ -0,0 +1,1718 @@ +#include +#include +#include +#include +#include +#include +#include +#ifdef JSON_USE_CPPTL +# include +#endif +#include // size_t +#ifndef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR +# include "../inc/json_batchallocator.h" +#endif // #ifndef JSON_USE_SIMPLE_INTERNAL_ALLOCATOR + +#define JSON_ASSERT_UNREACHABLE assert( false ) +#define JSON_ASSERT( condition ) assert( condition ); // @todo <= change this into an exception throw +#define JSON_ASSERT_MESSAGE( condition, message ) if (!( condition )) throw std::runtime_error( message ); + +namespace Json { + +const Value Value::null; +const Int Value::minInt = Int( ~(UInt(-1)/2) ); +const Int Value::maxInt = Int( UInt(-1)/2 ); +const UInt Value::maxUInt = UInt(-1); + +// A "safe" implementation of strdup. Allow null pointer to be passed. +// Also avoid warning on msvc80. +// +//inline char *safeStringDup( const char *czstring ) +//{ +// if ( czstring ) +// { +// const size_t length = (unsigned int)( strlen(czstring) + 1 ); +// char *newString = static_cast( malloc( length ) ); +// memcpy( newString, czstring, length ); +// return newString; +// } +// return 0; +//} +// +//inline char *safeStringDup( const std::string &str ) +//{ +// if ( !str.empty() ) +// { +// const size_t length = str.length(); +// char *newString = static_cast( malloc( length + 1 ) ); +// memcpy( newString, str.c_str(), length ); +// newString[length] = 0; +// return newString; +// } +// return 0; +//} + +ValueAllocator::~ValueAllocator() +{ +} + +class DefaultValueAllocator : public ValueAllocator +{ +public: + virtual ~DefaultValueAllocator() + { + } + + virtual char *makeMemberName( const char *memberName ) + { + return duplicateStringValue( memberName ); + } + + virtual void releaseMemberName( char *memberName ) + { + releaseStringValue( memberName ); + } + + virtual char *duplicateStringValue( const char *value, + unsigned int length = unknown ) + { + //@todo invesgate this old optimization + //if ( !value || value[0] == 0 ) + // return 0; + + if ( length == unknown ) + length = (unsigned int)strlen(value); + char *newString = static_cast( malloc( length + 1 ) ); + memcpy( newString, value, length ); + newString[length] = 0; + return newString; + } + + virtual void releaseStringValue( char *value ) + { + if ( value ) + free( value ); + } +}; + +static ValueAllocator *&valueAllocator() +{ + static DefaultValueAllocator defaultAllocator; + static ValueAllocator *valueAllocator = &defaultAllocator; + return valueAllocator; +} + +static struct DummyValueAllocatorInitializer { + DummyValueAllocatorInitializer() + { + valueAllocator(); // ensure valueAllocator() statics are initialized before main(). + } +} dummyValueAllocatorInitializer; + + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ValueInternals... +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +#ifdef JSON_VALUE_USE_INTERNAL_MAP +# include "json_internalarray.inl" +# include "json_internalmap.inl" +#endif // JSON_VALUE_USE_INTERNAL_MAP + +# include "../inc/json_valueiterator.inl" + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::CommentInfo +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + + +Value::CommentInfo::CommentInfo() + : comment_( 0 ) +{ +} + +Value::CommentInfo::~CommentInfo() +{ + if ( comment_ ) + valueAllocator()->releaseStringValue( comment_ ); +} + + +void +Value::CommentInfo::setComment( const char *text ) +{ + if ( comment_ ) + valueAllocator()->releaseStringValue( comment_ ); + JSON_ASSERT( text ); + JSON_ASSERT_MESSAGE( text[0]=='\0' || text[0]=='/', "Comments must start with /"); + // It seems that /**/ style comments are acceptable as well. + comment_ = valueAllocator()->duplicateStringValue( text ); +} + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::CZString +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +# ifndef JSON_VALUE_USE_INTERNAL_MAP + +// Notes: index_ indicates if the string was allocated when +// a string is stored. + +Value::CZString::CZString( int index ) + : cstr_( 0 ) + , index_( index ) +{ +} + +Value::CZString::CZString( const char *cstr, DuplicationPolicy allocate ) + : cstr_( allocate == duplicate ? valueAllocator()->makeMemberName(cstr) + : cstr ) + , index_( allocate ) +{ +} + +Value::CZString::CZString( const CZString &other ) +: cstr_( other.index_ != noDuplication && other.cstr_ != 0 + ? valueAllocator()->makeMemberName( other.cstr_ ) + : other.cstr_ ) + , index_( other.cstr_ ? (other.index_ == noDuplication ? noDuplication : duplicate) + : other.index_ ) +{ +} + +Value::CZString::~CZString() +{ + if ( cstr_ && index_ == duplicate ) + valueAllocator()->releaseMemberName( const_cast( cstr_ ) ); +} + +void +Value::CZString::swap( CZString &other ) +{ + std::swap( cstr_, other.cstr_ ); + std::swap( index_, other.index_ ); +} + +Value::CZString & +Value::CZString::operator =( const CZString &other ) +{ + CZString temp( other ); + swap( temp ); + return *this; +} + +bool +Value::CZString::operator<( const CZString &other ) const +{ + if ( cstr_ ) + return strcmp( cstr_, other.cstr_ ) < 0; + return index_ < other.index_; +} + +bool +Value::CZString::operator==( const CZString &other ) const +{ + if ( cstr_ ) + return strcmp( cstr_, other.cstr_ ) == 0; + return index_ == other.index_; +} + + +int +Value::CZString::index() const +{ + return index_; +} + + +const char * +Value::CZString::c_str() const +{ + return cstr_; +} + +bool +Value::CZString::isStaticString() const +{ + return index_ == noDuplication; +} + +#endif // ifndef JSON_VALUE_USE_INTERNAL_MAP + + +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// class Value::Value +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////////// + +/*! \internal Default constructor initialization must be equivalent to: + * memset( this, 0, sizeof(Value) ) + * This optimization is used in ValueInternalMap fast allocator. + */ +Value::Value( ValueType type ) + : type_( type ) + , allocated_( 0 ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + switch ( type ) + { + case nullValue: + break; + case intValue: + case uintValue: + value_.int_ = 0; + break; + case realValue: + value_.real_ = 0.0; + break; + case stringValue: + value_.string_ = 0; + break; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + value_.map_ = new ObjectValues(); + break; +#else + case arrayValue: + value_.array_ = arrayAllocator()->newArray(); + break; + case objectValue: + value_.map_ = mapAllocator()->newMap(); + break; +#endif + case booleanValue: + value_.bool_ = false; + break; + default: + JSON_ASSERT_UNREACHABLE; + } +} + + +Value::Value( Int value ) + : type_( intValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.int_ = value; +} + + +Value::Value( UInt value ) + : type_( uintValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.uint_ = value; +} + +Value::Value( double value ) + : type_( realValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.real_ = value; +} + +Value::Value( const char *value ) + : type_( stringValue ) + , allocated_( true ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = valueAllocator()->duplicateStringValue( value ); +} + + +Value::Value( const char *beginValue, + const char *endValue ) + : type_( stringValue ) + , allocated_( true ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = valueAllocator()->duplicateStringValue( beginValue, + UInt(endValue - beginValue) ); +} + + +Value::Value( const std::string &value ) + : type_( stringValue ) + , allocated_( true ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = valueAllocator()->duplicateStringValue( value.c_str(), + (unsigned int)value.length() ); + +} + +Value::Value( const StaticString &value ) + : type_( stringValue ) + , allocated_( false ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = const_cast( value.c_str() ); +} + + +# ifdef JSON_USE_CPPTL +Value::Value( const CppTL::ConstString &value ) + : type_( stringValue ) + , allocated_( true ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.string_ = valueAllocator()->duplicateStringValue( value, value.length() ); +} +# endif + +Value::Value( bool value ) + : type_( booleanValue ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + value_.bool_ = value; +} + + +Value::Value( const Value &other ) + : type_( other.type_ ) + , comments_( 0 ) +# ifdef JSON_VALUE_USE_INTERNAL_MAP + , itemIsUsed_( 0 ) +#endif +{ + switch ( type_ ) + { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + value_ = other.value_; + break; + case stringValue: + if ( other.value_.string_ ) + { + value_.string_ = valueAllocator()->duplicateStringValue( other.value_.string_ ); + allocated_ = true; + } + else + value_.string_ = 0; + break; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + value_.map_ = new ObjectValues( *other.value_.map_ ); + break; +#else + case arrayValue: + value_.array_ = arrayAllocator()->newArrayCopy( *other.value_.array_ ); + break; + case objectValue: + value_.map_ = mapAllocator()->newMapCopy( *other.value_.map_ ); + break; +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + if ( other.comments_ ) + { + comments_ = new CommentInfo[numberOfCommentPlacement]; + for ( int comment =0; comment < numberOfCommentPlacement; ++comment ) + { + const CommentInfo &otherComment = other.comments_[comment]; + if ( otherComment.comment_ ) + comments_[comment].setComment( otherComment.comment_ ); + } + } +} + + +Value::~Value() +{ + switch ( type_ ) + { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + break; + case stringValue: + if ( allocated_ ) + valueAllocator()->releaseStringValue( value_.string_ ); + break; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + delete value_.map_; + break; +#else + case arrayValue: + arrayAllocator()->destructArray( value_.array_ ); + break; + case objectValue: + mapAllocator()->destructMap( value_.map_ ); + break; +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + + if ( comments_ ) + delete[] comments_; +} + +Value & +Value::operator=( const Value &other ) +{ + Value temp( other ); + swap( temp ); + return *this; +} + +void +Value::swap( Value &other ) +{ + ValueType temp = type_; + type_ = other.type_; + other.type_ = temp; + std::swap( value_, other.value_ ); + int temp2 = allocated_; + allocated_ = other.allocated_; + other.allocated_ = temp2; +} + +ValueType +Value::type() const +{ + return type_; +} + + +int +Value::compare( const Value &other ) +{ + /* + int typeDelta = other.type_ - type_; + switch ( type_ ) + { + case nullValue: + + return other.type_ == type_; + case intValue: + if ( other.type_.isNumeric() + case uintValue: + case realValue: + case booleanValue: + break; + case stringValue, + break; + case arrayValue: + delete value_.array_; + break; + case objectValue: + delete value_.map_; + default: + JSON_ASSERT_UNREACHABLE; + } + */ + return 0; // unreachable +} + +bool +Value::operator <( const Value &other ) const +{ + int typeDelta = type_ - other.type_; + if ( typeDelta ) + return typeDelta < 0 ? true : false; + switch ( type_ ) + { + case nullValue: + return false; + case intValue: + return value_.int_ < other.value_.int_; + case uintValue: + return value_.uint_ < other.value_.uint_; + case realValue: + return value_.real_ < other.value_.real_; + case booleanValue: + return value_.bool_ < other.value_.bool_; + case stringValue: + return ( value_.string_ == 0 && other.value_.string_ ) + || ( other.value_.string_ + && value_.string_ + && strcmp( value_.string_, other.value_.string_ ) < 0 ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + { + int delta = int( value_.map_->size() - other.value_.map_->size() ); + if ( delta ) + return delta < 0; + return (*value_.map_) < (*other.value_.map_); + } +#else + case arrayValue: + return value_.array_->compare( *(other.value_.array_) ) < 0; + case objectValue: + return value_.map_->compare( *(other.value_.map_) ) < 0; +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable +} + +bool +Value::operator <=( const Value &other ) const +{ + return !(other > *this); +} + +bool +Value::operator >=( const Value &other ) const +{ + return !(*this < other); +} + +bool +Value::operator >( const Value &other ) const +{ + return other < *this; +} + +bool +Value::operator ==( const Value &other ) const +{ + //if ( type_ != other.type_ ) + // GCC 2.95.3 says: + // attempt to take address of bit-field structure member `Json::Value::type_' + // Beats me, but a temp solves the problem. + int temp = other.type_; + if ( type_ != temp ) + return false; + switch ( type_ ) + { + case nullValue: + return true; + case intValue: + return value_.int_ == other.value_.int_; + case uintValue: + return value_.uint_ == other.value_.uint_; + case realValue: + return value_.real_ == other.value_.real_; + case booleanValue: + return value_.bool_ == other.value_.bool_; + case stringValue: + return ( value_.string_ == other.value_.string_ ) + || ( other.value_.string_ + && value_.string_ + && strcmp( value_.string_, other.value_.string_ ) == 0 ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + return value_.map_->size() == other.value_.map_->size() + && (*value_.map_) == (*other.value_.map_); +#else + case arrayValue: + return value_.array_->compare( *(other.value_.array_) ) == 0; + case objectValue: + return value_.map_->compare( *(other.value_.map_) ) == 0; +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable +} + +bool +Value::operator !=( const Value &other ) const +{ + return !( *this == other ); +} + +const char * +Value::asCString() const +{ + JSON_ASSERT( type_ == stringValue ); + return value_.string_; +} + + +std::string +Value::asString() const +{ + switch ( type_ ) + { + case nullValue: + return ""; + case stringValue: + return value_.string_ ? value_.string_ : ""; + case booleanValue: + return value_.bool_ ? "true" : "false"; + case intValue: + case uintValue: + case realValue: + case arrayValue: + case objectValue: + JSON_ASSERT_MESSAGE( false, "Type is not convertible to string" ); + default: + JSON_ASSERT_UNREACHABLE; + } + return ""; // unreachable +} + +# ifdef JSON_USE_CPPTL +CppTL::ConstString +Value::asConstString() const +{ + return CppTL::ConstString( asString().c_str() ); +} +# endif + +Value::Int +Value::asInt() const +{ + switch ( type_ ) + { + case nullValue: + return 0; + case intValue: + return value_.int_; + case uintValue: + JSON_ASSERT_MESSAGE( value_.uint_ < (unsigned)maxInt, "integer out of signed integer range" ); + return value_.uint_; + case realValue: + JSON_ASSERT_MESSAGE( value_.real_ >= minInt && value_.real_ <= maxInt, "Real out of signed integer range" ); + return Int( value_.real_ ); + case booleanValue: + return value_.bool_ ? 1 : 0; + case stringValue: + case arrayValue: + case objectValue: + JSON_ASSERT_MESSAGE( false, "Type is not convertible to int" ); + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable; +} + +Value::UInt +Value::asUInt() const +{ + switch ( type_ ) + { + case nullValue: + return 0; + case intValue: + JSON_ASSERT_MESSAGE( value_.int_ >= 0, "Negative integer can not be converted to unsigned integer" ); + return value_.int_; + case uintValue: + return value_.uint_; + case realValue: + JSON_ASSERT_MESSAGE( value_.real_ >= 0 && value_.real_ <= maxUInt, "Real out of unsigned integer range" ); + return UInt( value_.real_ ); + case booleanValue: + return value_.bool_ ? 1 : 0; + case stringValue: + case arrayValue: + case objectValue: + JSON_ASSERT_MESSAGE( false, "Type is not convertible to uint" ); + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable; +} + +double +Value::asDouble() const +{ + switch ( type_ ) + { + case nullValue: + return 0.0; + case intValue: + return value_.int_; + case uintValue: + return value_.uint_; + case realValue: + return value_.real_; + case booleanValue: + return value_.bool_ ? 1.0 : 0.0; + case stringValue: + case arrayValue: + case objectValue: + JSON_ASSERT_MESSAGE( false, "Type is not convertible to double" ); + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable; +} + +bool +Value::asBool() const +{ + switch ( type_ ) + { + case nullValue: + return false; + case intValue: + case uintValue: + return value_.int_ != 0; + case realValue: + return value_.real_ != 0.0; + case booleanValue: + return value_.bool_; + case stringValue: + return value_.string_ && value_.string_[0] != 0; + case arrayValue: + case objectValue: + return value_.map_->size() != 0; + default: + JSON_ASSERT_UNREACHABLE; + } + return false; // unreachable; +} + + +bool +Value::isConvertibleTo( ValueType other ) const +{ + switch ( type_ ) + { + case nullValue: + return true; + case intValue: + return ( other == nullValue && value_.int_ == 0 ) + || other == intValue + || ( other == uintValue && value_.int_ >= 0 ) + || other == realValue + || other == stringValue + || other == booleanValue; + case uintValue: + return ( other == nullValue && value_.uint_ == 0 ) + || ( other == intValue && value_.uint_ <= (unsigned)maxInt ) + || other == uintValue + || other == realValue + || other == stringValue + || other == booleanValue; + case realValue: + return ( other == nullValue && value_.real_ == 0.0 ) + || ( other == intValue && value_.real_ >= minInt && value_.real_ <= maxInt ) + || ( other == uintValue && value_.real_ >= 0 && value_.real_ <= maxUInt ) + || other == realValue + || other == stringValue + || other == booleanValue; + case booleanValue: + return ( other == nullValue && value_.bool_ == false ) + || other == intValue + || other == uintValue + || other == realValue + || other == stringValue + || other == booleanValue; + case stringValue: + return other == stringValue + || ( other == nullValue && (!value_.string_ || value_.string_[0] == 0) ); + case arrayValue: + return other == arrayValue + || ( other == nullValue && value_.map_->size() == 0 ); + case objectValue: + return other == objectValue + || ( other == nullValue && value_.map_->size() == 0 ); + default: + JSON_ASSERT_UNREACHABLE; + } + return false; // unreachable; +} + + +/// Number of values in array or object +Value::UInt +Value::size() const +{ + switch ( type_ ) + { + case nullValue: + case intValue: + case uintValue: + case realValue: + case booleanValue: + case stringValue: + return 0; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: // size of the array is highest index + 1 + if ( !value_.map_->empty() ) + { + ObjectValues::const_iterator itLast = value_.map_->end(); + --itLast; + return (*itLast).first.index()+1; + } + return 0; + case objectValue: + return Int( value_.map_->size() ); +#else + case arrayValue: + return Int( value_.array_->size() ); + case objectValue: + return Int( value_.map_->size() ); +#endif + default: + JSON_ASSERT_UNREACHABLE; + } + return 0; // unreachable; +} + + +bool +Value::empty() const +{ + if ( isNull() || isArray() || isObject() ) + return size() == 0u; + else + return false; +} + + +bool +Value::operator!() const +{ + return isNull(); +} + + +void +Value::clear() +{ + JSON_ASSERT( type_ == nullValue || type_ == arrayValue || type_ == objectValue ); + + switch ( type_ ) + { +#ifndef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + case objectValue: + value_.map_->clear(); + break; +#else + case arrayValue: + value_.array_->clear(); + break; + case objectValue: + value_.map_->clear(); + break; +#endif + default: + break; + } +} + +void +Value::resize( UInt newSize ) +{ + JSON_ASSERT( type_ == nullValue || type_ == arrayValue ); + if ( type_ == nullValue ) + *this = Value( arrayValue ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + UInt oldSize = size(); + if ( newSize == 0 ) + clear(); + else if ( newSize > oldSize ) + (*this)[ newSize - 1 ]; + else + { + for ( UInt index = newSize; index < oldSize; ++index ) + value_.map_->erase( index ); + assert( size() == newSize ); + } +#else + value_.array_->resize( newSize ); +#endif +} + + +Value & +Value::operator[]( UInt index ) +{ + JSON_ASSERT( type_ == nullValue || type_ == arrayValue ); + if ( type_ == nullValue ) + *this = Value( arrayValue ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString key( index ); + ObjectValues::iterator it = value_.map_->lower_bound( key ); + if ( it != value_.map_->end() && (*it).first == key ) + return (*it).second; + + ObjectValues::value_type defaultValue( key, null ); + it = value_.map_->insert( it, defaultValue ); + return (*it).second; +#else + return value_.array_->resolveReference( index ); +#endif +} + + +const Value & +Value::operator[]( UInt index ) const +{ + JSON_ASSERT( type_ == nullValue || type_ == arrayValue ); + if ( type_ == nullValue ) + return null; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString key( index ); + ObjectValues::const_iterator it = value_.map_->find( key ); + if ( it == value_.map_->end() ) + return null; + return (*it).second; +#else + Value *value = value_.array_->find( index ); + return value ? *value : null; +#endif +} + + +Value & +Value::operator[]( const char *key ) +{ + return resolveReference( key, false ); +} + + +Value & +Value::resolveReference( const char *key, + bool isStatic ) +{ + JSON_ASSERT( type_ == nullValue || type_ == objectValue ); + if ( type_ == nullValue ) + *this = Value( objectValue ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString actualKey( key, isStatic ? CZString::noDuplication + : CZString::duplicateOnCopy ); + ObjectValues::iterator it = value_.map_->lower_bound( actualKey ); + if ( it != value_.map_->end() && (*it).first == actualKey ) + return (*it).second; + + ObjectValues::value_type defaultValue( actualKey, null ); + it = value_.map_->insert( it, defaultValue ); + Value &value = (*it).second; + return value; +#else + return value_.map_->resolveReference( key, isStatic ); +#endif +} + + +Value +Value::get( UInt index, + const Value &defaultValue ) const +{ + const Value *value = &((*this)[index]); + return value == &null ? defaultValue : *value; +} + + +bool +Value::isValidIndex( UInt index ) const +{ + return index < size(); +} + + + +const Value & +Value::operator[]( const char *key ) const +{ + JSON_ASSERT( type_ == nullValue || type_ == objectValue ); + if ( type_ == nullValue ) + return null; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString actualKey( key, CZString::noDuplication ); + ObjectValues::const_iterator it = value_.map_->find( actualKey ); + if ( it == value_.map_->end() ) + return null; + return (*it).second; +#else + const Value *value = value_.map_->find( key ); + return value ? *value : null; +#endif +} + + +Value & +Value::operator[]( const std::string &key ) +{ + return (*this)[ key.c_str() ]; +} + + +const Value & +Value::operator[]( const std::string &key ) const +{ + return (*this)[ key.c_str() ]; +} + +Value & +Value::operator[]( const StaticString &key ) +{ + return resolveReference( key, true ); +} + + +# ifdef JSON_USE_CPPTL +Value & +Value::operator[]( const CppTL::ConstString &key ) +{ + return (*this)[ key.c_str() ]; +} + + +const Value & +Value::operator[]( const CppTL::ConstString &key ) const +{ + return (*this)[ key.c_str() ]; +} +# endif + + +Value & +Value::append( const Value &value ) +{ + return (*this)[size()] = value; +} + + +Value +Value::get( const char *key, + const Value &defaultValue ) const +{ + const Value *value = &((*this)[key]); + return value == &null ? defaultValue : *value; +} + + +Value +Value::get( const std::string &key, + const Value &defaultValue ) const +{ + return get( key.c_str(), defaultValue ); +} + +Value +Value::removeMember( const char* key ) +{ + JSON_ASSERT( type_ == nullValue || type_ == objectValue ); + if ( type_ == nullValue ) + return null; +#ifndef JSON_VALUE_USE_INTERNAL_MAP + CZString actualKey( key, CZString::noDuplication ); + ObjectValues::iterator it = value_.map_->find( actualKey ); + if ( it == value_.map_->end() ) + return null; + Value old(it->second); + value_.map_->erase(it); + return old; +#else + Value *value = value_.map_->find( key ); + if (value){ + Value old(*value); + value_.map_.remove( key ); + return old; + } else { + return null; + } +#endif +} + +Value +Value::removeMember( const std::string &key ) +{ + return removeMember( key.c_str() ); +} + +# ifdef JSON_USE_CPPTL +Value +Value::get( const CppTL::ConstString &key, + const Value &defaultValue ) const +{ + return get( key.c_str(), defaultValue ); +} +# endif + +bool +Value::isMember( const char *key ) const +{ + const Value *value = &((*this)[key]); + return value != &null; +} + + +bool +Value::isMember( const std::string &key ) const +{ + return isMember( key.c_str() ); +} + + +# ifdef JSON_USE_CPPTL +bool +Value::isMember( const CppTL::ConstString &key ) const +{ + return isMember( key.c_str() ); +} +#endif + +Value::Members +Value::getMemberNames() const +{ + JSON_ASSERT( type_ == nullValue || type_ == objectValue ); + if ( type_ == nullValue ) + return Value::Members(); + Members members; + members.reserve( value_.map_->size() ); +#ifndef JSON_VALUE_USE_INTERNAL_MAP + ObjectValues::const_iterator it = value_.map_->begin(); + ObjectValues::const_iterator itEnd = value_.map_->end(); + for ( ; it != itEnd; ++it ) + members.push_back( std::string( (*it).first.c_str() ) ); +#else + ValueInternalMap::IteratorState it; + ValueInternalMap::IteratorState itEnd; + value_.map_->makeBeginIterator( it ); + value_.map_->makeEndIterator( itEnd ); + for ( ; !ValueInternalMap::equals( it, itEnd ); ValueInternalMap::increment(it) ) + members.push_back( std::string( ValueInternalMap::key( it ) ) ); +#endif + return members; +} +// +//# ifdef JSON_USE_CPPTL +//EnumMemberNames +//Value::enumMemberNames() const +//{ +// if ( type_ == objectValue ) +// { +// return CppTL::Enum::any( CppTL::Enum::transform( +// CppTL::Enum::keys( *(value_.map_), CppTL::Type() ), +// MemberNamesTransform() ) ); +// } +// return EnumMemberNames(); +//} +// +// +//EnumValues +//Value::enumValues() const +//{ +// if ( type_ == objectValue || type_ == arrayValue ) +// return CppTL::Enum::anyValues( *(value_.map_), +// CppTL::Type() ); +// return EnumValues(); +//} +// +//# endif + + +bool +Value::isNull() const +{ + return type_ == nullValue; +} + + +bool +Value::isBool() const +{ + return type_ == booleanValue; +} + + +bool +Value::isInt() const +{ + return type_ == intValue; +} + + +bool +Value::isUInt() const +{ + return type_ == uintValue; +} + + +bool +Value::isIntegral() const +{ + return type_ == intValue + || type_ == uintValue + || type_ == booleanValue; +} + + +bool +Value::isDouble() const +{ + return type_ == realValue; +} + + +bool +Value::isNumeric() const +{ + return isIntegral() || isDouble(); +} + + +bool +Value::isString() const +{ + return type_ == stringValue; +} + + +bool +Value::isArray() const +{ + return type_ == nullValue || type_ == arrayValue; +} + + +bool +Value::isObject() const +{ + return type_ == nullValue || type_ == objectValue; +} + + +void +Value::setComment( const char *comment, + CommentPlacement placement ) +{ + if ( !comments_ ) + comments_ = new CommentInfo[numberOfCommentPlacement]; + comments_[placement].setComment( comment ); +} + + +void +Value::setComment( const std::string &comment, + CommentPlacement placement ) +{ + setComment( comment.c_str(), placement ); +} + + +bool +Value::hasComment( CommentPlacement placement ) const +{ + return comments_ != 0 && comments_[placement].comment_ != 0; +} + +std::string +Value::getComment( CommentPlacement placement ) const +{ + if ( hasComment(placement) ) + return comments_[placement].comment_; + return ""; +} + + +std::string +Value::toStyledString() const +{ + StyledWriter writer; + return writer.write( *this ); +} + + +Value::const_iterator +Value::begin() const +{ + switch ( type_ ) + { +#ifdef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + if ( value_.array_ ) + { + ValueInternalArray::IteratorState it; + value_.array_->makeBeginIterator( it ); + return const_iterator( it ); + } + break; + case objectValue: + if ( value_.map_ ) + { + ValueInternalMap::IteratorState it; + value_.map_->makeBeginIterator( it ); + return const_iterator( it ); + } + break; +#else + case arrayValue: + case objectValue: + if ( value_.map_ ) + return const_iterator( value_.map_->begin() ); + break; +#endif + default: + break; + } + return const_iterator(); +} + +Value::const_iterator +Value::end() const +{ + switch ( type_ ) + { +#ifdef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + if ( value_.array_ ) + { + ValueInternalArray::IteratorState it; + value_.array_->makeEndIterator( it ); + return const_iterator( it ); + } + break; + case objectValue: + if ( value_.map_ ) + { + ValueInternalMap::IteratorState it; + value_.map_->makeEndIterator( it ); + return const_iterator( it ); + } + break; +#else + case arrayValue: + case objectValue: + if ( value_.map_ ) + return const_iterator( value_.map_->end() ); + break; +#endif + default: + break; + } + return const_iterator(); +} + + +Value::iterator +Value::begin() +{ + switch ( type_ ) + { +#ifdef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + if ( value_.array_ ) + { + ValueInternalArray::IteratorState it; + value_.array_->makeBeginIterator( it ); + return iterator( it ); + } + break; + case objectValue: + if ( value_.map_ ) + { + ValueInternalMap::IteratorState it; + value_.map_->makeBeginIterator( it ); + return iterator( it ); + } + break; +#else + case arrayValue: + case objectValue: + if ( value_.map_ ) + return iterator( value_.map_->begin() ); + break; +#endif + default: + break; + } + return iterator(); +} + +Value::iterator +Value::end() +{ + switch ( type_ ) + { +#ifdef JSON_VALUE_USE_INTERNAL_MAP + case arrayValue: + if ( value_.array_ ) + { + ValueInternalArray::IteratorState it; + value_.array_->makeEndIterator( it ); + return iterator( it ); + } + break; + case objectValue: + if ( value_.map_ ) + { + ValueInternalMap::IteratorState it; + value_.map_->makeEndIterator( it ); + return iterator( it ); + } + break; +#else + case arrayValue: + case objectValue: + if ( value_.map_ ) + return iterator( value_.map_->end() ); + break; +#endif + default: + break; + } + return iterator(); +} + + +// class PathArgument +// ////////////////////////////////////////////////////////////////// + +PathArgument::PathArgument() + : kind_( kindNone ) +{ +} + + +PathArgument::PathArgument( Value::UInt index ) + : index_( index ) + , kind_( kindIndex ) +{ +} + + +PathArgument::PathArgument( const char *key ) + : key_( key ) + , kind_( kindKey ) +{ +} + + +PathArgument::PathArgument( const std::string &key ) + : key_( key.c_str() ) + , kind_( kindKey ) +{ +} + +// class Path +// ////////////////////////////////////////////////////////////////// + +Path::Path( const std::string &path, + const PathArgument &a1, + const PathArgument &a2, + const PathArgument &a3, + const PathArgument &a4, + const PathArgument &a5 ) +{ + InArgs in; + in.push_back( &a1 ); + in.push_back( &a2 ); + in.push_back( &a3 ); + in.push_back( &a4 ); + in.push_back( &a5 ); + makePath( path, in ); +} + + +void +Path::makePath( const std::string &path, + const InArgs &in ) +{ + const char *current = path.c_str(); + const char *end = current + path.length(); + InArgs::const_iterator itInArg = in.begin(); + while ( current != end ) + { + if ( *current == '[' ) + { + ++current; + if ( *current == '%' ) + addPathInArg( path, in, itInArg, PathArgument::kindIndex ); + else + { + Value::UInt index = 0; + for ( ; current != end && *current >= '0' && *current <= '9'; ++current ) + index = index * 10 + Value::UInt(*current - '0'); + args_.push_back( index ); + } + if ( current == end || *current++ != ']' ) + invalidPath( path, int(current - path.c_str()) ); + } + else if ( *current == '%' ) + { + addPathInArg( path, in, itInArg, PathArgument::kindKey ); + ++current; + } + else if ( *current == '.' ) + { + ++current; + } + else + { + const char *beginName = current; + while ( current != end && !strchr( "[.", *current ) ) + ++current; + args_.push_back( std::string( beginName, current ) ); + } + } +} + + +void +Path::addPathInArg( const std::string &path, + const InArgs &in, + InArgs::const_iterator &itInArg, + PathArgument::Kind kind ) +{ + if ( itInArg == in.end() ) + { + // Error: missing argument %d + } + else if ( (*itInArg)->kind_ != kind ) + { + // Error: bad argument type + } + else + { + args_.push_back( **itInArg ); + } +} + + +void +Path::invalidPath( const std::string &path, + int location ) +{ + // Error: invalid path. +} + + +const Value & +Path::resolve( const Value &root ) const +{ + const Value *node = &root; + for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it ) + { + const PathArgument &arg = *it; + if ( arg.kind_ == PathArgument::kindIndex ) + { + if ( !node->isArray() || node->isValidIndex( arg.index_ ) ) + { + // Error: unable to resolve path (array value expected at position... + } + node = &((*node)[arg.index_]); + } + else if ( arg.kind_ == PathArgument::kindKey ) + { + if ( !node->isObject() ) + { + // Error: unable to resolve path (object value expected at position...) + } + node = &((*node)[arg.key_]); + if ( node == &Value::null ) + { + // Error: unable to resolve path (object has no member named '' at position...) + } + } + } + return *node; +} + + +Value +Path::resolve( const Value &root, + const Value &defaultValue ) const +{ + const Value *node = &root; + for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it ) + { + const PathArgument &arg = *it; + if ( arg.kind_ == PathArgument::kindIndex ) + { + if ( !node->isArray() || node->isValidIndex( arg.index_ ) ) + return defaultValue; + node = &((*node)[arg.index_]); + } + else if ( arg.kind_ == PathArgument::kindKey ) + { + if ( !node->isObject() ) + return defaultValue; + node = &((*node)[arg.key_]); + if ( node == &Value::null ) + return defaultValue; + } + } + return *node; +} + + +Value & +Path::make( Value &root ) const +{ + Value *node = &root; + for ( Args::const_iterator it = args_.begin(); it != args_.end(); ++it ) + { + const PathArgument &arg = *it; + if ( arg.kind_ == PathArgument::kindIndex ) + { + if ( !node->isArray() ) + { + // Error: node is not an array at position ... + } + node = &((*node)[arg.index_]); + } + else if ( arg.kind_ == PathArgument::kindKey ) + { + if ( !node->isObject() ) + { + // Error: node is not an object at position... + } + node = &((*node)[arg.key_]); + } + } + return *node; +} + + +} // namespace Json diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_writer.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_writer.cpp new file mode 100644 index 0000000..18491d0 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/json_writer.cpp @@ -0,0 +1,829 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#if _MSC_VER >= 1400 // VC++ 8.0 +#pragma warning( disable : 4996 ) // disable warning about strdup being deprecated. +#endif + +namespace Json { + +static bool isControlCharacter(char ch) +{ + return ch > 0 && ch <= 0x1F; +} + +static bool containsControlCharacter( const char* str ) +{ + while ( *str ) + { + if ( isControlCharacter( *(str++) ) ) + return true; + } + return false; +} +static void uintToString( unsigned int value, + char *¤t ) +{ + *--current = 0; + do + { + *--current = (value % 10) + '0'; + value /= 10; + } + while ( value != 0 ); +} + +std::string valueToString( Int value ) +{ + char buffer[32]; + char *current = buffer + sizeof(buffer); + bool isNegative = value < 0; + if ( isNegative ) + value = -value; + uintToString( UInt(value), current ); + if ( isNegative ) + *--current = '-'; + assert( current >= buffer ); + return current; +} + + +std::string valueToString( UInt value ) +{ + char buffer[32]; + char *current = buffer + sizeof(buffer); + uintToString( value, current ); + assert( current >= buffer ); + return current; +} + +std::string valueToString( double value ) +{ + char buffer[32]; +#if defined(_MSC_VER) && defined(__STDC_SECURE_LIB__) // Use secure version with visual studio 2005 to avoid warning. + sprintf_s(buffer, sizeof(buffer), "%#.16g", value); +#else + sprintf(buffer, "%#.16g", value); +#endif + char* ch = buffer + strlen(buffer) - 1; + if (*ch != '0') return buffer; // nothing to truncate, so save time + while(ch > buffer && *ch == '0'){ + --ch; + } + char* last_nonzero = ch; + while(ch >= buffer){ + switch(*ch){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + --ch; + continue; + case '.': + // Truncate zeroes to save bytes in output, but keep one. + *(last_nonzero+2) = '\0'; + return buffer; + default: + return buffer; + } + } + return buffer; +} + + +std::string valueToString( bool value ) +{ + return value ? "true" : "false"; +} + +std::string valueToQuotedString( const char *value ) +{ + // Not sure how to handle unicode... + if (strpbrk(value, "\"\\\b\f\n\r\t") == NULL && !containsControlCharacter( value )) + return std::string("\"") + value + "\""; + // We have to walk value and escape any special characters. + // Appending to std::string is not efficient, but this should be rare. + // (Note: forward slashes are *not* rare, but I am not escaping them.) + unsigned maxsize = strlen(value)*2 + 3; // allescaped+quotes+NULL + std::string result; + result.reserve(maxsize); // to avoid lots of mallocs + result += "\""; + for (const char* c=value; *c != 0; ++c) + { + switch(*c) + { + case '\"': + result += "\\\""; + break; + case '\\': + result += "\\\\"; + break; + case '\b': + result += "\\b"; + break; + case '\f': + result += "\\f"; + break; + case '\n': + result += "\\n"; + break; + case '\r': + result += "\\r"; + break; + case '\t': + result += "\\t"; + break; + //case '/': + // Even though \/ is considered a legal escape in JSON, a bare + // slash is also legal, so I see no reason to escape it. + // (I hope I am not misunderstanding something. + // blep notes: actually escaping \/ may be useful in javascript to avoid (*c); + result += oss.str(); + } + else + { + result += *c; + } + break; + } + } + result += "\""; + return result; +} + +// Class Writer +// ////////////////////////////////////////////////////////////////// +Writer::~Writer() +{ +} + + +// Class FastWriter +// ////////////////////////////////////////////////////////////////// + +FastWriter::FastWriter() + : yamlCompatiblityEnabled_( false ) +{ +} + + +void +FastWriter::enableYAMLCompatibility() +{ + yamlCompatiblityEnabled_ = true; +} + + +std::string +FastWriter::write( const Value &root ) +{ + document_ = ""; + writeValue( root ); + document_ += "\n"; + return document_; +} + + +void +FastWriter::writeValue( const Value &value ) +{ + switch ( value.type() ) + { + case nullValue: + document_ += "null"; + break; + case intValue: + document_ += valueToString( value.asInt() ); + break; + case uintValue: + document_ += valueToString( value.asUInt() ); + break; + case realValue: + document_ += valueToString( value.asDouble() ); + break; + case stringValue: + document_ += valueToQuotedString( value.asCString() ); + break; + case booleanValue: + document_ += valueToString( value.asBool() ); + break; + case arrayValue: + { + document_ += "["; + int size = value.size(); + for ( int index =0; index < size; ++index ) + { + if ( index > 0 ) + document_ += ","; + writeValue( value[index] ); + } + document_ += "]"; + } + break; + case objectValue: + { + Value::Members members( value.getMemberNames() ); + document_ += "{"; + for ( Value::Members::iterator it = members.begin(); + it != members.end(); + ++it ) + { + const std::string &name = *it; + if ( it != members.begin() ) + document_ += ","; + document_ += valueToQuotedString( name.c_str() ); + document_ += yamlCompatiblityEnabled_ ? ": " + : ":"; + writeValue( value[name] ); + } + document_ += "}"; + } + break; + } +} + + +// Class StyledWriter +// ////////////////////////////////////////////////////////////////// + +StyledWriter::StyledWriter() + : rightMargin_( 74 ) + , indentSize_( 3 ) +{ +} + + +std::string +StyledWriter::write( const Value &root ) +{ + document_ = ""; + addChildValues_ = false; + indentString_ = ""; + writeCommentBeforeValue( root ); + writeValue( root ); + writeCommentAfterValueOnSameLine( root ); + document_ += "\n"; + return document_; +} + + +void +StyledWriter::writeValue( const Value &value ) +{ + switch ( value.type() ) + { + case nullValue: + pushValue( "null" ); + break; + case intValue: + pushValue( valueToString( value.asInt() ) ); + break; + case uintValue: + pushValue( valueToString( value.asUInt() ) ); + break; + case realValue: + pushValue( valueToString( value.asDouble() ) ); + break; + case stringValue: + pushValue( valueToQuotedString( value.asCString() ) ); + break; + case booleanValue: + pushValue( valueToString( value.asBool() ) ); + break; + case arrayValue: + writeArrayValue( value); + break; + case objectValue: + { + Value::Members members( value.getMemberNames() ); + if ( members.empty() ) + pushValue( "{}" ); + else + { + writeWithIndent( "{" ); + indent(); + Value::Members::iterator it = members.begin(); + while ( true ) + { + const std::string &name = *it; + const Value &childValue = value[name]; + writeCommentBeforeValue( childValue ); + writeWithIndent( valueToQuotedString( name.c_str() ) ); + document_ += " : "; + writeValue( childValue ); + if ( ++it == members.end() ) + { + writeCommentAfterValueOnSameLine( childValue ); + break; + } + document_ += ","; + writeCommentAfterValueOnSameLine( childValue ); + } + unindent(); + writeWithIndent( "}" ); + } + } + break; + } +} + + +void +StyledWriter::writeArrayValue( const Value &value ) +{ + unsigned size = value.size(); + if ( size == 0 ) + pushValue( "[]" ); + else + { + bool isArrayMultiLine = isMultineArray( value ); + if ( isArrayMultiLine ) + { + writeWithIndent( "[" ); + indent(); + bool hasChildValue = !childValues_.empty(); + unsigned index =0; + while ( true ) + { + const Value &childValue = value[index]; + writeCommentBeforeValue( childValue ); + if ( hasChildValue ) + writeWithIndent( childValues_[index] ); + else + { + writeIndent(); + writeValue( childValue ); + } + if ( ++index == size ) + { + writeCommentAfterValueOnSameLine( childValue ); + break; + } + document_ += ","; + writeCommentAfterValueOnSameLine( childValue ); + } + unindent(); + writeWithIndent( "]" ); + } + else // output on a single line + { + assert( childValues_.size() == size ); + document_ += "[ "; + for ( unsigned index =0; index < size; ++index ) + { + if ( index > 0 ) + document_ += ", "; + document_ += childValues_[index]; + } + document_ += " ]"; + } + } +} + + +bool +StyledWriter::isMultineArray( const Value &value ) +{ + int size = value.size(); + bool isMultiLine = size*3 >= rightMargin_ ; + childValues_.clear(); + for ( int index =0; index < size && !isMultiLine; ++index ) + { + const Value &childValue = value[index]; + isMultiLine = isMultiLine || + ( (childValue.isArray() || childValue.isObject()) && + childValue.size() > 0 ); + } + if ( !isMultiLine ) // check if line length > max line length + { + childValues_.reserve( size ); + addChildValues_ = true; + int lineLength = 4 + (size-1)*2; // '[ ' + ', '*n + ' ]' + for ( int index =0; index < size && !isMultiLine; ++index ) + { + writeValue( value[index] ); + lineLength += int( childValues_[index].length() ); + isMultiLine = isMultiLine && hasCommentForValue( value[index] ); + } + addChildValues_ = false; + isMultiLine = isMultiLine || lineLength >= rightMargin_; + } + return isMultiLine; +} + + +void +StyledWriter::pushValue( const std::string &value ) +{ + if ( addChildValues_ ) + childValues_.push_back( value ); + else + document_ += value; +} + + +void +StyledWriter::writeIndent() +{ + if ( !document_.empty() ) + { + char last = document_[document_.length()-1]; + if ( last == ' ' ) // already indented + return; + if ( last != '\n' ) // Comments may add new-line + document_ += '\n'; + } + document_ += indentString_; +} + + +void +StyledWriter::writeWithIndent( const std::string &value ) +{ + writeIndent(); + document_ += value; +} + + +void +StyledWriter::indent() +{ + indentString_ += std::string( indentSize_, ' ' ); +} + + +void +StyledWriter::unindent() +{ + assert( int(indentString_.size()) >= indentSize_ ); + indentString_.resize( indentString_.size() - indentSize_ ); +} + + +void +StyledWriter::writeCommentBeforeValue( const Value &root ) +{ + if ( !root.hasComment( commentBefore ) ) + return; + document_ += normalizeEOL( root.getComment( commentBefore ) ); + document_ += "\n"; +} + + +void +StyledWriter::writeCommentAfterValueOnSameLine( const Value &root ) +{ + if ( root.hasComment( commentAfterOnSameLine ) ) + document_ += " " + normalizeEOL( root.getComment( commentAfterOnSameLine ) ); + + if ( root.hasComment( commentAfter ) ) + { + document_ += "\n"; + document_ += normalizeEOL( root.getComment( commentAfter ) ); + document_ += "\n"; + } +} + + +bool +StyledWriter::hasCommentForValue( const Value &value ) +{ + return value.hasComment( commentBefore ) + || value.hasComment( commentAfterOnSameLine ) + || value.hasComment( commentAfter ); +} + + +std::string +StyledWriter::normalizeEOL( const std::string &text ) +{ + std::string normalized; + normalized.reserve( text.length() ); + const char *begin = text.c_str(); + const char *end = begin + text.length(); + const char *current = begin; + while ( current != end ) + { + char c = *current++; + if ( c == '\r' ) // mac or dos EOL + { + if ( *current == '\n' ) // convert dos EOL + ++current; + normalized += '\n'; + } + else // handle unix EOL & other char + normalized += c; + } + return normalized; +} + + +// Class StyledStreamWriter +// ////////////////////////////////////////////////////////////////// + +StyledStreamWriter::StyledStreamWriter( std::string indentation ) + : document_(NULL) + , rightMargin_( 74 ) + , indentation_( indentation ) +{ +} + + +void +StyledStreamWriter::write( std::ostream &out, const Value &root ) +{ + document_ = &out; + addChildValues_ = false; + indentString_ = ""; + writeCommentBeforeValue( root ); + writeValue( root ); + writeCommentAfterValueOnSameLine( root ); + *document_ << "\n"; + document_ = NULL; // Forget the stream, for safety. +} + + +void +StyledStreamWriter::writeValue( const Value &value ) +{ + switch ( value.type() ) + { + case nullValue: + pushValue( "null" ); + break; + case intValue: + pushValue( valueToString( value.asInt() ) ); + break; + case uintValue: + pushValue( valueToString( value.asUInt() ) ); + break; + case realValue: + pushValue( valueToString( value.asDouble() ) ); + break; + case stringValue: + pushValue( valueToQuotedString( value.asCString() ) ); + break; + case booleanValue: + pushValue( valueToString( value.asBool() ) ); + break; + case arrayValue: + writeArrayValue( value); + break; + case objectValue: + { + Value::Members members( value.getMemberNames() ); + if ( members.empty() ) + pushValue( "{}" ); + else + { + writeWithIndent( "{" ); + indent(); + Value::Members::iterator it = members.begin(); + while ( true ) + { + const std::string &name = *it; + const Value &childValue = value[name]; + writeCommentBeforeValue( childValue ); + writeWithIndent( valueToQuotedString( name.c_str() ) ); + *document_ << " : "; + writeValue( childValue ); + if ( ++it == members.end() ) + { + writeCommentAfterValueOnSameLine( childValue ); + break; + } + *document_ << ","; + writeCommentAfterValueOnSameLine( childValue ); + } + unindent(); + writeWithIndent( "}" ); + } + } + break; + } +} + + +void +StyledStreamWriter::writeArrayValue( const Value &value ) +{ + unsigned size = value.size(); + if ( size == 0 ) + pushValue( "[]" ); + else + { + bool isArrayMultiLine = isMultineArray( value ); + if ( isArrayMultiLine ) + { + writeWithIndent( "[" ); + indent(); + bool hasChildValue = !childValues_.empty(); + unsigned index =0; + while ( true ) + { + const Value &childValue = value[index]; + writeCommentBeforeValue( childValue ); + if ( hasChildValue ) + writeWithIndent( childValues_[index] ); + else + { + writeIndent(); + writeValue( childValue ); + } + if ( ++index == size ) + { + writeCommentAfterValueOnSameLine( childValue ); + break; + } + *document_ << ","; + writeCommentAfterValueOnSameLine( childValue ); + } + unindent(); + writeWithIndent( "]" ); + } + else // output on a single line + { + assert( childValues_.size() == size ); + *document_ << "[ "; + for ( unsigned index =0; index < size; ++index ) + { + if ( index > 0 ) + *document_ << ", "; + *document_ << childValues_[index]; + } + *document_ << " ]"; + } + } +} + + +bool +StyledStreamWriter::isMultineArray( const Value &value ) +{ + int size = value.size(); + bool isMultiLine = size*3 >= rightMargin_ ; + childValues_.clear(); + for ( int index =0; index < size && !isMultiLine; ++index ) + { + const Value &childValue = value[index]; + isMultiLine = isMultiLine || + ( (childValue.isArray() || childValue.isObject()) && + childValue.size() > 0 ); + } + if ( !isMultiLine ) // check if line length > max line length + { + childValues_.reserve( size ); + addChildValues_ = true; + int lineLength = 4 + (size-1)*2; // '[ ' + ', '*n + ' ]' + for ( int index =0; index < size && !isMultiLine; ++index ) + { + writeValue( value[index] ); + lineLength += int( childValues_[index].length() ); + isMultiLine = isMultiLine && hasCommentForValue( value[index] ); + } + addChildValues_ = false; + isMultiLine = isMultiLine || lineLength >= rightMargin_; + } + return isMultiLine; +} + + +void +StyledStreamWriter::pushValue( const std::string &value ) +{ + if ( addChildValues_ ) + childValues_.push_back( value ); + else + *document_ << value; +} + + +void +StyledStreamWriter::writeIndent() +{ + /* + Some comments in this method would have been nice. ;-) + + if ( !document_.empty() ) + { + char last = document_[document_.length()-1]; + if ( last == ' ' ) // already indented + return; + if ( last != '\n' ) // Comments may add new-line + *document_ << '\n'; + } + */ + *document_ << '\n' << indentString_; +} + + +void +StyledStreamWriter::writeWithIndent( const std::string &value ) +{ + writeIndent(); + *document_ << value; +} + + +void +StyledStreamWriter::indent() +{ + indentString_ += indentation_; +} + + +void +StyledStreamWriter::unindent() +{ + assert( indentString_.size() >= indentation_.size() ); + indentString_.resize( indentString_.size() - indentation_.size() ); +} + + +void +StyledStreamWriter::writeCommentBeforeValue( const Value &root ) +{ + if ( !root.hasComment( commentBefore ) ) + return; + *document_ << normalizeEOL( root.getComment( commentBefore ) ); + *document_ << "\n"; +} + + +void +StyledStreamWriter::writeCommentAfterValueOnSameLine( const Value &root ) +{ + if ( root.hasComment( commentAfterOnSameLine ) ) + *document_ << " " + normalizeEOL( root.getComment( commentAfterOnSameLine ) ); + + if ( root.hasComment( commentAfter ) ) + { + *document_ << "\n"; + *document_ << normalizeEOL( root.getComment( commentAfter ) ); + *document_ << "\n"; + } +} + + +bool +StyledStreamWriter::hasCommentForValue( const Value &value ) +{ + return value.hasComment( commentBefore ) + || value.hasComment( commentAfterOnSameLine ) + || value.hasComment( commentAfter ); +} + + +std::string +StyledStreamWriter::normalizeEOL( const std::string &text ) +{ + std::string normalized; + normalized.reserve( text.length() ); + const char *begin = text.c_str(); + const char *end = begin + text.length(); + const char *current = begin; + while ( current != end ) + { + char c = *current++; + if ( c == '\r' ) // mac or dos EOL + { + if ( *current == '\n' ) // convert dos EOL + ++current; + normalized += '\n'; + } + else // handle unix EOL & other char + normalized += c; + } + return normalized; +} + + +std::ostream& operator<<( std::ostream &sout, const Value &root ) +{ + Json::StyledStreamWriter writer; + writer.write(sout, root); + return sout; +} + + +} // namespace Json diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/sconscript b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/sconscript new file mode 100644 index 0000000..f6520d1 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/lib_json/src/sconscript @@ -0,0 +1,8 @@ +Import( 'env buildLibrary' ) + +buildLibrary( env, Split( """ + json_reader.cpp + json_value.cpp + json_writer.cpp + """ ), + 'json' ) diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/CMakeLists.txt new file mode 100644 index 0000000..92352b7 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc inc_common) +AUX_SOURCE_DIRECTORY(./src DIR_TO_RESAMPLE_SRCS) +ADD_LIBRARY(resample2 ${DIR_TO_RESAMPLE_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/CResample2.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/CResample2.h new file mode 100644 index 0000000..11b682e --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/CResample2.h @@ -0,0 +1,41 @@ +#ifndef __CRESAMPLE_H_ +#define __CRESAMPLE_H_ + +#define BUFFER_LEN (4096*10) //max input buffer len is 4096 samples + +//convert parameter +// #define KALA_RESAMPLE_SRC_SINC_BEST_QUALITY 0 +// #define KALA_RESAMPLE_SRC_SINC_MEDIUM_QUALITY 1 +// #define KALA_RESAMPLE_SRC_SINC_FASTEST 2 +// #define KALA_RESAMPLE_SRC_ZERO_ORDER_HOLD 3 +#define KALA_RESAMPLE_SRC_LINEAR 4 + +// only for pcm 16 bit samples +class CResample2 +{ +public: + CResample2(); + +public: + int init(int src_sample_rate, int dst_sample_rate, int channel, int max_input_size, int* max_output_size); + void reset(); + void uninit(); + int process(char* pSrc, int src_size, char* pDst); + //should input convert parameter + //int setType(int convert); + +private: + float apply_gain(float * data, long frames, int channels, float max, float gain); + +private: + void *m_src_state; + double m_src_ratio; + int m_convert; + int m_channles; + float m_gain; + float m_max; +}; + + +#endif + diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/fastest_coeffs.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/fastest_coeffs.h new file mode 100644 index 0000000..49b8399 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/fastest_coeffs.h @@ -0,0 +1,2505 @@ +/* +** Copyright (C) 2002-2011 Erik de Castro Lopo +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +*/ + +/* +** This code is part of Secret Rabbit Code aka libsamplerate. A commercial +** use license for this code is available, please see: +** http://www.mega-nerd.com/SRC/procedure.html +*/ + +/* +** f = make_filter (8, 128, 100.3) ; +** Pass band width : 0.0039062 (should be 0.0039062) +** Stop band atten. : 100.71 dB +** -3dB band width : 0.484 +** half length : 2463 +** increment : 128 +*/ + +static const struct fastest_coeffs_s +{ int increment ; + coeff_t coeffs [2464] ; +} fastest_coeffs = +{ 128, +{ + 8.31472372954840555082e-01, + 8.31414005540308198583e-01, + 8.31238918266223869580e-01, + 8.30947156036480505392e-01, + 8.30538793675450581766e-01, + 8.30013935904800659316e-01, + 8.29372717311066987023e-01, + 8.28615302303967515840e-01, + 8.27741885065490623496e-01, + 8.26752689489751890761e-01, + 8.25647969113678215081e-01, + 8.24428007038499943704e-01, + 8.23093115842108757896e-01, + 8.21643637482293187624e-01, + 8.20079943190897053817e-01, + 8.18402433358933589780e-01, + 8.16611537412689103554e-01, + 8.14707713680854150873e-01, + 8.12691449252757824873e-01, + 8.10563259827706050764e-01, + 8.08323689555523805517e-01, + 8.05973310868314363198e-01, + 8.03512724303517833491e-01, + 8.00942558318331943035e-01, + 7.98263469095534694553e-01, + 7.95476140340800830231e-01, + 7.92581283071560838138e-01, + 7.89579635397499868255e-01, + 7.86471962292734527722e-01, + 7.83259055359786127148e-01, + 7.79941732585400893107e-01, + 7.76520838088307852054e-01, + 7.72997241859018080490e-01, + 7.69371839491718167992e-01, + 7.65645551908390675777e-01, + 7.61819325075220210586e-01, + 7.57894129711408459649e-01, + 7.53870960990470018181e-01, + 7.49750838234153449413e-01, + 7.45534804599028211314e-01, + 7.41223926755909090502e-01, + 7.36819294562192195208e-01, + 7.32322020727209643809e-01, + 7.27733240470738174110e-01, + 7.23054111174766811487e-01, + 7.18285812028632841830e-01, + 7.13429543667664534112e-01, + 7.08486527805442301009e-01, + 7.03458006859804640953e-01, + 6.98345243572719653891e-01, + 6.93149520624175785599e-01, + 6.87872140240182283755e-01, + 6.82514423795047564525e-01, + 6.77077711408058502407e-01, + 6.71563361534684655219e-01, + 6.65972750552474845875e-01, + 6.60307272341742135247e-01, + 6.54568337861228477514e-01, + 6.48757374718860524432e-01, + 6.42875826737744904271e-01, + 6.36925153517562181449e-01, + 6.30906829991492501541e-01, + 6.24822345978837789815e-01, + 6.18673205733470954470e-01, + 6.12460927488293727095e-01, + 6.06187042995817604307e-01, + 5.99853097065060292259e-01, + 5.93460647094893878339e-01, + 5.87011262603992944875e-01, + 5.80506524757569142281e-01, + 5.73948025891025337408e-01, + 5.67337369030688098981e-01, + 5.60676167411809700525e-01, + 5.53966043993961543279e-01, + 5.47208630974010734604e-01, + 5.40405569296826038261e-01, + 5.33558508163880174102e-01, + 5.26669104539922661168e-01, + 5.19739022657876970079e-01, + 5.12769933522119303326e-01, + 5.05763514410336290084e-01, + 4.98721448374081555155e-01, + 4.91645423738241937883e-01, + 4.84537133599546865348e-01, + 4.77398275324308896117e-01, + 4.70230550045545592219e-01, + 4.63035662159660077464e-01, + 4.55815318822846149427e-01, + 4.48571229447379538069e-01, + 4.41305105197960123586e-01, + 4.34018658488283970431e-01, + 4.26713602477997000495e-01, + 4.19391650570203500248e-01, + 4.12054515909689722530e-01, + 4.04703910882034223473e-01, + 3.97341546613763640927e-01, + 3.89969132473721613596e-01, + 3.82588375575806771689e-01, + 3.75200980283257823356e-01, + 3.67808647714624070701e-01, + 3.60413075251609871241e-01, + 3.53015956048925771960e-01, + 3.45618978546330835044e-01, + 3.38223825983006376461e-01, + 3.30832175914426429575e-01, + 3.23445699731881031180e-01, + 3.16066062184803764357e-01, + 3.08694920906066150312e-01, + 3.01333925940378832831e-01, + 2.93984719275965256102e-01, + 2.86648934379644393378e-01, + 2.79328195735489559492e-01, + 2.72024118387182545220e-01, + 2.64738307484245039003e-01, + 2.57472357832259801658e-01, + 2.50227853447243409057e-01, + 2.43006367114305704691e-01, + 2.35809459950733935063e-01, + 2.28638680973647728800e-01, + 2.21495566672345989279e-01, + 2.14381640585498134399e-01, + 2.07298412883298144305e-01, + 2.00247379954717363848e-01, + 1.93230023999986955108e-01, + 1.86247812628430653437e-01, + 1.79302198461779749294e-01, + 1.72394618743085786816e-01, + 1.65526494951356295537e-01, + 1.58699232422028796430e-01, + 1.51914219973401071195e-01, + 1.45172829539132269838e-01, + 1.38476415806921215879e-01, + 1.31826315863480453272e-01, + 1.25223848845901208904e-01, + 1.18670315599523901184e-01, + 1.12166998342411894374e-01, + 1.05715160336527447260e-01, + 9.93160455657086521652e-02, + 9.29708784205405536216e-02, + 8.66808633902153846673e-02, + 8.04471847614677826321e-02, + 7.42710063246745516574e-02, + 6.81534710872001986415e-02, + 6.20957009940759641076e-02, + 5.60987966560835549235e-02, + 5.01638370853247708703e-02, + 4.42918794383505357026e-02, + 3.84839587669171534490e-02, + 3.27410877764400740086e-02, + 2.70642565922108620236e-02, + 2.14544325334371267788e-02, + 1.59125598951669576520e-02, + 1.04395597381551803740e-02, + 5.03632968672305773861e-03, +-2.96256265336385191805e-04, +-5.55734794075828358179e-03, +-1.07461191566687631893e-02, +-1.58617678942645466689e-02, +-2.09035164602743607498e-02, +-2.58706116401622790435e-02, +-3.07623248430414844568e-02, +-3.55779522382659724178e-02, +-4.03168148836769782428e-02, +-4.49782588454727128013e-02, +-4.95616553096875425699e-02, +-5.40664006852556791594e-02, +-5.84919166986474642345e-02, +-6.28376504800633867154e-02, +-6.71030746411782619276e-02, +-7.12876873444269476554e-02, +-7.53910123638282386738e-02, +-7.94125991373483691715e-02, +-8.33520228108008270906e-02, +-8.72088842732959695914e-02, +-9.09828101842390379872e-02, +-9.46734529918955292072e-02, +-9.82804909435327500589e-02, +-1.01803628087157427284e-01, +-1.05242594264867719844e-01, +-1.08597145097841310535e-01, +-1.11867061962988789681e-01, +-1.15052151961296145188e-01, +-1.18152247877890054228e-01, +-1.21167208133862752684e-01, +-1.24096916729885473063e-01, +-1.26941283181660202750e-01, +-1.29700242447243679900e-01, +-1.32373754846295377252e-01, +-1.34961805971292009287e-01, +-1.37464406590764143257e-01, +-1.39881592544604443917e-01, +-1.42213424631507739937e-01, +-1.44459988488595730827e-01, +-1.46621394463294696386e-01, +-1.48697777477524800682e-01, +-1.50689296884269657850e-01, +-1.52596136316595465399e-01, +-1.54418503529190731527e-01, +-1.56156630232500315270e-01, +-1.57810771919529219121e-01, +-1.59381207685401427021e-01, +-1.60868240039743037872e-01, +-1.62272194711985145998e-01, +-1.63593420449666626659e-01, +-1.64832288809824062392e-01, +-1.65989193943563151379e-01, +-1.67064552373901109572e-01, +-1.68058802766975601273e-01, +-1.68972405696717037360e-01, +-1.69805843403086798027e-01, +-1.70559619543971530131e-01, +-1.71234258940853617537e-01, +-1.71830307318344255307e-01, +-1.72348331037702334756e-01, +-1.72788916824434257702e-01, +-1.73152671490098081231e-01, +-1.73440221648409775845e-01, +-1.73652213425782242506e-01, +-1.73789312166397952319e-01, +-1.73852202131942051855e-01, +-1.73841586196111674845e-01, +-1.73758185534021086793e-01, +-1.73602739306629005878e-01, +-1.73376004340306061335e-01, +-1.73078754801670009478e-01, +-1.72711781867818603420e-01, +-1.72275893392080048372e-01, +-1.71771913565416961545e-01, +-1.71200682573611373538e-01, +-1.70563056250360139954e-01, +-1.69859905726417126370e-01, +-1.69092117074913228514e-01, +-1.68260590952989147473e-01, +-1.67366242239875284703e-01, +-1.66409999671557895518e-01, +-1.65392805472166642966e-01, +-1.64315614982222552021e-01, +-1.63179396283883837437e-01, +-1.61985129823331186483e-01, +-1.60733808030429803360e-01, +-1.59426434935813571281e-01, +-1.58064025785527417778e-01, +-1.56647606653372045704e-01, +-1.55178214051094831571e-01, +-1.53656894536566474008e-01, +-1.52084704320088470730e-01, +-1.50462708868975059140e-01, +-1.48791982510548842500e-01, +-1.47073608033699704256e-01, +-1.45308676289147314931e-01, +-1.43498285788550977715e-01, +-1.41643542302611558092e-01, +-1.39745558458309881988e-01, +-1.37805453335422323224e-01, +-1.35824352062461073398e-01, +-1.33803385412180564362e-01, +-1.31743689396791985313e-01, +-1.29646404863030306753e-01, +-1.27512677087215337002e-01, +-1.25343655370452389253e-01, +-1.23140492634104758984e-01, +-1.20904345015691472298e-01, +-1.18636371465341922127e-01, +-1.16337733342949820048e-01, +-1.14009594016166518338e-01, +-1.11653118459372716065e-01, +-1.09269472853762789066e-01, +-1.06859824188683741331e-01, +-1.04425339864360325337e-01, +-1.01967187296145456177e-01, +-9.94865335204263567803e-02, +-9.69845448023236023083e-02, +-9.44623862453117940641e-02, +-9.19212214028948121358e-02, +-8.93622118924671249296e-02, +-8.67865170114848205607e-02, +-8.41952933560805999447e-02, +-8.15896944422443981537e-02, +-7.89708703296961439522e-02, +-7.63399672485739477779e-02, +-7.36981272290610500697e-02, +-7.10464877340710454501e-02, +-6.83861812951113146042e-02, +-6.57183351514422919859e-02, +-6.30440708926501142129e-02, +-6.03645041047437408421e-02, +-5.76807440198948140342e-02, +-5.49938931699267691267e-02, +-5.23050470436661057994e-02, +-4.96152937482609926456e-02, +-4.69257136745778041798e-02, +-4.42373791667729082677e-02, +-4.15513541961495605492e-02, +-3.88686940393953503370e-02, +-3.61904449613011935938e-02, +-3.35176439020573244121e-02, +-3.08513181692228674602e-02, +-2.81924851344595717162e-02, +-2.55421519351213023585e-02, +-2.29013151807887539724e-02, +-2.02709606648342685609e-02, +-1.76520630811025022733e-02, +-1.50455857457888787787e-02, +-1.24524803245954687053e-02, +-9.87368656524285036313e-03, +-7.31013203541311037958e-03, +-4.76273186619807602227e-03, +-2.23238850112297869746e-03, + 2.80008549183706099625e-04, + 2.77358294660976899965e-03, + 5.24747175940274562800e-03, + 7.70082569017439908660e-03, + 1.01328092980087648006e-02, + 1.25426012146140665460e-02, + 1.49293943544662570388e-02, + 1.72923961188884665885e-02, + 1.96308285940195309527e-02, + 2.19439287426209730936e-02, + 2.42309485896793734561e-02, + 2.64911554017603391442e-02, + 2.87238318600733545660e-02, + 3.09282762272103349532e-02, + 3.31038025075217068327e-02, + 3.52497406010981520486e-02, + 3.73654364513253609004e-02, + 3.94502521859858221176e-02, + 4.15035662518817155542e-02, + 4.35247735429537541130e-02, + 4.55132855218787699125e-02, + 4.74685303351244439196e-02, + 4.93899529214478216765e-02, + 5.12770151138242716304e-02, + 5.31291957347935772660e-02, + 5.49459906852194576721e-02, + 5.67269130264521220797e-02, + 5.84714930558940249039e-02, + 6.01792783759655322551e-02, + 6.18498339564735599705e-02, + 6.34827421903864652641e-02, + 6.50776029430226859995e-02, + 6.66340335946605799577e-02, + 6.81516690765814614483e-02, + 6.96301619005592065115e-02, + 7.10691821818139612965e-02, + 7.24684176554465098175e-02, + 7.38275736863740761340e-02, + 7.51463732727930683319e-02, + 7.64245570431912463194e-02, + 7.76618832469397474272e-02, + 7.88581277384926976337e-02, + 8.00130839552289779837e-02, + 8.11265628889681067459e-02, + 8.21983930512013155623e-02, + 8.32284204320703352442e-02, + 8.42165084531432683868e-02, + 8.51625379140240473808e-02, + 8.60664069328434949702e-02, + 8.69280308806818224898e-02, + 8.77473423099686122839e-02, + 8.85242908769151987114e-02, + 8.92588432580306151420e-02, + 8.99509830607803234637e-02, + 9.06007107284422380511e-02, + 9.12080434392217309636e-02, + 9.17730149996878741270e-02, + 9.22956757325926607782e-02, + 9.27760923591415126443e-02, + 9.32143478757788968014e-02, + 9.36105414255621187669e-02, + 9.39647881641913207407e-02, + 9.42772191207702781046e-02, + 9.45479810533706027664e-02, + 9.47772362994778183598e-02, + 9.49651626213951355338e-02, + 9.51119530466846413441e-02, + 9.52178157037280176178e-02, + 9.52829736524876819148e-02, + 9.53076647105531166160e-02, + 9.52921412745576373871e-02, + 9.52366701370536278271e-02, + 9.51415322989309503177e-02, + 9.50070227774735681647e-02, + 9.48334504101390751707e-02, + 9.46211376541590265532e-02, + 9.43704203820504156086e-02, + 9.40816476731309581094e-02, + 9.37551816011396865758e-02, + 9.33913970180541563870e-02, + 9.29906813342047527948e-02, + 9.25534342947849225647e-02, + 9.20800677528557931506e-02, + 9.15710054389489019888e-02, + 9.10266827273659706599e-02, + 9.04475463992783224043e-02, + 8.98340544027328158361e-02, + 8.91866756096650198371e-02, + 8.85058895700238101867e-02, + 8.77921862631190763615e-02, + 8.70460658462897246546e-02, + 8.62680384010083983748e-02, + 8.54586236765221690659e-02, + 8.46183508311429133375e-02, + 8.37477581712920277068e-02, + 8.28473928884114751980e-02, + 8.19178107938471483651e-02, + 8.09595760518180135312e-02, + 7.99732609105757996648e-02, + 7.89594454318716387764e-02, + 7.79187172188340326784e-02, + 7.68516711423724852015e-02, + 7.57589090662164482692e-02, + 7.46410395707000073884e-02, + 7.34986776754032733461e-02, + 7.23324445607601979047e-02, + 7.11429672887474440213e-02, + 6.99308785227581580779e-02, + 6.86968162467783832748e-02, + 6.74414234839716131287e-02, + 6.61653480147834510694e-02, + 6.48692420946761771905e-02, + 6.35537621716019962559e-02, + 6.22195686033254202751e-02, + 6.08673253747022482973e-02, + 5.94976998150253330588e-02, + 5.81113623155428762890e-02, + 5.67089860472591994478e-02, + 5.52912466791220663653e-02, + 5.38588220967053943333e-02, + 5.24123921214928872869e-02, + 5.09526382308646275110e-02, + 4.94802432788957607945e-02, + 4.79958912180662375380e-02, + 4.65002668219884549017e-02, + 4.49940554092515265783e-02, + 4.34779425684853407241e-02, + 4.19526138847447563340e-02, + 4.04187546673120054463e-02, + 3.88770496790168534895e-02, + 3.73281828671714888124e-02, + 3.57728370962169389680e-02, + 3.42116938821758476141e-02, + 3.26454331290065291604e-02, + 3.10747328669506231447e-02, + 2.95002689929673225788e-02, + 2.79227150133440210622e-02, + 2.63427417885741359249e-02, + 2.47610172805882329528e-02, + 2.31782063024293799591e-02, + 2.15949702704538760989e-02, + 2.00119669591453143431e-02, + 1.84298502586232419709e-02, + 1.68492699349288496680e-02, + 1.52708713931675090641e-02, + 1.36952954435869880129e-02, + 1.21231780706691841254e-02, + 1.05551502053105091677e-02, + 8.99183750016553651196e-03, + 7.43386010822696258193e-03, + 5.88183246471273707412e-03, + 4.33636307232945251988e-03, + 2.79805428998205086427e-03, + 1.26750212499337003291e-03, +-2.54703971099550386531e-04, +-1.76798130311027175757e-03, +-3.27175412906725469539e-03, +-4.76545385331804925710e-03, +-6.24851921581533794464e-03, +-7.72039647752874400727e-03, +-9.18053960192777122884e-03, +-1.06284104324833178490e-02, +-1.20634788661366718077e-02, +-1.34852230226875247771e-02, +-1.48931294100519973078e-02, +-1.62866930853476296615e-02, +-1.76654178117594401476e-02, +-1.90288162111466874205e-02, +-2.03764099123495759369e-02, +-2.17077296951579609696e-02, +-2.30223156299061669505e-02, +-2.43197172126588360974e-02, +-2.55994934959561624976e-02, +-2.68612132150869431513e-02, +-2.81044549098614510063e-02, +-2.93288070418574950415e-02, +-3.05338681071131295974e-02, +-3.17192467442452205595e-02, +-3.28845618379712614776e-02, +-3.40294426180154721551e-02, +-3.51535287533818185945e-02, +-3.62564704419792716017e-02, +-3.73379284955845242022e-02, +-3.83975744201309962533e-02, +-3.94350904913155775322e-02, +-4.04501698255130062720e-02, +-4.14425164459938585870e-02, +-4.24118453444415760556e-02, +-4.33578825377650758921e-02, +-4.42803651202084772032e-02, +-4.51790413107587551789e-02, +-4.60536704958539877541e-02, +-4.69040232673985507672e-02, +-4.77298814560914094751e-02, +-4.85310381600771723054e-02, +-4.93072977689298017068e-02, +-5.00584759829825892696e-02, +-5.07843998280173986037e-02, +-5.14849076653303427964e-02, +-5.21598491971914657306e-02, +-5.28090854677170859488e-02, +-5.34324888591782357072e-02, +-5.40299430837655400572e-02, +-5.46013431708381041796e-02, +-5.51465954496810906171e-02, +-5.56656175277993395256e-02, +-5.61583382647804357779e-02, +-5.66246977417538960298e-02, +-5.70646472264832865795e-02, +-5.74781491341238848225e-02, +-5.78651769836829588112e-02, +-5.82257153502198851469e-02, +-5.85597598128258789441e-02, +-5.88673168984241990120e-02, +-5.91484040214318093631e-02, +-5.94030494193287308957e-02, +-5.96312920841784027681e-02, +-5.98331816901454746627e-02, +-6.00087785170606569096e-02, +-6.01581533700810480725e-02, +-6.02813874954959694197e-02, +-6.03785724927326447609e-02, +-6.04498102226119424230e-02, +-6.04952127119116611631e-02, +-6.05149020542914278797e-02, +-6.05090103076376881197e-02, +-6.04776793878847099273e-02, +-6.04210609593744951695e-02, +-6.03393163218124903291e-02, +-6.02326162938837256222e-02, +-6.01011410935896536745e-02, +-5.99450802153716350018e-02, +-5.97646323040843391317e-02, +-5.95600050258849322837e-02, +-5.93314149361059764431e-02, +-5.90790873441773764507e-02, +-5.88032561756684640786e-02, +-5.85041638315173181950e-02, +-5.81820610445198463379e-02, +-5.78372067331465664064e-02, +-5.74698678527617162759e-02, +-5.70803192443151696800e-02, +-5.66688434805820984153e-02, +-5.62357307100216502471e-02, +-5.57812784983319834287e-02, +-5.53057916677746758127e-02, +-5.48095821343453915020e-02, +-5.42929687428649263015e-02, +-5.37562771000702349644e-02, +-5.31998394057807341695e-02, +-5.26239942822169029513e-02, +-5.20290866015511582754e-02, +-5.14154673117670768523e-02, +-5.07834932609073572141e-02, +-5.01335270197884388943e-02, +-4.94659367032617980353e-02, +-4.87810957901005926018e-02, +-4.80793829415919610204e-02, +-4.73611818189140221236e-02, +-4.66268808993793651418e-02, +-4.58768732916221277929e-02, +-4.51115565498113532672e-02, +-4.43313324869706107401e-02, +-4.35366069874822472774e-02, +-4.27277898188581847783e-02, +-4.19052944428566706558e-02, +-4.10695378260253277092e-02, +-4.02209402497498702544e-02, +-3.93599251198885058400e-02, +-3.84869187760717781921e-02, +-3.76023503007467674308e-02, +-3.67066513280452297319e-02, +-3.58002558525536487832e-02, +-3.48836000380640318119e-02, +-3.39571220263849699039e-02, +-3.30212617462878818553e-02, +-3.20764607226682249563e-02, +-3.11231618859974003277e-02, +-3.01618093821427596390e-02, +-2.91928483826300218251e-02, +-2.82167248954252464221e-02, +-2.72338855763107207109e-02, +-2.62447775409285488646e-02, +-2.52498481775659533444e-02, +-2.42495449607560524530e-02, +-2.32443152657647901516e-02, +-2.22346061840382018537e-02, +-2.12208643396787077773e-02, +-2.02035357070221716080e-02, +-1.91830654293842946256e-02, +-1.81598976390459701524e-02, +-1.71344752785447841659e-02, +-1.61072399233397958729e-02, +-1.50786316059164128556e-02, +-1.40490886413957953571e-02, +-1.30190474547137412242e-02, +-1.19889424094323342185e-02, +-1.09592056382471266657e-02, +-9.93026687525074697183e-03, +-8.90255329001433948211e-03, +-7.87648932354562125724e-03, +-6.85249652618241146540e-03, +-5.83099339747908569642e-03, +-4.81239522814202146106e-03, +-3.79711394406930576734e-03, +-2.78555795254968683455e-03, +-1.77813199067227692071e-03, +-7.75236976000132386663e-04, + 2.22730140442126654798e-04, + 1.21537651881706244492e-03, + 2.20231357271108733539e-03, + 3.18315710891246220898e-03, + 4.15752746468348553799e-03, + 5.12504964248380791986e-03, + 6.08535344210042478813e-03, + 7.03807359014245199208e-03, + 7.98284986685961206465e-03, + 8.91932723024580452476e-03, + 9.84715593738785290034e-03, + 1.07659916630240357766e-02, + 1.16754956152756248638e-02, + 1.25753346485176220604e-02, + 1.34651813733560731662e-02, + 1.43447142636787781933e-02, + 1.52136177607511777904e-02, + 1.60715823743268690360e-02, + 1.69183047807457617728e-02, + 1.77534879179936204430e-02, + 1.85768410776981605925e-02, + 1.93880799940382604618e-02, + 2.01869269295435888045e-02, + 2.09731107577651766649e-02, + 2.17463670427963037812e-02, + 2.25064381156266125894e-02, + 2.32530731473125917841e-02, + 2.39860282189490944815e-02, + 2.47050663884288181082e-02, + 2.54099577539762186418e-02, + 2.61004795144461655687e-02, + 2.67764160263764816605e-02, + 2.74375588577874841845e-02, + 2.80837068387202806741e-02, + 2.87146661085097808230e-02, + 2.93302501597869115513e-02, + 2.99302798792087168533e-02, + 3.05145835849139068774e-02, + 3.10829970607048658437e-02, + 3.16353635869560598226e-02, + 3.21715339682534032240e-02, + 3.26913665577675052742e-02, + 3.31947272783659833029e-02, + 3.36814896404726560331e-02, + 3.41515347566807569990e-02, + 3.46047513531298478462e-02, + 3.50410357776568884280e-02, + 3.54602920047340924858e-02, + 3.58624316372060172875e-02, + 3.62473739048404727803e-02, + 3.66150456597097023748e-02, + 3.69653813684179058385e-02, + 3.72983231011940682964e-02, + 3.76138205178691634178e-02, + 3.79118308507581658340e-02, + 3.81923188844700278732e-02, + 3.84552569326661666804e-02, + 3.87006248117945095277e-02, + 3.89284098118221136287e-02, + 3.91386066639944005252e-02, + 3.93312175056476295842e-02, + 3.95062518421033306848e-02, + 3.96637265056755394799e-02, + 3.98036656118202977761e-02, + 3.99261005124597820326e-02, + 4.00310697465144360585e-02, + 4.01186189876763035778e-02, + 4.01888009894591641258e-02, + 4.02416755275608953313e-02, + 4.02773093395744422041e-02, + 4.02957760620868618573e-02, + 4.02971561652026855072e-02, + 4.02815368845340013304e-02, + 4.02490121506946865737e-02, + 4.01996825163432602857e-02, + 4.01336550808131173329e-02, + 4.00510434123766412284e-02, + 3.99519674681838021790e-02, + 3.98365535119223901361e-02, + 3.97049340292425986809e-02, + 3.95572476409943238340e-02, + 3.93936390143226622396e-02, + 3.92142587716682866628e-02, + 3.90192633977227906761e-02, + 3.88088151443859719070e-02, + 3.85830819337740632546e-02, + 3.83422372593309676581e-02, + 3.80864600850902706997e-02, + 3.78159347431409609275e-02, + 3.75308508293468318096e-02, + 3.72314030973733209318e-02, + 3.69177913510723085255e-02, + 3.65902203352790472701e-02, + 3.62488996250740352911e-02, + 3.58940435135636018438e-02, + 3.55258708982338911042e-02, + 3.51446051659309519066e-02, + 3.47504740765239503175e-02, + 3.43437096453047957523e-02, + 3.39245480241803926136e-02, + 3.34932293817127510471e-02, + 3.30499977820627663383e-02, + 3.25951010628938789293e-02, + 3.21287907122915217251e-02, + 3.16513217447548164674e-02, + 3.11629525763171093267e-02, + 3.06639448988514501382e-02, + 3.01545635536184866710e-02, + 2.96350764041116987446e-02, + 2.91057542082603579181e-02, + 2.85668704900414009706e-02, + 2.80187014105628129368e-02, + 2.74615256386703497637e-02, + 2.68956242211381771345e-02, + 2.63212804524964143205e-02, + 2.57387797445546746833e-02, + 2.51484094956766456030e-02, + 2.45504589598617914414e-02, + 2.39452191156906725455e-02, + 2.33329825351894608321e-02, + 2.27140432526683408443e-02, + 2.20886966335908999093e-02, + 2.14572392435271874778e-02, + 2.08199687172471933905e-02, + 2.01771836280079629178e-02, + 1.95291833570884962312e-02, + 1.88762679636269269101e-02, + 1.82187380548123403767e-02, + 1.75568946564845403124e-02, + 1.68910390841945853846e-02, + 1.62214728147774996103e-02, + 1.55484973584896369464e-02, + 1.48724141317607399387e-02, + 1.41935243306124080076e-02, + 1.35121288047925294795e-02, + 1.28285279326754275003e-02, + 1.21430214969758445281e-02, + 1.14559085613274869858e-02, + 1.07674873477713456404e-02, + 1.00780551152029641815e-02, + 9.38790803882408146641e-03, + 8.69734109064560119429e-03, + 8.00664792108640895052e-03, + 7.31612074171312902482e-03, + 6.62605020916498532735e-03, + 5.93672531030635993593e-03, + 5.24843324865020312286e-03, + 4.56145933209378684481e-03, + 3.87608686200798923521e-03, + 3.19259702372048361982e-03, + 2.51126877843176705626e-03, + 1.83237875660391988202e-03, + 1.15620115285868549186e-03, + 4.83007622422852007059e-04, +-1.86932820843070034112e-04, +-8.53353904797455329115e-04, +-1.51599219771675255281e-03, +-2.17458720530792556924e-03, +-2.82888146600037857989e-03, +-3.47862064448672828401e-03, +-4.12355362347965707925e-03, +-4.76343259365718217635e-03, +-5.39801314176371720144e-03, +-6.02705433684159932323e-03, +-6.65031881456398799024e-03, +-7.26757285964317947813e-03, +-7.87858648628854928153e-03, +-8.48313351669007821576e-03, +-9.08099165750268083608e-03, +-9.67194257431004678072e-03, +-1.02557719640449674509e-02, +-1.08322696253466653482e-02, +-1.14012295268339416271e-02, +-1.19624498732761111452e-02, +-1.25157331696445651287e-02, +-1.30608862830260651078e-02, +-1.35977205023845738180e-02, +-1.41260515961539080687e-02, +-1.46456998676501564532e-02, +-1.51564902082884610246e-02, +-1.56582521485937077588e-02, +-1.61508199069943896020e-02, +-1.66340324363880263936e-02, +-1.71077334684716746149e-02, +-1.75717715558275228149e-02, +-1.80260001117568194329e-02, +-1.84702774478586080609e-02, +-1.89044668093441003975e-02, +-1.93284364080869922042e-02, +-1.97420594534034529732e-02, +-2.01452141805614354242e-02, +-2.05377838770183090977e-02, +-2.09196569063852221004e-02, +-2.12907267301215390176e-02, +-2.16508919269584217127e-02, +-2.20000562100566773860e-02, +-2.23381284419012192399e-02, +-2.26650226469371808558e-02, +-2.29806580219539050014e-02, +-2.32849589442222955349e-02, +-2.35778549773940013234e-02, +-2.38592808751701725145e-02, +-2.41291765827496146324e-02, +-2.43874872360661625048e-02, +-2.46341631588262027774e-02, +-2.48691598573592027865e-02, +-2.50924380132932847709e-02, +-2.53039634740697960691e-02, +-2.55037072413113186098e-02, +-2.56916454570593408291e-02, +-2.58677593878966008423e-02, +-2.60320354069717534162e-02, +-2.61844649739453247395e-02, +-2.63250446128731642459e-02, +-2.64537758880496950975e-02, +-2.65706653778289558776e-02, +-2.66757246464459155111e-02, +-2.67689702138592805492e-02, +-2.68504235236379437679e-02, +-2.69201109089152179621e-02, +-2.69780635564342181898e-02, +-2.70243174687087896191e-02, +-2.70589134243261995871e-02, +-2.70818969364167577707e-02, +-2.70933182093176481986e-02, +-2.70932320934577017257e-02, +-2.70816980384915410862e-02, +-2.70587800447114543156e-02, +-2.70245466127663376554e-02, +-2.69790706917171427270e-02, +-2.69224296254590607369e-02, +-2.68547050975419879237e-02, +-2.67759830744198866481e-02, +-2.66863537471611969587e-02, +-2.65859114716531889921e-02, +-2.64747547073322930800e-02, +-2.63529859544745573285e-02, +-2.62207116900796607939e-02, +-2.60780423023825730366e-02, +-2.59250920240284947471e-02, +-2.57619788639449828760e-02, +-2.55888245379471308827e-02, +-2.54057543981124761556e-02, +-2.52128973609604678519e-02, +-2.50103858344739478359e-02, +-2.47983556439997539222e-02, +-2.45769459570643403201e-02, +-2.43462992071435090080e-02, +-2.41065610164222128564e-02, +-2.38578801175844575078e-02, +-2.36004082746693114037e-02, +-2.33343002030331689300e-02, +-2.30597134884559483436e-02, +-2.27768085054302904524e-02, +-2.24857483346725776918e-02, +-2.21866986798954189675e-02, +-2.18798277838799307138e-02, +-2.15653063438876642366e-02, +-2.12433074264517691987e-02, +-2.09140063815867055519e-02, +-2.05775807564556566243e-02, +-2.02342102085360346642e-02, +-1.98840764183222142025e-02, +-1.95273630016047500257e-02, +-1.91642554213670816832e-02, +-1.87949408993371563925e-02, +-1.84196083272362247374e-02, +-1.80384481777610752862e-02, +-1.76516524153425696797e-02, +-1.72594144067167720724e-02, +-1.68619288313498413845e-02, +-1.64593915917550098760e-02, +-1.60519997237402040069e-02, +-1.56399513066264282679e-02, +-1.52234453734734331148e-02, +-1.48026818213531103502e-02, +-1.43778613217079923037e-02, +-1.39491852308316760523e-02, +-1.35168555005115483686e-02, +-1.30810745888681710658e-02, +-1.26420453714316226301e-02, +-1.21999710524887047813e-02, +-1.17550550767402828961e-02, +-1.13075010413035727252e-02, +-1.08575126080952908542e-02, +-1.04052934166326063736e-02, +-9.95104699728536351566e-03, +-9.49497668501652312967e-03, +-9.03728553364356763933e-03, +-8.57817623065582068875e-03, +-8.11785101262214349449e-03, +-7.65651158122056946231e-03, +-7.19435901992488725798e-03, +-6.73159371137851351291e-03, +-6.26841525548942068990e-03, +-5.80502238827697216589e-03, +-5.34161290150089295564e-03, +-4.87838356310490647849e-03, +-4.41553003850264462471e-03, +-3.95324681273798422126e-03, +-3.49172711354636287548e-03, +-3.03116283534747218975e-03, +-2.57174446419663202748e-03, +-2.11366100372138449731e-03, +-1.65709990207213789248e-03, +-1.20224697991074881177e-03, +-7.49286359465203312402e-04, +-2.98400394673150758020e-04, + 1.50230397559290287587e-04, + 5.96427404960260163468e-04, + 1.04001398633389997676e-03, + 1.48081553681653948010e-03, + 1.91865955192711671630e-03, + 2.35337569038958404136e-03, + 2.78479583570576333731e-03, + 3.21275415646031688166e-03, + 3.63708716533605539573e-03, + 4.05763377682291995208e-03, + 4.47423536360066955581e-03, + 4.88673581157838838457e-03, + 5.29498157357465894235e-03, + 5.69882172162047926506e-03, + 6.09810799787139853900e-03, + 6.49269486411187517899e-03, + 6.88243954983998491859e-03, + 7.26720209891677272618e-03, + 7.64684541476874993227e-03, + 8.02123530413159993580e-03, + 8.39024051932213063565e-03, + 8.75373279902990839019e-03, + 9.11158690761618844656e-03, + 9.46368067291306243327e-03, + 9.80989502251233651264e-03, + 1.01501140185368699670e-02, + 1.04842248908878447194e-02, + 1.08121180689596009528e-02, + 1.11336872118183785596e-02, + 1.14488292368375710328e-02, + 1.17574443467867335855e-02, + 1.20594360553697797084e-02, + 1.23547112112087492664e-02, + 1.26431800202723137322e-02, + 1.29247560667452802280e-02, + 1.31993563323394361153e-02, + 1.34669012140451026943e-02, + 1.37273145403230718842e-02, + 1.39805235857388930609e-02, + 1.42264590840399576116e-02, + 1.44650552396788801418e-02, + 1.46962497377853603536e-02, + 1.49199837525900817770e-02, + 1.51362019543059365262e-02, + 1.53448525144697818512e-02, + 1.55458871097522988158e-02, + 1.57392609242401407266e-02, + 1.59249326501989980909e-02, + 1.61028644873237487822e-02, + 1.62730221404839558996e-02, + 1.64353748159745995105e-02, + 1.65898952162792344411e-02, + 1.67365595333573702330e-02, + 1.68753474404654685292e-02, + 1.70062420825228405308e-02, + 1.71292300650343690127e-02, + 1.72443014415816948948e-02, + 1.73514496998961910423e-02, + 1.74506717465267233158e-02, + 1.75419678901157470585e-02, + 1.76253418232991503067e-02, + 1.77008006032431768062e-02, + 1.77683546308354950449e-02, + 1.78280176285450023266e-02, + 1.78798066169677284665e-02, + 1.79237418900749095885e-02, + 1.79598469891815541721e-02, + 1.79881486756524357207e-02, + 1.80086769023645003329e-02, + 1.80214647839439801036e-02, + 1.80265485657978320744e-02, + 1.80239675919585257136e-02, + 1.80137642717629609113e-02, + 1.79959840453853894826e-02, + 1.79706753482452019632e-02, + 1.79378895743111561878e-02, + 1.78976810383233188306e-02, + 1.78501069369546815080e-02, + 1.77952273089348571300e-02, + 1.77331049941585293384e-02, + 1.76638055918014250101e-02, + 1.75873974174670689996e-02, + 1.75039514593883366311e-02, + 1.74135413337067820883e-02, + 1.73162432388551425222e-02, + 1.72121359090659648006e-02, + 1.71013005670323306462e-02, + 1.69838208757447130248e-02, + 1.68597828895295613616e-02, + 1.67292750043147309125e-02, + 1.65923879071472879509e-02, + 1.64492145249898746862e-02, + 1.62998499728209574056e-02, + 1.61443915010654574782e-02, + 1.59829384423819872985e-02, + 1.58155921578329479449e-02, + 1.56424559824643004402e-02, + 1.54636351703211580993e-02, + 1.52792368389266484952e-02, + 1.50893699132506348831e-02, + 1.48941450691946284529e-02, + 1.46936746766213478105e-02, + 1.44880727419542387757e-02, + 1.42774548503756936596e-02, + 1.40619381076500047506e-02, + 1.38416410815988405458e-02, + 1.36166837432563775367e-02, + 1.33871874077307433104e-02, + 1.31532746747999255282e-02, + 1.29150693692685249875e-02, + 1.26726964811125480254e-02, + 1.24262821054400597609e-02, + 1.21759533822933443264e-02, + 1.19218384363212748234e-02, + 1.16640663163469111840e-02, + 1.14027669348586990772e-02, + 1.11380710074510391738e-02, + 1.08701099922405512027e-02, + 1.05990160292857588803e-02, + 1.03249218800347264402e-02, + 1.00479608668283364181e-02, + 9.76826681248407595326e-03, + 9.48597397998680001707e-03, + 9.20121701231205180171e-03, + 8.91413087240663405686e-03, + 8.62485078335300560382e-03, + 8.33351216874106057175e-03, + 8.04025059327335284154e-03, + 7.74520170362733365033e-03, + 7.44850116959968472363e-03, + 7.15028462555652392224e-03, + 6.85068761221313375642e-03, + 6.54984551876693164157e-03, + 6.24789352540736173808e-03, + 5.94496654622468298501e-03, + 5.64119917254172174859e-03, + 5.33672561668945780872e-03, + 5.03167965625017643561e-03, + 4.72619457878821046942e-03, + 4.42040312709122713147e-03, + 4.11443744494245557813e-03, + 3.80842902344421868274e-03, + 3.50250864791438413365e-03, + 3.19680634537424174582e-03, + 2.89145133264915015631e-03, + 2.58657196509964968506e-03, + 2.28229568600325869593e-03, + 1.97874897660506266980e-03, + 1.67605730685465247574e-03, + 1.37434508684857771554e-03, + 1.07373561899400072825e-03, + 7.74351050912206037222e-04, + 4.76312329096932108620e-04, + 1.79739153344913828647e-04, +-1.15250068026150436743e-04, +-4.08538262157430215240e-04, +-7.00009734810518881830e-04, +-9.89550212697529359140e-04, +-1.27704688496522110984e-03, +-1.56238844381914230262e-03, +-1.84546512427596291067e-03, +-2.12616874302977649017e-03, +-2.40439273642179809562e-03, +-2.68003219750039467159e-03, +-2.95298391216083011210e-03, +-3.22314639435426720723e-03, +-3.49041992035452591087e-03, +-3.75470656207426648626e-03, +-4.01591021941965966441e-03, +-4.27393665167596914500e-03, +-4.52869350791463860101e-03, +-4.78009035641408387002e-03, +-5.02803871308742881402e-03, +-5.27245206890878791856e-03, +-5.51324591633307794364e-03, +-5.75033777470175880286e-03, +-5.98364721463032038506e-03, +-6.21309588137129026331e-03, +-6.43860751714846711591e-03, +-6.66010798245885143193e-03, +-6.87752527633716734257e-03, +-7.09078955558135361203e-03, +-7.29983315293484570641e-03, +-7.50459059422442856246e-03, +-7.70499861445137022159e-03, +-7.90099617283428028169e-03, +-8.09252446680348673513e-03, +-8.27952694494581836748e-03, +-8.46194931890021165288e-03, +-8.63973957420479179992e-03, +-8.81284798009584514900e-03, +-8.98122709826090423468e-03, +-9.14483179054685624276e-03, +-9.30361922562642808254e-03, +-9.45754888462495800494e-03, +-9.60658256571109842037e-03, +-9.75068438765514661215e-03, +-9.88982079235872779677e-03, +-1.00239605463608785763e-02, +-1.01530747413246837108e-02, +-1.02771367935108499936e-02, +-1.03961224422430293518e-02, +-1.05100097473716045521e-02, +-1.06187790857425311958e-02, +-1.07224131466778661165e-02, +-1.08208969264758890494e-02, +-1.09142177219381259629e-02, +-1.10023651229317290939e-02, +-1.10853310039956218930e-02, +-1.11631095149994884197e-02, +-1.12356970708646971419e-02, +-1.13030923403568215463e-02, +-1.13652962339602110059e-02, +-1.14223118908440956359e-02, +-1.14741446649318026840e-02, +-1.15208021100836454503e-02, +-1.15622939644049946284e-02, +-1.15986321336910645080e-02, +-1.16298306740207010868e-02, +-1.16559057735113307669e-02, +-1.16768757332475214827e-02, +-1.16927609473963332182e-02, +-1.17035838825226608945e-02, +-1.17093690561177760784e-02, +-1.17101430143551586693e-02, +-1.17059343090872795129e-02, +-1.16967734740980097013e-02, +-1.16826930006248379257e-02, +-1.16637273121658596037e-02, +-1.16399127385864407935e-02, +-1.16112874895409699111e-02, +-1.15778916272246922003e-02, +-1.15397670384720374415e-02, +-1.14969574062164479888e-02, +-1.14495081803284975280e-02, +-1.13974665478479546959e-02, +-1.13408814026266253211e-02, +-1.12798033143984600957e-02, +-1.12142844972935168402e-02, +-1.11443787778127377519e-02, +-1.10701415622809114236e-02, +-1.09916298037944538957e-02, +-1.09089019686816925125e-02, +-1.08220180024931385970e-02, +-1.07310392955389764802e-02, +-1.06360286479915983754e-02, +-1.05370502345710423397e-02, +-1.04341695688310136247e-02, +-1.03274534670632443106e-02, +-1.02169700118386209270e-02, +-1.01027885152025192345e-02, +-9.98497948154308812008e-03, +-9.86361457015006402871e-03, +-9.73876655748246930488e-03, +-9.61050929916365190286e-03, +-9.47891769172138146105e-03, +-9.34406763409175583623e-03, +-9.20603598890469380922e-03, +-9.06490054356958417647e-03, +-8.92073997117914622990e-03, +-8.77363379124968326139e-03, +-8.62366233031589164704e-03, +-8.47090668239862398803e-03, +-8.31544866936306283078e-03, +-8.15737080118616487978e-03, +-7.99675623615058242533e-03, +-7.83368874098351944402e-03, +-7.66825265095798756787e-03, +-7.50053282997436773782e-03, +-7.33061463064018075525e-03, +-7.15858385436481461928e-03, +-6.98452671148786126409e-03, +-6.80852978145714965441e-03, +-6.63067997307481386826e-03, +-6.45106448482760802543e-03, +-6.26977076531890029770e-03, +-6.08688647381931853542e-03, +-5.90249944095203298716e-03, +-5.71669762953000513278e-03, +-5.52956909556100162373e-03, +-5.34120194943696596085e-03, +-5.15168431732329797079e-03, +-4.96110430276443595266e-03, +-4.76954994852103134756e-03, +-4.57710919865432410564e-03, +-4.38386986087277181340e-03, +-4.18991956915663876782e-03, +-3.99534574667439676410e-03, +-3.80023556900675307108e-03, +-3.60467592769156538676e-03, +-3.40875339410503987864e-03, +-3.21255418369197943973e-03, +-3.01616412055992575564e-03, +-2.81966860245005685598e-03, +-2.62315256609809257030e-03, +-2.42670045299875130826e-03, +-2.23039617558575898118e-03, +-2.03432308384080993632e-03, +-1.83856393234277533909e-03, +-1.64320084776991355742e-03, +-1.44831529686655904529e-03, +-1.25398805488530435195e-03, +-1.06029917451672204415e-03, +-8.67327955316482155854e-04, +-6.75152913641518712638e-04, +-4.83851753104545291573e-04, +-2.93501335557769932588e-04, +-1.04177652615230481180e-04, + 8.40442022771478958144e-05, + 2.71090061213828637746e-04, + 4.56886708636217294885e-04, + 6.41361907564611910364e-04, + 8.24444425246958221068e-04, + 1.00606405821750295726e-03, + 1.18615165675600578790e-03, + 1.36463914874257485378e-03, + 1.54145956289825905236e-03, + 1.71654705140769636706e-03, + 1.88983691191461173828e-03, + 2.06126560888645086675e-03, + 2.23077079434063144103e-03, + 2.39829132792830895110e-03, + 2.56376729636941056573e-03, + 2.72714003223500402184e-03, + 2.88835213207216685155e-03, + 3.04734747386685260462e-03, + 3.20407123384176817371e-03, + 3.35846990258462183704e-03, + 3.51049130050470068257e-03, + 3.66008459261367522647e-03, + 3.80720030262936314294e-03, + 3.95179032639856198800e-03, + 4.09380794463911311387e-03, + 4.23320783499702736619e-03, + 4.36994608342004212803e-03, + 4.50398019484403704799e-03, + 4.63526910319382156461e-03, + 4.76377318069614620610e-03, + 4.88945424650618146178e-03, + 5.01227557464674778470e-03, + 5.13220190126144337750e-03, + 5.24919943118207308480e-03, + 5.36323584381190321402e-03, + 5.47428029832571112767e-03, + 5.58230343818897148389e-03, + 5.68727739499729628703e-03, + 5.78917579163970574818e-03, + 5.88797374478673089110e-03, + 5.98364786670789981782e-03, + 6.07617626642060343345e-03, + 6.16553855017385084303e-03, + 6.25171582127166582804e-03, + 6.33469067923863194541e-03, + 6.41444721833308011821e-03, + 6.49097102541174898749e-03, + 6.56424917715103632687e-03, + 6.63427023662958338657e-03, + 6.70102424927795491810e-03, + 6.76450273820044644529e-03, + 6.82469869887525251023e-03, + 6.88160659323871527759e-03, + 6.93522234316026366108e-03, + 6.98554332331408935064e-03, + 7.03256835345506155222e-03, + 7.07629769010476809138e-03, + 7.11673301765615093362e-03, + 7.15387743890304877992e-03, + 7.18773546500291789924e-03, + 7.21831300488032408247e-03, + 7.24561735407938580650e-03, + 7.26965718307318129604e-03, + 7.29044252503875406940e-03, + 7.30798476310635155423e-03, + 7.32229661709144288850e-03, + 7.33339212971884264747e-03, + 7.34128665234775375920e-03, + 7.34599683020745793799e-03, + 7.34754058715258225737e-03, + 7.34593710994830336597e-03, + 7.34120683209452638829e-03, + 7.33337141719967496728e-03, + 7.32245374191355016119e-03, + 7.30847787843014878861e-03, + 7.29146907657012011139e-03, + 7.27145374545387114529e-03, + 7.24845943477565521351e-03, + 7.22251481568945107037e-03, + 7.19364966131744686118e-03, + 7.16189482689201083881e-03, + 7.12728222954231872138e-03, + 7.08984482773655864257e-03, + 7.04961660039112210374e-03, + 7.00663252565801673161e-03, + 6.96092855940177307472e-03, + 6.91254161337735619636e-03, + 6.86150953312070904788e-03, + 6.80787107556324582597e-03, + 6.75166588638215301593e-03, + 6.69293447709806265528e-03, + 6.63171820193170571955e-03, + 6.56805923443159328512e-03, + 6.50200054388410785683e-03, + 6.43358587151825807998e-03, + 6.36285970651646794888e-03, + 6.28986726184373092646e-03, + 6.21465444990643503531e-03, + 6.13726785805332464285e-03, + 6.05775472392990760317e-03, + 5.97616291069856791357e-03, + 5.89254088213594148099e-03, + 5.80693767761965816410e-03, + 5.71940288701587758180e-03, + 5.62998662548002196115e-03, + 5.53873950818146131014e-03, + 5.44571262496510149348e-03, + 5.35095751496040238082e-03, + 5.25452614115022934027e-03, + 5.15647086491062122543e-03, + 5.05684442053339500839e-03, + 4.95569988974256699088e-03, + 4.85309067621648645985e-03, + 4.74907048012647350216e-03, + 4.64369327270371719946e-03, + 4.53701327084515566163e-03, + 4.42908491176951992635e-03, + 4.31996282773485212186e-03, + 4.20970182082771107734e-03, + 4.09835683783572966160e-03, + 3.98598294521311340144e-03, + 3.87263530415101094040e-03, + 3.75836914576165720403e-03, + 3.64323974638825019007e-03, + 3.52730240304928790995e-03, + 3.41061240902878646739e-03, + 3.29322502962129748730e-03, + 3.17519547804233142826e-03, + 3.05657889151338601694e-03, + 2.93743030753160130203e-03, + 2.81780464033296821486e-03, + 2.69775665755896121301e-03, + 2.57734095713514719389e-03, + 2.45661194437134461702e-03, + 2.33562380929147129019e-03, + 2.21443050420279223534e-03, + 2.09308572151161147862e-03, + 1.97164287179554201940e-03, + 1.85015506213867531038e-03, + 1.72867507473943343883e-03, + 1.60725534579748128607e-03, + 1.48594794468843234732e-03, + 1.36480455343317803527e-03, + 1.24387644646943291808e-03, + 1.12321447073277739387e-03, + 1.00286902605367005473e-03, + 8.82890045877847201225e-04, + 7.63326978315998568199e-04, + 6.44228767529792380013e-04, + 5.25643835459782418976e-04, + 4.07620063901896608968e-04, + 2.90204776937506045247e-04, + 1.73444723723805766706e-04, + 5.73860616484244659592e-05, +-5.79256601447129809831e-05, +-1.72445516855139978872e-04, +-2.86129223744603178401e-04, +-3.98933151471683798521e-04, +-5.10814341036775051638e-04, +-6.21730518333987194034e-04, +-7.31640108305292775383e-04, +-8.40502248693749103720e-04, +-9.48276803391555388537e-04, +-1.05492437538016840815e-03, +-1.16040631925863960139e-03, +-1.26468475335793167046e-03, +-1.36772257143744795961e-03, +-1.46948345396231690001e-03, +-1.56993187895820992227e-03, +-1.66903313244173671925e-03, +-1.76675331842487395514e-03, +-1.86305936849075486246e-03, +-1.95791905094055886799e-03, +-2.05130097950870097026e-03, +-2.14317462164644121497e-03, +-2.23351030637205421117e-03, +-2.32227923168745655630e-03, +-2.40945347156019864382e-03, +-2.49500598247101752769e-03, +-2.57891060952624437755e-03, +-2.66114209213571661222e-03, +-2.74167606925580006200e-03, +-2.82048908419860059130e-03, +-2.89755858900738426376e-03, +-2.97286294839969704451e-03, +-3.04638144327833843006e-03, +-3.11809427381250833106e-03, +-3.18798256208930414976e-03, +-3.25602835433819615824e-03, +-3.32221462272949338845e-03, +-3.38652526674922864716e-03, +-3.44894511415224942069e-03, +-3.50945992149601917673e-03, +-3.56805637425754813494e-03, +-3.62472208653601482564e-03, +-3.67944560034401835918e-03, +-3.73221638449032109761e-03, +-3.78302483305743104600e-03, +-3.83186226347697993233e-03, +-3.87872091420681094562e-03, +-3.92359394201303914723e-03, +-3.96647541886111301701e-03, +-4.00736032841956212047e-03, +-4.04624456218095809173e-03, +-4.08312491520368761599e-03, +-4.11799908147960237043e-03, +-4.15086564893147550587e-03, +-4.18172409404541419592e-03, +-4.21057477614258761356e-03, +-4.23741893129556731340e-03, +-4.26225866589410255086e-03, +-4.28509694986558761776e-03, +-4.30593760955555893838e-03, +-4.32478532027361865092e-03, +-4.34164559851025946141e-03, +-4.35652479383043678834e-03, +-4.36943008044940375129e-03, +-4.38036944849687272935e-03, +-4.38935169497548655082e-03, +-4.39638641441941844384e-03, +-4.40148398925969356471e-03, +-4.40465557990201740657e-03, +-4.40591311452397972614e-03, +-4.40526927859759102890e-03, +-4.40273750414399336200e-03, +-4.39833195872687317957e-03, +-4.39206753419116562726e-03, +-4.38395983515392842489e-03, +-4.37402516725410826781e-03, +-4.36228052516814471251e-03, +-4.34874358039834783829e-03, +-4.33343266884099378305e-03, +-4.31636677814128347924e-03, +-4.29756553484226828249e-03, +-4.27704919133478460302e-03, +-4.25483861261575103258e-03, +-4.23095526286197242544e-03, +-4.20542119182673330285e-03, +-4.17825902106652539991e-03, +-4.14949193000528453179e-03, +-4.11914364184333848390e-03, +-4.08723840931864319803e-03, +-4.05380100032764340012e-03, +-4.01885668341308427420e-03, +-3.98243121312639048598e-03, +-3.94455081527187324114e-03, +-3.90524217204034265055e-03, +-3.86453240703949710971e-03, +-3.82244907022857112119e-03, +-3.77902012276458334344e-03, +-3.73427392176791782957e-03, +-3.68823920501411533363e-03, +-3.64094507555994461798e-03, +-3.59242098631046497328e-03, +-3.54269672453505143905e-03, +-3.49180239633924225512e-03, +-3.43976841109999795926e-03, +-3.38662546587152699790e-03, +-3.33240452976901182711e-03, +-3.27713682833726460339e-03, +-3.22085382791172753977e-03, +-3.16358721997866921757e-03, +-3.10536890554190866259e-03, +-3.04623097950279270868e-03, +-2.98620571506079835605e-03, +-2.92532554814109750294e-03, +-2.86362306185669341502e-03, +-2.80113097101106212072e-03, +-2.73788210664884781517e-03, +-2.67390940066071719841e-03, +-2.60924587044905360173e-03, +-2.54392460366138209102e-03, +-2.47797874299740972931e-03, +-2.41144147109654920225e-03, +-2.34434599551184728525e-03, +-2.27672553377682423265e-03, +-2.20861329857108918892e-03, +-2.14004248299099744321e-03, +-2.07104624593109319652e-03, +-2.00165769758243626206e-03, +-1.93190988505322330110e-03, +-1.86183577811795423693e-03, +-1.79146825509988766485e-03, +-1.72084008889326943795e-03, +-1.64998393312966343087e-03, +-1.57893230849462818527e-03, +-1.50771758919912264932e-03, +-1.43637198961127387532e-03, +-1.36492755105304883201e-03, +-1.29341612876705033125e-03, +-1.22186937905778840625e-03, +-1.15031874661246304344e-03, +-1.07879545200534983015e-03, +-1.00733047939059738862e-03, +-9.35954564387392199190e-04, +-8.64698182161781326270e-04, +-7.93591535709335898878e-04, +-7.22664544342259342231e-04, +-6.51946832385152269008e-04, +-5.81467718082645003419e-04, +-5.11256202723101406751e-04, +-4.41340959980976682391e-04, +-3.71750325482115011495e-04, +-3.02512286594312090893e-04, +-2.33654472446860898751e-04, +-1.65204144181543805632e-04, +-9.71881854382950145007e-05, +-2.96330930778333087529e-05, + 3.74350318557888840534e-05, + 1.03990492930665522531e-04, + 1.70008006877860913661e-04, + 2.35462711895771908792e-04, + 3.00330175705594570618e-04, + 3.64586403356421419591e-04, + 4.28207844777551787079e-04, + 4.91171402077186748311e-04, + 5.53454436585029511952e-04, + 6.15034775638214118408e-04, + 6.75890719108685204992e-04, + 7.36001045671158494674e-04, + 7.95345018810736208159e-04, + 8.53902392568785988461e-04, + 9.11653417026953884206e-04, + 9.68578843528107927274e-04, + 1.02465992963409239201e-03, + 1.07987844381954517861e-03, + 1.13421666990184857697e-03, + 1.18765741120669863183e-03, + 1.24018399446962118877e-03, + 1.29178027347317181048e-03, + 1.34243063242041153760e-03, + 1.39211998904448393775e-03, + 1.44083379745542361494e-03, + 1.48855805072399061464e-03, + 1.53527928320389963832e-03, + 1.58098457259274222282e-03, + 1.62566154173287799080e-03, + 1.66929836015300469068e-03, + 1.71188374535185877309e-03, + 1.75340696382495094506e-03, + 1.79385783183615881223e-03, + 1.83322671593499951850e-03, + 1.87150453322187351919e-03, + 1.90868275136241117781e-03, + 1.94475338835288632730e-03, + 1.97970901203875114180e-03, + 2.01354273938795765367e-03, + 2.04624823552140497340e-03, + 2.07781971250251560127e-03, + 2.10825192788842950831e-03, + 2.13754018304492627786e-03, + 2.16568032122775241954e-03, + 2.19266872543273649843e-03, + 2.21850231601753312277e-03, + 2.24317854809751244041e-03, + 2.26669540871874157159e-03, + 2.28905141381085521987e-03, + 2.31024560492300908704e-03, + 2.33027754574556848419e-03, + 2.34914731842113299123e-03, + 2.36685551964774167771e-03, + 2.38340325657774047483e-03, + 2.39879214251553785076e-03, + 2.41302429241780800814e-03, + 2.42610231819949092105e-03, + 2.43802932384921434983e-03, + 2.44880890035772524199e-03, + 2.45844512046309356806e-03, + 2.46694253321619750424e-03, + 2.47430615837057783779e-03, + 2.48054148060029584083e-03, + 2.48565444354972896537e-03, + 2.48965144371932805070e-03, + 2.49253932419119233338e-03, + 2.49432536819870756192e-03, + 2.49501729254406187306e-03, + 2.49462324086802481049e-03, + 2.49315177677595679884e-03, + 2.49061187682437026880e-03, + 2.48701292337218105022e-03, + 2.48236469730101589823e-03, + 2.47667737060877201499e-03, + 2.46996149888082565729e-03, + 2.46222801364318543901e-03, + 2.45348821460201170497e-03, + 2.44375376177385136151e-03, + 2.43303666751100867299e-03, + 2.42134928842652587602e-03, + 2.40870431722312039469e-03, + 2.39511477443066006163e-03, + 2.38059400005651662299e-03, + 2.36515564515335941637e-03, + 2.34881366330883401689e-03, + 2.33158230206161314751e-03, + 2.31347609424828503169e-03, + 2.29450984928561332182e-03, + 2.27469864439260342423e-03, + 2.25405781575686164908e-03, + 2.23260294964972971082e-03, + 2.21034987349466626252e-03, + 2.18731464689328946802e-03, + 2.16351355261355186160e-03, + 2.13896308754443764677e-03, + 2.11367995362164717843e-03, + 2.08768104872860673846e-03, + 2.06098345757712827012e-03, + 2.03360444257223585765e-03, + 2.00556143466523338278e-03, + 1.97687202419953838434e-03, + 1.94755395175337346625e-03, + 1.91762509898370150235e-03, + 1.88710347947544372464e-03, + 1.85600722960030926380e-03, + 1.82435459938927956557e-03, + 1.79216394342290039413e-03, + 1.75945371174334418141e-03, + 1.72624244079242210488e-03, + 1.69254874437936271916e-03, + 1.65839130468247841071e-03, + 1.62378886328829906069e-03, + 1.58876021227249563002e-03, + 1.55332418532590395277e-03, + 1.51749964892962602506e-03, + 1.48130549358308134279e-03, + 1.44476062508820820261e-03, + 1.40788395589385340219e-03, + 1.37069439650360901800e-03, + 1.33321084695073701146e-03, + 1.29545218834344489506e-03, + 1.25743727448406820806e-03, + 1.21918492356532437090e-03, + 1.18071390994699786103e-03, + 1.14204295601606406994e-03, + 1.10319072413361980722e-03, + 1.06417580867146756643e-03, + 1.02501672814153568618e-03, + 9.85731917420752321024e-04, + 9.46339720074761028661e-04, + 9.06858380782653099826e-04, + 8.67306037865940180828e-04, + 8.27700715924154601863e-04, + 7.88060318579825755218e-04, + 7.48402621335140681366e-04, + 7.08745264542995573667e-04, + 6.69105746494616255432e-04, + 6.29501416626196083105e-04, + 5.89949468846577432593e-04, + 5.50466934988384673337e-04, + 5.11070678384569518186e-04, + 4.71777387572266644969e-04, + 4.32603570126133903009e-04, + 3.93565546622840108093e-04, + 3.54679444738640930936e-04, + 3.15961193481461178213e-04, + 2.77426517559644606348e-04, + 2.39090931888283949464e-04, + 2.00969736235134577339e-04, + 1.63078010007227617185e-04, + 1.25430607179671989044e-04, + 8.80421513676910678318e-05, + 5.09270310433270975443e-05, + 1.40993948977051206995e-05, +-2.24268526499689963992e-05, +-5.86380557959355767418e-05, +-9.45208115468090933828e-05, +-1.30061973765318488264e-04, +-1.65248657065980185041e-04, +-2.00068240560348736649e-04, +-2.34508371450769772170e-04, +-2.68556968472767403858e-04, +-3.02202225185153135114e-04, +-3.35432613107714699702e-04, +-3.68236884706315019053e-04, +-4.00604076224976460123e-04, +-4.32523510365022737217e-04, +-4.63984798811095579957e-04, +-4.94977844604242624843e-04, +-5.25492844361963827582e-04, +-5.55520290345540247376e-04, +-5.85050972374773735341e-04, +-6.14075979590610184985e-04, +-6.42586702065795259929e-04, +-6.70574832264203037069e-04, +-6.98032366349193725621e-04, +-7.24951605341828033552e-04, +-7.51325156129156322866e-04, +-7.77145932323820387393e-04, +-8.02407154975299293648e-04, +-8.27102353133903219450e-04, +-8.51225364268258727699e-04, +-8.74770334537431802288e-04, +-8.97731718918501797773e-04, +-9.20104281190827605862e-04, +-9.41883093778050233848e-04, +-9.63063537449165905750e-04, +-9.83641300879760935652e-04, +-1.00361238007481306557e-03, +-1.02297307765449128208e-03, +-1.04172000200423238610e-03, +-1.05985006629069969386e-03, +-1.07736048734502422482e-03, +-1.09424878441514257174e-03, +-1.11051277778848088776e-03, +-1.12615058728708443934e-03, +-1.14116063063659394818e-03, +-1.15554162171100749217e-03, +-1.16929256865495575027e-03, +-1.18241277188544584124e-03, +-1.19490182197486941203e-03, +-1.20675959741726535404e-03, +-1.21798626227982297217e-03, +-1.22858226374166065384e-03, +-1.23854832952187280184e-03, +-1.24788546519901061629e-03, +-1.25659495142406364872e-03, +-1.26467834102919943727e-03, +-1.27213745603428412742e-03, +-1.27897438455362244883e-03, +-1.28519147760499177768e-03, +-1.29079134582336268501e-03, +-1.29577685608154116216e-03, +-1.30015112802015094483e-03, +-1.30391753048923449490e-03, +-1.30707967790388737521e-03, +-1.30964142651636395419e-03, +-1.31160687060702277167e-03, +-1.31298033859659005196e-03, +-1.31376638908218385492e-03, +-1.31396980679961724973e-03, +-1.31359559851441281067e-03, +-1.31264898884408520024e-03, +-1.31113541601419526274e-03, +-1.30906052755070225271e-03, +-1.30643017591116055581e-03, +-1.30325041405731484843e-03, +-1.29952749097163636725e-03, +-1.29526784712039586857e-03, +-1.29047810986578645812e-03, +-1.28516508882973415524e-03, +-1.27933577121191956356e-03, +-1.27299731706460124078e-03, +-1.26615705452684046764e-03, +-1.25882247502064623448e-03, +-1.25100122841167137044e-03, +-1.24270111813698665680e-03, +-1.23393009630251375940e-03, +-1.22469625875265661526e-03, +-1.21500784011470538623e-03, +-1.20487320882053006339e-03, +-1.19430086210808427544e-03, +-1.18329942100529298017e-03, +-1.17187762529875026674e-03, +-1.16004432848981092231e-03, +-1.14780849274044756403e-03, +-1.13517918381147498753e-03, +-1.12216556599544479457e-03, +-1.10877689704675665670e-03, +-1.09502252311132830878e-03, +-1.08091187365826806380e-03, +-1.06645445641585442931e-03, +-1.05165985231426076241e-03, +-1.03653771043727446592e-03, +-1.02109774298538512859e-03, +-1.00534972025243104707e-03, +-9.89303465618205854493e-04, +-9.72968850559070386023e-04, +-9.56355789678956110487e-04, +-9.39474235762757228438e-04, +-9.22334174854466432199e-04, +-9.04945621361946477522e-04, +-8.87318613190598128385e-04, +-8.69463206907894578697e-04, +-8.51389472940857513876e-04, +-8.33107490808391681729e-04, +-8.14627344390523211622e-04, +-7.95959117236444131487e-04, +-7.77112887913182157074e-04, +-7.58098725396869001399e-04, +-7.38926684508335205812e-04, +-7.19606801394884719458e-04, +-7.00149089059903611230e-04, +-6.80563532942129469604e-04, +-6.60860086546161931871e-04, +-6.41048667125920334890e-04, +-6.21139151422514545521e-04, +-6.01141371458319205383e-04, +-5.81065110388481308873e-04, +-5.60920098411564179222e-04, +-5.40716008740645473485e-04, +-5.20462453636329636791e-04, +-5.00168980502928035153e-04, +-4.79845068049252442000e-04, +-4.59500122515179752151e-04, +-4.39143473965308969373e-04, +-4.18784372650737749064e-04, +-3.98431985440370181519e-04, +-3.78095392322528038760e-04, +-3.57783582978218623466e-04, +-3.37505453426840712917e-04, +-3.17269802745462131330e-04, +-2.97085329862539673351e-04, +-2.76960630426857884294e-04, +-2.56904193752789059696e-04, +-2.36924399842341755311e-04, +-2.17029516485020210204e-04, +-1.97227696436049984184e-04, +-1.77526974673721080374e-04, +-1.57935265736364822877e-04, +-1.38460361139671443579e-04, +-1.19109926874780676581e-04, +-9.98915009877096604120e-05, +-8.08124912404658787314e-05, +-6.18801728543761411219e-05, +-4.31016863358901726119e-05, +-2.44840353852607290751e-05, +-6.03408488822807525549e-06, + 1.22414410087942923482e-05, + 3.03359607401630166279e-05, + 4.82430370786643698930e-05, + 6.59563788094303331657e-05, + 8.34698423199615796228e-05, + 1.00777433106333784039e-04, + 1.17873307195481496850e-04, + 1.34751772483674884346e-04, + 1.51407289991337552218e-04, + 1.67834475034225882872e-04, + 1.84028098311239825778e-04, + 1.99983086909011169297e-04, + 2.15694525223608961004e-04, + 2.31157655799539194473e-04, + 2.46367880086458945071e-04, + 2.61320759113890506518e-04, + 2.76012014084471178285e-04, + 2.90437526885940853344e-04, + 3.04593340522622661477e-04, + 3.18475659466672001308e-04, + 3.32080849929764377465e-04, + 3.45405440055728798956e-04, + 3.58446120034815369788e-04, + 3.71199742140139986053e-04, + 3.83663320687053425832e-04, + 3.95834031916090828447e-04, + 4.07709213800306149479e-04, + 4.19286365777668133727e-04, + 4.30563148409382728376e-04, + 4.41537382964913725693e-04, + 4.52207050934656925797e-04, + 4.62570293470961546242e-04, + 4.72625410758635230730e-04, + 4.82370861315684551149e-04, + 4.91805261225328150740e-04, + 5.00927383300294468485e-04, + 5.09736156180340470138e-04, + 5.18230663364083014855e-04, + 5.26410142176160977465e-04, + 5.34273982670858136115e-04, + 5.41821726473215733132e-04, + 5.49053065558819199977e-04, + 5.55967840973343140065e-04, + 5.62566041493084596781e-04, + 5.68847802227587348751e-04, + 5.74813403165606124842e-04, + 5.80463267665585255090e-04, + 5.85797960891953992807e-04, + 5.90818188198352634011e-04, + 5.95524793459205619046e-04, + 5.99918757350793364382e-04, + 6.04001195583171897330e-04, + 6.07773357084216074402e-04, + 6.11236622137123529822e-04, + 6.14392500472671181100e-04, + 6.17242629317575641247e-04, + 6.19788771400296852575e-04, + 6.22032812915646266330e-04, + 6.23976761449542352556e-04, + 6.25622743865282679096e-04, + 6.26973004152734866845e-04, + 6.28029901241785953013e-04, + 6.28795906781460817864e-04, + 6.29273602886090081127e-04, + 6.29465679849935955126e-04, + 6.29374933831637162034e-04, + 6.29004264509917614644e-04, + 6.28356672711923979618e-04, + 6.27435258015602670238e-04, + 6.26243216327530610385e-04, + 6.24783837437580067548e-04, + 6.23060502551831571320e-04, + 6.21076681805128117189e-04, + 6.18835931754663313292e-04, + 6.16341892855999597296e-04, + 6.13598286922899423257e-04, + 6.10608914572360016171e-04, + 6.07377652656219848941e-04, + 6.03908451680720608987e-04, + 6.00205333215381750360e-04, + 5.96272387292547546150e-04, + 5.92113769798962869678e-04, + 5.87733699860716260477e-04, + 5.83136457222883410056e-04, + 5.78326379625196070923e-04, + 5.73307860175058681823e-04, + 5.68085344719199331932e-04, + 5.62663329215264671791e-04, + 5.57046357104629347515e-04, + 5.51239016687691835844e-04, + 5.45245938502918582949e-04, + 5.39071792710859535452e-04, + 5.32721286484389919269e-04, + 5.26199161406370888652e-04, + 5.19510190875949965511e-04, + 5.12659177524647379116e-04, + 5.05650950643461694588e-04, + 4.98490363622070692395e-04, + 4.91182291401325650700e-04, + 4.83731627940122419548e-04, + 4.76143283697776802978e-04, + 4.68422183132952726740e-04, + 4.60573262220249240485e-04, + 4.52601465985464203410e-04, + 4.44511746060588389465e-04, + 4.36309058259498746754e-04, + 4.27998360175392566791e-04, + 4.19584608800889610671e-04, + 4.11072758171783069880e-04, + 4.02467757035333936928e-04, + 3.93774546544049656484e-04, + 3.84998057975832470683e-04, + 3.76143210481313700240e-04, + 3.67214908859312392543e-04, + 3.58218041361136620669e-04, + 3.49157477524604336044e-04, + 3.40038066038525694287e-04, + 3.30864632638427479810e-04, + 3.21641978034209344382e-04, + 3.12374875870502510550e-04, + 3.03068070720368721877e-04, + 2.93726276113044930307e-04, + 2.84354172596324122988e-04, + 2.74956405834251703824e-04, + 2.65537584740686090270e-04, + 2.56102279649332684359e-04, + 2.46655020520732712157e-04, + 2.37200295186851598233e-04, + 2.27742547633623577692e-04, + 2.18286176322046336163e-04, + 2.08835532548216017655e-04, + 1.99394918842769620351e-04, + 1.89968587410093434423e-04, + 1.80560738607737464008e-04, + 1.71175519466368710516e-04, + 1.61817022250571095152e-04, + 1.52489283060860649410e-04, + 1.43196280477158617963e-04, + 1.33941934244020521144e-04, + 1.24730103997818122337e-04, + 1.15564588036148766046e-04, + 1.06449122129624024050e-04, + 9.73873783762444747338e-05, + 8.83829640984437603628e-05, + 7.94394207830501368397e-05, + 7.05602230641164422306e-05, + 6.17487777488439845697e-05, + 5.30084228865277819307e-05, + 4.43424268807018922721e-05, + 3.57539876443504868529e-05, + 2.72462317982963840894e-05, + 1.88222139126731892130e-05, + 1.04849157914673770899e-05, + 2.23724580001398245952e-06, +-5.91796176458841909860e-06, +-1.39779472603022615363e-05, +-2.19400262443108254274e-05, +-2.98015899747312779436e-05, +-3.75601058699388188874e-05, +-4.52131179258346812058e-05, +-5.27582470911579511194e-05, +-6.01931916011169823111e-05, +-6.75157272696155086192e-05, +-7.47237077402686759994e-05, +-8.18150646965978259242e-05, +-8.87878080315813176041e-05, +-9.56400259770174112402e-05, +-1.02369885192932295447e-04, +-1.08975630817447491838e-04, +-1.15455586477445233109e-04, +-1.21808154260462635915e-04, +-1.28031814648165977134e-04, +-1.34125126411867404445e-04, +-1.40086726470483718130e-04, +-1.45915329711446663599e-04, +-1.51609728774925367391e-04, +-1.57168793801964925671e-04, +-1.62591472146915872604e-04, +-1.67876788054762332352e-04, +-1.73023842303760092562e-04, +-1.78031811814023872061e-04, +-1.82899949222519527144e-04, +-1.87627582425063416539e-04, +-1.92214114085866635134e-04, +-1.96659021115234305434e-04, +-2.00961854115964548605e-04, +-2.05122236799067813760e-04, +-2.09139865369424642018e-04, +-2.13014507881963919287e-04, +-2.16746003569016719827e-04, +-2.20334262139455170628e-04, +-2.23779263050288623272e-04, +-2.27081054751326671731e-04, +-2.30239753903598189230e-04, +-2.33255544572162539776e-04, +-2.36128677394024743099e-04, +-2.38859468721759089912e-04, +-2.41448299743606627134e-04, +-2.43895615580664562507e-04, +-2.46201924361897988517e-04, +-2.48367796277648787261e-04, +-2.50393862612362249915e-04, +-2.52280814757210175570e-04, +-2.54029403203337271477e-04, +-2.55640436516424906718e-04, +-2.57114780293308988878e-04, +-2.58453356101321892260e-04, +-2.59657140401130335366e-04, +-2.60727163453730896234e-04, +-2.61664508212373382850e-04, +-2.62470309200077975623e-04, +-2.63145751373508769710e-04, +-2.63692068973905575490e-04, +-2.64110544365773612825e-04, +-2.64402506864084673854e-04, +-2.64569331550674807438e-04, +-2.64612438080565591254e-04, +-2.64533289478916812908e-04, +-2.64333390929326621406e-04, +-2.64014288554176887285e-04, +-2.63577568187730454374e-04, +-2.63024854142685936189e-04, +-2.62357807970878796362e-04, +-2.61578127218826830911e-04, +-2.60687544178804346540e-04, +-2.59687824636131118095e-04, +-2.58580766613355161020e-04, +-2.57368199111998000473e-04, +-2.56051980852535184582e-04, +-2.54633999013272917245e-04, +-2.53116167968770410229e-04, +-2.51500428028467203686e-04, +-2.49788744176148520739e-04, +-2.47983104810888498049e-04, +-2.46085520490098991209e-04, +-2.44098022675305284133e-04, +-2.42022662481256641692e-04, +-2.39861509428983846149e-04, +-2.37616650203396260988e-04, +-2.35290187416002671578e-04, +-2.32884238373345356290e-04, +-2.30400933851706944807e-04, +-2.27842416878660760553e-04, +-2.25210841522004905167e-04, +-2.22508371686640725982e-04, +-2.19737179919906623887e-04, +-2.16899446225907947382e-04, +-2.13997356889345798732e-04, +-2.11033103309352538255e-04, +-2.08008880843819519632e-04, +-2.04926887664708660573e-04, +-2.01789323624807442155e-04, +-1.98598389136399967663e-04, +-1.95356284062287273917e-04, +-1.92065206619610498147e-04, +-1.88727352296894345393e-04, +-1.85344912784731391314e-04, +-1.81920074920499755819e-04, +-1.78455019647530454055e-04, +-1.74951920989073971512e-04, +-1.71412945037461607323e-04, +-1.67840248958807175192e-04, +-1.64235980013589989322e-04, +-1.60602274593463965847e-04, +-1.56941257274607583660e-04, +-1.53255039887926981963e-04, +-1.49545720606400077326e-04, +-1.45815383049865629223e-04, +-1.42066095407511174204e-04, +-1.38299909578337953649e-04, +-1.34518860329830815736e-04, +-1.30724964475093245915e-04, +-1.26920220068654472992e-04, +-1.23106605621173758290e-04, +-1.19286079333219134950e-04, +-1.15460578348350841244e-04, +-1.11632018025632952099e-04, +-1.07802291231779830637e-04, +-1.03973267653065606659e-04, +-1.00146793127148565233e-04, +-9.63246889949230804435e-05, +-9.25087514725346873358e-05, +-8.87007510436452450671e-05, +-8.49024318720545515026e-05, +-8.11155112347393549661e-05, +-7.73416789754019609908e-05, +-7.35825969785674924512e-05, +-6.98398986642887023612e-05, +-6.61151885034750767074e-05, +-6.24100415538899748772e-05, +-5.87260030168200428450e-05, +-5.50645878143974986741e-05, +-5.14272801876128782695e-05, +-4.78155333149380714058e-05, +-4.42307689515766361233e-05, +-4.06743770892715146635e-05, +-3.71477156366324749162e-05, +-3.36521101199027279356e-05, +-3.01888534041130525139e-05, +-2.67592054345224943665e-05, +-2.33643929982665948157e-05, +-2.00056095060906951114e-05, +-1.66840147940797201324e-05, +-1.34007349452439661363e-05, +-1.01568621308437851693e-05, +-6.95345447128813701427e-06, +-3.79153591650250810366e-06, +-6.72096145553293655593e-07, + 2.40390951459279597616e-06, + 5.43556015137456058426e-06, + 8.42196931017640123499e-06, + 1.13622850193110927676e-05, + 1.42556897966238257746e-05, + 1.71014006379341248229e-05, + 1.98986689875390831126e-05, + 2.26467806909793172563e-05, + 2.53450559303131682314e-05, + 2.79928491421220740245e-05, + 3.05895489185053302676e-05, + 3.31345778912925430443e-05, + 3.56273925997451632237e-05, + 3.80674833419947510364e-05, + 4.04543740105068599521e-05, + 4.27876219118044463113e-05, + 4.50668175707753267513e-05, + 4.72915845198089502860e-05, + 4.94615790730713200283e-05, + 5.15764900862005805371e-05, + 5.36360387017334339240e-05, + 5.56399780805481871900e-05, + 5.75880931196438774090e-05, + 5.94802001565561985590e-05, + 6.13161466607366493955e-05, + 6.30958109121965409621e-05, + 6.48191016677504891876e-05, + 6.64859578151715343739e-05, + 6.80963480156043240521e-05, + 6.96502703345347587470e-05, + 7.11477518616810702089e-05, + 7.25888483201263885920e-05, + 7.39736436650163245096e-05, + 7.53022496721903708853e-05, + 7.65748055170595645635e-05, + 7.77914773440878826866e-05, + 7.89524578272120806021e-05, + 8.00579657215541123884e-05, + 8.11082454067584148567e-05, + 8.21035664223097817662e-05, + 8.30442229951717527346e-05, + 8.39305335600990616227e-05, + 8.47628402729624154692e-05, + 8.55415085174379832356e-05, + 8.62669264054029689666e-05, + 8.69395042713942022743e-05, + 8.75596741614536786501e-05, + 8.81278893167311284636e-05, + 8.86446236521661512422e-05, + 8.91103712306026604075e-05, + 8.95256457326717546962e-05, + 8.98909799227875892661e-05, + 9.02069251115862640782e-05, + 9.04740506151490363976e-05, + 9.06929432113401867103e-05, + 9.08642065935940279782e-05, + 9.09884608224771455898e-05, + 9.10663417753510066833e-05, + 9.10985005944656069544e-05, + 9.10856031337957275460e-05, + 9.10283294049456589850e-05, + 9.09273730224310103125e-05, + 9.07834406486567112131e-05, + 9.05972514388884682737e-05, + 9.03695364865346147717e-05, + 9.01010382690298824038e-05, + 8.97925100946232214449e-05, + 8.94447155503604388049e-05, + 8.90584279515538464710e-05, + 8.86344297930203114553e-05, + 8.81735122023717546660e-05, + 8.76764743956322256668e-05, + 8.71441231354561004216e-05, + 8.65772721922116491981e-05, + 8.59767418081955635050e-05, + 8.53433581652348287718e-05, + 8.46779528559294970011e-05, + 8.39813623587842351167e-05, + 8.32544275174723505482e-05, + 8.24979930244696123580e-05, + 8.17129069092911340059e-05, + 8.09000200315580515319e-05, + 8.00601855791177764851e-05, + 7.91942585714318722121e-05, + 7.83030953684469709260e-05, + 7.73875531851490379041e-05, + 7.64484896120066966278e-05, + 7.54867621414943666810e-05, + 7.45032277008856680736e-05, + 7.34987421914997258445e-05, + 7.24741600345781298914e-05, + 7.14303337239639759421e-05, + 7.03681133857495075701e-05, + 6.92883463450485250238e-05, + 6.81918767000556897513e-05, + 6.70795449035298084342e-05, + 6.59521873518545653477e-05, + 6.48106359818019012913e-05, + 6.36557178751403541801e-05, + 6.24882548712000070136e-05, + 6.13090631875229413088e-05, + 6.01189530487050480105e-05, + 5.89187283235398522627e-05, + 5.77091861705600261078e-05, + 5.64911166920768371556e-05, + 5.52653025968012133018e-05, + 5.40325188711331935503e-05, + 5.27935324591918643858e-05, + 5.15491019516612150597e-05, + 5.02999772835162253098e-05, + 4.90468994406823114189e-05, + 4.77906001756943685883e-05, + 4.65318017323872241855e-05, + 4.52712165796745444508e-05, + 4.40095471544385224527e-05, + 4.27474856135762158195e-05, + 4.14857135952114382585e-05, + 4.02249019891050019768e-05, + 3.89657107162695045101e-05, + 3.77087885178019617646e-05, + 3.64547727529321324196e-05, + 3.52042892062907349744e-05, + 3.39579519043854685862e-05, + 3.27163629412759540162e-05, + 3.14801123134230239524e-05, + 3.02497777637023744978e-05, + 2.90259246345387218443e-05, + 2.78091057301432964695e-05, + 2.65998611878025348972e-05, + 2.53987183581919854364e-05, + 2.42061916946511448772e-05, + 2.30227826513813061706e-05, + 2.18489795905047869758e-05, + 2.06852576979222047276e-05, + 1.95320789079097866306e-05, + 1.83898918363816234168e-05, + 1.72591317227478167385e-05, + 1.61402203802856600485e-05, + 1.50335661549492530784e-05, + 1.39395638925266073790e-05, + 1.28585949140593193966e-05, + 1.17910269994264146578e-05, + 1.07372143790027249535e-05, + 9.69749773328629209753e-06, + 8.67220420039572420566e-06, + 7.66164739132314657512e-06, + 6.66612741284622437347e-06, + 5.68593089796976951548e-06, + 4.72133104379410701050e-06, + 3.77258765668374723169e-06, + 2.83994720461857994069e-06, + 1.92364287659795633360e-06, + 1.02389464897677183691e-06, + 1.40909358598783243636e-07, +-7.25119217400925882488e-07, +-1.57401027526573751796e-06, +-2.40559589188401177682e-06, +-3.21972092580435969379e-06, +-4.01624291213239929447e-06, +-4.79503195145223449727e-06, +-5.55597059291959898474e-06, +-6.29895371166915398527e-06, +-7.02388838068995278969e-06, +-7.73069373730662337714e-06, +-8.41930084443043260571e-06, +-9.08965254672049638874e-06, +-9.74170332181420186798e-06, +-1.03754191267770145960e-05, +-1.09907772399306088372e-05, +-1.15877660982095854786e-05, +-1.21663851302071818753e-05, +-1.27266445850638015879e-05, +-1.32685653573596831328e-05, +-1.37921788081646564831e-05, +-1.42975265824070264185e-05, +-1.47846604227169150120e-05, +-1.52536419799077777845e-05, +-1.57045426202454864190e-05, +-1.61374432296745794440e-05, +-1.65524340151509907705e-05, +-1.69496143032443776877e-05, +-1.73290923361651281922e-05, +-1.76909850653765942752e-05, +-1.80354179429455134585e-05, +-1.83625247107888849940e-05, +-1.86724471879742999741e-05, +-1.89653350562250020523e-05, +-1.92413456437866339777e-05, +-1.95006437078064384194e-05, +-1.97434012153796016272e-05, +-1.99697971234102591856e-05, +-2.01800171574394671614e-05, +-2.03742535895865872536e-05, +-2.05527050157544749519e-05, +-2.07155761322383869926e-05, +-2.08630775118898238230e-05, +-2.09954253799722451970e-05, +-2.11128413898529763743e-05, +-2.12155523986690494219e-05, +-2.13037902431062623279e-05, +-2.13777915154254429865e-05, +-2.14377973398712400380e-05, +-2.14840531495946086989e-05, +-2.15168084642204138245e-05, +-2.15363166681862903517e-05, +-2.15428347899803886579e-05, +-2.15366232824005027408e-05, +-2.15179458039577262574e-05, +-2.14870690015422825217e-05, +-2.14442622944694083432e-05, +-2.13897976600211963056e-05, +-2.13239494205938423077e-05, +-2.12469940325636042692e-05, +-2.11592098769767039874e-05, +-2.10608770521689021838e-05, +-2.09522771684173563140e-05, +-2.08336931447246940868e-05, +-2.07054090078323760555e-05, +-2.05677096935581607638e-05, +-2.04208808505498906150e-05, +-2.02652086465450858026e-05, +-2.01009795772225878085e-05, +-1.99284802777307897448e-05, +-1.97479973369733553508e-05, +-1.95598171147313057406e-05, +-1.93642255616965921713e-05, +-1.91615080424908975211e-05, +-1.89519491617392180451e-05, +-1.87358325932659067312e-05, +-1.85134409124775290410e-05, +-1.82850554319945745078e-05, +-1.80509560405904254164e-05, +-1.78114210454942761765e-05, +-1.75667270181107023892e-05, +-1.73171486432067083623e-05, +-1.70629585716134836285e-05, +-1.68044272764881711800e-05, +-1.65418229131769521309e-05, +-1.62754111827191523261e-05, +-1.60054551990284544996e-05, +-1.57322153597849753270e-05, +-1.54559492210684566130e-05, +-1.51769113757617390406e-05, +-1.48953533357483810943e-05, +-1.46115234179280960335e-05, +-1.43256666340692667526e-05, +-1.40380245845161517285e-05, +-1.37488353557644599467e-05, +-1.34583334219182035850e-05, +-1.31667495500361276861e-05, +-1.28743107093750131593e-05, +-1.25812399845332226092e-05, +-1.22877564924967183814e-05, +-1.19940753035859587103e-05, +-1.17004073663007361519e-05, +-1.14069594360560160192e-05, +-1.11139340078022052344e-05, +-1.08215292525168970740e-05, +-1.05299389575568482923e-05, +-1.02393524708539662493e-05, +-9.94995464893742348120e-06, +-9.66192580876271611343e-06, +-9.37544168332491989210e-06, +-9.09067338103246418157e-06, +-8.80778734881422739372e-06, +-8.52694533893248795902e-06, +-8.24830437947059051977e-06, +-7.97201674846333152320e-06, +-7.69822995163491404110e-06, +-7.42708670370925303302e-06, +-7.15872491325360909230e-06, +-6.89327767101633304933e-06, +-6.63087324171573705124e-06, +-6.37163505923899323223e-06, +-6.11568172520321447157e-06, +-5.86312701083429459631e-06, +-5.61407986211404469277e-06, +-5.36864440814655013182e-06, +-5.12691997269171846217e-06, +-4.88900108881444233564e-06, +-4.65497751659457443244e-06, +-4.42493426384305196255e-06, +-4.19895160976678087265e-06, +-3.97710513152554464493e-06, +-3.75946573362158312445e-06, +-3.54609968006129461300e-06, +-3.33706862922910230812e-06, +-3.13242967141053148128e-06, +-2.93223536890220278037e-06, +-2.73653379864365903313e-06, +-2.54536859730841838172e-06, +-2.35877900878616331091e-06, +-2.17679993399191934131e-06, +-1.99946198293410626841e-06, +-1.82679152897453787459e-06, +-1.65881076521146377169e-06, +-1.49553776291791911430e-06, +-1.33698653196547020166e-06, +-1.18316708316432299485e-06, +-1.03408549244911473199e-06, +-8.89743966841013678409e-07, +-7.50140912114827378233e-07, +-6.15271002100918799020e-07, +-4.85125249549975873449e-07, +-3.59691078491283933177e-07, +-2.38952398011216803052e-07, +-1.22889677382464548894e-07, + 0.0 /* Need a final zero coefficient */ +} +} ; /* fastest_coeffs */ diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/float_cast.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/float_cast.h new file mode 100644 index 0000000..b639a3a --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/float_cast.h @@ -0,0 +1,281 @@ +/* +** Copyright (C) 2001-2011 Erik de Castro Lopo +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU Lesser General Public License as published by +** the Free Software Foundation; either version 2.1 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU Lesser General Public License for more details. +** +** You should have received a copy of the GNU Lesser General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +*/ + +/* Version 1.5 */ + +#ifndef FLOAT_CAST_HEADER +#define FLOAT_CAST_HEADER + +/*============================================================================ +** On Intel Pentium processors (especially PIII and probably P4), converting +** from float to int is very slow. To meet the C specs, the code produced by +** most C compilers targeting Pentium needs to change the FPU rounding mode +** before the float to int conversion is performed. +** +** Changing the FPU rounding mode causes the FPU pipeline to be flushed. It +** is this flushing of the pipeline which is so slow. +** +** Fortunately the ISO C99 specifications define the functions lrint, lrintf, +** llrint and llrintf which fix this problem as a side effect. +** +** On Unix-like systems, the configure process should have detected the +** presence of these functions. If they weren't found we have to replace them +** here with a standard C cast. +*/ + +/* +** The C99 prototypes for lrint and lrintf are as follows: +** +** long int lrintf (float x) ; +** long int lrint (double x) ; +*/ + +#include "resample_config.h" + +/* +** The presence of the required functions are detected during the configure +** process and the values HAVE_LRINT and HAVE_LRINTF are set accordingly in +** the config.h file. +*/ + +#define HAVE_LRINT_REPLACEMENT 0 + +#if (HAVE_LRINT && HAVE_LRINTF) + + /* + ** These defines enable functionality introduced with the 1999 ISO C + ** standard. They must be defined before the inclusion of math.h to + ** engage them. If optimisation is enabled, these functions will be + ** inlined. With optimisation switched off, you have to link in the + ** maths library using -lm. + */ + + #define _ISOC9X_SOURCE 1 + #define _ISOC99_SOURCE 1 + + #define __USE_ISOC9X 1 + #define __USE_ISOC99 1 + + #include + +#elif (defined (__CYGWIN__)) + + #include + + #undef HAVE_LRINT_REPLACEMENT + #define HAVE_LRINT_REPLACEMENT 1 + + #undef lrint + #undef lrintf + + #define lrint double2int + #define lrintf float2int + + /* + ** The native CYGWIN lrint and lrintf functions are buggy: + ** http://sourceware.org/ml/cygwin/2005-06/msg00153.html + ** http://sourceware.org/ml/cygwin/2005-09/msg00047.html + ** and slow. + ** These functions (pulled from the Public Domain MinGW math.h header) + ** replace the native versions. + */ + + static inline long double2int (double in) + { long retval ; + + __asm__ __volatile__ + ( "fistpl %0" + : "=m" (retval) + : "t" (in) + : "st" + ) ; + + return retval ; + } /* double2int */ + + static inline long float2int (float in) + { long retval ; + + __asm__ __volatile__ + ( "fistpl %0" + : "=m" (retval) + : "t" (in) + : "st" + ) ; + + return retval ; + } /* float2int */ + +#elif (defined (WIN64) || defined(_WIN64)) + + /* Win64 section should be places before Win32 one, because + ** most likely both WIN32 and WIN64 will be defined in 64-bit case. + */ + + #include + + /* Win64 doesn't seem to have these functions, nor inline assembly. + ** Therefore implement inline versions of these functions here. + */ + #include + #include + + __inline long int + lrint(double flt) + { + return _mm_cvtsd_si32(_mm_load_sd(&flt)); + } + + __inline long int + lrintf(float flt) + { + return _mm_cvtss_si32(_mm_load_ss(&flt)); + } + +#elif (defined (WIN32) || defined (_WIN32)) + + #undef HAVE_LRINT_REPLACEMENT + #define HAVE_LRINT_REPLACEMENT 1 + + #include + + /* + ** Win32 doesn't seem to have these functions. + ** Therefore implement inline versions of these functions here. + */ + + __inline long int + lrint (double flt) + { int intgr ; + + _asm + { fld flt + fistp intgr + } ; + + return intgr ; + } + + __inline long int + lrintf (float flt) + { int intgr ; + + _asm + { fld flt + fistp intgr + } ; + + return intgr ; + } + +#elif (defined (__MWERKS__) && defined (macintosh)) + + /* This MacOS 9 solution was provided by Stephane Letz */ + + #undef HAVE_LRINT_REPLACEMENT + #define HAVE_LRINT_REPLACEMENT 1 + #include + + #undef lrint + #undef lrintf + + #define lrint double2int + #define lrintf float2int + + inline int + float2int (register float in) + { long res [2] ; + + asm + { fctiw in, in + stfd in, res + } + return res [1] ; + } /* float2int */ + + inline int + double2int (register double in) + { long res [2] ; + + asm + { fctiw in, in + stfd in, res + } + return res [1] ; + } /* double2int */ + +// #elif (defined (__MACH__) && defined (__APPLE__)) +// +// /* For Apple MacOSX. */ +// +// #undef HAVE_LRINT_REPLACEMENT +// #define HAVE_LRINT_REPLACEMENT 1 +// #include +// +// #undef lrint +// #undef lrintf +// +// #define lrint double2int +// #define lrintf float2int +// +// inline static long +// float2int (register float in) +// { int res [2] ; +// +// __asm__ __volatile__ +// ( "fctiw %1, %1\n\t" +// "stfd %1, %0" +// : "=m" (res) /* Output */ +// : "f" (in) /* Input */ +// : "memory" +// ) ; +// +// return res [1] ; +// } /* lrintf */ +// +// inline static long +// double2int (register double in) +// { int res [2] ; +// +// __asm__ __volatile__ +// ( "fctiw %1, %1\n\t" +// "stfd %1, %0" +// : "=m" (res) /* Output */ +// : "f" (in) /* Input */ +// : "memory" +// ) ; +// +// return res [1] ; +// } /* lrint */ + +#else +// #ifndef __sgi +// #warning "Don't have the functions lrint() and lrintf()." +// #warning "Replacing these functions with a standard C cast." +// #endif + + #include + + #define lrint(dbl) ((int) (dbl)) + #define lrintf(flt) ((int) (flt)) + +#endif + + +#endif /* FLOAT_CAST_HEADER */ + diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/resample_common.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/resample_common.h new file mode 100644 index 0000000..a8d4e98 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/resample_common.h @@ -0,0 +1,169 @@ +/* +** Copyright (C) 2002-2011 Erik de Castro Lopo +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +*/ + +/* +** This code is part of Secret Rabbit Code aka libsamplerate. A commercial +** use license for this code is available, please see: +** http://www.mega-nerd.com/SRC/procedure.html +*/ + +#ifndef COMMON_H_INCLUDED +#define COMMON_H_INCLUDED + +#ifdef HAVE_STDINT_H +#include +#elif (SIZEOF_INT == 4) +typedef int int32_t ; +#elif (SIZEOF_LONG == 4) +typedef long int32_t ; +#endif + +#define SRC_MAX_RATIO 256 +#define SRC_MAX_RATIO_STR "256" + +#define SRC_MIN_RATIO_DIFF (1e-20) + +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) + +#define ARRAY_LEN(x) ((int) (sizeof (x) / sizeof ((x) [0]))) +#define OFFSETOF(type,member) ((int) (&((type*) 0)->member)) + +#define MAKE_MAGIC(a,b,c,d,e,f) ((a) + ((b) << 4) + ((c) << 8) + ((d) << 12) + ((e) << 16) + ((f) << 20)) + +/* +** Inspiration : http://sourcefrog.net/weblog/software/languages/C/unused.html +*/ +#ifdef UNUSED +#elif defined (__GNUC__) +# define UNUSED(x) UNUSED_ ## x __attribute__ ((unused)) +#elif defined (__LCLINT__) +# define UNUSED(x) /*@unused@*/ x +#else +# define UNUSED(x) x +#endif + +#ifdef __GNUC__ +# define WARN_UNUSED __attribute__ ((warn_unused_result)) +#else +# define WARN_UNUSED +#endif + + +#include "samplerate.h" + +enum +{ SRC_FALSE = 0, + SRC_TRUE = 1, + + SRC_MODE_PROCESS = 555, + SRC_MODE_CALLBACK = 556 +} ; + +enum +{ SRC_ERR_NO_ERROR = 0, + + SRC_ERR_MALLOC_FAILED, + SRC_ERR_BAD_STATE, + SRC_ERR_BAD_DATA, + SRC_ERR_BAD_DATA_PTR, + SRC_ERR_NO_PRIVATE, + SRC_ERR_BAD_SRC_RATIO, + SRC_ERR_BAD_PROC_PTR, + SRC_ERR_SHIFT_BITS, + SRC_ERR_FILTER_LEN, + SRC_ERR_BAD_CONVERTER, + SRC_ERR_BAD_CHANNEL_COUNT, + SRC_ERR_SINC_BAD_BUFFER_LEN, + SRC_ERR_SIZE_INCOMPATIBILITY, + SRC_ERR_BAD_PRIV_PTR, + SRC_ERR_BAD_SINC_STATE, + SRC_ERR_DATA_OVERLAP, + SRC_ERR_BAD_CALLBACK, + SRC_ERR_BAD_MODE, + SRC_ERR_NULL_CALLBACK, + SRC_ERR_NO_VARIABLE_RATIO, + SRC_ERR_SINC_PREPARE_DATA_BAD_LEN, + + /* This must be the last error number. */ + SRC_ERR_MAX_ERROR +} ; + +typedef struct SRC_PRIVATE_tag +{ double last_ratio, last_position ; + + int error ; + int channels ; + + /* SRC_MODE_PROCESS or SRC_MODE_CALLBACK */ + int mode ; + + /* Pointer to data to converter specific data. */ + void *private_data ; + + /* Varispeed process function. */ + int (*vari_process) (struct SRC_PRIVATE_tag *psrc, SRC_DATA *data) ; + + /* Constant speed process function. */ + int (*const_process) (struct SRC_PRIVATE_tag *psrc, SRC_DATA *data) ; + + /* State reset. */ + void (*reset) (struct SRC_PRIVATE_tag *psrc) ; + + /* Data specific to SRC_MODE_CALLBACK. */ + src_callback_t callback_func ; + void *user_callback_data ; + long saved_frames ; + float *saved_data ; +} SRC_PRIVATE ; + +/* In src_sinc.c */ +const char* sinc_get_name (int src_enum) ; +const char* sinc_get_description (int src_enum) ; + +int sinc_set_converter (SRC_PRIVATE *psrc, int src_enum) ; + +/* In src_linear.c */ +const char* linear_get_name (int src_enum) ; +const char* linear_get_description (int src_enum) ; + +int linear_set_converter (SRC_PRIVATE *psrc, int src_enum) ; + +/* In src_zoh.c */ +const char* zoh_get_name (int src_enum) ; +const char* zoh_get_description (int src_enum) ; + +int zoh_set_converter (SRC_PRIVATE *psrc, int src_enum) ; + +/*---------------------------------------------------------- +** Common static inline functions. +*/ + +static inline double +fmod_one (double x) +{ double res ; + + res = x - lrint (x) ; + if (res < 0.0) + return res + 1.0 ; + + return res ; +} /* fmod_one */ + +#endif /* COMMON_H_INCLUDED */ + diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/resample_config.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/resample_config.h new file mode 100644 index 0000000..9a3d2c8 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/resample_config.h @@ -0,0 +1,209 @@ +/* +** Copyright (C) 2002-2011 Erik de Castro Lopo +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +*/ + +/* +** This is the Win32 specific config.h header file. +** +** On Unix (including MacOSX), this header file is automatically generated +** during the configure process while on Win32 this has to be hand edited +** to keep it up to date. +** +** This is also a good file to add Win32 specific things. +*/ + +/* +** MSVC++ assumes that all floating point constants without a trailing +** letter 'f' are double precision. +** +** If this assumption is incorrect and one of these floating point constants +** is assigned to a float variable MSVC++ generates a warning. +** +** Since there are currently about 25000 of these warnings generated in +** src/src_sinc.c this slows down compile times considerably. The +** following #pragma disables the warning. +*/ + +#pragma warning(disable: 4305) + +/*---------------------------------------------------------------------------- +** Normal #defines follow. +*/ + +/* Set to 1 if the compile is GNU GCC. */ +#define COMPILER_IS_GCC 0 + +/* Target processor clips on negative float to int conversion. */ +#define CPU_CLIPS_NEGATIVE 0 + +/* Target processor clips on positive float to int conversion. */ +#define CPU_CLIPS_POSITIVE 0 + +/* Target processor is big endian. */ +#define CPU_IS_BIG_ENDIAN 0 + +/* Target processor is little endian. */ +#define CPU_IS_LITTLE_ENDIAN 1 + +/* Set to 1 to enable debugging. */ +#define ENABLE_DEBUG 0 + +/* Major version of GCC or 3 otherwise. */ +/* #undef GCC_MAJOR_VERSION */ + +/* Define to 1 if you have the `alarm' function. */ +/* #undef HAVE_ALARM */ + +/* Define to 1 if you have the `calloc' function. */ +#define HAVE_CALLOC 1 + +/* Define to 1 if you have the `ceil' function. */ +#define HAVE_CEIL 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Set to 1 if you have libfftw3. */ +/* #undef HAVE_FFTW3 */ + +/* Define to 1 if you have the `floor' function. */ +#define HAVE_FLOOR 1 + +/* Define to 1 if you have the `fmod' function. */ +#define HAVE_FMOD 1 + +/* Define to 1 if you have the `free' function. */ +#define HAVE_FREE 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_INTTYPES_H */ + +/* Define to 1 if you have the `m' library (-lm). */ +/* #undef HAVE_LIBM */ + +/* Define if you have C99's lrint function. */ +/* #undef HAVE_LRINT */ + +/* Define if you have C99's lrintf function. */ +/* #undef HAVE_LRINTF */ + +/* Define to 1 if you have the `malloc' function. */ +#define HAVE_MALLOC 1 + +/* Define to 1 if you have the `memcpy' function. */ +#define HAVE_MEMCPY 1 + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define if you have signal SIGALRM. */ +/* #undef HAVE_SIGALRM */ + +/* Define to 1 if you have the `signal' function. */ +/* #undef HAVE_SIGNAL */ + +/* Set to 1 if you have libsndfile. */ +#define HAVE_SNDFILE 0 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_STDINT_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_TIMES_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to the sub-directory in which libtool stores uninstalled libraries. +*/ +#define LT_OBJDIR ".libs/" + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +/* #undef NO_MINUS_C_MINUS_O */ + +/* Set to 1 if compiling for Win32 */ +#define OS_IS_WIN32 1 + +/* Name of package */ +#define PACKAGE "libsamplerate" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "erikd@mega-nerd.com" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "libsamplerate" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "libsamplerate 0.1.8" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "libsamplerate" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "http://www.mega-nerd.com/libsamplerate/" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "0.1.8" + +/* The size of `double', as computed by sizeof. */ +#define SIZEOF_DOUBLE 8 + +/* The size of `float', as computed by sizeof. */ +#define SIZEOF_FLOAT 4 + +/* The size of `int', as computed by sizeof. */ +#define SIZEOF_INT 4 + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "0.1.8" + +#define HAVE_VIDEO_HASH 1 +#define HAVE_IMAGE_HASH 1 + + +/* Extra Win32 hacks. */ + +/* +** Microsoft's compiler still does not support the 1999 ISO C Standard +** which includes 'inline'. +*/ + +#define inline __inline diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/samplerate.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/samplerate.h new file mode 100644 index 0000000..044f8d7 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc/samplerate.h @@ -0,0 +1,197 @@ +/* +** Copyright (C) 2002-2011 Erik de Castro Lopo +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +*/ + +/* +** This code is part of Secret Rabbit Code aka libsamplerate. A commercial +** use license for this code is available, please see: +** http://www.mega-nerd.com/SRC/procedure.html +*/ + +/* +** API documentation is available here: +** http://www.mega-nerd.com/SRC/api.html +*/ + +#ifndef SAMPLERATE_H +#define SAMPLERATE_H + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + + +/* Opaque data type SRC_STATE. */ +typedef struct SRC_STATE_tag SRC_STATE ; + +/* SRC_DATA is used to pass data to src_simple() and src_process(). */ +typedef struct +{ float *data_in, *data_out ; + + long input_frames, output_frames ; + long input_frames_used, output_frames_gen ; + + int end_of_input ; + + double src_ratio ; +} SRC_DATA ; + +/* SRC_CB_DATA is used with callback based API. */ +typedef struct +{ long frames ; + float *data_in ; +} SRC_CB_DATA ; + +/* +** User supplied callback function type for use with src_callback_new() +** and src_callback_read(). First parameter is the same pointer that was +** passed into src_callback_new(). Second parameter is pointer to a +** pointer. The user supplied callback function must modify *data to +** point to the start of the user supplied float array. The user supplied +** function must return the number of frames that **data points to. +*/ + +typedef long (*src_callback_t) (void *cb_data, float **data) ; + +/* +** Standard initialisation function : return an anonymous pointer to the +** internal state of the converter. Choose a converter from the enums below. +** Error returned in *error. +*/ + +SRC_STATE* src_new (int converter_type, int channels, int *error) ; + +/* +** Initilisation for callback based API : return an anonymous pointer to the +** internal state of the converter. Choose a converter from the enums below. +** The cb_data pointer can point to any data or be set to NULL. Whatever the +** value, when processing, user supplied function "func" gets called with +** cb_data as first parameter. +*/ + +SRC_STATE* src_callback_new (src_callback_t func, int converter_type, int channels, + int *error, void* cb_data) ; + +/* +** Cleanup all internal allocations. +** Always returns NULL. +*/ + +SRC_STATE* src_delete (SRC_STATE *state) ; + +/* +** Standard processing function. +** Returns non zero on error. +*/ + +int src_process (SRC_STATE *state, SRC_DATA *data) ; + +/* +** Callback based processing function. Read up to frames worth of data from +** the converter int *data and return frames read or -1 on error. +*/ +long src_callback_read (SRC_STATE *state, double src_ratio, long frames, float *data) ; + +/* +** Simple interface for performing a single conversion from input buffer to +** output buffer at a fixed conversion ratio. +** Simple interface does not require initialisation as it can only operate on +** a single buffer worth of audio. +*/ + +int src_simple (SRC_DATA *data, int converter_type, int channels) ; + +/* +** This library contains a number of different sample rate converters, +** numbered 0 through N. +** +** Return a string giving either a name or a more full description of each +** sample rate converter or NULL if no sample rate converter exists for +** the given value. The converters are sequentially numbered from 0 to N. +*/ + +const char *src_get_name (int converter_type) ; +const char *src_get_description (int converter_type) ; +const char *src_get_version (void) ; + +/* +** Set a new SRC ratio. This allows step responses +** in the conversion ratio. +** Returns non zero on error. +*/ + +int src_set_ratio (SRC_STATE *state, double new_ratio) ; + +/* +** Reset the internal SRC state. +** Does not modify the quality settings. +** Does not free any memory allocations. +** Returns non zero on error. +*/ + +int src_reset (SRC_STATE *state) ; + +/* +** Return TRUE if ratio is a valid conversion ratio, FALSE +** otherwise. +*/ + +int src_is_valid_ratio (double ratio) ; + +/* +** Return an error number. +*/ + +int src_error (SRC_STATE *state) ; + +/* +** Convert the error number into a string. +*/ +const char* src_strerror (int error) ; + +/* +** The following enums can be used to set the interpolator type +** using the function src_set_converter(). +*/ + +enum +{ + SRC_SINC_BEST_QUALITY = 0, + SRC_SINC_MEDIUM_QUALITY = 1, + SRC_SINC_FASTEST = 2, + SRC_ZERO_ORDER_HOLD = 3, + SRC_LINEAR = 4, +} ; + +/* +** Extra helper functions for converting from short to float and +** back again. +*/ + +void src_short_to_float_array (const short *in, float *out, int len) ; +void src_float_to_short_array (const float *in, short *out, int len) ; + +void src_int_to_float_array (const int *in, float *out, int len) ; +void src_float_to_int_array (const float *in, int *out, int len) ; + + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ + +#endif /* SAMPLERATE_H */ + diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc_common/KTYPED.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc_common/KTYPED.h new file mode 100644 index 0000000..ed3f54b --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc_common/KTYPED.h @@ -0,0 +1,42 @@ +#ifndef KALA_AUDIO_BASE_TYPE_H +#define KALA_AUDIO_BASE_TYPE_H + +#ifdef _MSC_VER +// MSVC build for Windows, and it's (expected to be) able to handle true stereo in real time +#define PSEUDO_MULTICHANNELS 0 +#else +#define PSEUDO_MULTICHANNELS 1 +#endif + +#ifdef LIBKALAAUDIOBASE_DYNAMIC_EXPORTS +#define LIB_KALAAUDIOBASE_API __declspec(dllexport) +#endif + +#ifdef LIBKALAAUDIOBASE_DYNAMIC_IMPORTS +#define LIB_KALAAUDIOBASE_API __declspec(dllimport) +#endif + +#ifndef LIB_KALAAUDIOBASE_API +#define LIB_KALAAUDIOBASE_API +#endif + +#define SONG_SCORE_FRAME_MS 40 +#define SONG_SCORE_CHANNEL_NUM 1 +#define SONG_SCORE_FRAME_SIZE KALA_DEFAULT_SAMPLE_RATE*SONG_SCORE_FRAME_MS/1000*2*SONG_SCORE_CHANNEL_NUM // mono +#define SONG_SCORE_FRAME_SHIF SONG_SCORE_FRAME_SIZE +//#define SONG_SCORE_MAX_SCORE 100 // max score value. +//#define SONG_SCORE_MIN_SCORE 0 // min score value. + + +#ifndef safe_free +#define safe_free(p) { if(p) { free(p); (p)=NULL; } } +#endif + +//typedef struct _tagFLOAT_BUFFER +//{ +// float* buf; // buffer in float +// int len; // float length. +// +//}FltBuf, *PFltBuf; + +#endif diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc_common/MSdcommon.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc_common/MSdcommon.h new file mode 100644 index 0000000..dc3fc6a --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/inc_common/MSdcommon.h @@ -0,0 +1,207 @@ +/************************************************************************/ +/* mix sound lib */ +/* written by ethanzhao, 5.13, 2014 */ +/************************************************************************/ + +#ifndef KALA_MIX_SOUND_FILE_COMMON_H +#define KALA_MIX_SOUND_FILE_COMMON_H + +#include "KTYPED.h" + +/************************************************************************************************/ +/* return value meanings */ +#define err_kala_audio_base_h_null -1 // point is NULL +#define err_kala_audio_base_h_param_invalid -2 // invalid parameter +#define err_kala_audio_base_h_malloc_null -3 // malloc return NULL +#define err_kala_audio_base_h_in_open_file -4 // error when open text or audio file +#define err_kala_audio_base_h_in_no_enough_data -5 // no enough input data +#define err_kala_audio_base_h_in_data_overflow -6 // data overflow +#define err_kala_audio_base_h_in_audio_tpye -7 // audio type error,not 16 bit maybe +#define err_kala_audio_base_h_lib_init -8 // lib init or do error. +#define err_kala_audio_base_h_sentence_file_error -9 // error when init sentence file +#define err_kala_audio_base_h_note_file_error -10 // error when init note file. +#define err_kala_audio_base_h_section_info_error -11 // section info error +#define err_kala_audio_base_h_qrc_only_one_sentence -12 // qrc only has one sentence; +#define err_kala_audio_base_h_clipped_happened -13 // clipped happened when process audio data +#define err_kala_audio_base_h_unknown -100 // unknown error + +enum +{ + err_kala_audio_base_h_success = 0, /* 0 Success. **/ + err_kala_audio_base_h_sl_error_to_get_result = 1, /* 1 Success and ,use in function putbuffer. **/ + err_kala_audio_base_h_sl_error_to_get_last_result = 2, /* 2 Success. **/ + err_kala_audio_base_h_ts_vad_ok = 1, /* 1 Success. **/ + err_kala_audio_base_h_ts_stoped = 2, /* 1 Success. **/ + err_kala_audio_base_h_sl_error_param = -29999, + err_kala_audio_base_h_sl_error_init, + err_kala_audio_base_h_sl_error_mfcc, + err_kala_audio_base_h_sl_error_dict_path, + err_kala_audio_base_h_sl_error_input_word, + err_kala_audio_base_h_sl_error_dec_init, + err_kala_audio_base_h_sl_error_dec_sp, + err_kala_audio_base_h_sl_error_word_num, + err_kala_audio_base_h_sl_error_qrc_init, + err_kala_audio_base_h_sl_error_mle_init, + err_kala_audio_base_h_sl_error_pitch_init, + err_kala_audio_base_h_sl_error_qrc_file, + err_kala_audio_base_h_sl_error_note_file, + err_kala_audio_base_h_sl_error_qrc_combine, + err_kala_audio_base_h_sl_error_buffer_length, + err_kala_audio_base_h_sl_error_sent_id, + err_kala_audio_base_h_sl_error_start_session, + err_kala_audio_base_h_sl_error_mde_word, + err_kala_audio_base_h_sl_error_get_pitch, + err_kala_audio_base_h_sl_error_pitch_time, + err_kala_audio_base_h_sl_error_buffer_time, + err_kala_audio_base_h_sl_error_sent_time, + err_kala_audio_base_h_sl_error_not_last, + err_kala_audio_base_h_sl_error_is_last, + err_kala_audio_base_h_sl_error_miss_buffer, + err_kala_audio_base_h_sl_error_no_create, + err_kala_audio_base_h_sl_error_no_word, + err_kala_audio_base_h_ts_error_init = -39999, + err_kala_audio_base_h_ts_error_no_support, + err_kala_audio_base_h_ts_error_param, + err_kala_audio_base_h_ts_error_dec_init, + err_kala_audio_base_h_ts_error_sts_init, + err_kala_audio_base_h_ts_error_pitch, + err_kala_audio_base_h_ts_error_seg_id, + err_kala_audio_base_h_ts_error_no_buffer, + err_kala_audio_base_h_ts_error_out_buffer, + err_kala_audio_base_h_ts_error_noise, + err_kala_audio_base_h_ts_error_word = -38999, + err_kala_audio_base_h_ts_error_vad = -37999, + err_kala_audio_base_h_ts_error_sts_proc = -36999, +}; + +/* err_kala_audio_base_h_low_level_prefix: + desc: Low Level lib (such as webrtc) error occurs. + To get the corresponding low level error, plus err_kala_audio_base_h_low_level_base + + example: + int err = XXX(); + + if (err < err_kala_audio_base_h_low_level_prefix) + { + // Low Level error occurs. + int errLL = err + err_kala_audio_base_h_low_level_base; + + } +*/ + +#define err_kala_audio_base_h_low_level_base -10000 +#define ok_mix_sound_h 0 // successful +/************************************************************************************************/ + +#include "stdlib.h" +#include "string.h" + +//#define TEST_FOR_DEBUG_LOG 1 + +/* +* common file of this project,some type defines,and the buffer use the original common file +*/ + +#ifndef AudioSample +#define AudioSample short +#endif + +typedef AudioSample Asample; +// +//#ifndef WORD32 +//#define WORD32 int +//#endif +// +//#ifndef UWORD32 +//#define UWORD32 unsigned WORD32 +//#endif +// +//#ifndef WORD16 +//#define WORD16 short +//#endif +// +//#ifndef UWORD16 +//#define UWORD16 unsigned WORD16 +//#endif +// +//#ifndef HRESULT +//#define HRESULT WORD32 +//#endif +// +//#ifndef HANDLE +//#define HANDLE void* +//#endif + +//#ifndef max +//#define max(a,b) (((a) > (b)) ? (a) : (b)) +//#endif +// +//#ifndef min +//#define min(a,b) (((a) < (b)) ? (a) : (b)) +//#endif + +#ifndef SAFE_FREE +#define SAFE_FREE(p) { if(p) { free(p); (p)=NULL; } } +#endif + +#ifndef SAFE_RELEASE +#define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } } +#endif + +#ifndef SHORTMAX +#define SHORTMAX 32767 +#endif +#ifndef SHORTMIN +#define SHORTMIN -32768 +#endif + +#ifndef INT32_MAX +#define INT32_MAX (WORD32)0x7fffffffL +#endif +#ifndef INT32_MIN +#define INT32_MIN (WORD32)0x80000000L +#endif + +#ifndef Clip_short +#define Clip_short(x) (short)((x)>SHORTMAX?SHORTMAX:((x) +#include "CResample2.h" +#include "samplerate.h" +#include "MSdcommon.h" + + +CResample2::CResample2() + : m_src_state(NULL) +{ + +} + +int CResample2::init(int src_sample_rate, int dst_sample_rate, int channel, int max_input_size, int* max_output_size) +{ + m_convert = SRC_LINEAR; + m_channles = channel; + m_gain = 1.0f; + m_max = 0.0f; + int error = 0; + m_src_state = NULL; + m_src_ratio = (1.0 * dst_sample_rate) / (1.0 * src_sample_rate); + if (src_is_valid_ratio(m_src_ratio) == 0) + { + return err_kala_audio_base_h_lib_init; + } + //SRC_STATE *src_state; + if ((m_src_state = src_new(m_convert, channel, &error)) == NULL) + { + return err_kala_audio_base_h_lib_init; + } + //m_src_state = src_state; + + + *max_output_size = (int)(1.0 * max_input_size * m_src_ratio) + 128; + return 0; +} + +// int CResample2::setType(int convert) +// { +// if (convert < SRC_SINC_BEST_QUALITY) +// { +// m_convert = SRC_SINC_BEST_QUALITY; +// } +// else if (convert > SRC_LINEAR) +// { +// m_convert = SRC_LINEAR; +// } +// else +// { +// m_convert = convert; +// } +// +// int error = 0; +// if (m_src_state != NULL) +// { +// m_src_state = src_delete((SRC_STATE*)m_src_state); +// } +// if ((m_src_state = src_new(m_convert, m_channles, &error)) == NULL) +// { +// return err_kala_audio_base_h_lib_init; +// } +// return 0; +// } + +int CResample2::process(char* pSrc, int src_size, char* pDst) +{ + if (src_size % 2 % m_channles != 0) + { + return err_kala_audio_base_h_param_invalid; + } + static float input[BUFFER_LEN]; + static float output[BUFFER_LEN]; + int output_count_data = 0; + int error = 0; + SRC_DATA src_data; + + src_data.end_of_input = 0; + src_data.src_ratio = m_src_ratio; + src_data.output_frames = BUFFER_LEN / m_channles; + src_short_to_float_array((short*)pSrc, input, src_size / 2); + src_data.data_in = input; + src_data.data_out = output; + src_data.input_frames = src_size / 2 / m_channles; + while (src_data.input_frames != 0) + { + error = src_process((SRC_STATE*)m_src_state, &src_data); + if ( 0 != error ) + { + //printf("\nError : %s\n", src_strerror(error)); + return err_kala_audio_base_h_lib_init; + } + m_max = apply_gain(src_data.data_out, + src_data.output_frames_gen, + m_channles, m_max, m_gain); + output_count_data += src_data.output_frames_gen; + src_data.data_in += src_data.input_frames_used * m_channles; + src_data.input_frames -= src_data.input_frames_used; + + + } + src_float_to_short_array(output, (short*)pDst, output_count_data * m_channles); + if (m_max > 1.0f) + { + return err_kala_audio_base_h_clipped_happened; //output clipped + } + return output_count_data * m_channles * 2; //only for 16bit +} + +void CResample2::reset() +{ + src_reset((SRC_STATE*)m_src_state); + m_max = 0.0f; +} + +void CResample2::uninit() +{ + m_src_state = src_delete((SRC_STATE*)m_src_state); +} + +float CResample2::apply_gain(float * data, long frames, int channels, float max, float gain) +{ + long k; + + for (k = 0; k < frames * channels; k++) + { + data[k] *= gain; + + if (fabs(data[k]) > max) + max = fabs(data[k]); + } + + return max; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/src/samplerate.c b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/src/samplerate.c new file mode 100644 index 0000000..138e401 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/src/samplerate.c @@ -0,0 +1,545 @@ +/* +** Copyright (C) 2002-2011 Erik de Castro Lopo +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +*/ + +/* +** This code is part of Secret Rabbit Code aka libsamplerate. A commercial +** use license for this code is available, please see: +** http://www.mega-nerd.com/SRC/procedure.html +*/ + +#include +#include +#include + +#include "resample_config.h" + +#include "samplerate.h" +#include "float_cast.h" +#include "resample_common.h" + +static int psrc_set_converter (SRC_PRIVATE *psrc, int converter_type) ; + + +static inline int +is_bad_src_ratio (double ratio) +{ return (ratio < (1.0 / SRC_MAX_RATIO) || ratio > (1.0 * SRC_MAX_RATIO)) ; +} /* is_bad_src_ratio */ + +SRC_STATE * +src_new (int converter_type, int channels, int *error) +{ SRC_PRIVATE *psrc ; + + if (error) + *error = SRC_ERR_NO_ERROR ; + + if (channels < 1) + { if (error) + *error = SRC_ERR_BAD_CHANNEL_COUNT ; + return NULL ; + } ; + + if ((psrc = calloc (1, sizeof (*psrc))) == NULL) + { if (error) + *error = SRC_ERR_MALLOC_FAILED ; + return NULL ; + } ; + + psrc->channels = channels ; + psrc->mode = SRC_MODE_PROCESS ; + + if (psrc_set_converter (psrc, converter_type) != SRC_ERR_NO_ERROR) + { if (error) + *error = SRC_ERR_BAD_CONVERTER ; + free (psrc) ; + psrc = NULL ; + } ; + + src_reset ((SRC_STATE*) psrc) ; + + return (SRC_STATE*) psrc ; +} /* src_new */ + +SRC_STATE* +src_callback_new (src_callback_t func, int converter_type, int channels, int *error, void* cb_data) +{ SRC_STATE *src_state ; + + if (func == NULL) + { if (error) + *error = SRC_ERR_BAD_CALLBACK ; + return NULL ; + } ; + + if (error != NULL) + *error = 0 ; + + if ((src_state = src_new (converter_type, channels, error)) == NULL) + return NULL ; + + src_reset (src_state) ; + + ((SRC_PRIVATE*) src_state)->mode = SRC_MODE_CALLBACK ; + ((SRC_PRIVATE*) src_state)->callback_func = func ; + ((SRC_PRIVATE*) src_state)->user_callback_data = cb_data ; + + return src_state ; +} /* src_callback_new */ + +SRC_STATE * +src_delete (SRC_STATE *state) +{ SRC_PRIVATE *psrc ; + + psrc = (SRC_PRIVATE*) state ; + if (psrc) + { if (psrc->private_data) + free (psrc->private_data) ; + memset (psrc, 0, sizeof (SRC_PRIVATE)) ; + free (psrc) ; + } ; + + return NULL ; +} /* src_state */ + +int +src_process (SRC_STATE *state, SRC_DATA *data) +{ SRC_PRIVATE *psrc ; + int error ; + + psrc = (SRC_PRIVATE*) state ; + + if (psrc == NULL) + return SRC_ERR_BAD_STATE ; + if (psrc->vari_process == NULL || psrc->const_process == NULL) + return SRC_ERR_BAD_PROC_PTR ; + + if (psrc->mode != SRC_MODE_PROCESS) + return SRC_ERR_BAD_MODE ; + + /* Check for valid SRC_DATA first. */ + if (data == NULL) + return SRC_ERR_BAD_DATA ; + + /* And that data_in and data_out are valid. */ + if (data->data_in == NULL || data->data_out == NULL) + return SRC_ERR_BAD_DATA_PTR ; + + /* Check src_ratio is in range. */ + if (is_bad_src_ratio (data->src_ratio)) + return SRC_ERR_BAD_SRC_RATIO ; + + if (data->input_frames < 0) + data->input_frames = 0 ; + if (data->output_frames < 0) + data->output_frames = 0 ; + + if (data->data_in < data->data_out) + { if (data->data_in + data->input_frames * psrc->channels > data->data_out) + { /*-printf ("\n\ndata_in: %p data_out: %p\n", + (void*) (data->data_in + data->input_frames * psrc->channels), (void*) data->data_out) ;-*/ + return SRC_ERR_DATA_OVERLAP ; + } ; + } + else if (data->data_out + data->output_frames * psrc->channels > data->data_in) + { /*-printf ("\n\ndata_in : %p output frames: %ld data_out: %p\n", (void*) data->data_in, data->output_frames, (void*) data->data_out) ; + + printf ("data_out: %p (%p) data_in: %p\n", (void*) data->data_out, + (void*) (data->data_out + data->input_frames * psrc->channels), (void*) data->data_in) ;-*/ + return SRC_ERR_DATA_OVERLAP ; + } ; + + /* Set the input and output counts to zero. */ + data->input_frames_used = 0 ; + data->output_frames_gen = 0 ; + + /* Special case for when last_ratio has not been set. */ + if (psrc->last_ratio < (1.0 / SRC_MAX_RATIO)) + psrc->last_ratio = data->src_ratio ; + + /* Now process. */ + if (fabs (psrc->last_ratio - data->src_ratio) < 1e-15) + error = psrc->const_process (psrc, data) ; + else + error = psrc->vari_process (psrc, data) ; + + return error ; +} /* src_process */ + +long +src_callback_read (SRC_STATE *state, double src_ratio, long frames, float *data) +{ SRC_PRIVATE *psrc ; + SRC_DATA src_data ; + + long output_frames_gen ; + int error = 0 ; + + if (state == NULL) + return 0 ; + + if (frames <= 0) + return 0 ; + + psrc = (SRC_PRIVATE*) state ; + + if (psrc->mode != SRC_MODE_CALLBACK) + { psrc->error = SRC_ERR_BAD_MODE ; + return 0 ; + } ; + + if (psrc->callback_func == NULL) + { psrc->error = SRC_ERR_NULL_CALLBACK ; + return 0 ; + } ; + + memset (&src_data, 0, sizeof (src_data)) ; + + /* Check src_ratio is in range. */ + if (is_bad_src_ratio (src_ratio)) + { psrc->error = SRC_ERR_BAD_SRC_RATIO ; + return 0 ; + } ; + + /* Switch modes temporarily. */ + src_data.src_ratio = src_ratio ; + src_data.data_out = data ; + src_data.output_frames = frames ; + + src_data.data_in = psrc->saved_data ; + src_data.input_frames = psrc->saved_frames ; + + output_frames_gen = 0 ; + while (output_frames_gen < frames) + { /* Use a dummy array for the case where the callback function + ** returns without setting the ptr. + */ + float dummy [1] ; + + if (src_data.input_frames == 0) + { float *ptr = dummy ; + + src_data.input_frames = psrc->callback_func (psrc->user_callback_data, &ptr) ; + src_data.data_in = ptr ; + + if (src_data.input_frames == 0) + src_data.end_of_input = 1 ; + } ; + + /* + ** Now call process function. However, we need to set the mode + ** to SRC_MODE_PROCESS first and when we return set it back to + ** SRC_MODE_CALLBACK. + */ + psrc->mode = SRC_MODE_PROCESS ; + error = src_process (state, &src_data) ; + psrc->mode = SRC_MODE_CALLBACK ; + + if (error != 0) + break ; + + src_data.data_in += src_data.input_frames_used * psrc->channels ; + src_data.input_frames -= src_data.input_frames_used ; + + src_data.data_out += src_data.output_frames_gen * psrc->channels ; + src_data.output_frames -= src_data.output_frames_gen ; + + output_frames_gen += src_data.output_frames_gen ; + + if (src_data.end_of_input == SRC_TRUE && src_data.output_frames_gen == 0) + break ; + } ; + + psrc->saved_data = src_data.data_in ; + psrc->saved_frames = src_data.input_frames ; + + if (error != 0) + { psrc->error = error ; + return 0 ; + } ; + + return output_frames_gen ; +} /* src_callback_read */ + +/*========================================================================== +*/ + +int +src_set_ratio (SRC_STATE *state, double new_ratio) +{ SRC_PRIVATE *psrc ; + + psrc = (SRC_PRIVATE*) state ; + + if (psrc == NULL) + return SRC_ERR_BAD_STATE ; + if (psrc->vari_process == NULL || psrc->const_process == NULL) + return SRC_ERR_BAD_PROC_PTR ; + + if (is_bad_src_ratio (new_ratio)) + return SRC_ERR_BAD_SRC_RATIO ; + + psrc->last_ratio = new_ratio ; + + return SRC_ERR_NO_ERROR ; +} /* src_set_ratio */ + +int +src_reset (SRC_STATE *state) +{ SRC_PRIVATE *psrc ; + + if ((psrc = (SRC_PRIVATE*) state) == NULL) + return SRC_ERR_BAD_STATE ; + + if (psrc->reset != NULL) + psrc->reset (psrc) ; + + psrc->last_position = 0.0 ; + psrc->last_ratio = 0.0 ; + + psrc->saved_data = NULL ; + psrc->saved_frames = 0 ; + + psrc->error = SRC_ERR_NO_ERROR ; + + return SRC_ERR_NO_ERROR ; +} /* src_reset */ + +/*============================================================================== +** Control functions. +*/ + +const char * +src_get_name (int converter_type) +{ const char *desc ; + + //if ((desc = sinc_get_name (converter_type)) != NULL) + // return desc ; + + //if ((desc = zoh_get_name (converter_type)) != NULL) + // return desc ; + + if ((desc = linear_get_name (converter_type)) != NULL) + return desc ; + + return NULL ; +} /* src_get_name */ + +const char * +src_get_description (int converter_type) +{ const char *desc ; + + //if ((desc = sinc_get_description (converter_type)) != NULL) + // return desc ; + + //if ((desc = zoh_get_description (converter_type)) != NULL) + // return desc ; + + if ((desc = linear_get_description (converter_type)) != NULL) + return desc ; + + return NULL ; +} /* src_get_description */ + +const char * +src_get_version (void) +{ return PACKAGE "-" VERSION " (c) 2002-2008 Erik de Castro Lopo" ; +} /* src_get_version */ + +int +src_is_valid_ratio (double ratio) +{ + if (is_bad_src_ratio (ratio)) + return SRC_FALSE ; + + return SRC_TRUE ; +} /* src_is_valid_ratio */ + +/*============================================================================== +** Error reporting functions. +*/ + +int +src_error (SRC_STATE *state) +{ if (state) + return ((SRC_PRIVATE*) state)->error ; + return SRC_ERR_NO_ERROR ; +} /* src_error */ + +const char* +src_strerror (int error) +{ + switch (error) + { case SRC_ERR_NO_ERROR : + return "No error." ; + case SRC_ERR_MALLOC_FAILED : + return "Malloc failed." ; + case SRC_ERR_BAD_STATE : + return "SRC_STATE pointer is NULL." ; + case SRC_ERR_BAD_DATA : + return "SRC_DATA pointer is NULL." ; + case SRC_ERR_BAD_DATA_PTR : + return "SRC_DATA->data_out is NULL." ; + case SRC_ERR_NO_PRIVATE : + return "Internal error. No private data." ; + + case SRC_ERR_BAD_SRC_RATIO : + return "SRC ratio outside [1/" SRC_MAX_RATIO_STR ", " SRC_MAX_RATIO_STR "] range." ; + + case SRC_ERR_BAD_SINC_STATE : + return "src_process() called without reset after end_of_input." ; + case SRC_ERR_BAD_PROC_PTR : + return "Internal error. No process pointer." ; + case SRC_ERR_SHIFT_BITS : + return "Internal error. SHIFT_BITS too large." ; + case SRC_ERR_FILTER_LEN : + return "Internal error. Filter length too large." ; + case SRC_ERR_BAD_CONVERTER : + return "Bad converter number." ; + case SRC_ERR_BAD_CHANNEL_COUNT : + return "Channel count must be >= 1." ; + case SRC_ERR_SINC_BAD_BUFFER_LEN : + return "Internal error. Bad buffer length. Please report this." ; + case SRC_ERR_SIZE_INCOMPATIBILITY : + return "Internal error. Input data / internal buffer size difference. Please report this." ; + case SRC_ERR_BAD_PRIV_PTR : + return "Internal error. Private pointer is NULL. Please report this." ; + case SRC_ERR_DATA_OVERLAP : + return "Input and output data arrays overlap." ; + case SRC_ERR_BAD_CALLBACK : + return "Supplied callback function pointer is NULL." ; + case SRC_ERR_BAD_MODE : + return "Calling mode differs from initialisation mode (ie process v callback)." ; + case SRC_ERR_NULL_CALLBACK : + return "Callback function pointer is NULL in src_callback_read ()." ; + case SRC_ERR_NO_VARIABLE_RATIO : + return "This converter only allows constant conversion ratios." ; + case SRC_ERR_SINC_PREPARE_DATA_BAD_LEN : + return "Internal error : Bad length in prepare_data ()." ; + + case SRC_ERR_MAX_ERROR : + return "Placeholder. No error defined for this error number." ; + + default : break ; + } + + return NULL ; +} /* src_strerror */ + +/*============================================================================== +** Simple interface for performing a single conversion from input buffer to +** output buffer at a fixed conversion ratio. +*/ + +int +src_simple (SRC_DATA *src_data, int converter, int channels) +{ SRC_STATE *src_state ; + int error ; + + if ((src_state = src_new (converter, channels, &error)) == NULL) + return error ; + + src_data->end_of_input = 1 ; /* Only one buffer worth of input. */ + + error = src_process (src_state, src_data) ; + + src_state = src_delete (src_state) ; + + return error ; +} /* src_simple */ + +void +src_short_to_float_array (const short *in, float *out, int len) +{ + while (len) + { len -- ; + out [len] = (float) (in [len] / (1.0 * 0x8000)) ; + } ; + + return ; +} /* src_short_to_float_array */ + +void +src_float_to_short_array (const float *in, short *out, int len) +{ double scaled_value ; + + while (len) + { len -- ; + + scaled_value = in [len] * (8.0 * 0x10000000) ; + if (CPU_CLIPS_POSITIVE == 0 && scaled_value >= (1.0 * 0x7FFFFFFF)) + { out [len] = 32767 ; + continue ; + } ; + if (CPU_CLIPS_NEGATIVE == 0 && scaled_value <= (-8.0 * 0x10000000)) + { out [len] = -32768 ; + continue ; + } ; + + out [len] = (short) (lrint (scaled_value) >> 16) ; + } ; + +} /* src_float_to_short_array */ + +void +src_int_to_float_array (const int *in, float *out, int len) +{ + while (len) + { len -- ; + out [len] = (float) (in [len] / (8.0 * 0x10000000)) ; + } ; + + return ; +} /* src_int_to_float_array */ + +void +src_float_to_int_array (const float *in, int *out, int len) +{ double scaled_value ; + + while (len) + { len -- ; + + scaled_value = in [len] * (8.0 * 0x10000000) ; + if (CPU_CLIPS_POSITIVE == 0 && scaled_value >= (1.0 * 0x7FFFFFFF)) + { out [len] = 0x7fffffff ; + continue ; + } ; + if (CPU_CLIPS_NEGATIVE == 0 && scaled_value <= (-8.0 * 0x10000000)) + { out [len] = -1 - 0x7fffffff ; + continue ; + } ; + + out [len] = lrint (scaled_value) ; + } ; + +} /* src_float_to_int_array */ + +/*============================================================================== +** Private functions. +*/ + +static int +psrc_set_converter (SRC_PRIVATE *psrc, int converter_type) +{ + //if (sinc_set_converter (psrc, converter_type) == SRC_ERR_NO_ERROR) + // return SRC_ERR_NO_ERROR ; + + //if (zoh_set_converter (psrc, converter_type) == SRC_ERR_NO_ERROR) + // return SRC_ERR_NO_ERROR ; + + if (linear_set_converter (psrc, converter_type) == SRC_ERR_NO_ERROR) + return SRC_ERR_NO_ERROR ; + + return SRC_ERR_BAD_CONVERTER ; +} /* psrc_set_converter */ + diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/src/src_linear.c b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/src/src_linear.c new file mode 100644 index 0000000..0ad5264 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/resample2/src/src_linear.c @@ -0,0 +1,219 @@ +/* +** Copyright (C) 2002-2011 Erik de Castro Lopo +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +** GNU General Public License for more details. +** +** You should have received a copy of the GNU General Public License +** along with this program; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. +*/ + +/* +** This code is part of Secret Rabbit Code aka libsamplerate. A commercial +** use license for this code is available, please see: +** http://www.mega-nerd.com/SRC/procedure.html +*/ + +#include +#include +#include +#include "resample_config.h" +#include "float_cast.h" +#include "resample_common.h" + +static int linear_vari_process (SRC_PRIVATE *psrc, SRC_DATA *data) ; +static void linear_reset (SRC_PRIVATE *psrc) ; + +/*======================================================================================== +*/ + +#define LINEAR_MAGIC_MARKER MAKE_MAGIC ('l', 'i', 'n', 'e', 'a', 'r') + +#define SRC_DEBUG 0 + +typedef struct +{ int linear_magic_marker ; + int channels ; + int reset ; + long in_count, in_used ; + long out_count, out_gen ; + float last_value [1] ; +} LINEAR_DATA ; + +/*---------------------------------------------------------------------------------------- +*/ + +static int +linear_vari_process (SRC_PRIVATE *psrc, SRC_DATA *data) +{ LINEAR_DATA *priv ; + double src_ratio, input_index, rem ; + int ch ; + + if (data->input_frames <= 0) + return SRC_ERR_NO_ERROR ; + + if (psrc->private_data == NULL) + return SRC_ERR_NO_PRIVATE ; + + priv = (LINEAR_DATA*) psrc->private_data ; + + if (priv->reset) + { /* If we have just been reset, set the last_value data. */ + for (ch = 0 ; ch < priv->channels ; ch++) + priv->last_value [ch] = data->data_in [ch] ; + priv->reset = 0 ; + } ; + + priv->in_count = data->input_frames * priv->channels ; + priv->out_count = data->output_frames * priv->channels ; + priv->in_used = priv->out_gen = 0 ; + + src_ratio = psrc->last_ratio ; + input_index = psrc->last_position ; + + /* Calculate samples before first sample in input array. */ + while (input_index < 1.0 && priv->out_gen < priv->out_count) + { + if (priv->in_used + priv->channels * (1.0 + input_index) >= priv->in_count) + break ; + + if (priv->out_count > 0 && fabs (psrc->last_ratio - data->src_ratio) > SRC_MIN_RATIO_DIFF) + src_ratio = psrc->last_ratio + priv->out_gen * (data->src_ratio - psrc->last_ratio) / priv->out_count ; + + for (ch = 0 ; ch < priv->channels ; ch++) + { data->data_out [priv->out_gen] = (float) (priv->last_value [ch] + input_index * + (data->data_in [ch] - priv->last_value [ch])) ; + priv->out_gen ++ ; + } ; + + /* Figure out the next index. */ + input_index += 1.0 / src_ratio ; + } ; + + rem = fmod_one (input_index) ; + priv->in_used += priv->channels * lrint (input_index - rem) ; + input_index = rem ; + + /* Main processing loop. */ + while (priv->out_gen < priv->out_count && priv->in_used + priv->channels * input_index < priv->in_count) + { + if (priv->out_count > 0 && fabs (psrc->last_ratio - data->src_ratio) > SRC_MIN_RATIO_DIFF) + src_ratio = psrc->last_ratio + priv->out_gen * (data->src_ratio - psrc->last_ratio) / priv->out_count ; + + if (SRC_DEBUG && priv->in_used < priv->channels && input_index < 1.0) + { printf ("Whoops!!!! in_used : %ld channels : %d input_index : %f\n", priv->in_used, priv->channels, input_index) ; + exit (1) ; + } ; + + for (ch = 0 ; ch < priv->channels ; ch++) + { data->data_out [priv->out_gen] = (float) (data->data_in [priv->in_used - priv->channels + ch] + input_index * + (data->data_in [priv->in_used + ch] - data->data_in [priv->in_used - priv->channels + ch])) ; + priv->out_gen ++ ; + } ; + + /* Figure out the next index. */ + input_index += 1.0 / src_ratio ; + rem = fmod_one (input_index) ; + + priv->in_used += priv->channels * lrint (input_index - rem) ; + input_index = rem ; + } ; + + if (priv->in_used > priv->in_count) + { input_index += (priv->in_used - priv->in_count) / priv->channels ; + priv->in_used = priv->in_count ; + } ; + + psrc->last_position = input_index ; + + if (priv->in_used > 0) + for (ch = 0 ; ch < priv->channels ; ch++) + priv->last_value [ch] = data->data_in [priv->in_used - priv->channels + ch] ; + + /* Save current ratio rather then target ratio. */ + psrc->last_ratio = src_ratio ; + + data->input_frames_used = priv->in_used / priv->channels ; + data->output_frames_gen = priv->out_gen / priv->channels ; + + return SRC_ERR_NO_ERROR ; +} /* linear_vari_process */ + +/*------------------------------------------------------------------------------ +*/ + +const char* +linear_get_name (int src_enum) +{ + if (src_enum == SRC_LINEAR) + return "Linear Interpolator" ; + + return NULL ; +} /* linear_get_name */ + +const char* +linear_get_description (int src_enum) +{ + if (src_enum == SRC_LINEAR) + return "Linear interpolator, very fast, poor quality." ; + + return NULL ; +} /* linear_get_descrition */ + +int +linear_set_converter (SRC_PRIVATE *psrc, int src_enum) +{ LINEAR_DATA *priv = NULL ; + + if (src_enum != SRC_LINEAR) + return SRC_ERR_BAD_CONVERTER ; + + if (psrc->private_data != NULL) + { free (psrc->private_data) ; + psrc->private_data = NULL ; + } ; + + if (psrc->private_data == NULL) + { priv = calloc (1, sizeof (*priv) + psrc->channels * sizeof (float)) ; + if (priv == NULL) + return SRC_ERR_MALLOC_FAILED ; + psrc->private_data = priv ; + } ; + + priv->linear_magic_marker = LINEAR_MAGIC_MARKER ; + priv->channels = psrc->channels ; + + psrc->const_process = linear_vari_process ; + psrc->vari_process = linear_vari_process ; + psrc->reset = linear_reset ; + + linear_reset (psrc) ; + + return SRC_ERR_NO_ERROR ; +} /* linear_set_converter */ + +/*=================================================================================== +*/ + +static void +linear_reset (SRC_PRIVATE *psrc) +{ LINEAR_DATA *priv = NULL ; + + priv = (LINEAR_DATA*) psrc->private_data ; + if (priv == NULL) + return ; + + priv->channels = psrc->channels ; + priv->reset = 1 ; + memset (priv->last_value, 0, sizeof (priv->last_value [0]) * priv->channels) ; + + return ; +} /* linear_reset */ + diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/CMakeLists.txt new file mode 100644 index 0000000..8aa4f76 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_ST_LYRIC_PARSER_SRCS) +add_library(waves ${DIR_ST_LYRIC_PARSER_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/inc/STLyricParser.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/inc/STLyricParser.h new file mode 100644 index 0000000..df73dfd --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/inc/STLyricParser.h @@ -0,0 +1,31 @@ +// +// Created by yangjianli on 2020/4/21. +// + +#ifndef PRE_PROCESS_VOICE_STLYRICPARSER_H +#define PRE_PROCESS_VOICE_STLYRICPARSER_H + +/** + * Starmaker歌词解析器 + * 直接切分出每段歌词的时间 + */ +#include "vector" +struct ST_LINE +{ + int begin_ms; + int duration_ms; +}; + +class STLyricParser +{ +public: + STLyricParser() {}; + ~STLyricParser() {}; + +public: + std::vector parse_lines(const char* lyric_path); // 文件地址 + std::vector parse_lines(const char* lyric_content, int len); // 文件内容 +}; + + +#endif //PRE_PROCESS_VOICE_STLYRICPARSER_H diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/src/STLyricParser.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/src/STLyricParser.cpp new file mode 100644 index 0000000..0a515eb --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/st_lyric_parser/src/STLyricParser.cpp @@ -0,0 +1,49 @@ +// +// Created by yangjianli on 2020/4/21. +// + +#include "lib_json/inc/json.h" +#include "STLyricParser.h" +#include "fstream" + +std::vector STLyricParser::parse_lines(const char *lyric_path) +{ + std::ifstream t; + t.open(lyric_path); // open input file + if( !t.is_open() ) + { + t.close(); + std::vector res; + return res; + } + t.seekg(0, std::ios::end); // go to the end + int len = (int)t.tellg(); // report location (this is the length) + t.seekg(0, std::ios::beg); // go back to the beginning + char* lyric_content = new char[len]; // allocate memory for a buffer of appropriate dimension + t.read(lyric_content, len); // read the whole file into the buffer + t.close(); + return parse_lines(lyric_content, len); +} + +std::vector STLyricParser::parse_lines(const char *lyric_content, int len) +{ + Json::Reader reader; + Json::Value root; + + std::vector lines; + if(reader.parse(lyric_content,lyric_content+len,root)) + { + Json::Value j_lyric = root["lyric"]; + for(uint32_t i = 0; i < j_lyric.size(); i++) { + Json::Value j_line = j_lyric[i]; + ST_LINE s_line_time; + s_line_time.begin_ms = j_line["line"]["start_time"].asInt(); + s_line_time.duration_ms = j_line["line"]["duration"].asInt(); + lines.push_back(s_line_time); + } + } + else { + return lines; + } + return lines; +} \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/CMakeLists.txt new file mode 100644 index 0000000..d960346 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_VOICE_DETECT_SRCS) +add_library(voice_detect ${DIR_VOICE_DETECT_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/inc/CVoiceDetect.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/inc/CVoiceDetect.h new file mode 100644 index 0000000..eebd613 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/inc/CVoiceDetect.h @@ -0,0 +1,66 @@ +// +// Created by yangjianli on 2020-03-16. +// + +#ifndef VOICE_DETECT_CVOICEDETECT_H +#define VOICE_DETECT_CVOICEDETECT_H + +#include "resample2/inc/CResample2.h" +#include "dpitch/inc/DPitchHandle.h" +#include "vector" + +#define VD_ERR_CODE_SUCCESS 0 +#define VD_MAX_INPUT_SIZE_MS 0.002 // 一次最多输入两ms +#define VD_DETECT_WINDOW 5 //检测的帧数[5ms/帧] + +#define VD_DETECT_MIN_FREQUENCY 100 // 人声最小频率 +#define VD_DETECT_MAX_FREQUENCY 500 // 人声最大频率 + +/** + * 人声检测 + * 实时人声检测 + * 方案: + * 1 只处理单声道数据 + * 2 对于输入的数据进行重采样处理 + * 分为两部分,第一部分输入数据,第二部分读取数据。 + */ + + +struct VOICE_STATE +{ + float cur_time; + bool is_voice; +}; + +class CVoiceDetect +{ +public: + CVoiceDetect(); + ~CVoiceDetect(); + +public: + int init(int sample_rate); + void uninit(); + // 每次塞入一帧数据,返回此时是否有语音 + // 其实是一段时间之前是否有语音 + int process(short* inbuf, int len, std::vector &voice_state, bool last); + int process(float* inbuf, int len, std::vector &voice_state, bool last); + +private: + int process(std::vector &voice_state, bool last); + +private: + CResample2* m_resample2; + DPitch::CPitcher* m_pitcher; +private: + std::vector m_in_buf; // 输入数据 + std::vector m_in_resample_buf; // 重采样之后的数据 + std::vector m_pitches; // 得到的pitch的结果 + int m_current_ms; // 当前时间点 + int m_sample_rate; + short* m_tp_resample_buf; + float* m_tp_pitches; +}; + + +#endif //VOICE_DETECT_CVOICEDETECT_H diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/src/CVoiceDetect.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/src/CVoiceDetect.cpp new file mode 100644 index 0000000..7335893 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/voice-detect/src/CVoiceDetect.cpp @@ -0,0 +1,201 @@ +// +// Created by yangjianli on 2020-03-16. +// + +#include "CVoiceDetect.h" + + +CVoiceDetect::CVoiceDetect() +{ + m_resample2 = NULL; + m_pitcher = NULL; + m_current_ms = 0; + m_sample_rate = 0; + m_tp_pitches = NULL; + m_tp_resample_buf = NULL; + uninit(); +} + +CVoiceDetect::~CVoiceDetect() +{ + uninit(); +} + +int CVoiceDetect::init(int sample_rate) +{ + uninit(); + m_sample_rate = sample_rate; + m_resample2 = new CResample2(); + int max_out_len; + int ret = m_resample2->init(sample_rate, DP_SAMPLE_RATE, 1, VD_MAX_INPUT_SIZE_MS * sample_rate, &max_out_len); + if(ret != VD_ERR_CODE_SUCCESS) + { + uninit(); + return ret; + } + m_pitcher = new DPitch::CPitcher(); + + // 临时使用 + m_tp_resample_buf = new short[int(VD_MAX_INPUT_SIZE_MS * sample_rate * 2)]; // 多开辟一点 + m_tp_pitches = new float[100]; + return VD_ERR_CODE_SUCCESS; +} + +void CVoiceDetect::uninit() +{ + if(m_pitcher) + { + delete m_pitcher; + m_pitcher = NULL; + } + + if(m_resample2) + { + delete m_resample2; + m_resample2 = NULL; + } + if(m_tp_resample_buf) + { + delete[] m_tp_resample_buf; + m_tp_resample_buf = NULL; + } + + if(m_tp_pitches) + { + delete[] m_tp_pitches; + m_tp_pitches = NULL; + } + + m_in_buf.clear(); + m_in_resample_buf.clear(); + m_pitches.clear(); +} + +int CVoiceDetect::process(short *inbuf, int len, std::vector &voice_state, bool last) +{ + // TODO 性能优化 + m_in_buf.insert(m_in_buf.end(), inbuf, inbuf+len); + return process(voice_state, last); +} + +int CVoiceDetect::process(float *inbuf, int len, std::vector &voice_state, bool last) +{ + // TODO 性能优化 + for(int i=0;i &voice_state,bool last) +{ + // 重采样 + int max_size = VD_MAX_INPUT_SIZE_MS * m_sample_rate; + int start = 0; + while(m_in_buf.size() - start >= max_size) + { + // 取出2ms进行处理 + short* pdata = &m_in_buf[start]; + int out_len = m_resample2->process((char*)pdata, max_size * 2, (char*)m_tp_resample_buf); + m_in_resample_buf.insert(m_in_resample_buf.end(), m_tp_resample_buf, m_tp_resample_buf + out_len/2); + start += max_size; + } + if(last) + { + // 取出2ms进行处理 + short* pdata = &m_in_buf[start]; + int out_len = m_resample2->process((char*)pdata, (m_in_buf.size() - start) * 2, (char*)m_tp_resample_buf); + m_in_resample_buf.insert(m_in_resample_buf.end(), m_tp_resample_buf, m_tp_resample_buf + out_len/2); + start = m_in_buf.size(); + } + + m_in_buf.erase(m_in_buf.begin(), m_in_buf.begin()+start); + + // 提取pitch + start = 0; + while(m_in_resample_buf.size() - start >= DP_NSAMP_SHIFT) + { + short* pdata = &m_in_resample_buf[start]; + int frame = 0; + m_pitcher->Process(pdata, DP_NSAMP_SHIFT, m_tp_pitches, frame, false); + m_pitches.insert(m_pitches.end(), m_tp_pitches, m_tp_pitches + frame); + start += DP_NSAMP_SHIFT; + } + if(last) + { + short* pdata = &m_in_resample_buf[start]; + int frame = 0; + m_pitcher->Process(pdata, m_in_resample_buf.size() - start, m_tp_pitches, frame, true); + m_pitches.insert(m_pitches.end(), m_tp_pitches, m_tp_pitches + frame); + start = m_in_resample_buf.size(); + } + +//#ifdef DEBUG +// FILE* file = fopen("/tmp/resample1.pcm", "ab"); +// short* pt_data = &m_in_resample_buf[0]; +// fwrite(pt_data, start * sizeof(short), 1, file); +// fclose(file); +//#endif + + m_in_resample_buf.erase(m_in_resample_buf.begin(), m_in_resample_buf.begin()+start); + + // 对结果做判定 + // 每20个结果作为一个窗口 + // TODO 性能可以优化 + start = 0; + int ms = DP_NSAMP_SHIFT * 1000 / DP_SAMPLE_RATE; + float half = ms / 2.0; + while(m_pitches.size() - start >= VD_DETECT_WINDOW) + { + // 连续检测一个窗口,如果全部都符合要求,则是人声 + bool check = true; + for(int i=start;i VD_DETECT_MAX_FREQUENCY || m_pitches[i] < VD_DETECT_MIN_FREQUENCY) + { + check = false; + break; + } + } + VOICE_STATE vs = + { + .cur_time = (float)(m_current_ms + half), // 对应的是中间那块 + .is_voice = check + }; + voice_state.push_back(vs); + + // 滑动窗口 + m_current_ms += ms; + start += 1; + } + if(last) + { + while(m_pitches.size() - start > 0) + { + int len = m_pitches.size() - start; + // 连续检测一个窗口,如果全部都符合要求,则是人声 + bool check = true; + for(int i=start;i VD_DETECT_MAX_FREQUENCY || m_pitches[i] < VD_DETECT_MIN_FREQUENCY) + { + check = false; + break; + } + } + VOICE_STATE vs = + { + .cur_time = (float)(m_current_ms + half), + .is_voice = check + }; + voice_state.push_back(vs); + + // 滑动窗口 + m_current_ms += ms; + start += 1; + } + } + m_pitches.erase(m_pitches.begin(), m_pitches.begin() + start); + return VD_ERR_CODE_SUCCESS; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/CMakeLists.txt b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/CMakeLists.txt new file mode 100644 index 0000000..3045b00 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_WAVES_SRCS) +add_library(waves ${DIR_WAVES_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/inc/ExtraMono.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/inc/ExtraMono.h new file mode 100644 index 0000000..280fab0 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/inc/ExtraMono.h @@ -0,0 +1,230 @@ + +#include +#include + +#define SIZE_LONG 4 +#define SIZE_SHORT 2 + +#define SIZE_FLAG 4 +#define FMT_TAG 0x0001 + +#define BITS_PER_BYTE 8 + +#ifndef AFS_CMPL_MAX_WAV +#define AFS_CMPL_MAX_WAV 15360000 // 时长16分(960*16000) +#endif + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个32位数据 +//+---------------------------------------------------------------------------+ +unsigned long fa_read_u32(FILE* fp) +{ + unsigned long cx; + unsigned char temp[SIZE_LONG]; + + fread(temp, sizeof(unsigned char), SIZE_LONG, fp); + cx = (unsigned long)temp[0]; + cx |= (unsigned long)temp[1] << 8; + cx |= (unsigned long)temp[2] << 16; + cx |= (unsigned long)temp[3] << 24; + return cx; +} + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个16位数据 +//+---------------------------------------------------------------------------+ +unsigned short fa_read_u16(FILE *fp) +{ + unsigned short cx; + unsigned char temp[SIZE_SHORT]; + + fread(temp, sizeof(unsigned char), SIZE_SHORT, fp); + cx = temp[0] | (temp[1] * 256); + return cx; +} + +int GetWaveHeadLen(const char* pszFile,unsigned short &channels, int &nPos, int& nLength) +{ + //+---------------------------------------------------------------------------+ + //+ 读取WAVE的头信息 + //+---------------------------------------------------------------------------+ + unsigned char temp[SIZE_FLAG]; + unsigned short bits_per_sample; + unsigned long x_size; + unsigned long n_skip; + + unsigned short format; + //unsigned short channels; + unsigned long sample_rate; + unsigned short block_align; + unsigned long data_size; + int nCnt = 0; + + /* 读取通用信息 */ + FILE* pWavFile = fopen(pszFile, "rb"); + if ( pWavFile == NULL ) + { + printf("Input file can not be opened!\n"); + return -1; + } + + fseek(pWavFile, 0, SEEK_END ); + nLength = ftell(pWavFile); + fseek(pWavFile, 0, SEEK_SET ); + + // 判断资源标识为"RIFF" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "RIFF", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Resource flag is not RIFF!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + nCnt += SIZE_LONG; + + // 判断文件标识为"WAVE" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "WAVE", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "File flag is not WAVE\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + // 判断格式标识为"fmt " + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "fmt ", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Format flag is not FMT!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + x_size = fa_read_u32(pWavFile); + nCnt += SIZE_LONG; + + // 判断编码格式为0x0001 + format = fa_read_u16(pWavFile); + nCnt += SIZE_SHORT; + if ( format != FMT_TAG ) + { + fprintf(stderr, "Encoding format is not 0x0001!\n"); + fclose(pWavFile); + + return -1; + } + + // 读取声道数目和采样频率 + channels = fa_read_u16(pWavFile); + sample_rate = fa_read_u32(pWavFile); + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + + // 读取对齐单位和样本位数 + block_align = fa_read_u16(pWavFile); + bits_per_sample = fa_read_u16(pWavFile); + + /* 读取特殊信息 */ + x_size -= (4*SIZE_SHORT + 2*SIZE_LONG); + if ( x_size != 0 ) + { + fseek(pWavFile, x_size, SEEK_CUR); + } + + // 读取数据大小 + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + while ( memcmp(temp, "data", SIZE_FLAG) != 0 ) + { + n_skip = fa_read_u32(pWavFile); + fseek(pWavFile, n_skip, SEEK_CUR); + + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + } + + data_size = fa_read_u32(pWavFile); + fclose(pWavFile); + + //+---------------------------------------------------------------------------+ + //+ 返回WAVE的头长度 + //+---------------------------------------------------------------------------+ + nPos = nCnt; + int nHeadLength = nLength - data_size; + return nHeadLength; +} + +bool ExtraMono(const std::string &sInput, const std::string &sOutput) +{ + FILE *pFile = fopen(sInput.c_str(), "rb"); + if ( NULL == pFile ) + { + printf("Fopen Error %s", sInput.c_str()); + return false; + } + + FILE *pFile2 = fopen(sOutput.c_str(), "wb"); + if ( NULL == pFile2 ) + { + printf("Fopen2 Error %s", sOutput.c_str()); + return false; + } + + short *pBuf = new short[AFS_CMPL_MAX_WAV]; + int nLen = 0; + + nLen = fread(pBuf, sizeof(short), AFS_CMPL_MAX_WAV, pFile); + if ( nLen <= 0 ) + { + perror("Fread Error!"); + return false; + } + + unsigned short channels=0; + int nPos; + int nLength; + int nHeadByte = GetWaveHeadLen(sInput.c_str(),channels, nPos, nLength); + int nHeadShort = nHeadByte/2; + + if (channels==1) + { + fwrite(pBuf + nHeadShort, sizeof(short), nLen - nHeadShort, pFile2); + } + else + { + short *pBuf2 = new short[AFS_CMPL_MAX_WAV]; + memcpy( pBuf2, pBuf, nHeadShort*sizeof(short)); + pBuf2[nPos] = 1; + + unsigned char tmp[2]; + memcpy(tmp, &pBuf2[nPos], 2); + + pBuf2[nPos] = static_cast(tmp[0] | tmp[1]*256); + + short *pWav = pBuf + nHeadShort; + nLen -= nHeadShort; + + int halfnlen=nLen/2; + for (int i=0;i<=halfnlen;i++ ) + { + pBuf2[nHeadShort+i] = *(pWav+i*2); + } + fwrite(pBuf2, sizeof(short), nLen+nHeadShort, pFile2); + + delete []pBuf; + delete []pBuf2; + pBuf = NULL; + pBuf2 = NULL; + } + + + fclose(pFile); + fclose(pFile2); + return true; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/inc/WaveFile.h b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/inc/WaveFile.h new file mode 100644 index 0000000..8b57806 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/inc/WaveFile.h @@ -0,0 +1,74 @@ +#ifndef WAVE_FILE_H +#define WAVE_FILE_H + +#include +#include + + +typedef enum SAMPLE_FORMAT +{ + SF_U8 = 8, + SF_S16 = 16, + SF_S24 = 24, + SF_S32 = 32, + SF_IEEE_FLOAT = 0x100 + 32, + SF_IEEE_DOUBLE = 0x100 + 64, + SF_MAX, +} SAMPLE_FORMAT; + +/* 主处理对象 **/ +class CWaveFile +{ +public: + /* 构造传入文件及 是读还是写 **/ + CWaveFile(const char* Filename, bool Write); + virtual ~CWaveFile(); + +public: + int GetChannels(); + int GetSampleRate(); + double GetDuration(); // in second + uint32_t GetChannelMask(); + void SetChannels(int Channels); + void SetSampleRate(int SampleRate); + void SetSampleFormat(SAMPLE_FORMAT Format); + void SetChannelMask(uint32_t Mask); + void Stat(); + void SetupDone(); + bool ReadFrameAsS16(short* FrameSamples, int Frames = 1); + bool ReadFrameAsDouble(double* FrameSamples, int Frames = 1); + bool ReadFrameAsfloat(float* FrameSamples, int Frames = 1); + void WriteRaw(void* Raw, int Size); + void WriteFrame(uint8_t* FrameSamples, int Frames = 1); + void WriteFrame(short* FrameSamples, int Frames = 1); + void WriteFrame(int32_t* FrameSamples, int Frames = 1); + void WriteFrameS24(int32_t* FrameSamples, int Frames = 1); + void WriteFrame(double* FrameSamples, int Frames = 1); + void WriteFrame(float* FrameSamples, int Frames=1); + void Seek(int FramePos, int Where = SEEK_SET); + bool GetStatus(); + SAMPLE_FORMAT GetFormat(); + int GetTotalFrames(); + int GetFramesRead(); + + +protected: + FILE* File; + int Channels; /* 通道数 **/ + int SampleRate; /* 采样率 **/ + SAMPLE_FORMAT Format; /* 采样精度 **/ + int SampleSize; // Measured in Bits + unsigned int FrameStartPos; /* 音频数据的起始位置 **/ + unsigned long TotalFrames; /* 总帧数,如果16bit,则一个short为一帧 **/ + unsigned long FramesRead; + double Duration; /* 时长 **/ + + bool ReadOnly; /* 是度还是写 **/ + + uint32_t ChannelMask; + + bool m_bOK; /* 文件是否已经被打开 **/ +}; + + +#endif \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/src/WaveFile.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/src/WaveFile.cpp new file mode 100644 index 0000000..b880fe2 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/ref/waves/src/WaveFile.cpp @@ -0,0 +1,818 @@ + +#include +#include +#include +#include + +#if WIN32 +#else +#include +#endif + +#include "WaveFile.h" + +#define SPEAKER_FRONT_LEFT 0x1 +#define SPEAKER_FRONT_RIGHT 0x2 +#define SPEAKER_FRONT_CENTER 0x4 +#define SPEAKER_LOW_FREQUENCY 0x8 +#define SPEAKER_BACK_LEFT 0x10 +#define SPEAKER_BACK_RIGHT 0x20 +#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40 +#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80 +#define SPEAKER_BACK_CENTER 0x100 +#define SPEAKER_SIDE_LEFT 0x200 +#define SPEAKER_SIDE_RIGHT 0x400 +#define SPEAKER_TOP_CENTER 0x800 +#define SPEAKER_TOP_FRONT_LEFT 0x1000 +#define SPEAKER_TOP_FRONT_CENTER 0x2000 +#define SPEAKER_TOP_FRONT_RIGHT 0x4000 +#define SPEAKER_TOP_BACK_LEFT 0x8000 +#define SPEAKER_TOP_BACK_CENTER 0x10000 +#define SPEAKER_TOP_BACK_RIGHT 0x20000 +#define SPEAKER_RESERVED 0x80000000 + + +#define SPEAKER_REAR_CENTER_SURROUND SPEAKER_BACK_CENTER + +#define DCA_MONO 0 +#define DCA_CHANNEL 1 +#define DCA_STEREO 2 +#define DCA_STEREO_SUMDIFF 3 +#define DCA_STEREO_TOTAL 4 +#define DCA_3F 5 +#define DCA_2F1R 6 +#define DCA_3F1R 7 +#define DCA_2F2R 8 +#define DCA_3F2R 9 +#define DCA_4F2R 10 + +#define DCA_DOLBY 101 /* FIXME */ + +#define DCA_CHANNEL_MAX DCA_3F2R /* We don't handle anything above that */ +#define DCA_CHANNEL_BITS 6 +#define DCA_CHANNEL_MASK 0x3F + +#define DCA_LFE 0x80 +#define DCA_ADJUST_LEVEL 0x100 + +#define WAVE_FORMAT_PCM 0x0001 +#define WAVE_FORMAT_IEEE_FLOAT 0x0003 +#define WAVE_FORMAT_EXTENSIBLE 0xFFFE + +static uint8_t wav_header[] = { + 'R', 'I', 'F', 'F', 0xfc, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 16, 0, 0, 0, + WAVE_FORMAT_PCM, WAVE_FORMAT_PCM >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, + 'd', 'a', 't', 'a', 0xd8, 0xff, 0xff, 0xff +}; + +static uint8_t wavmulti_header[] = { + 'R', 'I', 'F', 'F', 0xf0, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 40, 0, 0, 0, + (uint8_t)(WAVE_FORMAT_EXTENSIBLE & 0xFF), WAVE_FORMAT_EXTENSIBLE >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 22, 0, + 0, 0, 0, 0, 0, 0, + WAVE_FORMAT_IEEE_FLOAT, WAVE_FORMAT_IEEE_FLOAT >> 8, + 0, 0, 0, 0, 0x10, 0x00, 0x80, 0, 0, 0xaa, 0, 0x38, 0x9b, 0x71, + 'd', 'a', 't', 'a', 0xb4, 0xff, 0xff, 0xff +}; + +static void store4 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; + buf[2] = value >> 16; + buf[3] = value >> 24; +} + +static void store2 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; +} + + +static uint32_t find_chunk(FILE * file, const uint8_t chunk_id[4]) +{ + uint8_t buffer[8]; + while (1) { + size_t chunksize; + size_t s = fread(buffer, 1, 8, file); + if (s < 8) + return 0; + chunksize = (uint32_t)buffer[4] | ((uint32_t)buffer[5] << 8) | + ((uint32_t)buffer[6] << 16) | ((uint32_t)buffer[7] << 24); + if (!memcmp(buffer, chunk_id, 4)) + return chunksize; + fseek(file, chunksize, SEEK_CUR); + } +} + + +CWaveFile::CWaveFile(const char* Filename, bool Write) + : Duration(0), ReadOnly(false), m_bOK(false) +{ + Channels = 0; + + /* 打开文件 **/ + File = fopen(Filename, Write ? "wb":"rb"); + if ( !File ) + return; + + /* 设置写文件初始参数 **/ + if ( Write ) + { + SampleRate = 44100; + Channels = 2; + Format = SF_S16; + SampleSize = 16; + ChannelMask = 0; + m_bOK = true; + return; + } + + ReadOnly = true; + + size_t s; + uint8_t buffer[8]; + uint8_t *fmt = NULL; + uint32_t v; + uint32_t avg_bps; + uint32_t block_align; + unsigned short FormatType; + unsigned short SampleType; + + static const uint8_t riff[4] = { 'R', 'I', 'F', 'F' }; + static const uint8_t wave[4] = { 'W', 'A', 'V', 'E' }; + static const uint8_t fmt_[4] = { 'f', 'm', 't', ' ' }; + static const uint8_t data[4] = { 'd', 'a', 't', 'a' }; + + /* 前四个字节为 riff **/ + s = fread(buffer, 1, 8, File); + if (s < 8) + goto err2; + + if (memcmp(buffer, riff, 4)) + goto err2; + + /* 8~12为wave **/ + /* TODO: check size (in buffer[4..8]) */ + s = fread(buffer, 1, 4, File); + if (s < 4) + goto err2; + + if (memcmp(buffer, wave, 4)) + goto err2; + + s = find_chunk(File, fmt_); + if ( s != 16 && s != 18 && s != 40 ) + goto err2; + + fmt = (uint8_t*)malloc(s); + if (!fmt) + goto err2; + + if (fread(fmt, 1, s, File) != s) + goto err3; + + /* wFormatTag */ + v = (uint32_t)fmt[0] | ((uint32_t)fmt[1] << 8); + if (v != WAVE_FORMAT_PCM && v != WAVE_FORMAT_IEEE_FLOAT && v != WAVE_FORMAT_EXTENSIBLE) + goto err3; + + FormatType = v; + + if (s == 40 && 0xfffe == v) + { + // fmt begins at 0x14 of the wave file + v = *(unsigned short*)&fmt[0x2C - 0x14]; + } + + SampleType = v; + + /* wChannels */ + v = (uint32_t)fmt[2] | ((uint32_t)fmt[3] << 8); + + Channels = v; + + if (v < 1 || v > 32) + goto err3; + + /* dwSamplesPerSec */ + SampleRate = (uint32_t)fmt[4] | ((uint32_t)fmt[5] << 8) | + ((uint32_t)fmt[6] << 16) | ((uint32_t)fmt[7] << 24); + + /* dwAvgBytesPerSec */ + avg_bps = (uint32_t)fmt[8] | ((uint32_t)fmt[9] << 8) | + ((uint32_t)fmt[10] << 16) | ((uint32_t)fmt[11] << 24); + + /* wBlockAlign */ + block_align = (uint32_t)fmt[12] | ((uint32_t)fmt[13] << 8); + + /* wBitsPerSample */ + SampleSize = (uint32_t)fmt[14] | ((uint32_t)fmt[15] << 8); + if (SampleSize != 8 && SampleSize != 16 && SampleSize != 32 && SampleSize != 24 && SampleSize != 64) + goto err3; + + switch (SampleSize) + { + case 8: + Format = SF_U8; + break; + case 16: + Format = SF_S16; + break; + case 24: + Format = SF_S24; + break; + case 32: + { + if (SampleType == WAVE_FORMAT_IEEE_FLOAT) + Format = SF_IEEE_FLOAT; + else + Format = SF_S32; + + } + break; + case 64: + if (SampleType != WAVE_FORMAT_IEEE_FLOAT) + goto err3; + Format = SF_IEEE_DOUBLE; + break; + } + + + // Handle 24-bit samples individually +#if 0 + if (SampleSize == 24 && Channels <= 2) + { + int ba24 = Channels * (SampleSize / 8); // Align to 4x + + ba24 = (ba24 + 3) / 4 * 4; + + if (block_align != ba24) + goto err3; + } + else +#endif + { + if (block_align != Channels * (SampleSize / 8)) + goto err3; + } + + if (avg_bps != block_align * SampleRate) + goto err3; + + v = find_chunk(File, data); + + if (v == 0 || v % block_align != 0) + goto err3; + + TotalFrames = v / block_align; + + FramesRead = 0; + + if (FormatType == WAVE_FORMAT_EXTENSIBLE) + { + ChannelMask = *(unsigned int*)(&fmt[0x14]); + } + else + { + ChannelMask = 0; + } + + FrameStartPos = ftell(File); + + free(fmt); + m_bOK = true; + return; + +err3: + free(fmt); +err2: + fclose(File); + + File = NULL; +} + +bool CWaveFile::GetStatus() +{ + return m_bOK; +} + +SAMPLE_FORMAT CWaveFile::GetFormat() +{ + return Format; +} + +int CWaveFile::GetTotalFrames() +{ + return TotalFrames; +} + +int CWaveFile::GetFramesRead() +{ + return FramesRead; +} + +CWaveFile::~CWaveFile() +{ + if (File != NULL) + { + if (!ReadOnly) + { + unsigned int Size = ftell(File) - FrameStartPos;// 44; + + fseek(File, FrameStartPos - 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + + Size += FrameStartPos - 8; + + fseek(File, 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + } + + fclose(File); + } +} + +int CWaveFile::GetSampleRate() +{ + return SampleRate; +} + +void CWaveFile::SetSampleRate(int SampleRate) +{ + this->SampleRate = SampleRate; +} + +void CWaveFile::SetupDone() +{ + unsigned char Header[68]; + + fseek(File, 0, SEEK_SET); + + SampleSize = Format & 0xFF; + + if (ChannelMask) + { + memcpy(Header, wavmulti_header, sizeof(wavmulti_header)); + + if (Format < SF_IEEE_FLOAT) + { + // store2(Header + 20, WAVE_FORMAT_PCM); + store2(Header + 44, WAVE_FORMAT_PCM); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + store2(Header + 38, SampleSize / 8 * 8); + store4(Header + 40, ChannelMask); + + fwrite(Header, sizeof(wavmulti_header), 1, File); + } + else + { + memcpy(Header, wav_header, sizeof(wav_header)); + + if (Format >= SF_IEEE_FLOAT) + { + store2(Header + 20, WAVE_FORMAT_IEEE_FLOAT); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + fwrite(Header, sizeof(wav_header), 1, File); + } + + + FrameStartPos = ftell(File); +} + + +void CWaveFile::Seek(int FramePos, int Where) +{ + // Ignoring Where + + fseek(File, FrameStartPos + FramePos * Channels* (SampleSize / 8), Where); + + FramesRead = FramePos; + +} + +int CWaveFile::GetChannels() +{ + return Channels; +} + +void CWaveFile::SetChannels(int Channels) +{ + this->Channels = Channels; +} + +void CWaveFile::SetSampleFormat(SAMPLE_FORMAT Format) +{ + this->Format = Format; +} + +uint32_t CWaveFile::GetChannelMask() +{ + return ChannelMask; +} + +void CWaveFile::SetChannelMask(uint32_t Mask) +{ + ChannelMask = Mask; +} + +bool CWaveFile::ReadFrameAsS16(short* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) << 8; + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + return Frames == fread(FrameSamples, sizeof(FrameSamples[0])*Channels, Frames, File); + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 8); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 16); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + double DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + } + return false; +} + +bool CWaveFile::ReadFrameAsfloat(float* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[Frames * Channels]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +bool CWaveFile::ReadFrameAsDouble(double* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample ))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +void CWaveFile::WriteRaw(void* Raw, int Size) +{ + fwrite(Raw, Size, 1, File); +} + + +void CWaveFile::WriteFrame(uint8_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(short* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(int32_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrameS24(int32_t* FrameSamples, int Frames) +{ + for (int c = 0; c < Channels; c++) + { + fwrite(&FrameSamples[c], 3, 1, File); + } +} + +void CWaveFile::WriteFrame(double* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(float* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + + +double CWaveFile::GetDuration() +{ + return Duration; +} diff --git a/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/src/CPreProcessVoice.cpp b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/src/CPreProcessVoice.cpp new file mode 100644 index 0000000..52e119d --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/pre_process_voice/src/CPreProcessVoice.cpp @@ -0,0 +1,291 @@ +// +// Created by yangjianli on 2020/4/9. +// + +#include "CPreProcessVoice.h" +#include "waves/inc/WaveFile.h" +#include "ebur128/inc/ebur128.h" +#include "voice-detect/inc/CVoiceDetect.h" +#include "st_lyric_parser/inc/STLyricParser.h" +#include "string" +#include "math.h" +#define PROC_LEN 44100 +#define DEFAULT_BASELINE_DB (float)-14.57f +#define MAX_GAIN 100 // 最多拉伸100倍 + +CPreProcessVoice::CPreProcessVoice() +{ + m_samle_rate = 0; + m_channel = 0; + m_voice_state.clear(); + m_voice_line.clear(); + m_lines.clear(); + m_lyric_parser = NULL; +} + +CPreProcessVoice::~CPreProcessVoice() +{ + uninit(); +} + +int CPreProcessVoice::init(int sample_rate, int channel) +{ + m_samle_rate = sample_rate; + m_channel = channel; + if(m_lyric_parser) + { + delete m_lyric_parser; + m_lyric_parser = NULL; + } + m_lyric_parser = new STLyricParser(); + return 0; +} + +void CPreProcessVoice::uninit() +{ + m_voice_state.clear(); + m_voice_line.clear(); + m_lines.clear(); + if(m_lyric_parser) + { + delete m_lyric_parser; + m_lyric_parser = NULL; + } +} + +int CPreProcessVoice::process(float *inbuf, int len) +{ + if(m_samle_rate == 0 || m_channel == 0) + { + return -1; + } + double gain_loudness = 0; + double gain = 1.0; + ebur128_whole(inbuf, len, gain_loudness, gain); + gain = gain <= MAX_GAIN ? gain : MAX_GAIN; + apply_gain(inbuf,len, gain); + if(!m_lines.empty()) + { + for(int i=0;i= len) + { + break; + } + if(pos+duration >= len) + { + duration = len - pos; + } + split_voice(inbuf+pos, duration); + // 更新得到的数据 + for(int j=last_size;jparse_lines(json_file); + } + process(inbuf, len); + printf("split2size: {%ld}\n", m_voice_line.size()); + std::string s_dst_dir = dst_dir; + for(int i=0;iSetSampleRate(owave.GetSampleRate()); + wave_write->SetChannels(owave.GetChannels()); + wave_write->SetSampleFormat(SF_IEEE_FLOAT); + wave_write->SetupDone(); + wave_write->WriteFrame(inbuf+st_pos, frames); + +// printf("write %s frames=%d ok\n", dst_file.c_str(), frames); + delete wave_write; + } + delete [] inbuf; + uninit(); + return 0; +} + +int CPreProcessVoice::ebur128_whole(float* inbuf, const int len, double &gated_loudness, double &gain) +{ + + ebur128_state* st = NULL; + st = ebur128_init(m_channel, m_samle_rate, EBUR128_MODE_I); + if(NULL == st) + { + return -1; + } + int nPos = 0; + int nTmpLength = 0; + int nRet; + + while(nPos < len) + { + nTmpLength = PROC_LEN; + if(len - nPos < PROC_LEN) + { + nTmpLength = len - nPos; + } + nRet = ebur128_add_frames_float(st, inbuf+nPos, nTmpLength/m_channel); + if(nRet != 0) + { + return -2; + } + nPos += nTmpLength; + } + + gated_loudness = -1; + ebur128_loudness_global(st, &gated_loudness); + float db = (DEFAULT_BASELINE_DB - gated_loudness) / 20.f; + gain = pow(10, db); + ebur128_destroy(&st); + return 0; +} + +int CPreProcessVoice::apply_gain(float *inbuf, const int len, double gain) +{ + for(int i=0;i lines; + STLyricParser stLyricParser; + lines = stLyricParser.parse_lines(lyric_path); + + for(int i=0;i 20: + mfcc_o.append(n) + return np.array(mfcc_o) + + def load_data(self): + self._pvc = pre_process_voice.PreProcessVoice() + return read_file(self._data_path) + + def processer(self, single_job): + logging.info("start ....") + st = time.time() + single_job, gender = str(single_job).strip().split(',') + src_wav = os.path.join(WORK_DIR, "wavs/{}.wav".format(single_job)) + + # 切分音频 + dst_dir = os.path.join(WORK_DIR, "wavs_all/{}".format(single_job)) + if os.path.exists(dst_dir): + return + os.makedirs(dst_dir) + ret = self._pvc.process(src_wav, dst_dir) + if ret == 0: + wave_list = glob.glob(dst_dir + "/*.wav") + mfcc_dir = os.path.join(WORK_DIR, "mfcc_all/{}/{}".format(gender, single_job)) + for wav in wave_list: + if not os.path.exists(mfcc_dir): + os.makedirs(mfcc_dir) + (filepath, tempfilename) = os.path.split(wav) + (filename, extension) = os.path.splitext(tempfilename) + dst_mfcc = os.path.join(mfcc_dir, filename) # 使用numpy保存,所以自动添加了.npy扩展 + if os.path.exists(dst_mfcc + ".npy"): + return + logging.info("{} => {}".format(wav, dst_mfcc)) + mfcc = self.get_one_mfcc(wav) + np.save(dst_mfcc, mfcc) + logging.info("process:{} spent_time={}".format(single_job, time.time() - st)) + + +if __name__ == "__main__": + WORK_DIR = sys.argv[1] + data_path = os.path.join(WORK_DIR, "id_gender.txt") + m_gen_mfcc = MGenMfcc(data_path, worker_num=24) + m_gen_mfcc.process() diff --git a/AIMeiSheng/voice_classification/script/dataset/split_test.py b/AIMeiSheng/voice_classification/script/dataset/split_test.py new file mode 100644 index 0000000..802a954 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/split_test.py @@ -0,0 +1,61 @@ +""" +切分出测试集 +""" +import os +import sys +import glob +import shutil + + +def sp2dict(x_list): + tp_dict = {} + for x in x_list: + ret = str(x).split('/') + if ret[-2] not in tp_dict.keys(): + tp_dict[ret[-2]] = [] + tp_dict[ret[-2]].append(x) + return tp_dict + + +def copy_data2_test(work_dir, dst_dir): + split_rate = 0.7 + male_file_list = glob.glob(os.path.join(work_dir, 'male/*/*.npy')) + female_file_list = glob.glob(os.path.join(work_dir, 'female/*/*.npy')) + + male_file_list = male_file_list[int(len(male_file_list) * split_rate):] + female_file_list = female_file_list[int(len(female_file_list) * split_rate):] + + male_file_list = list(sp2dict(male_file_list).values()) + female_file_list = list(sp2dict(female_file_list).values()) + + # if os.path.exists(dst_dir): + os.makedirs(dst_dir) + os.makedirs(os.path.join(dst_dir, "male")) + os.makedirs(os.path.join(dst_dir, "female")) + for male in male_file_list: + id = male[0].split("/")[-2] + src_file = os.path.join(work_dir, "../male/{}.mp4".format(id)) + dst_file = os.path.join(dst_dir, "male/{}.mp4".format(id)) + shutil.copy(src_file, dst_file) + # 拷贝文件到测试文件夹 + # cmd = "ln -s {} {}".format(os.path.join(work_dir, "male/{}".format(id)), + # os.path.join(dst_dir, "male/{}".format(id))) + # print(cmd) + # os.system(cmd) + + for female in female_file_list: + id = female[0].split("/")[-2] + src_file = os.path.join(work_dir, "../female/{}.mp4".format(id)) + dst_file = os.path.join(dst_dir, "female/{}.mp4".format(id)) + shutil.copy(src_file, dst_file) + # 拷贝文件到测试文件夹 + # cmd = "ln -s {} {}".format(os.path.join(work_dir, "female/{}".format(id)), + # os.path.join(dst_dir, "female/{}".format(id))) + # print(cmd) + # os.system(cmd) + + +if __name__ == "__main__": + work_dir = sys.argv[1] + test_dir = sys.argv[2] + copy_data2_test(work_dir, test_dir) diff --git a/AIMeiSheng/voice_classification/script/dataset/transcode.py b/AIMeiSheng/voice_classification/script/dataset/transcode.py new file mode 100644 index 0000000..7e42d13 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/dataset/transcode.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +from common import * +import os +import glob +import sys + + +class TransCode(SimpleMultiProcesser): + + def load_data(self): + res = glob.glob(os.path.join(self._data_path, "*/*mp4")) + st = time.time() + print("read lines:{} spend_time={}".format(len(res), time.time() - st)) + return res + + def processer(self, single_job): + st = time.time() + single_job = str(single_job).strip('\n') + src_mp4 = single_job + single_job = os.path.basename(single_job).split('.')[0] + if not os.path.exists(os.path.join(self._data_path, "wavs")): + os.makedirs(os.path.join(self._data_path, "wavs")) + dst_wav = os.path.join(self._data_path, "wavs/{}.wav".format(single_job)) + log_path = os.path.join(self._data_path, "worker.log") + with open(log_path, 'w+', encoding='utf8') as logf: + trans = subprocess.Popen(["ffmpeg", "-i", src_mp4, + "-ar", "16000", "-ac", "1", dst_wav], + stderr=logf) + trans.communicate() + + logging.info("process:{} spent_time={}".format(single_job, time.time() - st)) + + +if __name__ == "__main__": + data_path = sys.argv[1] # /data/datasets/voice_classification/av_area_sa + trans = TransCode(data_path=data_path, worker_num=8) + trans.process() diff --git a/AIMeiSheng/voice_classification/script/download_msg.py b/AIMeiSheng/voice_classification/script/download_msg.py new file mode 100644 index 0000000..379b6c2 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/download_msg.py @@ -0,0 +1,84 @@ +""" +下载数据 +self._data_dst_config = { + "host": "172.16.2.91", + "passwd": "gRYppQtdTpP3nFzH", + "user": "worker", + "db": "av_db", + "port": 3306 +} +从这里获取数据并下载到目录中 +""" +from common import * +import multiprocessing as mp +import sys + +BASE_DIR = "/data/datasets/music_voice_dataset_full" + + +class GetDataset: + def __init__(self, area, male_num, female_num): + self._area = area + self._male_num = male_num + self._female_num = female_num + self._data_dst_config = { + "host": "172.16.2.91", + "passwd": "gRYppQtdTpP3nFzH", + "user": "worker", + "db": "av_db", + "port": 3306 + } + + def get_data(self): + sql = """ + select recording_id,img_url + from recording_gender + where area in ("{}") and gender2 = {} and stat = 1 + order by create_time desc + limit {} + """ + + # 取男生数据 + male_msg = get_data_by_mysql(sql.format(self._area, 1, self._male_num), self._data_dst_config) + female_msg = get_data_by_mysql(sql.format(self._area, 2, self._female_num), self._data_dst_config) + return male_msg, female_msg + + def download_mp4(self, work_dir, data): + for recording_id, img_url in data: + filename = "{}.mp4".format(recording_id) + filepath = os.path.join(work_dir, filename) + if os.path.exists(filepath): + continue + cmd = "coscmd -r ap-mumbai -b av-audit-sync-in-1256122840 download dataset/voice_classification_dataset/{} {}".format( + img_url, filepath) + exec_cmd(cmd) + + def download(self, work_dir, data, process_nums): + pool = mp.Pool(processes=process_nums) + bth = int(len(data) / process_nums) + for idx in range(0, len(data), bth): + pool.apply_async(self.download_mp4, args=(work_dir, data[idx:idx + bth])) + pool.close() + pool.join() + + def process(self): + male, female = self.get_data() + print("male_num={} female_num={}".format(len(male), len(female))) + work_dir = os.path.join(BASE_DIR, self._area) + if not os.path.exists(work_dir): + os.makedirs(work_dir) + m_work_dir = os.path.join(work_dir, "male") + if not os.path.join(m_work_dir): + os.makedirs(m_work_dir) + f_work_dir = os.path.join(work_dir, "female") + if not os.path.join(f_work_dir): + os.makedirs(f_work_dir) + self.download(m_work_dir, male, 20) + self.download(f_work_dir, female, 20) + + +if __name__ == "__main__": + area = sys.argv[1] # 地区名称例如:av_area_in + num = int(sys.argv[2]) # 并发数量 + gd = GetDataset(area, num, num) + gd.process() diff --git a/AIMeiSheng/voice_classification/script/download_origin_mp4.py b/AIMeiSheng/voice_classification/script/download_origin_mp4.py new file mode 100644 index 0000000..10f1029 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/download_origin_mp4.py @@ -0,0 +1,88 @@ +""" +获取干声 & 下载 +要求: 根据输入的数量,获取最新的干声 +""" +import sys +from common import * +import multiprocessing as mp + + +class DownloadOriginMaster: + + def __init__(self, num, work_dir, process_num): + """ + 想获取到的干声数量 + :param num: + """ + self._num = int(num) + self._work_dir = work_dir + self._process_num = int(process_num) + if not os.path.exists(self._work_dir): + os.makedirs(self._work_dir) + + def get_data_from_db(self, num, shared_id): + num = num * 2 # 多找一点,以防干声不够 + sql = """ + select id,created_on + from recording + where song_completed = 1 + and media_type in (1, 2, 9, 10) + order by created_on desc + limit {} + """.format(num) + return get_shard_data_by_sql(sql, shared_id=shared_id) + + def check_origin(self, recording_id): + """ + 检查远端文件是否存在 + :param recording_id: + :return: + """ + cmd = "coscmd -r ap-mumbai -b starmaker-1256122840 info production/uploading/recordings/{}/origin_master.mp4" \ + .format(recording_id) + "| grep Content-Length |awk \'{print $2}\'" + res_str = exec_cmd_and_result(cmd) + size = float(res_str) + if size > 0: + return True + return False + + def download_mp4(self, work_dir, data): + for recording_id in data: + # if self.check_origin(recording_id): + dst_file_name = "{}_origin.mp4".format(recording_id) + dst_file = os.path.join(work_dir, dst_file_name) + cmd = "coscmd -r ap-mumbai -b starmaker-1256122840 download -f production/uploading/recordings/{}/origin_master.mp4 {}" \ + .format(recording_id, dst_file) + exec_cmd(cmd) + + def download(self, work_dir, data, process_nums): + pool = mp.Pool(processes=process_nums) + bth = int(len(data) / process_nums) + for idx in range(0, len(data), bth): + pool.apply_async(self.download_mp4, args=(work_dir, data[idx:idx + bth])) + pool.close() + pool.join() + + def process(self): + db_num = 40 + num = self._num // db_num + + all_data = [] + for i in range(0, db_num): + try: + data = self.get_data_from_db(num, i) + for rid, _ in data: + all_data.append(rid) + except Exception as ex: + print(ex) + print("get_data_num = {}".format(len(all_data))) + self.download(self._work_dir, all_data, self._process_num) + + +if __name__ == "__main__": + num = sys.argv[1] # 作品数量 + work_dir = sys.argv[2] # 工作目录 + process_num = sys.argv[3] # 进程数量 + + dom = DownloadOriginMaster(num, work_dir, process_num) + dom.process() diff --git a/AIMeiSheng/voice_classification/script/download_recording.py b/AIMeiSheng/voice_classification/script/download_recording.py new file mode 100644 index 0000000..77b901c --- /dev/null +++ b/AIMeiSheng/voice_classification/script/download_recording.py @@ -0,0 +1,47 @@ +""" +根据输入的作品id下载作品,扩展名为_rec.mp4 +""" +import sys +import glob +from common import * +import multiprocessing as mp + + +class DownloadMaster: + + def __init__(self, work_dir, process_num): + self._work_dir = work_dir + self._process_num = int(process_num) + if not os.path.exists(self._work_dir): + os.makedirs(self._work_dir) + + def get_recording_ids(self): + return glob.glob(os.path.join(self._work_dir, "*/*/*_acc.mp4")) + + def download_mp4(self, data): + for cur_file in data: + recording_id = str(cur_file).split("/")[-1].split("_")[0] + dst_file = str(cur_file).replace("_acc.mp4", "_rec.mp4") + cmd = "coscmd -r ap-mumbai -b starmaker-1256122840 download -f production/uploading/recordings/{}/master.mp4 {}" \ + .format(recording_id, dst_file) + exec_cmd(cmd) + + def download(self, data, process_nums): + pool = mp.Pool(processes=process_nums) + bth = int(len(data) / process_nums) + for idx in range(0, len(data), bth): + pool.apply_async(self.download_mp4, args=(data[idx:idx + bth],)) + pool.close() + pool.join() + + def process(self): + recording_files = self.get_recording_ids() + print(len(recording_files)) + self.download(recording_files, self._process_num) + + +if __name__ == "__main__": + work_dir = sys.argv[1] # 工作目录 + process_num = sys.argv[2] # 进程数量 + dom = DownloadMaster(work_dir, process_num) + dom.process() diff --git a/AIMeiSheng/voice_classification/script/export_data.py b/AIMeiSheng/voice_classification/script/export_data.py new file mode 100644 index 0000000..98e03e3 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/export_data.py @@ -0,0 +1,185 @@ +""" +从精品池中选择出数据,写入到音视频数据库中 +选择规则为: +1 数据来源: mysql -uroot -hresearch-db-r1.starmaker.co -p'Qrdl1130' rec库recording表label字段, 包含female和male,且只包含一种 +2 选择最近几天的数据 + +// 目前已经取了2021-10-19 08:00:00[北京时间],向前倒200天的标注数据 +""" +from common import * +from country2av_area import get_area_by_country +import time +import multiprocessing as mp +import shutil + +BASE_DIR = "/tmp/export_data" + + +class ExportData: + + def __init__(self, begin_tm, end_time, prod=True): + self._begin_tm = begin_tm + self._end_time = end_time + # banned_user_map["host"], passwd=banned_user_map["passwd"], user=banned_user_map["user"], + # db=banned_user_map["db"] + + self._data_src_config = { + "host": "research-db-r1.starmaker.co", + "passwd": "Qrdl1130", + "user": "root", + "db": "rec", + "port": 3306 + } + self._data_dst_config = { + "host": "in-test-smusic-api-1", + "passwd": "solo2018", + "user": "root", + "db": "av_db", + "port": 3306 + } + if prod: + self._data_dst_config = { + "host": "172.16.2.91", + "passwd": "gRYppQtdTpP3nFzH", + "user": "worker", + "db": "av_db", + "port": 3306 + } + + def get_data(self): + sql = """ + select r_id, r_locale, sm_labels, r_created_on from recording + where r_created_on > {} and r_created_on < {} + and r_media_type in (1, 2, 9, 10) + and sm_labels != "" + order by r_created_on desc + limit 1000 + """ + + tot_res = [] + res = get_data_by_mysql(sql.format(self._begin_tm, self._end_time), self._data_src_config) + while len(res) > 0: + tot_res.extend(res) + # 获取最小的时间 + min_r_created_on = res[-1][3] + res = get_data_by_mysql(sql.format(self._begin_tm, min_r_created_on), self._data_src_config) + return tot_res + + def write2db(self, msg): + sql = "replace into recording_gender(recording_id,country,area,img_url, gender1,create_time,update_time) values({},\"{}\",\"{}\",\"{}\",{},{},{})". \ + format(msg[0], msg[1], msg[2], msg[3], msg[4], int(time.time()), int(time.time())) + write_data_to_mysql(sql, self._data_dst_config) + + def check_gender(self, sm_labels): + """ + 检查性别选项 + gender: -1 未知 0 未标注 1 男 2 女 + :param sm_labels: + :return: + """ + labels = str(sm_labels).split(",") + gender = 0 + if "female" in labels: + gender += 2 + if "male" in labels: + gender += 1 + if gender not in [1, 2]: + return -1 + return gender + + def check_origin(self, recording_id): + """ + 检查远端文件是否存在 + :param recording_id: + :return: + """ + cmd = "coscmd -r ap-mumbai -b starmaker-1256122840 info production/uploading/recordings/{}/origin_master.mp4" \ + .format(recording_id) + "| grep Content-Length |awk \'{print $2}\'" + res_str = exec_cmd_and_result(cmd) + size = float(res_str) + if size > 0: + return True + return False + + def mv_mp4(self, work_dir, recording_id): + ret = False + if os.path.exists(work_dir): + shutil.rmtree(work_dir) + os.makedirs(work_dir) + dst_file_name = "{}_origin.mp4".format(recording_id) + dst_file = os.path.join(work_dir, dst_file_name) + cmd = "coscmd -r ap-mumbai -b starmaker-1256122840 download -f production/uploading/recordings/{}/origin_master.mp4 {}" \ + .format(recording_id, dst_file) + if exec_cmd(cmd) and os.path.exists(dst_file): + # 下载成功再上传 + cmd = "coscmd -r ap-mumbai -b av-audit-sync-in-1256122840 upload {} dataset/voice_classification_dataset/". \ + format(dst_file, dst_file_name) + if exec_cmd(cmd): + # 下载成功检查远端是否成功 + cmd = "coscmd -r ap-mumbai -b av-audit-sync-in-1256122840 info dataset/voice_classification_dataset/{}" \ + .format(dst_file_name) + "| grep Content-Length |awk \'{print $2}\'" + res_str = exec_cmd_and_result(cmd) + size = float(res_str) + if size > 0: + ret = True + shutil.rmtree(work_dir) + + return ret + + def write_one(self, msg): + """ + 1 筛选出非av_area_other的数据 + 2 下载干声并上传到av-audit-sync-in-1256122840/dataset/voice_classification_dataset[使用recording.mp4作为名字] + 3 将结果写入到数据库 + :return: + """ + r_id, r_locale, sm_labels, r_created_on = msg + r_id = str(r_id) + + # 检查性别标注 + gender = self.check_gender(sm_labels) + if gender < 0: + print(gender) + return + # 检查国家 + av_area = get_area_by_country(r_locale) + if av_area == "av_area_other": + print(av_area) + return + + # 下载并上传干声 + work_dir = os.path.join(BASE_DIR, r_id) + ret = self.mv_mp4(work_dir, r_id) + if not ret: + print(ret) + return + + # 写入到数据库 + new_msg = [ + r_id, r_locale, av_area, "{}_origin.mp4".format(r_id), gender + ] + self.write2db(new_msg) + + def write_multi(self, data): + print("write_multi:{}".format(len(data))) + for i in range(len(data)): + msg = data[i] + self.write_one(msg) + + def process(self): + data = self.get_data() + print("data_num={}".format(len(data))) + process_nums = 20 + pool = mp.Pool(processes=process_nums) + bth = int(len(data) / process_nums) + for idx in range(0, len(data), bth): + pool.apply_async(self.write_multi, args=(data[idx:idx + bth],)) + pool.close() + pool.join() + + +if __name__ == "__main__": + st_time = 1634601600 - 200 * 86400 + ed_time = 1634601600 - 100 * 86400 + ed = ExportData(st_time, ed_time) + ed.process() diff --git a/AIMeiSheng/voice_classification/script/get_song_msg.py b/AIMeiSheng/voice_classification/script/get_song_msg.py new file mode 100644 index 0000000..b0e44f7 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/get_song_msg.py @@ -0,0 +1,138 @@ +""" +1 获取作品对应的歌曲 +2 获取歌曲的伴奏 & 歌词 +""" + +from common import * +import urllib.request +import glob +import sys +import multiprocessing as mp + + +def download_url_img_to_disk(url, local): + """ + 下载图像到本地 + :param url: + :param local: + :return: + """ + local_path_tmp = local + ".tmp" + try: + urllib.request.urlretrieve(url, local_path_tmp) + os.rename(local_path_tmp, local) + except Exception as inst: + print(inst) + if os.path.exists(local_path_tmp): + os.unlink(local_path_tmp) + + +def get_song_ids_by_recording_ids(rids): + sql = "select s_song_id, r_id from recording where r_id in ({})" + data_src_config = { + "host": "research-db-r1.starmaker.co", + "passwd": "Qrdl1130", + "user": "root", + "db": "rec", + "port": 3306 + } + msgs = get_data_by_mysql(sql.format(",".join(rids)), data_src_config) + return msgs + + +def download_msg(work_dir, song_id, r_id): + """ + 下载伴奏 & 歌词 + :param work_dir: + :param song_id: + :param r_id: + :return: + """ + song_edit = { + "host": "172.16.2.91", + "passwd": "gRYppQtdTpP3nFzH", + "user": "worker", + "db": "starmaker", + "port": 3306 + } + sql = """ + select instrumental_id, song_id, instrumental_url + from instrumental + where instrumental_id = + ( + select instrumental_id + from song + where song_id = {} + ) + """ + acc_msg = get_data_by_mysql(sql.format(song_id), song_edit) + path = acc_msg[0][2] + print(path) + filename = "{}_acc.mp4".format(r_id) + cmd = "coscmd -r ap-mumbai -b songbook-starmaker-private-1256122840 download -f production/instrumental/{} {}".format( + path, os.path.join(work_dir, filename)) + exec_cmd(cmd) + + # 获取歌词 + sql = """ + select content,lyric_id + from lyric_content + where lyric_id = + ( + select lyric_id + from song + where song_id = {} + ) + """ + sql = sql.format(song_id) + lyric_msg = get_data_by_mysql(sql, song_edit) + content = lyric_msg[0][0] + filename = "{}.lyric".format(r_id) + with open(os.path.join(work_dir, filename), "w") as f: + f.write(content) + + +def multi_download(work_dir, data): + for msg in data: + download_msg(work_dir, msg[0], msg[1]) + + +def get_ids(work_dir): + msg = glob.glob(os.path.join(work_dir, "*mp4")) + ids = [] + for f in msg: + if "acc" in f: + continue + id = f.split("/")[-1].split('.')[0] + ids.append(id) + return ids + + +def get_one_area(work_dir, process_nums): + """ + :param work_dir: mp4所在的目录 + :param num: 并发数量 + :return: + """ + ids = get_ids(work_dir) + data = get_song_ids_by_recording_ids(ids) + pool = mp.Pool(processes=process_nums) + bth = int(len(data) / process_nums) + for idx in range(0, len(data), bth): + it = pool.apply_async(multi_download, args=(work_dir, data[idx:idx + bth])) + it.get() + pool.close() + pool.join() + + +def get_dataset(work_dir, num): + dirs = glob.glob(os.path.join(work_dir, "*/*")) + for dir in dirs: + if os.path.isdir(dir): + get_one_area(dir, num) + + +if __name__ == "__main__": + work_dir = sys.argv[1] + num = int(sys.argv[2]) + get_dataset(work_dir, num) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread.py b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread.py new file mode 100644 index 0000000..4522aca --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread.py @@ -0,0 +1,113 @@ +""" +调整判定方式,寻找到最高的阈值 +""" +import sys + + +def load_from_log(filename): + data = [] + with open(filename) as f: + while True: + line = f.readline() + if not line: + break + if "torch " in line and "..." in line: + new_line = line.strip().replace("torch", "") + new_line = new_line.replace("....", "") + new_line = new_line.strip().split(",") + data.append({"msg": [int(new_line[0]), int(new_line[1]), int(new_line[2])]}) + elif "file_name=" in line: + # "file_name=/data/datasets/music_voice_dataset_full/features/av_area_sa/male/6192449414441450.feature.npy ret=1" + ret = line.strip().replace("file_name=", "").split(" ") + filename = ret[0] + gender = filename.split("/")[-2] + area = filename.split("/")[-3] + if gender == "male": + gender = 1 + if gender == "female": + gender = 0 + ret = ret[1].replace("ret=", "") + # print(int(ret)) + data[-1]["filename"] = filename + data[-1]["ret"] = int(ret) + data[-1]["gender"] = gender + data[-1]["area"] = area + return data + + +def calc_one(msg): + """ + :param msg: [女性帧数,男性帧数,不确定帧数] + :return: + """ + female = msg[0] + male = msg[1] + not_sure = msg[2] + tot = female + male + not_sure + if female / tot > 0.5: + return 0 + if male / tot > 0.5: + return 1 + return 2 + + +def calc_one_v1(msg): + """ + 人声大于50%,且性别A/性别B大于2,则判定为性别A + :param msg: [女性帧数,男性帧数,不确定帧数] + :return: + """ + female = msg[0] + male = msg[1] + not_sure = msg[2] + tot = female + male + not_sure + if (female + male) / tot > 0.1: + if female > 5 * male: + return 0 + if male > 5 * female: + return 1 + return 2 + + +def process(filename): + data = load_from_log(filename) + # 计算准确率和召回率 + f_f = 0 # 真女判女 + f_m = 0 # 真女判男 + f_o = 0 # 真女判其他 + m_m = 0 # 真男判男 + m_f = 0 # 真男判女 + m_o = 0 # 真男判其他 + for dt in data: + gender = dt["gender"] + msg = dt["msg"] + ret = calc_one_v1(msg) + if gender == 0: + if ret == 0: + f_f += 1 + elif ret == 1: + f_m += 1 + else: + f_o += 1 + if gender == 1: + if ret == 0: + m_f += 1 + elif ret == 1: + m_m += 1 + else: + m_o += 1 + print("{},{},{}".format(f_f, f_m, f_o)) + print("{},{},{}".format(m_m, m_f, m_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + +if __name__ == "__main__": + filename = sys.argv[1] + process(filename) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v1.py b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v1.py new file mode 100644 index 0000000..4019a36 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v1.py @@ -0,0 +1,156 @@ +""" +确定一下如何从段级别判定歌曲级别的逻辑 +""" +import numpy as np +import sys + + +class GetMethod: + + def get_data(self, log): + ret = {} + with open(log) as f: + while True: + line = f.readline() + if not line: + break + line = line.strip() + arr = line.split(",") + filename = arr[0] + label = arr[1] + if filename not in ret.keys(): + ret[filename] = { + "gender": int(label), + "scores": [] + } + ret[filename]["scores"].append([float(arr[3]), float(arr[4]), float(arr[5])]) + return ret + + def stargy_v1(self, msg): + """ + 求和: 哪个分高就是哪个 + :param msg: + :return: + """ + return np.array(msg).sum(axis=0).argmax() + + def stargy_v2(self, msg): + """ + 去除不确定之后: 男性分数大于女性分数 2倍以上则判定男,同理去判定女 + :param msg: + :return: + """ + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][2] > 0.1: + continue + f_score.append(msg[i][0]) + m_score.append(msg[i][1]) + if (len(f_score) + len(m_score)) / len(msg) < 0.5: + return 2 + + f_avg = 0 + if len(f_score) > 0: + f_avg = sum(f_score) / len(f_score) + m_avg = 0 + if len(m_score) > 0: + m_avg = sum(m_score) / len(m_score) + + if f_avg > 6 * m_avg: + return 0 + if m_avg > 15 * f_avg: + return 1 + return 2 + + def stargy_v3(self, msg): + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][0] > 0.8: + f_score.append(msg[i][0]) + elif msg[i][1] > 0.8: + m_score.append(msg[i][1]) + + if (len(f_score) + len(m_score)) / len(msg) < 0.5: + return 2 + + f_avg = len(f_score) + m_avg = len(m_score) + if f_avg > 3 * m_avg: + return 0 + if m_avg > 3 * f_avg: + return 1 + return 2 + + def stargy_v4(self, msg): + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][2] > 0.5: # 非人声 + continue + f_score.append(msg[i][0]) + m_score.append(msg[i][1]) + if len(f_score) / len(msg) < 0.3: + return 2, -1 + + f_avg = sum(f_score) / len(f_score) + m_avg = sum(m_score) / len(m_score) + rate = f_avg / (f_avg + m_avg) + if rate > 0.9: + return 0, rate + if rate < 0.1: + return 1, rate + return 2, rate + + def process_one_file(self, msg): + # return self.stargy_v1(msg) + # return self.stargy_v3(msg) + return self.stargy_v4(msg) + + def process(self, log_file): + msgs = self.get_data(log_file) + f_f = 0 # 真女判女 + f_m = 0 # 真女判男 + f_o = 0 # 真女判其他 + m_m = 0 # 真男判男 + m_f = 0 # 真男判女 + m_o = 0 # 真男判其他 + + for k, v in msgs.items(): + gender = v["gender"] + msg = v["scores"] + ret, rate = self.process_one_file(msg) + if gender == 0: + if ret == 0: + f_f += 1 + elif ret == 1: + f_m += 1 + print("f_m|file={}, rate={}".format(k, rate)) + else: + f_o += 1 + if gender == 1: + if ret == 0: + m_f += 1 + print("m_f|file={}, rate={}".format(k, rate)) + elif ret == 1: + m_m += 1 + else: + m_o += 1 + + print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o)) + print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + +if __name__ == "__main__": + log_file = sys.argv[1] + gm = GetMethod() + gm.process(log_file) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v2.py b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v2.py new file mode 100644 index 0000000..80e5206 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v2.py @@ -0,0 +1,126 @@ +""" +确定一下如何从段级别判定歌曲级别的逻辑 +两个模型产生的结果 +""" +import numpy as np +import sys +import os +import torchvision.models.mobilenet +class GetMethod: + + def get_data(self, log): + ret = {} + with open(log) as f: + while True: + line = f.readline() + if not line: + break + line = line.strip() + arr = line.split(",") + filename = arr[0] + label = arr[1] + if filename not in ret.keys(): + ret[filename] = { + "gender": int(label), + "scores": [] + } + ret[filename]["scores"].append([]) + for i in range(3, len(arr)): + ret[filename]["scores"][-1].append(float(arr[i])) + return ret + + def stargy_v1(self, msg): + """ + 1 模型1的人声帧再去分男女 + 2 获取男/女的数值进行判断 + :param msg: + :return: + """ + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][0] > 0.5: # 非人声 + continue + f_score.append(msg[i][2]) + m_score.append(msg[i][3]) + + silence_rate = len(f_score) / len(msg) + if silence_rate < 0.4 or len(f_score) < 10: + return 2, -1, silence_rate + + f_avg = sum(f_score) / len(f_score) + m_avg = sum(m_score) / len(m_score) + rate = f_avg / (f_avg + m_avg) + if rate > 0.75: + return 0, rate, silence_rate + if rate < 0.1: + return 1, rate, silence_rate + return 2, rate, silence_rate + + def process_one_file(self, msg): + return self.stargy_v1(msg) + + def process(self, log_file): + msgs = self.get_data(log_file) + f_f = 0 # 真女判女 + f_m = 0 # 真女判男 + f_o = 0 # 真女判其他 + m_m = 0 # 真男判男 + m_f = 0 # 真男判女 + m_o = 0 # 真男判其他 + + err_list = [] + for k, v in msgs.items(): + gender = v["gender"] + msg = v["scores"] + ret, rate, silence_rate = self.process_one_file(msg) + # if gender == 2 and ret != 2: + # print("2_other:{}, {}, rate={}".format(k, ret, rate)) + if gender == 0: + if ret != 0: + err_list.append([k, rate]) + if ret == 0: + f_f += 1 + elif ret == 1: + f_m += 1 + print("f_m:{},rate={},silence={}".format(k, rate, silence_rate)) + else: + f_o += 1 + # print("f_o:{},rate={}".format(k, rate)) + if gender == 1: + if ret != 1: + err_list.append([k, rate]) + if ret == 0: + m_f += 1 + print("m_f:{},rate={},silence={}".format(k, rate, silence_rate)) + elif ret == 1: + m_m += 1 + else: + m_o += 1 + # print("m_o:{},rate={}".format(k, rate)) + + print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o)) + print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + # 打印出结果 + # for ii in range(0, len(err_list)): + # dst_filename = "/".join(err_list[ii][0].split("/")[-3:]) + # cmd = "cp {} /data/datasets/music_voice_dataset_full/feature_no2000_err/{}".format(err_list[ii], + # dst_filename) + # print(err_list[ii]) + # print(cmd) + # os.system(cmd) + + +if __name__ == "__main__": + log_file = sys.argv[1] + gm = GetMethod() + gm.process(log_file) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v3.py b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v3.py new file mode 100644 index 0000000..8a392b8 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v3.py @@ -0,0 +1,156 @@ +""" +确定一下如何从段级别判定歌曲级别的逻辑 +两个模型产生的结果 +""" +import numpy as np +import sys +import os +import torchvision.models.mobilenet + + +class GetMethod: + + def get_data(self, log): + ret = {} + with open(log) as f: + while True: + line = f.readline() + if not line: + break + line = line.strip() + arr = line.split(",") + filename = arr[0] + label = arr[1] + if filename not in ret.keys(): + ret[filename] = { + "gender": int(label), + "scores": [] + } + ret[filename]["scores"].append([]) + for i in range(3, len(arr)): + ret[filename]["scores"][-1].append(float(arr[i])) + return ret + + def stargy_v1(self, msg): + """ + 1 模型1的人声帧再去分男女 + 2 获取男/女的数值进行判断 + :param msg: + :return: + """ + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][2] > 0.5: # 非人声 + continue + f_score.append(msg[i][4]) + m_score.append(msg[i][5]) + + silence_rate = len(f_score) / len(msg) + if silence_rate < 0.4: + return 2, -1, silence_rate + + f_avg = sum(f_score) / len(f_score) + m_avg = sum(m_score) / len(m_score) + rate = f_avg / (f_avg + m_avg) + if rate > 0.91: + return 0, rate, silence_rate + if rate < 0.09: + return 1, rate, silence_rate + return 2, rate, silence_rate + + def stargy_pure_vocal(self, msg): + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][0] > 0.01: # 非人声 + continue + f_score.append(msg[i][4]) + m_score.append(msg[i][5]) + + silence_rate = len(f_score) / len(msg) + if silence_rate < 0.6: + return 2, -1, silence_rate + + f_avg = sum(f_score) / len(f_score) + m_avg = sum(m_score) / len(m_score) + rate = f_avg / (f_avg + m_avg) + if rate > 0.6: + return 0, rate, silence_rate + if rate < 0.4: + return 1, rate, silence_rate + return 2, rate, silence_rate + + def process_one_file(self, msg): + ret, rate, silence_rate = self.stargy_pure_vocal(msg) + if ret == 2: + ret, rate, silence_rate = self.stargy_v1(msg) + return ret, rate, silence_rate, "v1" + return ret, rate, silence_rate, "v0" + + def process(self, log_file): + msgs = self.get_data(log_file) + f_f = 0 # 真女判女 + f_m = 0 # 真女判男 + f_o = 0 # 真女判其他 + m_m = 0 # 真男判男 + m_f = 0 # 真男判女 + m_o = 0 # 真男判其他 + + err_list = [] + for k, v in msgs.items(): + gender = v["gender"] + msg = v["scores"] + ret, rate, silence_rate, vtp = self.process_one_file(msg) + # if gender == 2 and ret != 0: + # print("2_other:{}, {}, rate={}".format(k, ret, rate)) + if gender == 0: + if ret != 0: + err_list.append([k, rate]) + if ret == 0: + f_f += 1 + elif ret == 1: + f_m += 1 + print("f_m:{},rate={},silence={}|{}".format(k, rate, silence_rate, vtp)) + else: + f_o += 1 + # print("f_o:{},rate={}".format(k, rate)) + if gender == 1: + if ret != 1: + err_list.append([k, rate]) + if ret == 0: + m_f += 1 + print("m_f:{},rate={},silence={}|{}".format(k, rate, silence_rate, vtp)) + elif ret == 1: + m_m += 1 + else: + m_o += 1 + # print("m_o:{},rate={}".format(k, rate)) + + print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o)) + print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + # 打印出结果 + # for ii in range(0, len(err_list)): + # dst_filename = "/".join(err_list[ii][0].split("/")[-3:]) + # cmd = "cp {} /data/datasets/music_voice_dataset_full/feature_no2000_err/{}".format(err_list[ii], + # dst_filename) + # print(err_list[ii]) + # print(cmd) + # os.system(cmd) + + +if __name__ == "__main__": + log_file = sys.argv[1] + gm = GetMethod() + gm.process(log_file) + + diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v4.py b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v4.py new file mode 100644 index 0000000..f1f43ec --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/ana/find_best_thread_v4.py @@ -0,0 +1,157 @@ +""" +确定一下如何从段级别判定歌曲级别的逻辑 +两个模型产生的结果 +""" +import numpy as np +import sys +import os +import torchvision.models.mobilenet + + +class GetMethod: + + def get_data(self, log): + ret = {} + with open(log) as f: + while True: + line = f.readline() + if not line: + break + line = line.strip() + arr = line.split(",") + filename = arr[0] + label = arr[1] + if filename not in ret.keys(): + ret[filename] = { + "gender": int(label), + "scores": [] + } + ret[filename]["scores"].append([]) + for i in range(3, len(arr)): + ret[filename]["scores"][-1].append(float(arr[i])) + return ret + + def stargy_v1(self, msg): + """ + 1 模型1的人声帧再去分男女 + 2 获取男/女的数值进行判断 + :param msg: + :return: + """ + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][0] > 0.5: # 非人声 + continue + f_score.append(msg[i][2]) + m_score.append(msg[i][3]) + + silence_rate = len(f_score) / len(msg) + if silence_rate < 0.4 or len(f_score) < 4: + return 2, -1, silence_rate + + f_avg = sum(f_score) / len(f_score) + m_avg = sum(m_score) / len(m_score) + rate = f_avg / (f_avg + m_avg) + if rate > 0.75: + return 0, rate, silence_rate + if rate < 0.1: + return 1, rate, silence_rate + return 2, rate, silence_rate + + def stargy_pure_vocal(self, msg): + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][0] > 0.5: # 非人声 + continue + f_score.append(msg[i][2]) + m_score.append(msg[i][3]) + + silence_rate = len(f_score) / len(msg) + if silence_rate < 0.4: + return 2, -1, silence_rate + + f_avg = sum(f_score) / len(f_score) + m_avg = sum(m_score) / len(m_score) + rate = f_avg / (f_avg + m_avg) + if rate > 0.65: + return 0, rate, silence_rate + if rate < 0.12: + return 1, rate, silence_rate + return 2, rate, silence_rate + + def process_one_file(self, msg, msg1): + ret, rate, silence_rate = self.stargy_pure_vocal(msg1) + # ret, rate, silence_rate = self.stargy_pure_vocal_v1(msg1) + if ret == 2: + ret, rate, silence_rate = self.stargy_v1(msg) + return ret, rate, silence_rate, "v1" + return ret, rate, silence_rate, "v0" + + def process(self, log_file, log_file_strict): + msgs = self.get_data(log_file) + msgs1 = self.get_data(log_file_strict) + f_f = 0 # 真女判女 + f_m = 0 # 真女判男 + f_o = 0 # 真女判其他 + m_m = 0 # 真男判男 + m_f = 0 # 真男判女 + m_o = 0 # 真男判其他 + + err_list = [] + for k, v in msgs.items(): + gender = v["gender"] + msg = v["scores"] + ret, rate, silence_rate, vtp = self.process_one_file(msg, msgs1[k]["scores"]) + # if gender == 2 and ret != 0: + # print("2_other:{}, {}, rate={}".format(k, ret, rate)) + if gender == 0: + if ret != 0: + err_list.append([k, rate]) + if ret == 0: + f_f += 1 + elif ret == 1: + f_m += 1 + print("f_m:{},rate={},silence={}|{}".format(k, rate, silence_rate, vtp)) + else: + f_o += 1 + # print("f_o:{},rate={}".format(k, rate)) + if gender == 1: + if ret != 1: + err_list.append([k, rate]) + if ret == 0: + m_f += 1 + print("m_f:{},rate={},silence={}|{}".format(k, rate, silence_rate, vtp)) + elif ret == 1: + m_m += 1 + else: + m_o += 1 + # print("m_o:{},rate={}".format(k, rate)) + + print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o)) + print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o)) + # 女性准确率和召回率 + f_acc = f_f / (f_f + m_f) + f_recall = f_f / (f_f + f_m + f_o) + # 男性准确率和召回率 + m_acc = m_m / (m_m + f_m) + m_recall = m_m / (m_m + m_f + m_o) + print("female: acc={}|recall={}".format(f_acc, f_recall)) + print("male: acc={}|recall={}".format(m_acc, m_recall)) + + # 打印出结果 + # for ii in range(0, len(err_list)): + # dst_filename = "/".join(err_list[ii][0].split("/")[-3:]) + # cmd = "cp {} /data/datasets/music_voice_dataset_full/feature_no2000_err/{}".format(err_list[ii], + # dst_filename) + # print(err_list[ii]) + # print(cmd) + # os.system(cmd) + + +if __name__ == "__main__": + log_file = sys.argv[1] + log_file_strict = sys.argv[2] + gm = GetMethod() + gm.process(log_file, log_file_strict) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/ana/show_scatter.py b/AIMeiSheng/voice_classification/script/music_voice_class/ana/show_scatter.py new file mode 100644 index 0000000..c773d6f --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/ana/show_scatter.py @@ -0,0 +1,110 @@ +""" +展示数据 +""" +import sys +import matplotlib.pyplot as plt + + +def get_data(log): + ret = {} + with open(log) as f: + while True: + line = f.readline() + if not line: + break + line = line.strip() + arr = line.split(",") + filename = arr[0] + label = arr[1] + if filename not in ret.keys(): + ret[filename] = { + "gender": int(label), + "scores": [] + } + ret[filename]["scores"].append([]) + for i in range(3, len(arr)): + ret[filename]["scores"][-1].append(float(arr[i])) + return ret + + +def stargy_v1(msg): + """ + 1 模型1的人声帧再去分男女 + 2 获取男/女的数值进行判断 + :param msg: + :return: + """ + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][0] > 0.5: # 非人声 + continue + f_score.append(msg[i][2]) + m_score.append(msg[i][3]) + if len(f_score) / len(msg) < 0.3: + return 2, -1 + + f_avg = sum(f_score) / len(f_score) + m_avg = sum(m_score) / len(m_score) + rate = f_avg / (f_avg + m_avg) + if rate > 0.9: + return 0, rate + if rate < 0.1: + return 1, rate + return 2, rate + + +def plot_point(items): + plt.figure() + plt.title("Show Distributed") + + colors = ['red', 'blue', 'green'] + labels = ['female', 'male', 'other'] + for idx, item in enumerate(items): + plt.scatter(item.keys(), item.values(), color=colors[idx], label=labels[idx]) + + plt.legend() + plt.xlabel('male_score') + plt.ylabel('distributed') + # plt.savefig(filename) + plt.show() + + +def process(filename): + """ + 1 载入数据 + 2 获取每首歌的分数 + 3 分男女其他按照分数做统计 + :param filename: + :return: + """ + msgs = get_data(filename) + female = {} + male = {} + other = {} + for k, v in msgs.items(): + gender = v["gender"] + msg = v["scores"] + ret, rate = stargy_v1(msg) + + cur_gender = female + if gender == 1: + cur_gender = male + elif gender == 2: + cur_gender = other + rate = int(rate * 100) + + if rate > 50 and rate < 70 and gender == 1: + print("key={} rate={}".format(k, rate)) + if rate < 50 and rate > 30 and gender == 0: + print("key={} rate={}".format(k, rate)) + + if rate not in cur_gender.keys(): + cur_gender[rate] = 0 + cur_gender[rate] += 1 + plot_point([female, male, other]) + + +if __name__ == "__main__": + file = sys.argv[1] + process(file) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/ana/test.py b/AIMeiSheng/voice_classification/script/music_voice_class/ana/test.py new file mode 100644 index 0000000..7de7d0f --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/ana/test.py @@ -0,0 +1,45 @@ +""" +临时使用 +""" +import sys + + +def load_file(filename): + ret = {} + with open(filename) as f: + while True: + line = f.readline() + line = line.strip() + if not line: + break + line = line.split(" ") + ret[line[1]] = int(line[0]) + return ret + + +def process(err_file, tot_file): + tot_dict = load_file(tot_file) + err_dict = load_file(err_file) + new_err_dict = {} + for k, v in err_dict.items(): + tot_num = tot_dict[k] + new_err_dict[k] = [v, tot_num] + a = sorted(new_err_dict.items(), key=lambda x: x[1][0], reverse=True) + err_tot_num = sum(err_dict.values()) + print("idx|filename|filenum|err_num|err_rate|cum_num|cum_rate") + cur_num = 0 + for i in range(0, len(a)): + filename = a[i][0].replace("/data/datasets/music_voice_dataset_full/feature_no2000/", "") + filename = filename.replace(".feature.npy", "") + cur_num += a[i][1][0] + filenum = a[i][1][1] + err_num = a[i][1][0] + err_rate = round(err_num / filenum, 2) + cum_rate = round(cur_num / err_tot_num, 2) + print("{}|{}|{}|{}|{}|{}|{}".format(i, filename, filenum, err_num, err_rate, cur_num, cum_rate)) + + +if __name__ == "__main__": + err_file = sys.argv[1] + tot_file = sys.argv[2] + process(err_file, tot_file) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/demo/get_vocal.py b/AIMeiSheng/voice_classification/script/music_voice_class/demo/get_vocal.py new file mode 100644 index 0000000..cabca79 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/demo/get_vocal.py @@ -0,0 +1,127 @@ +""" +获取干声中的人声段 +""" +import torch +from torch import nn +import librosa +import numpy as np +import os +import shutil + +FRAME_LEN = 128 +MFCC_LEN = 80 + + +class MusicVoiceV2Model(nn.Module): + def __init__(self): + super(MusicVoiceV2Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 32, 3, 2), # (62, 38) + nn.BatchNorm2d(32), + nn.ReLU(), + nn.Conv2d(32, 32, 3, 2), # (30, 18) + nn.BatchNorm2d(32), + nn.ReLU(), + nn.Conv2d(32, 16, 3, 2), # (14, 8) + nn.AvgPool2d((14, 8)), + ] + layer2 = [ + nn.Linear(16, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 16]) + x = self.layer2(x) + return x + + +def get_one_mfcc(file_url): + data, sr = librosa.load(file_url, sr=16000) + mfcc = librosa.feature.mfcc(data, sr, n_fft=512, hop_length=256, n_mfcc=MFCC_LEN) + mfcc = mfcc.transpose() + return mfcc + + +class GetVocal: + + def __init__(self, model_path): + self.device = 'cpu' + model = MusicVoiceV2Model() + params = torch.load(model_path, map_location=self.device) + model.load_state_dict(params) + model.eval() + self.model = model + self.frame_num = FRAME_LEN + self.batch_size = 128 + + def get_batch_data(self, filename): + data = get_one_mfcc(filename) + new_data = [] + for i in range(self.frame_num // 2, len(data) - self.frame_num // 2, 10): + new_data.append(data[i - self.frame_num // 2:i + self.frame_num // 2]) + return new_data + + def predict(self, data): + tot_ret = [] + with torch.no_grad(): + for i in range(0, len(data), self.batch_size): + new_data = data[i: i + self.batch_size] + predicts = self.model(torch.from_numpy(np.array(new_data))) + _, predicts = predicts.max(dim=1) + tot_ret.extend(list(predicts.numpy())) + return tot_ret + + def get_vocal_line(self, msg): + """ + 从第一个1开始计数,数连续的1,直到不为1的地方,此时长度小于128,放弃,大于128,记录下来 + :param msg: + :return: + """ + lines = [] + st = -1 + for i in range(0, len(msg)): + if st == -1 and msg[i] == 1: + st = i + elif st > 0 and msg[i] == 0: + if i - st >= 128: + lines.append([st, i]) + st = -1 + return lines + + def get_tm(self, sec): + mm = int(sec / 60) + sec = sec - mm * 60 + 0.008 + return "00:{}:{}".format(mm, sec) + + def split_voacl(self, filename, lines): + out_dir = str(filename).split(".")[0] + if os.path.exists(out_dir): + shutil.rmtree(out_dir) + os.makedirs(out_dir) + for idx, (st, ed) in enumerate(lines): + st_sec = st * 0.16 + ed_sec = ed * 0.16 + print(st_sec, ed_sec) + cmd = "ffmpeg -i {} -ss {} -to {} {}/{}.wav".format(filename, self.get_tm(st_sec), self.get_tm(ed_sec), + out_dir, idx) + os.system(cmd) + + def process(self, filename): + data = self.get_batch_data(filename) + print("get_batch_data ok...") + ret_msg = self.predict(data) + rr = self.get_vocal_line(ret_msg) + self.split_voacl(filename, rr) + + +if __name__ == "__main__": + gv = GetVocal("model/CNN_epoch_9_0.9902759647735838.pth") + gv.process("resource/12.wav") diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/demo/model/CNN_epoch_9_0.9902759647735838.pth b/AIMeiSheng/voice_classification/script/music_voice_class/demo/model/CNN_epoch_9_0.9902759647735838.pth new file mode 100644 index 0000000..4f2d687 Binary files /dev/null and b/AIMeiSheng/voice_classification/script/music_voice_class/demo/model/CNN_epoch_9_0.9902759647735838.pth differ diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/format_simple_label2label.py b/AIMeiSheng/voice_classification/script/music_voice_class/format_simple_label2label.py new file mode 100644 index 0000000..f6c307a --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/format_simple_label2label.py @@ -0,0 +1,72 @@ +""" +将标注时使用的文件格式转化为切割时使用的格式 +两件事情: +1 增加性别列 +2 时间列增加00:0 +""" +import sys + + +def load_file(filename): + lines = [] + with open(filename) as f: + while True: + line = f.readline() + if not line: + break + lines.append(line) + return lines + + +def write2file(lines, filename): + with open(filename, "w") as f: + for line in lines: + f.write(line + "\n") + + +def format_file(in_file, gender, out_file, num): + lines = load_file(in_file) + data = [] + for idx, line in enumerate(lines): + if idx > 7: + line = line.strip().split(",") + if len(line) > 0: + data.append(line) + new_data = ["gender,id,type,idx,st,ed,rate"] + cur_id = "" + cur_idx = 0 + cur_device_num = 0 + for dt in data: + if len(dt) < 5: + break + iid = dt[0] + if iid != cur_id: + cur_id = iid + cur_idx = 0 + cur_device_num += 1 + # 检查一下 + """ + 1 类型是1,比率小于0 + 2 类型是2/3, 比率>0 + """ + tp = int(dt[1]) + rate = float(dt[4]) + if tp == 1 and rate < 0 or (tp in (2, 3) and rate > 0): + print("err!->{}".format(dt)) + exit(-1) + + new_line = "{},{},{},{},00:0{},00:0{},{}".format(gender, dt[0], dt[1], cur_idx, dt[2], dt[3], dt[4]) + cur_idx += 1 + if cur_device_num > num: + break + + new_data.append(new_line) + write2file(new_data, out_file) + + +if __name__ == "__main__": + in_file = sys.argv[1] + gender = sys.argv[2] + out_file = sys.argv[3] + num = int(sys.argv[4]) + format_file(in_file, gender, out_file, num) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_acc_dataset.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_acc_dataset.py new file mode 100644 index 0000000..032c3ac --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_acc_dataset.py @@ -0,0 +1,107 @@ +""" +每种情况取32个伴奏,每个伴奏取1分钟 +从30s-1:30s + +此时的work_dir,是每个地区 +例如: av_area_in +""" + +import os +import sys +import glob +import shutil + + +def read_file(filepath, header=True): + lines = [] + with open(filepath) as f: + while True: + line = f.readline() + if not line: + break + if header: + header = False + continue + lines.append(line) + return lines + + +def exec_cmd(cmd): + print(cmd) + ret = os.system(cmd) + if ret != 0: + return False + return True + + +class GenAccDataset: + + def __init__(self, work_dir, out_dir): + self._work_dir = work_dir + self._out_dir = out_dir + self._cache_dir = os.path.join(work_dir, "cache") + if os.path.exists(self._cache_dir): + shutil.rmtree(self._cache_dir) + os.makedirs(self._cache_dir) + + def get_files(self): + female_files = glob.glob(os.path.join(self._work_dir, "female/*acc.mp4")) + male_files = glob.glob(os.path.join(self._work_dir, "male/*acc.mp4")) + return female_files, male_files + + def get_file_by_label(self): + file_path = os.path.join(self._work_dir, "label.txt") + data = read_file(file_path) + male_files = set() + female_files = set() + for line in data: + arr = line.split(",") + gender = arr[0] + rid = arr[1] + if gender == "male": + male_files.add("{}/{}/{}_acc.mp4".format(self._work_dir, gender, rid)) + elif gender == "female": + female_files.add("{}/{}/{}_acc.mp4".format(self._work_dir, gender, rid)) + return male_files, female_files + + def process_one(self, file, gender): + """ + 1 先指定码率为32kbps + 2 切分 + 3 转码为wav [16k mono] + :param gender: + :param file: + :return: + """ + filename = str(file).split("/")[-1] + tmp_file = os.path.join(self._cache_dir, filename) + cmd = "ffmpeg -i {} -b:a 32k -ss 00:00:30 -to 00:00:46 {}".format(file, tmp_file) + if not exec_cmd(cmd): + print("exec_cmd:{} err".format(cmd)) + return + + out_dir = os.path.join(self._out_dir, "acc/{}".format(gender)) + if not os.path.exists(out_dir): + os.makedirs(out_dir) + cut_wav_file = os.path.join(out_dir, filename + ".wav") + cmd = "ffmpeg -i {} -ar 16000 -ac 1 {}".format(tmp_file, cut_wav_file) + if not exec_cmd(cmd): + print("exec_cmd:{} err".format(cmd)) + return + + def process(self): + # female, male = self.get_files() + female, male = self.get_file_by_label() + for idx, f in enumerate(female): + self.process_one(f, "female") + for idx, f in enumerate(male): + self.process_one(f, "male") + + +if __name__ == "__main__": + w_dir = sys.argv[1] + o_dir = sys.argv[2] + # w_dir = "/data/datasets/music_voice_dataset_2000/dataset/av_area_in" # 注意一共有四个大区,这是其中一个 + # o_dir = "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in" + gad = GenAccDataset(w_dir, o_dir) + gad.process() diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset.py new file mode 100644 index 0000000..cd45a6d --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset.py @@ -0,0 +1,140 @@ +""" +预处理数据 +1 使用ebur128做拉伸 +2 切分出纯人声->伴奏占比<=0.1,其余的都为非人声,只要大于等于3s长度的音频段 +3 从纯伴奏中切割出一部分纯伴奏 +4 提取特征,做数据集 +文件结构为: +处理之后,最终结果 +dataset: + --av_area_in + --pure + --female + --xxx.wav + --male + --other + --av_area_sa +""" + +import os +import sys + +# EBUR128_BIN = "/Users/yangjianli/linux/opt/soft/bin/standard_audio_no_cut" +EBUR128_BIN = "/opt/soft/bin/standard_audio_no_cut" + + +def read_file(path): + lines = [] + with open(path) as f: + while True: + line = f.readline() + if not line: + break + lines.append(line) + return lines + + +def exec_cmd(cmd): + print(cmd) + ret = os.system(cmd) + if ret != 0: + return False + return True + + +def get_sec(tm): + """ + 00:00:25.625,00:00:31.192 + 将时间解析为s + :param tm: + :return: + """ + out = str(tm).split(":") + out_sec = int(out[0]) * 60 * 60 + int(out[1]) * 60 + float(out[2]) + return out_sec + + +class GenVoiceDataset: + + def __init__(self, work_dir, out_dir): + self._work_dir = work_dir + self._out_dir = out_dir + + # gender,id,type,idx,st,ed,rate + self._msg = self.get_msg() + self._cache_dir = os.path.join(work_dir, ".cache") + + def get_msg(self): + """ + 处理成id=>[ + [gender, rtype, idx, st, ed, rate] + ] + :return: + """ + label = read_file(os.path.join(self._work_dir, "label.txt")) + msg = {} + for i in range(1, len(label)): + gender, rid, rtype, idx, st, ed, rate = label[i].split(",") + if rid not in msg.keys(): + msg[rid] = [] + msg[rid].append([gender, rtype, idx, st, ed, rate]) + return msg + + def process_one(self, rid, msg): + # 先处理整个音频的拉伸 + gender = msg[0][0] + src_path = os.path.join(self._work_dir, os.path.join(gender, rid + ".mp4")) + cache_path = os.path.join(self._cache_dir, rid) + if not os.path.exists(cache_path): + os.makedirs(cache_path) + + # 转码 + wav_path = os.path.join(cache_path, rid + ".wav") + cmd = "ffmpeg -i {} -ar 16000 -ac 1 {}".format(src_path, wav_path) + if not os.path.exists(wav_path): + if not exec_cmd(cmd): + print("exec_cmd:{} err!".format(cmd)) + return + + # 均衡化 + wav_eb_path = os.path.join(cache_path, rid + "_eb.wav") + cmd = "{} {} {}".format(EBUR128_BIN, wav_path, wav_eb_path) + if not os.path.exists(wav_eb_path): + if not exec_cmd(cmd): + print("exec_cmd:{} err!".format(cmd)) + return + + # 切割成多个 + for idx in range(0, len(msg)): + gender, rtype, idx, st, ed, rate = msg[idx] + + # rtype=1并且float(rate) <=0.1认为是纯人声,其他认为是非纯人声,只要大于等于3s的音频 + # out_dir/rtype/gender/rid_idx.wav + if get_sec(ed) - get_sec(st) >= 2.1: + if int(rtype) == 1 and float(rate) <= 0.05: + pwd = os.path.join(self._out_dir, + os.path.join("pure", gender)) + if not os.path.exists(pwd): + os.makedirs(pwd) + out_wav = os.path.join(pwd, "{}_{}.wav".format(rid, idx)) + else: + pwd = os.path.join(self._out_dir, + os.path.join("other", gender)) + if not os.path.exists(pwd): + os.makedirs(pwd) + out_wav = os.path.join(pwd, "{}_{}.wav".format(rid, idx)) + cmd = "ffmpeg -i {} -ss {} -to {} {}".format(wav_eb_path, st, ed, out_wav) + exec_cmd(cmd) + + def process(self): + for rid, msg in self._msg.items(): + self.process_one(rid, msg) + + +if __name__ == "__main__": + work_dir = sys.argv[1] + dst_dir = sys.argv[2] + # work_dir = "/data/datasets/music_voice_dataset_2000/dataset/av_area_in" # 注意一共有四个大区,这是其中一个 + # dst_dir = "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in" + gvd = GenVoiceDataset(work_dir, dst_dir) + gvd.process() diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_feature.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_feature.py new file mode 100644 index 0000000..f3fb396 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_feature.py @@ -0,0 +1,103 @@ +""" +提取特征的代码 +输出结构与之前一致 +dataset_feature: + --av_area_in + --pure + --female + --xxx.feature + --male + --other + --av_area_sa + +1 载入数据 +2 提取特征 +""" + +import os +import sys +import glob +import librosa +import numpy as np + + +def get_one_mfcc(file_url): + data, sr = librosa.load(file_url, sr=16000) + if len(data) < 512: + return [] + mfcc = librosa.feature.mfcc(data, sr, n_fft=512, hop_length=256, n_mfcc=80) + mfcc = mfcc.transpose() + return mfcc + + +class CalcVoiceDatasetSamples: + """ + 统计文件夹下的数据的帧数 + 此时直接到female/male层级做统计 + """ + + def __init__(self, work_dir): + self._work_dir = work_dir + + def get_files(self): + return glob.glob(os.path.join(self._work_dir, "*.feature.npy")) + + def get_one_num(self, file): + data = np.load(file) + return data.shape[0] - 127 + + def process(self): + files = self.get_files() + nums = 0 + for file in files: + nums += self.get_one_num(file) + print("work_dir={}|num={}".format(self._work_dir, nums)) + + +def calc_voice_dataset_samples(feature_dir): + """ + 这个到特征的根目录就可以 + :param feature_dir: + :return: + """ + dirs = glob.glob(os.path.join(feature_dir, "*/*/*")) + for dir in dirs: + cvds = CalcVoiceDatasetSamples(dir) + cvds.process() + + +class GetDatasetFeature: + + def __init__(self, work_dir, out_dir): + self._work_dir = work_dir + self._out_dir = out_dir + + def get_files(self): + return glob.glob(os.path.join(self._work_dir, "*/*/*/*.wav")) + + def process_one(self, file): + out_path = str(file).replace(self._work_dir, self._out_dir) + out_path = str(out_path).replace("wav", "feature") + dir = os.path.dirname(out_path) + if not os.path.exists(dir): + os.makedirs(dir) + mfcc = get_one_mfcc(file) + if len(mfcc) > 1: + np.save(out_path, mfcc) + + def process(self): + files = self.get_files() + for file in files: + print("process ... {}".format(file)) + self.process_one(file) + + +if __name__ == "__main__": + work_dir = sys.argv[1] + out_dir = sys.argv[2] + # # work_dir = "/data/datasets/music_voice_dataset_2000/split_10_bf" + # # out_dir = "/data/datasets/music_voice_dataset_2000/feature_10_bf" + gdf = GetDatasetFeature(work_dir, out_dir) + gdf.process() + # work_dir = sys.argv[1] + # calc_voice_dataset_samples(work_dir) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_feature_v1.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_feature_v1.py new file mode 100644 index 0000000..b52d61f --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_feature_v1.py @@ -0,0 +1,107 @@ +""" +提取幅度谱特征的代码 +输出结构与之前一致 +dataset_feature: + --av_area_in + --pure + --female + --xxx.feature + --male + --other + --av_area_sa + +1 载入数据 +2 提取特征 +""" + +import os +import sys +import glob +import librosa +import numpy as np + + +def get_one_mfcc(file_url): + data, sr = librosa.load(file_url, sr=16000) + mfcc = librosa.feature.mfcc(data, sr, n_fft=512, hop_length=256, n_mfcc=80) + mfcc = mfcc.transpose() + return mfcc + + +def get_one_amp(file_url): + data, sr = librosa.load(file_url, sr=16000) + stft = np.abs(librosa.stft(data, n_fft=512, hop_length=256)) + stft = librosa.amplitude_to_db(stft) + return stft.transpose() + + +class CalcVoiceDatasetSamples: + """ + 统计文件夹下的数据的帧数 + 此时直接到female/male层级做统计 + """ + + def __init__(self, work_dir): + self._work_dir = work_dir + + def get_files(self): + return glob.glob(os.path.join(self._work_dir, "*.feature.npy")) + + def get_one_num(self, file): + data = np.load(file) + return data.shape[0] - 127 + + def process(self): + files = self.get_files() + nums = 0 + for file in files: + nums += self.get_one_num(file) + print("work_dir={}|num={}".format(self._work_dir, nums)) + + +def calc_voice_dataset_samples(feature_dir): + """ + 这个到特征的根目录就可以 + :param feature_dir: + :return: + """ + dirs = glob.glob(os.path.join(feature_dir, "*/*/*")) + for dir in dirs: + cvds = CalcVoiceDatasetSamples(dir) + cvds.process() + + +class GetDatasetFeature: + + def __init__(self, work_dir, out_dir): + self._work_dir = work_dir + self._out_dir = out_dir + + def get_files(self): + return glob.glob(os.path.join(self._work_dir, "*/acc/*/*.wav")) + + def process_one(self, file): + out_path = str(file).replace(self._work_dir, self._out_dir) + out_path = str(out_path).replace("wav", "feature") + dir = os.path.dirname(out_path) + if not os.path.exists(dir): + os.makedirs(dir) + mfcc = get_one_amp(file) + np.save(out_path, mfcc) + + def process(self): + files = self.get_files() + for file in files: + print("process ... {}".format(file)) + self.process_one(file) + + +if __name__ == "__main__": + # work_dir = sys.argv[1] + # out_dir = sys.argv[2] + # work_dir = "/data/datasets/music_voice_dataset_2000/split_10_bf" + # out_dir = "/data/datasets/music_voice_dataset_2000/feature_amp_10_bf" + # gdf = GetDatasetFeature(work_dir, out_dir) + # gdf.process() + work_dir = sys.argv[1] + calc_voice_dataset_samples(work_dir) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_files.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_files.py new file mode 100644 index 0000000..873ee02 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_files.py @@ -0,0 +1,107 @@ +""" +预处理数据 +1 使用ebur128做拉伸 +2 转码为16k单声道的音频 +文件结构为: +处理之后,最终结果 +dataset: + --av_area_in + --female + --xxx.wav + --male + --av_area_sa +""" + +import os +import sys +import glob +import librosa +import numpy as np +import shutil +import multiprocessing as mp + +# EBUR128_BIN = "/Users/yangjianli/linux/opt/soft/bin/standard_audio_no_cut" +EBUR128_BIN = "/opt/soft/bin/standard_audio_no_cut" + + +def get_one_mfcc(file_url): + data, sr = librosa.load(file_url, sr=16000) + mfcc = librosa.feature.mfcc(data, sr, n_fft=512, hop_length=256, n_mfcc=80) + mfcc = mfcc.transpose() + return mfcc + + +def exec_cmd(cmd): + print(cmd) + ret = os.system(cmd) + if ret != 0: + return False + return True + + +class GenVoiceDataset: + + def __init__(self, work_dir, out_dir): + self._work_dir = work_dir + self._out_dir = out_dir + self._cache_dir = os.path.join(work_dir, ".cache") + + self.files = self.get_files() + + def get_files(self): + return glob.glob(os.path.join(self._work_dir, "*.mp4")) + + def process_one(self, in_file, pure_filename, cache_dir, out_feature_file): + print(in_file, pure_filename, cache_dir, out_feature_file) + + wav_path = os.path.join(cache_dir, pure_filename + ".wav") + cmd = "ffmpeg -i {} -ar 16000 -ac 1 {}".format(in_file, wav_path) + if not os.path.exists(wav_path): + if not exec_cmd(cmd): + print("exec_cmd:{} err!".format(cmd)) + return + + # 均衡化 + eq_path = os.path.join(cache_dir, pure_filename + "_eq.wav") + cmd = "{} {} {}".format(EBUR128_BIN, wav_path, eq_path) + if not os.path.exists(eq_path): + if not exec_cmd(cmd): + print("exec_cmd:{} err!".format(cmd)) + return + mfcc = get_one_mfcc(eq_path) + np.save(out_feature_file, mfcc) + print("save_feature_file={}_shape={}".format(out_feature_file, mfcc.shape)) + + os.unlink(wav_path) + os.unlink(eq_path) + + def process_multi(self): + process_nums = 8 + pool = mp.Pool(processes=process_nums) + bth = int(len(self.files) / process_nums) + for idx in range(0, len(self.files), bth): + pool.apply_async(self.process, args=(self.files[idx:idx + bth], )) + pool.close() + pool.join() + + def process(self, data): + for file in data: + tp_dir = os.path.dirname(file) + cache_dir = tp_dir.replace(self._work_dir, self._cache_dir) + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + pure_name = os.path.basename(file).replace(".mp4", "") + out_dir = tp_dir.replace(self._work_dir, self._out_dir) + if not os.path.exists(out_dir): + os.makedirs(out_dir) + out_filename = os.path.join(out_dir, pure_name + ".feature") + self.process_one(file, pure_name, cache_dir, out_filename) + + +if __name__ == "__main__": + work_dir = sys.argv[1] + dst_dir = sys.argv[2] + # work_dir = "/data/datasets/music_voice_dataset_full/dataset_no2000" # 不包含标注使用的那2000 + # dst_dir = "/data/datasets/music_voice_dataset_full/dataset_no2000" + gvd = GenVoiceDataset(work_dir, dst_dir) + gvd.process_multi() diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_files_v1.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_files_v1.py new file mode 100644 index 0000000..6bf798e --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_dataset_files_v1.py @@ -0,0 +1,114 @@ +""" +预处理数据 +1 使用ebur128做拉伸 +2 转码为16k单声道的音频 +文件结构为: +处理之后,最终结果 +dataset: + --av_area_in + --female + --xxx.wav + --male + --av_area_sa +""" + +import os +import sys +import glob +import librosa +import numpy as np +import shutil +import multiprocessing as mp + +# EBUR128_BIN = "/Users/yangjianli/linux/opt/soft/bin/standard_audio_no_cut" +EBUR128_BIN = "/opt/soft/bin/standard_audio_no_cut" + + +def get_one_mfcc(file_url): + data, sr = librosa.load(file_url, sr=16000) + mfcc = librosa.feature.mfcc(data, sr, n_fft=512, hop_length=256, n_mfcc=80) + mfcc = mfcc.transpose() + return mfcc + + +def get_one_amp(file_url): + data, sr = librosa.load(file_url, sr=16000) + stft = np.abs(librosa.stft(data, n_fft=512, hop_length=256)) + stft = librosa.amplitude_to_db(stft) + return stft.transpose() + + +def exec_cmd(cmd): + print(cmd) + ret = os.system(cmd) + if ret != 0: + return False + return True + + +class GenVoiceDataset: + + def __init__(self, work_dir, out_dir): + self._work_dir = work_dir + self._out_dir = out_dir + self._cache_dir = os.path.join(work_dir, ".cache") + + self.files = self.get_files() + + def get_files(self): + return glob.glob(os.path.join(self._work_dir, "*/*/*.mp4")) + + def process_one(self, in_file, pure_filename, cache_dir, out_feature_file): + print(in_file, pure_filename, cache_dir, out_feature_file) + + wav_path = os.path.join(cache_dir, pure_filename + ".wav") + cmd = "ffmpeg -i {} -ar 16000 -ac 1 {}".format(in_file, wav_path) + if not os.path.exists(wav_path): + if not exec_cmd(cmd): + print("exec_cmd:{} err!".format(cmd)) + return + + # 均衡化 + eq_path = os.path.join(cache_dir, pure_filename + "_eq.wav") + cmd = "{} {} {}".format(EBUR128_BIN, wav_path, eq_path) + if not os.path.exists(eq_path): + if not exec_cmd(cmd): + print("exec_cmd:{} err!".format(cmd)) + return + mfcc = get_one_amp(eq_path) + np.save(out_feature_file, mfcc) + print("save_feature_file={}_shape={}".format(out_feature_file, mfcc.shape)) + + os.unlink(wav_path) + os.unlink(eq_path) + + def process_multi(self): + process_nums = 8 + pool = mp.Pool(processes=process_nums) + bth = int(len(self.files) / process_nums) + for idx in range(0, len(self.files), bth): + pool.apply_async(self.process, args=(self.files[idx:idx + bth],)) + pool.close() + pool.join() + + def process(self, data): + for file in data: + tp_dir = os.path.dirname(file) + cache_dir = tp_dir.replace(self._work_dir, self._cache_dir) + if not os.path.exists(cache_dir): + os.makedirs(cache_dir) + pure_name = os.path.basename(file).replace(".mp4", "") + out_dir = tp_dir.replace(self._work_dir, self._out_dir) + if not os.path.exists(out_dir): + os.makedirs(out_dir) + out_filename = os.path.join(out_dir, pure_name + ".feature") + self.process_one(file, pure_name, cache_dir, out_filename) + + +if __name__ == "__main__": + work_dir = sys.argv[1] + dst_dir = sys.argv[2] + # work_dir = "/data/datasets/music_voice_dataset_full/dataset_no2000" # 不包含标注使用的那2000 + # dst_dir = "/data/datasets/music_voice_dataset_full/dataset_no2000" + gvd = GenVoiceDataset(work_dir, dst_dir) + gvd.process_multi() diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_err_dataset.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_err_dataset.py new file mode 100644 index 0000000..41277a9 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_err_dataset.py @@ -0,0 +1,48 @@ +""" +根据样本和已经预测好的结果构建数据集 +""" +import os +import glob + + +class GenDataSetMap: + + def __init__(self, work_dir): + self._ma_label_full = os.path.join(work_dir, "ma_label_32_full.txt") + self._ma_label = os.path.join(work_dir, "ma_label_32.txt") + self.files = glob.glob(os.path.join(work_dir, "*/*/*npy")) + + def load_file(self): + lines = [] + with open(self._ma_label_full) as f: + while True: + line = f.readline() + if not line: + break + lines.append(line) + return lines + + def get_ids(self): + ids = [] + for file in self.files: + id = file.split("/")[-1] + ids.append(id) + return ids + + def process(self): + lines = self.load_file() + ids = self.get_ids() + dst_line = [] + for line in lines: + filename = line.split(",")[0].split("/")[-1] + if filename in ids: + dst_line.append(line) + # 写入文件 + with open(self._ma_label, "w") as f: + for line in dst_line: + f.write(line) + + +if __name__ == "__main__": + gds = GenDataSetMap("/data/datasets/music_voice_dataset_full/feature_no2000_err") + gds.process() diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_file_dict.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_file_dict.py new file mode 100644 index 0000000..3690451 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_file_dict.py @@ -0,0 +1,56 @@ +""" +一次性 +生成一个字典,代表了特征文件的人声占比情况. -1 代表无人声 0代表无伴奏 +""" +import sys +import os + + +def load(file, header=True): + lines = [] + with open(file) as f: + while True: + line = f.readline() + if header: + header = False + continue + if not line: + break + lines.append(line) + return lines + + +def write2file(filename, data): + with open(filename, "w") as f: + for line in data: + f.write(line + "\n") + + +def process(area, in_file, out_file): + msg = load(in_file) + new_ret_in = [] + new_ret_no = [] + for line in msg: + line = line.strip().split(",") + gender = line[0] + id = line[1] + tp = "other" + if int(line[2]) == 1 and float(line[-1]) <= 0.05: + tp = "pure" + idx = line[3] + new_line_format = "/data/datasets/music_voice_dataset_2000/feature_rec_005/{area}/{tp}/{gender}/{id}_{idx}.feature.npy" + new_line = new_line_format.format(area=area, tp=tp, gender=gender, id=id, idx=idx) + if os.path.exists(new_line): + new_line += ",{}".format(line[-1]) + new_ret_in.append(new_line) + else: + new_ret_no.append(new_line) + print("in:{} no={}".format(len(new_ret_in), len(new_ret_no))) + write2file(out_file, new_ret_in) + + +if __name__ == "__main__": + in_file = sys.argv[1] + area = sys.argv[2] + out_file = sys.argv[3] + process(area, in_file, out_file) diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_pure_acc_gender_dataset.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_pure_acc_gender_dataset.py new file mode 100644 index 0000000..2bda43f --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_pure_acc_gender_dataset.py @@ -0,0 +1,49 @@ +""" +将生成的伴奏的文件写入到索引文件 +""" +import os +import sys +import glob +import time +import torch +import numpy as np +import torch.nn as nn + +MFCC_LEN = 80 +FRAME_LEN = 128 + +os.environ["LRU_CACHE_CAPACITY"] = "1" + + +class GenAccGenderDataset: + def __init__(self, work_dir): + """ + work_dir传入gen_data_files生成的特征文件目录[下一级就是地区] + :param work_dir: + """ + self.work_dir = work_dir + self.label_txt = os.path.join(work_dir, "ma_label_acc_32.txt") + + def process(self): + """ + 将数据载入到内存 + :return: + """ + feature_list = [] + acc_files = glob.glob(os.path.join(self.work_dir, "*/*/*_acc.feature.npy")) + step = 32 + for i in range(len(acc_files)): + data = np.load(acc_files[i]) + for j in range(FRAME_LEN, len(data), step): + feature_list.append([acc_files[i], j, 2]) + print("get_dataset ok...") + with open(self.label_txt, "w") as f: + for dt in feature_list: + line = "{},{},{}".format(dt[0], dt[1], dt[2]) + f.write(line + "\n") + + +if __name__ == "__main__": + work_dir = "/data/datasets/music_voice_dataset_full/feature_no2000_v1_train" + gpd = GenAccGenderDataset(work_dir) + gpd.process() diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_pure_gender_dataset.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_pure_gender_dataset.py new file mode 100644 index 0000000..7269457 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_pure_gender_dataset.py @@ -0,0 +1,263 @@ +""" +利用模型来辅助分类人声和伴奏 +""" +import os +import sys +import glob +import time +import torch +import numpy as np +import torch.nn as nn + +MFCC_LEN = 80 +FRAME_LEN = 128 + +os.environ["LRU_CACHE_CAPACITY"] = "1" + + +class MusicVoiceV3Model(nn.Module): + def __init__(self): + super(MusicVoiceV3Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 32, 3, 2), # (62, 38)--> v3_2新增 + nn.BatchNorm2d(32), + nn.ReLU(), + nn.Conv2d(32, 32, 3, 2), # (62, 38) + nn.BatchNorm2d(32), + nn.ReLU(), + # nn.Conv2d(32, 32, 3, 2), # (30, 18) + # nn.BatchNorm2d(32), + # nn.ReLU(), + nn.Conv2d(32, 16, 3, 2), # (14, 8) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 16, 3, 2), # (6, 3) + # nn.AvgPool2d((14, 8)), + ] + layer2 = [ + nn.Linear(16 * 6 * 3, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 16 * 6 * 3]) + x = self.layer2(x) + return x + + +class MusicVoiceV4Model(nn.Module): + def __init__(self): + super(MusicVoiceV4Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (62, 38) -> layers2 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (60, 36) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (58, 34) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (56, 32) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (27, 15) -> layer5 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 32, 3), # (25, 13) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (12, 6) -> layers8 + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3), # (10, 4) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (4, 1) -> layers10 + ] + layer2 = [ + nn.Linear(32 * 4 * 1, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 32 * 4 * 1]) + x = self.layer2(x) + return x + + +class MusicVoiceV5Model(nn.Module): + def __init__(self): + super(MusicVoiceV5Model, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + conv_bn(1, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + nn.AvgPool2d((4, 3)), + ) + self.fc = nn.Linear(1024, 2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.model(x) + x = x.view(-1, 1024) + x = self.fc(x) + return x + + +class GenPureGenderDataset: + def __init__(self, work_dir, model_path): + """ + work_dir传入gen_data_files生成的特征文件目录[下一级就是地区] + :param work_dir: + """ + self.work_dir = work_dir + print("cur_work_dir={}".format(work_dir)) + self.label_txt = os.path.join(work_dir, "ma_label_32_strict_v1.txt") + # 载入模型 + self.device = 'cuda' + model = MusicVoiceV5Model() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + model.to(self.device) + self.model = model + self.get_dataset() + + def get_dataset(self): + """ + 将数据载入到内存 + :return: + """ + st = time.time() + female_files = glob.glob(os.path.join(self.work_dir, "*/female/*.feature.npy")) + male_files = glob.glob(os.path.join(self.work_dir, "*/male/*.feature.npy")) + self._dataset = {} + self._feature_list = [] + step = 32 + for i in range(len(female_files)): + if "acc" in female_files[i]: + continue + data = np.load(female_files[i]) + self._dataset[female_files[i]] = data + for j in range(FRAME_LEN, len(data), step): + self._feature_list.append([female_files[i], j, 0]) + + for i in range(len(male_files)): + if "acc" in male_files[i]: + continue + data = np.load(male_files[i]) + self._dataset[male_files[i]] = data + for j in range(FRAME_LEN, len(data), step): + self._feature_list.append([male_files[i], j, 1]) + print("get_dataset ok... sp={}".format(time.time() - st)) + + def process(self): + """ + 更新一下self._feature_list中label的标签 + :return: + """ + tm = time.time() + data_gen_tm = 0 + data_to_numpy = 0 + predict_tm = 0 + calc_tm = 0 + with torch.no_grad(): + print("feature_list 长度为:{}".format(len(self._feature_list))) + batch_size = 256 + for i in range(0, len(self._feature_list), batch_size): + st = time.time() + data = [] + for j in range(len(self._feature_list[i:i + batch_size])): + dt = self._feature_list[i + j] # 文件地址, 帧号, label + mem = self._dataset[dt[0]] + data.append(mem[dt[1] - FRAME_LEN:dt[1]]) + data_gen_tm += time.time() - st + + st = time.time() + data = torch.from_numpy(np.array(data)).to(self.device) + data_to_numpy += time.time() - st + + st = time.time() + predicts = self.model(data) + predict_tm += time.time() - st + + st = time.time() + predicts_score = torch.nn.functional.softmax(predicts, dim=1) + predicts_score = predicts_score.to("cpu").numpy() + # 准一点: 要求1[人声]的分数>0.8再搞进来,否则不参与训练 + for ii in range(0, len(predicts_score)): + if predicts_score[ii][1] < 0.5: + self._feature_list[i + ii][2] = 2 + calc_tm += time.time() - st + + if i % (batch_size * 10) == 0: + print("process ... {}/{} tm={}".format(i, len(self._feature_list), time.time() - tm)) + print("{},{},{},{}".format(data_gen_tm, data_to_numpy, predict_tm, calc_tm)) + + with open(self.label_txt, "w") as f: + for dt in self._feature_list: + line = "{},{},{}".format(dt[0], dt[1], dt[2]) + f.write(line + "\n") + + +if __name__ == "__main__": + work_dir = "/data/datasets/music_voice_dataset_full/feature_no2000" + # model_path = "/data/datasets/music_voice_dataset_2000/models/voice_v5_prod/CNN_epoch_3_0.9919894003121993.pth" + model_strict_path = "/data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod_rec/CNN_epoch_1_0.8822981570144947.pth" + # model_path = "/data/datasets/music_voice_dataset_2000/voice_10_bf_models_prod_v4/CNN_epoch_2_0.9910721598820542.pth" + gpd = GenPureGenderDataset(work_dir, model_strict_path) + gpd.process() diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/gen_rec_dataset.py b/AIMeiSheng/voice_classification/script/music_voice_class/gen_rec_dataset.py new file mode 100644 index 0000000..d486686 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/gen_rec_dataset.py @@ -0,0 +1,140 @@ +""" +预处理数据 +1 使用ebur128做拉伸 +2 切分出纯人声->伴奏占比<=0.1,其余的都为非人声,只要大于等于3s长度的音频段 +3 从纯伴奏中切割出一部分纯伴奏 +4 提取特征,做数据集 +文件结构为: +处理之后,最终结果 +dataset: + --av_area_in + --pure + --female + --xxx.wav + --male + --other + --av_area_sa +""" + +import os +import sys + +# EBUR128_BIN = "/Users/yangjianli/linux/opt/soft/bin/standard_audio_no_cut" +EBUR128_BIN = "/opt/soft/bin/standard_audio_no_cut" + + +def read_file(path): + lines = [] + with open(path) as f: + while True: + line = f.readline() + if not line: + break + lines.append(line) + return lines + + +def exec_cmd(cmd): + print(cmd) + ret = os.system(cmd) + if ret != 0: + return False + return True + + +def get_sec(tm): + """ + 00:00:25.625,00:00:31.192 + 将时间解析为s + :param tm: + :return: + """ + out = str(tm).split(":") + out_sec = int(out[0]) * 60 * 60 + int(out[1]) * 60 + float(out[2]) + return out_sec + + +class GenVoiceDataset: + + def __init__(self, work_dir, out_dir): + self._work_dir = work_dir + self._out_dir = out_dir + + # gender,id,type,idx,st,ed,rate + self._msg = self.get_msg() + self._cache_dir = os.path.join(work_dir, ".cache") + + def get_msg(self): + """ + 处理成id=>[ + [gender, rtype, idx, st, ed, rate] + ] + :return: + """ + label = read_file(os.path.join(self._work_dir, "label.txt")) + msg = {} + for i in range(1, len(label)): + gender, rid, rtype, idx, st, ed, rate = label[i].split(",") + if rid not in msg.keys(): + msg[rid] = [] + msg[rid].append([gender, rtype, idx, st, ed, rate]) + return msg + + def process_one(self, rid, msg): + # 先处理整个音频的拉伸 + gender = msg[0][0] + src_path = os.path.join(self._work_dir, os.path.join(gender, rid + "_rec.mp4")) + cache_path = os.path.join(self._cache_dir, rid) + if not os.path.exists(cache_path): + os.makedirs(cache_path) + + # 转码 + wav_path = os.path.join(cache_path, rid + "_rec.wav") + cmd = "ffmpeg -i {} -ar 16000 -ac 1 {}".format(src_path, wav_path) + if not os.path.exists(wav_path): + if not exec_cmd(cmd): + print("exec_cmd:{} err!".format(cmd)) + return + + # 均衡化 + wav_eb_path = os.path.join(cache_path, rid + "_eb_rec.wav") + cmd = "{} {} {}".format(EBUR128_BIN, wav_path, wav_eb_path) + if not os.path.exists(wav_eb_path): + if not exec_cmd(cmd): + print("exec_cmd:{} err!".format(cmd)) + return + + # 切割成多个 + for idx in range(0, len(msg)): + gender, rtype, idx, st, ed, rate = msg[idx] + + # rtype=1并且float(rate) <=0.1认为是纯人声,其他认为是非纯人声,只要大于等于3s的音频 + # out_dir/rtype/gender/rid_idx.wav + if get_sec(ed) - get_sec(st) >= 2.1: + if int(rtype) == 1 and float(rate) <= 0.05: + pwd = os.path.join(self._out_dir, + os.path.join("pure_rec", gender)) + if not os.path.exists(pwd): + os.makedirs(pwd) + out_wav = os.path.join(pwd, "{}_{}_rec.wav".format(rid, idx)) + # else: + # pwd = os.path.join(self._out_dir, + # os.path.join("other", gender)) + # if not os.path.exists(pwd): + # os.makedirs(pwd) + # out_wav = os.path.join(pwd, "{}_{}_rec.wav".format(rid, idx)) + cmd = "ffmpeg -i {} -ss {} -to {} {}".format(wav_eb_path, st, ed, out_wav) + exec_cmd(cmd) + + def process(self): + for rid, msg in self._msg.items(): + self.process_one(rid, msg) + + +if __name__ == "__main__": + work_dir = sys.argv[1] + dst_dir = sys.argv[2] + # work_dir = "/data/datasets/music_voice_dataset_2000/dataset/av_area_in" # 注意一共有四个大区,这是其中一个 + # dst_dir = "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in" + gvd = GenVoiceDataset(work_dir, dst_dir) + gvd.process() diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/readme.txt b/AIMeiSheng/voice_classification/script/music_voice_class/readme.txt new file mode 100644 index 0000000..075e581 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/readme.txt @@ -0,0 +1,20 @@ +代码操作顺序 +1 gen_dataset.py // 将原始音频切分出纯人声和其他 +2 gen_acc_dataset.py // 将伴奏切一部分出来做其他 +3 gen_dataset_feature.py // 提取特征 + + +// 这个与上面无关,用于测试 +1 gen_dataset_files.py // 对整个mp4文件拉伸后,直接提取特征 + +label.txt格式: + 性别, recording_id, type, 序号, 开始时间, 结束时间, 伴奏占比[完全是伴奏,则是-1] + 例子:female,584342372,2,0,00:00:00,00:00:25.625,-1 + +注意: +// type的具体类型 +// 1 干声 添加伴奏比例 +// 2 纯音乐 +// 3 噪音/静音 + +序号的作用是,一个recording会切出多个段,代表的段的序号 diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/split_by_idx.py b/AIMeiSheng/voice_classification/script/music_voice_class/split_by_idx.py new file mode 100644 index 0000000..b34ac8a --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/split_by_idx.py @@ -0,0 +1,140 @@ +""" +通过索引文件切割文件,构建数据集 + +文件结构: +// recording_id, +// 类型: +// 1 音乐带人声,带有比例值 +// 2 纯音乐 +// 3 噪音/静音 +// 开始时间(ms) +// 结束时间(ms) +id,type,st,ed + +数据集结构: +data: + ---female + ---1 + ---rid_1.wav + ---2 + ---rid_2.wav + ---3 + ---4 + ---male + ---1 + ---2 + ---3 + ---4 +""" + +import os +import sys + + +def read_file(file_path): + lines = [] + with open(file_path) as f: + while True: + line = f.readline() + line = line.strip() + if not line: + break + lines.append(line) + return lines + + +def exec_cmd(cmd): + print(cmd) + ret = os.system(cmd) + if ret != 0: + return False + return True + + +def parse_txt(lines): + msg = [] + for idx, line in enumerate(lines): + if idx == 0: + continue + line = line.split(",") + msg.append(line) + return msg + + +def get_msg(): + pth = "/Users/yangjianli/starMaker/av_tools/av_cv_research/voice_classification/script/music_voice_class/resource/music_voice/av_area_sa/label_test.txt" + lines = read_file(pth) + gender = lines[4] + rid = lines[5] + st_tm = lines[6] + default_rate = lines[7] + msg = [] + for idx, line in enumerate(lines): + if idx <= 7: + continue + line = line.split(",") + msg.append(line) + return msg, rid, st_tm, gender, default_rate + + +def format2txt(): + msgs, r_id, st_tm, gender, default_rate = get_msg() + for idx, line in enumerate(msgs): + # female,281475001556604,4,18,00:05:44.398,00:05:59.983 + label = line[0] + ed_time = line[1] + rate = -1 + if int(label) == 1: + rate = default_rate + if len(line) >= 3: + rate = line[2] + new_line = "{},{},{},{},00:0{},00:0{},{}".format(gender, r_id, label, idx, st_tm, ed_time, rate) + st_tm = ed_time + print(new_line) + + +class SplitByIdx: + + def __init__(self, file_path, data_src, data_dst): + lines = read_file(file_path) + self.msg = parse_txt(lines) + self._data_src = data_src + self._data_dst = data_dst + + def process(self): + for line in self.msg: + print(line) + gender = line[0] + r_id = line[1] + tp = line[2] + idx = line[3] + st_tm = line[4] + ed_tm = line[5] + + # 检查并创建文件夹 + dst_dir = os.path.join(self._data_dst, os.path.join(gender, tp)) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + + cmd = "ffmpeg -i {data_src}/{gender}/{r_id}.mp4 -acodec copy -ss {st_tm} -to {ed_tm} {data_dst}/{gender}/{tp}/{r_id}_{idx}.mp4". \ + format(data_src=self._data_src, gender=gender, r_id=r_id, st_tm=st_tm, ed_tm=ed_tm, + data_dst=self._data_dst, tp=tp, idx=idx) + # print(cmd) + exec_cmd(cmd) + + +def split(): + file_path = sys.argv[1] + data_src = sys.argv[2] + data_dst = sys.argv[3] + + Sbi = SplitByIdx(file_path, data_src, data_dst) + Sbi.process() + + +if __name__ == "__main__": + # format2txt() + if len(sys.argv) > 3: + split() + else: + format2txt() \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/split_gen_dataset.py b/AIMeiSheng/voice_classification/script/music_voice_class/split_gen_dataset.py new file mode 100644 index 0000000..01f37bf --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/split_gen_dataset.py @@ -0,0 +1,79 @@ +""" +暂未完成,正在试验中 +思路: +1 根据歌词切割伴奏和干声文件 +2 打包到一个新的文件地址 +""" +import glob +import os +import json +import librosa +import soundfile +import shutil +import sys + + +class GenDataset: + # work_dir xxx/area/gender/*mp4 + def __init__(self, work_dir, target_dir): + self._work_dir = work_dir + self._target_dir = target_dir + + def get_ids(self): + mp4_files = glob.glob(os.path.join(self._work_dir, "*/*/*mp4")) + ids = [] + for file in mp4_files: + area = file.split("/")[-3] + gender = file.split("/")[-2] + id = file.split("/")[-1].split(".")[0] + ids.append("{}/{}/{}".format(area, gender, id)) + return ids + + def load_lyric(self, id): + lyric_name = os.path.join(self._work_dir, "{}.lyric".format(id)) + lines = [] + with open(lyric_name, "r") as f: + while True: + line = f.readline() + if not line: + break + lines.append(line) + lines = "".join(lines) + return json.loads(lines) + + def cut_one(self, id): + lyric = self.load_lyric(id) + id_dir = os.path.join(self._target_dir, id) + if os.path.exists(id_dir): + shutil.rmtree(id_dir) + os.makedirs(id_dir) + + wav, sr = librosa.load(os.path.join(self._work_dir, "{}.mp4".format(id)), sr=16000) + acc, sr = librosa.load(os.path.join(self._work_dir, "{}_acc.mp4".format(id)), sr=16000) + for idx, line in enumerate(lyric["lyric"]): + st_tm = float(line["line"]["start_time"]) + duration = float(line["line"]["duration"]) + + print("cut_one:{} {},{}".format(idx, st_tm, duration)) + # 写入人声 + new_wav = wav[int(st_tm * sr / 1000): int((st_tm + duration) * sr / 1000)] + out_file = os.path.join(self._target_dir, "{}.wav".format(idx)) + soundfile.write(out_file, new_wav, sr) + + # 写入伴奏 + new_wav = acc[int(st_tm * sr / 1000): int((st_tm + duration) * sr / 1000)] + out_file = os.path.join(self._target_dir, "{}_acc.wav".format(idx)) + soundfile.write(out_file, new_wav, sr) + + def process(self): + ids = self.get_ids() + for id in ids: + self.cut_one(id) + break + + +if __name__ == "__main__": + work_dir = sys.argv[1] + target_dir = sys.argv[2] + gd = GenDataset(work_dir, target_dir) + gd.process() \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/CMakeLists.txt b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/CMakeLists.txt new file mode 100644 index 0000000..ad07eae --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 2.8) +project(standard_audio_no_cut) +set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) + +include_directories(ref) +add_subdirectory(ref) + + +include_directories(./) +include_directories(inc) + +file(GLOB_RECURSE SRC_SRC_DIR src/*cpp) +add_executable(standard_audio_no_cut main.cpp ${SRC_SRC_DIR}) + +target_link_libraries(standard_audio_no_cut + ${LIBRARY_OUTPUT_PATH}/libwaves.a + ${LIBRARY_OUTPUT_PATH}/libalimiter.a + ${LIBRARY_OUTPUT_PATH}/libebur128.a + ) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/main.cpp b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/main.cpp new file mode 100644 index 0000000..cad7417 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/main.cpp @@ -0,0 +1,132 @@ +// +// Created by yangjianli on 2020-03-04. +// + +/** + * 目的: 将输入音频拉伸到标准响度 + * 操作方案: + * 1 载入音频 + * 2 获取音频响度 + * 3 拉伸音频 & 做压限 + * 4 输出到文件 + */ + +#include +#include +#include +#include "waves/inc/WaveFile.h" +#include "ebur128/inc/ebur128.h" +#include "alimter/inc/alimiter.h" + + +#define PROC_LEN 1024 +#define DEFAULT_BASELINE_DB (float)-14.57f + +int short2float(short *pInBuf, int nLen, float *pOutBuf) +{ + for (int i = 0; i < nLen; i++) + { + pOutBuf[i] = pInBuf[i] * 1.0 / 32768; + } + return 0; +} + +int float2short(float *pInBuf, int nLen, short *pOutBuf) +{ + for (int i = 0; i < nLen; i++) + { + pOutBuf[i] = int(pInBuf[i] * 32768); + } + return 0; +} + + +int ebur128_whole(int nChannel, int nSampleRate, short *pData, int nLength, double &gated_loudness) +{ + ebur128_state *st = NULL; + st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I); + if (NULL == st) + { + return -1; + } + int nPos = 0; + int nTmpLength = 0; + int nRet; + while (nPos < nLength) + { + nTmpLength = PROC_LEN; + if (nLength - nPos < PROC_LEN) + { + nTmpLength = nLength - nPos; + } + nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel); + if (nRet != 0) + { + return -2; + } + nPos += nTmpLength; + } + + gated_loudness = -1; + ebur128_loudness_global(st, &gated_loudness); + ebur128_destroy(&st); + return 0; +} + +void apply_gain(float *pdata, int len, double gain) +{ + for (int i = 0; i < len; i++) + { + pdata[i] *= gain; + } +} + +int main(int argc, char *argv[]) +{ + if (argc < 3) + { + printf("param err! example: ./exe vocal.wav out.wav\n"); + return -1; + } + std::string input = argv[1]; + std::string output = argv[2]; + CWaveFile *owavefile = new CWaveFile(input.c_str(), false); + int buf_len = owavefile->GetTotalFrames() * owavefile->GetChannels(); + short *input_buf = new short[buf_len]; + owavefile->ReadFrameAsS16(input_buf, owavefile->GetTotalFrames()); + double gain_loudness = -1; + ebur128_whole(owavefile->GetChannels(), owavefile->GetSampleRate(), input_buf, buf_len, gain_loudness); + float db = (DEFAULT_BASELINE_DB - gain_loudness) / 20.f; + float gain = pow(10, db); + + // 音量本身太小,不做拉伸 + if (gain > 80) { + printf("need gain = %f\n", gain); + gain = 1; + } + + // 拉伸 + float *input_buf_f = new float[buf_len]; + short2float(input_buf, buf_len, input_buf_f); + apply_gain(input_buf_f, buf_len, gain); + + // 压限 + SUPERSOUND::Alimiter alimiter; + alimiter.SetParam(owavefile->GetSampleRate(), owavefile->GetChannels()); + alimiter.Filter(input_buf_f, input_buf_f, buf_len); + float2short(input_buf_f, buf_len, input_buf); + + // 输出 + CWaveFile *out_file = new CWaveFile(output.c_str(), true); + out_file->SetChannels(owavefile->GetChannels()); + out_file->SetSampleFormat(SF_S16); + out_file->SetSampleRate(owavefile->GetSampleRate()); + out_file->SetupDone(); + out_file->WriteFrame(input_buf, owavefile->GetTotalFrames()); + + delete owavefile; + delete out_file; + delete[] input_buf; + delete[] input_buf_f; + return 0; +} \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/CMakeLists.txt b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/CMakeLists.txt new file mode 100644 index 0000000..f7336c8 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/CMakeLists.txt @@ -0,0 +1,3 @@ +add_subdirectory(waves) +add_subdirectory(ebur128) +add_subdirectory(alimter) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/CMakeLists.txt b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/CMakeLists.txt new file mode 100644 index 0000000..9748c4d --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_ALIMTER_SRCS) +add_library(alimiter ${DIR_ALIMTER_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/inc/alimiter.h b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/inc/alimiter.h new file mode 100644 index 0000000..8022d39 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/inc/alimiter.h @@ -0,0 +1,99 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现 FFMPEG 中的限制器,这个压限器对频谱友好,但是压得比较厉害 + +#ifndef __ALIMITER_H__ +#define __ALIMITER_H__ + +#include +#define ERROR_SUPERSOUND_SUCCESS 0 +#define ERROR_SUPERSOUND_PARAM -1 +#define ERROR_SUPERSOUND_MEMORY -2 +typedef struct AudioLimiterContext +{ + float limit; + float attack; + float release; + float att; + float level_in; + float level_out; + int32_t auto_release; + int32_t auto_level; + float asc; + int32_t asc_c; + int32_t asc_pos; + float asc_coeff; + + float *buffer; + int32_t buffer_size; + int32_t buffer_max_size; + int32_t pos; + int32_t *nextpos; + float *nextdelta; + + float delta; + int32_t nextiter; + int32_t nextlen; + int32_t asc_changed; +}AudioLimiterContext; + +namespace SUPERSOUND +{ + + +class Alimiter +{ +public: + Alimiter(); + ~Alimiter(); + +public: + void Flush(); + int32_t GetLatecy(); + int32_t SetParam(int32_t fs, int32_t channels); + void Filter(float * input, float * output, int32_t num); + +private: + void Uninit(); + int32_t config_input(); + float get_rdelta(AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc); + +private: + AudioLimiterContext m_alimiterCtx; + int m_nChannels; + int m_nFs; +}; + + +} + +#endif /* __ALIMITER_H__ */ \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/src/alimiter.cpp b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/src/alimiter.cpp new file mode 100644 index 0000000..abbd622 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/alimter/src/alimiter.cpp @@ -0,0 +1,306 @@ + +#include "alimiter.h" +#include +#include +#include +#include + +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#define MIDDLE(x, y, z) ((x)<(y)?((y)<(z)?(y):(x)<(z)?(z):(x)):((y)>(z)?(y):(x)>(z)?(z):(x))) +#define SAFE_DELETE_PTR(ptr) \ +{ \ + if(ptr) \ + { \ + delete [] ptr; \ + ptr = NULL; \ + } \ +} + +namespace SUPERSOUND +{ + + +Alimiter::Alimiter() +{ + memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx)); + + m_nChannels = 0; + m_nFs = 0; + + Flush(); +} + +Alimiter::~Alimiter() +{ + Uninit(); +} + +void Alimiter::Flush() +{ + float * buffer = m_alimiterCtx.buffer; + float * nextdelta = m_alimiterCtx.nextdelta; + int32_t * nextpos = m_alimiterCtx.nextpos; + int32_t buffer_max_size = m_alimiterCtx.buffer_max_size; + int32_t buffer_size = m_alimiterCtx.buffer_size; + + if(buffer) + memset(buffer, 0, sizeof(float) * buffer_max_size); + if(nextdelta) + memset(nextdelta, 0, sizeof(float) * buffer_max_size); + if(nextpos) + memset(nextpos, -1, sizeof(float) * buffer_max_size); + + memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx)); + + m_alimiterCtx.level_in = 1; + m_alimiterCtx.level_out = 32000 / 32768.0; + m_alimiterCtx.limit = 1; + m_alimiterCtx.attack = 5; + m_alimiterCtx.release = 50; + m_alimiterCtx.auto_release = 0; + m_alimiterCtx.asc_coeff = 0.5; + m_alimiterCtx.auto_level = 1; + + m_alimiterCtx.attack /= 1000; + m_alimiterCtx.release /= 1000; + m_alimiterCtx.att = 1; + m_alimiterCtx.asc_pos = -1; + m_alimiterCtx.asc_coeff = pow(0.5f, m_alimiterCtx.asc_coeff - 0.5f) * 2 * -1; + + m_alimiterCtx.buffer = buffer; + m_alimiterCtx.nextdelta = nextdelta; + m_alimiterCtx.nextpos = nextpos; + m_alimiterCtx.buffer_max_size = buffer_max_size; + m_alimiterCtx.buffer_size = buffer_size; +} + +int32_t Alimiter::GetLatecy() +{ + return m_alimiterCtx.buffer_size / m_nChannels; +} + +int32_t Alimiter::SetParam( int32_t fs, int32_t channels ) +{ + if((fs == m_nFs) && (channels == m_nChannels)) + return ERROR_SUPERSOUND_SUCCESS; + + m_nChannels = channels; + m_nFs = fs; + + return config_input(); +} + +void Alimiter::Filter( float * input, float * output, int32_t num ) +{ + num = num / m_nChannels; + int channels = m_nChannels; + int buffer_size = m_alimiterCtx.buffer_size; + float * buffer = m_alimiterCtx.buffer; + float release = m_alimiterCtx.release; + float limit = m_alimiterCtx.limit; + float * nextdelta = m_alimiterCtx.nextdelta; + float level = m_alimiterCtx.auto_level ? 1 / limit : 1; + float level_out = m_alimiterCtx.level_out; + float level_in = m_alimiterCtx.level_in; + int *nextpos = m_alimiterCtx.nextpos; + + float * buf; + float * dst; + float * src; + int n, c, i; + AudioLimiterContext * s = &m_alimiterCtx; + + dst = output; + src = input; + + for (n = 0; n < num; n++) { + float peak = 0; + + for (c = 0; c < channels; c++) { + float sample = src[c] * level_in; + + buffer[s->pos + c] = sample; + peak = MAX(peak, fabs(sample)); + } + + if (s->auto_release && peak > limit) { + s->asc += peak; + s->asc_c++; + } + + if (peak > limit) { + float patt = MIN(limit / peak, 1); + float rdelta = get_rdelta(s, release, m_nFs, + peak, limit, patt, 0); + float delta = (limit / peak - s->att) / buffer_size * channels; + int found = 0; + + if (delta < s->delta) { + s->delta = delta; + nextpos[0] = s->pos; + nextpos[1] = -1; + nextdelta[0] = rdelta; + s->nextlen = 1; + s->nextiter= 0; + } else { + for (i = s->nextiter; i < s->nextiter + s->nextlen; i++) { + int j = i % buffer_size; + float ppeak, pdelta; + + ppeak = fabs(buffer[nextpos[j]]) > fabs(buffer[nextpos[j] + 1]) ? + fabs(buffer[nextpos[j]]) : fabs(buffer[nextpos[j] + 1]); + pdelta = (limit / peak - limit / ppeak) / (((buffer_size - nextpos[j] + s->pos) % buffer_size) / channels); + if (pdelta < nextdelta[j]) { + nextdelta[j] = pdelta; + found = 1; + break; + } + } + if (found) { + s->nextlen = i - s->nextiter + 1; + nextpos[(s->nextiter + s->nextlen) % buffer_size] = s->pos; + nextdelta[(s->nextiter + s->nextlen) % buffer_size] = rdelta; + nextpos[(s->nextiter + s->nextlen + 1) % buffer_size] = -1; + s->nextlen++; + } + } + } + + buf = &s->buffer[(s->pos + channels) % buffer_size]; + peak = 0; + for (c = 0; c < channels; c++) { + float sample = buf[c]; + + peak = MAX(peak, fabs(sample)); + } + + if (s->pos == s->asc_pos && !s->asc_changed) + s->asc_pos = -1; + + if (s->auto_release && s->asc_pos == -1 && peak > limit) { + s->asc -= peak; + s->asc_c--; + } + + s->att += s->delta; + + for (c = 0; c < channels; c++) + dst[c] = buf[c] * s->att; + + if ((s->pos + channels) % buffer_size == nextpos[s->nextiter]) { + if (s->auto_release) { + s->delta = get_rdelta(s, release, m_nFs, + peak, limit, s->att, 1); + if (s->nextlen > 1) { + int pnextpos = nextpos[(s->nextiter + 1) % buffer_size]; + float ppeak = fabs(buffer[pnextpos]) > fabs(buffer[pnextpos + 1]) ? + fabs(buffer[pnextpos]) : + fabs(buffer[pnextpos + 1]); + float pdelta = (limit / ppeak - s->att) / + (((buffer_size + pnextpos - + ((s->pos + channels) % buffer_size)) % + buffer_size) / channels); + if (pdelta < s->delta) + s->delta = pdelta; + } + } else { + s->delta = nextdelta[s->nextiter]; + s->att = limit / peak; + } + + s->nextlen -= 1; + nextpos[s->nextiter] = -1; + s->nextiter = (s->nextiter + 1) % buffer_size; + } + + if (s->att > 1.) { + s->att = 1.; + s->delta = 0.; + s->nextiter = 0; + s->nextlen = 0; + nextpos[0] = -1; + } + + if (s->att <= 0.) { + s->att = 0.000001f; + s->delta = (1 - s->att) / (m_nFs * release); + } + + if (s->att != 1 && (1 - s->att) < 0.000001f) + s->att = 1; + + if (s->delta != 0 && fabs(s->delta) < 0.000001f) + s->delta = 0; + + for (c = 0; c < channels; c++) + dst[c] = MIDDLE(dst[c], -limit, limit) * level * level_out; + + s->pos = (s->pos + channels) % buffer_size; + src += channels; + dst += channels; + } +} + +void Alimiter::Uninit() +{ + SAFE_DELETE_PTR(m_alimiterCtx.buffer); + SAFE_DELETE_PTR(m_alimiterCtx.nextdelta); + SAFE_DELETE_PTR(m_alimiterCtx.nextpos); +} + +int32_t Alimiter::config_input() +{ + int obuffer_size = int(m_nFs * m_nChannels * 100 / 1000. + m_nChannels); + if(obuffer_size < m_nChannels) + return ERROR_SUPERSOUND_PARAM; + + if(obuffer_size > m_alimiterCtx.buffer_max_size) + { + SAFE_DELETE_PTR(m_alimiterCtx.buffer); + m_alimiterCtx.buffer = new(std::nothrow) float[obuffer_size]; + if(m_alimiterCtx.buffer == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.buffer, 0, sizeof(float) * obuffer_size); + + SAFE_DELETE_PTR(m_alimiterCtx.nextdelta); + m_alimiterCtx.nextdelta = new(std::nothrow) float[obuffer_size]; + if(m_alimiterCtx.nextdelta == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.nextdelta, 0, sizeof(float) * obuffer_size); + + SAFE_DELETE_PTR(m_alimiterCtx.nextpos); + m_alimiterCtx.nextpos = new(std::nothrow) int32_t[obuffer_size]; + if(m_alimiterCtx.nextpos == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.nextpos, -1, obuffer_size*sizeof(int32_t)); + + m_alimiterCtx.buffer_max_size = obuffer_size; + } + + m_alimiterCtx.buffer_size = int(m_nFs * m_alimiterCtx.attack * m_nChannels); + m_alimiterCtx.buffer_size -= m_alimiterCtx.buffer_size % m_nChannels; + + return ERROR_SUPERSOUND_SUCCESS; +} + +float Alimiter::get_rdelta( AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc ) +{ + float rdelta = (1 - patt) / (sample_rate * release); + + if (asc && s->auto_release && s->asc_c > 0) { + float a_att = limit / (s->asc_coeff * s->asc) * (float)s->asc_c; + + if (a_att > patt) { + float delta = MAX((a_att - patt) / (sample_rate * release), rdelta / 10); + + if (delta < rdelta) + rdelta = delta; + } + } + + return rdelta; +} + + +} \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/CMakeLists.txt b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/CMakeLists.txt new file mode 100644 index 0000000..18a5a86 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_EBUR128_SRCS) +add_library(ebur128 ${DIR_EBUR128_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/inc/ebur128.h b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/inc/ebur128.h new file mode 100644 index 0000000..faa66c6 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/inc/ebur128.h @@ -0,0 +1,425 @@ +/* See COPYING file for copyright and license details. */ + +#ifndef EBUR128_H_ +#define EBUR128_H_ + +/** \file ebur128.h + * \brief libebur128 - a library for loudness measurement according to + * the EBU R128 standard. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define EBUR128_VERSION_MAJOR 1 +#define EBUR128_VERSION_MINOR 2 +#define EBUR128_VERSION_PATCH 4 + +#include /* for size_t */ + +/** \enum channel + * Use these values when setting the channel map with ebur128_set_channel(). + * See definitions in ITU R-REC-BS 1770-4 + */ +enum channel { + EBUR128_UNUSED = 0, /**< unused channel (for example LFE channel) */ + EBUR128_LEFT = 1, + EBUR128_Mp030 = 1, /**< itu M+030 */ + EBUR128_RIGHT = 2, + EBUR128_Mm030 = 2, /**< itu M-030 */ + EBUR128_CENTER = 3, + EBUR128_Mp000 = 3, /**< itu M+000 */ + EBUR128_LEFT_SURROUND = 4, + EBUR128_Mp110 = 4, /**< itu M+110 */ + EBUR128_RIGHT_SURROUND = 5, + EBUR128_Mm110 = 5, /**< itu M-110 */ + EBUR128_DUAL_MONO, /**< a channel that is counted twice */ + EBUR128_MpSC, /**< itu M+SC */ + EBUR128_MmSC, /**< itu M-SC */ + EBUR128_Mp060, /**< itu M+060 */ + EBUR128_Mm060, /**< itu M-060 */ + EBUR128_Mp090, /**< itu M+090 */ + EBUR128_Mm090, /**< itu M-090 */ + EBUR128_Mp135, /**< itu M+135 */ + EBUR128_Mm135, /**< itu M-135 */ + EBUR128_Mp180, /**< itu M+180 */ + EBUR128_Up000, /**< itu U+000 */ + EBUR128_Up030, /**< itu U+030 */ + EBUR128_Um030, /**< itu U-030 */ + EBUR128_Up045, /**< itu U+045 */ + EBUR128_Um045, /**< itu U-030 */ + EBUR128_Up090, /**< itu U+090 */ + EBUR128_Um090, /**< itu U-090 */ + EBUR128_Up110, /**< itu U+110 */ + EBUR128_Um110, /**< itu U-110 */ + EBUR128_Up135, /**< itu U+135 */ + EBUR128_Um135, /**< itu U-135 */ + EBUR128_Up180, /**< itu U+180 */ + EBUR128_Tp000, /**< itu T+000 */ + EBUR128_Bp000, /**< itu B+000 */ + EBUR128_Bp045, /**< itu B+045 */ + EBUR128_Bm045 /**< itu B-045 */ +}; + +/** \enum error + * Error return values. + */ +enum error { + EBUR128_SUCCESS = 0, + EBUR128_ERROR_NOMEM, + EBUR128_ERROR_INVALID_MODE, + EBUR128_ERROR_INVALID_CHANNEL_INDEX, + EBUR128_ERROR_NO_CHANGE +}; + +/** \enum mode + * Use these values in ebur128_init (or'ed). Try to use the lowest possible + * modes that suit your needs, as performance will be better. + */ +enum mode { + /** can call ebur128_loudness_momentary */ + EBUR128_MODE_M = (1 << 0), + /** can call ebur128_loudness_shortterm */ + EBUR128_MODE_S = (1 << 1) | EBUR128_MODE_M, + /** can call ebur128_loudness_global_* and ebur128_relative_threshold */ + EBUR128_MODE_I = (1 << 2) | EBUR128_MODE_M, + /** can call ebur128_loudness_range */ + EBUR128_MODE_LRA = (1 << 3) | EBUR128_MODE_S, + /** can call ebur128_sample_peak */ + EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | EBUR128_MODE_M, + /** can call ebur128_true_peak */ + EBUR128_MODE_TRUE_PEAK = (1 << 5) | EBUR128_MODE_M + | EBUR128_MODE_SAMPLE_PEAK, + /** uses histogram algorithm to calculate loudness */ + EBUR128_MODE_HISTOGRAM = (1 << 6) +}; + +/** forward declaration of ebur128_state_internal */ +struct ebur128_state_internal; + +/** \brief Contains information about the state of a loudness measurement. + * + * You should not need to modify this struct directly. + */ +typedef struct { + int mode; /**< The current mode. */ + unsigned int channels; /**< The number of channels. */ + unsigned long samplerate; /**< The sample rate. */ + struct ebur128_state_internal* d; /**< Internal state. */ +} ebur128_state; + +/** \brief Get library version number. Do not pass null pointers here. + * + * @param major major version number of library + * @param minor minor version number of library + * @param patch patch version number of library + */ +void ebur128_get_version(int* major, int* minor, int* patch); + +/** \brief Initialize library state. + * + * @param channels the number of channels. + * @param samplerate the sample rate. + * @param mode see the mode enum for possible values. + * @return an initialized library state, or NULL on error. + */ +ebur128_state* ebur128_init(unsigned int channels, + unsigned long samplerate, + int mode); + +/** \brief Destroy library state. + * + * @param st pointer to a library state. + */ +void ebur128_destroy(ebur128_state** st); + +/** \brief Set channel type. + * + * The default is: + * - 0 -> EBUR128_LEFT + * - 1 -> EBUR128_RIGHT + * - 2 -> EBUR128_CENTER + * - 3 -> EBUR128_UNUSED + * - 4 -> EBUR128_LEFT_SURROUND + * - 5 -> EBUR128_RIGHT_SURROUND + * + * @param st library state. + * @param channel_number zero based channel index. + * @param value channel type from the "channel" enum. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_set_channel(ebur128_state* st, + unsigned int channel_number, + int value); + +/** \brief Change library parameters. + * + * Note that the channel map will be reset when setting a different number of + * channels. The current unfinished block will be lost. + * + * @param st library state. + * @param channels new number of channels. + * @param samplerate new sample rate. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be + * invalid and must be destroyed. + * - EBUR128_ERROR_NO_CHANGE if channels and sample rate were not changed. + */ +int ebur128_change_parameters(ebur128_state* st, + unsigned int channels, + unsigned long samplerate); + +/** \brief Set the maximum window duration. + * + * Set the maximum duration that will be used for ebur128_window_loudness(). + * Note that this destroys the current content of the audio buffer. + * + * @param st library state. + * @param window duration of the window in ms. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be + * invalid and must be destroyed. + * - EBUR128_ERROR_NO_CHANGE if window duration not changed. + */ +int ebur128_set_max_window(ebur128_state* st, unsigned long window); + +/** \brief Set the maximum history. + * + * Set the maximum history that will be stored for loudness integration. + * More history provides more accurate results, but requires more resources. + * + * Applies to ebur128_loudness_range() and ebur128_loudness_global() when + * EBUR128_MODE_HISTOGRAM is not set. + * + * Default is ULONG_MAX (at least ~50 days). + * Minimum is 3000ms for EBUR128_MODE_LRA and 400ms for EBUR128_MODE_M. + * + * @param st library state. + * @param history duration of history in ms. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NO_CHANGE if history not changed. + */ +int ebur128_set_max_history(ebur128_state* st, unsigned long history); + +/** \brief Add frames to be processed. + * + * @param st library state. + * @param src array of source frames. Channels must be interleaved. + * @param frames number of frames. Not number of samples! + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. + */ +int ebur128_add_frames_short(ebur128_state* st, + const short* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_int(ebur128_state* st, + const int* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_float(ebur128_state* st, + const float* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_double(ebur128_state* st, + const double* src, + size_t frames); + +/** \brief Get global integrated loudness in LUFS. + * + * @param st library state. + * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set. + */ +int ebur128_loudness_global(ebur128_state* st, double* out); +/** \brief Get global integrated loudness in LUFS across multiple instances. + * + * @param sts array of library states. + * @param size length of sts + * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set. + */ +int ebur128_loudness_global_multiple(ebur128_state** sts, + size_t size, + double* out); + +/** \brief Get momentary loudness (last 400ms) in LUFS. + * + * @param st library state. + * @param out momentary loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + */ +int ebur128_loudness_momentary(ebur128_state* st, double* out); +/** \brief Get short-term loudness (last 3s) in LUFS. + * + * @param st library state. + * @param out short-term loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_S" has not been set. + */ +int ebur128_loudness_shortterm(ebur128_state* st, double* out); + +/** \brief Get loudness of the specified window in LUFS. + * + * window must not be larger than the current window set in st. + * The current window can be changed by calling ebur128_set_max_window(). + * + * @param st library state. + * @param window window in ms to calculate loudness. + * @param out loudness in LUFS. -HUGE_VAL if result is negative infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if window larger than current window in st. + */ +int ebur128_loudness_window(ebur128_state* st, + unsigned long window, + double* out); + +/** \brief Get loudness range (LRA) of programme in LU. + * + * Calculates loudness range according to EBU 3342. + * + * @param st library state. + * @param out loudness range (LRA) in LU. Will not be changed in case of + * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be + * returned in this case. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM in case of memory allocation error. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set. + */ +int ebur128_loudness_range(ebur128_state* st, double* out); +/** \brief Get loudness range (LRA) in LU across multiple instances. + * + * Calculates loudness range according to EBU 3342. + * + * @param sts array of library states. + * @param size length of sts + * @param out loudness range (LRA) in LU. Will not be changed in case of + * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be + * returned in this case. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM in case of memory allocation error. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set. + */ +int ebur128_loudness_range_multiple(ebur128_state** sts, + size_t size, + double* out); + +/** \brief Get maximum sample peak from all frames that have been processed. + * + * The equation to convert to dBFS is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum sample peak in float format (1.0 is 0 dBFS) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum sample peak from the last call to add_frames(). + * + * The equation to convert to dBFS is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum sample peak in float format (1.0 is 0 dBFS) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_prev_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum true peak from all frames that have been processed. + * + * Uses an implementation defined algorithm to calculate the true peak. Do not + * try to compare resulting values across different versions of the library, + * as the algorithm may change. + * + * The current implementation uses a custom polyphase FIR interpolator to + * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for + * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz. + * + * The equation to convert to dBTP is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum true peak in float format (1.0 is 0 dBTP) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum true peak from the last call to add_frames(). + * + * Uses an implementation defined algorithm to calculate the true peak. Do not + * try to compare resulting values across different versions of the library, + * as the algorithm may change. + * + * The current implementation uses a custom polyphase FIR interpolator to + * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for + * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz. + * + * The equation to convert to dBTP is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum true peak in float format (1.0 is 0 dBTP) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_prev_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get relative threshold in LUFS. + * + * @param st library state + * @param out relative threshold in LUFS. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not + * been set. + */ +int ebur128_relative_threshold(ebur128_state* st, double* out); +#ifdef __cplusplus +} +#endif + +#endif /* EBUR128_H_ */ diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/src/ebur128.c b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/src/ebur128.c new file mode 100644 index 0000000..6c10f1e --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/ebur128/src/ebur128.c @@ -0,0 +1,1333 @@ +/* See COPYING file for copyright and license details. */ + +#include "ebur128.h" + +#include +#include +#include /* You may have to define _USE_MATH_DEFINES if you use MSVC */ +#include +#include + +/* This can be replaced by any BSD-like queue implementation. */ +#include + +#define CHECK_ERROR(condition, errorcode, goto_point) \ + if ((condition)) { \ + errcode = (errorcode); \ + goto goto_point; \ + } + +STAILQ_HEAD(ebur128_double_queue, ebur128_dq_entry); +struct ebur128_dq_entry { + double z; + STAILQ_ENTRY(ebur128_dq_entry) entries; +}; + +#define ALMOST_ZERO 0.000001 + +typedef struct { /* Data structure for polyphase FIR interpolator */ + unsigned int factor; /* Interpolation factor of the interpolator */ + unsigned int taps; /* Taps (prefer odd to increase zero coeffs) */ + unsigned int channels; /* Number of channels */ + unsigned int delay; /* Size of delay buffer */ + struct { + unsigned int count; /* Number of coefficients in this subfilter */ + unsigned int* index; /* Delay index of corresponding filter coeff */ + double* coeff; /* List of subfilter coefficients */ + }* filter; /* List of subfilters (one for each factor) */ + float** z; /* List of delay buffers (one for each channel) */ + unsigned int zi; /* Current delay buffer index */ +} interpolator; + +struct ebur128_state_internal { + /** Filtered audio data (used as ring buffer). */ + double* audio_data; + /** Size of audio_data array. */ + size_t audio_data_frames; + /** Current index for audio_data. */ + size_t audio_data_index; + /** How many frames are needed for a gating block. Will correspond to 400ms + * of audio at initialization, and 100ms after the first block (75% overlap + * as specified in the 2011 revision of BS1770). */ + unsigned long needed_frames; + /** The channel map. Has as many elements as there are channels. */ + int* channel_map; + /** How many samples fit in 100ms (rounded). */ + unsigned long samples_in_100ms; + /** BS.1770 filter coefficients (nominator). */ + double b[5]; + /** BS.1770 filter coefficients (denominator). */ + double a[5]; + /** BS.1770 filter state. */ + double v[5][5]; + /** Linked list of block energies. */ + struct ebur128_double_queue block_list; + unsigned long block_list_max; + unsigned long block_list_size; + /** Linked list of 3s-block energies, used to calculate LRA. */ + struct ebur128_double_queue short_term_block_list; + unsigned long st_block_list_max; + unsigned long st_block_list_size; + int use_histogram; + unsigned long *block_energy_histogram; + unsigned long *short_term_block_energy_histogram; + /** Keeps track of when a new short term block is needed. */ + size_t short_term_frame_counter; + /** Maximum sample peak, one per channel */ + double* sample_peak; + double* prev_sample_peak; + /** Maximum true peak, one per channel */ + double* true_peak; + double* prev_true_peak; + interpolator* interp; + float* resampler_buffer_input; + size_t resampler_buffer_input_frames; + float* resampler_buffer_output; + size_t resampler_buffer_output_frames; + /** The maximum window duration in ms. */ + unsigned long window; + unsigned long history; +}; + +static double relative_gate = -10.0; + +/* Those will be calculated when initializing the library */ +static double relative_gate_factor; +static double minus_twenty_decibels; +static double histogram_energies[1000]; +static double histogram_energy_boundaries[1001]; + +static interpolator* interp_create(unsigned int taps, unsigned int factor, unsigned int channels) { + interpolator* interp = calloc(1, sizeof(interpolator)); + unsigned int j = 0; + + interp->taps = taps; + interp->factor = factor; + interp->channels = channels; + interp->delay = (interp->taps + interp->factor - 1) / interp->factor; + + /* Initialize the filter memory + * One subfilter per interpolation factor. */ + interp->filter = calloc(interp->factor, sizeof(*interp->filter)); + for (j = 0; j < interp->factor; j++) { + interp->filter[j].index = calloc(interp->delay, sizeof(unsigned int)); + interp->filter[j].coeff = calloc(interp->delay, sizeof(double)); + } + /* One delay buffer per channel. */ + interp->z = calloc(interp->channels, sizeof(float*)); + for (j = 0; j < interp->channels; j++) { + interp->z[j] = calloc( interp->delay, sizeof(float) ); + } + + /* Calculate the filter coefficients */ + for (j = 0; j < interp->taps; j++) { + /* Calculate sinc */ + double m = (double)j - (double)(interp->taps - 1) / 2.0; + double c = 1.0; + if (fabs(m) > ALMOST_ZERO) { + c = sin(m * M_PI / interp->factor) / (m * M_PI / interp->factor); + } + /* Apply Hanning window */ + c *= 0.5 * (1 - cos(2 * M_PI * j / (interp->taps - 1))); + + if (fabs(c) > ALMOST_ZERO) { /* Ignore any zero coeffs. */ + /* Put the coefficient into the correct subfilter */ + unsigned int f = j % interp->factor; + unsigned int t = interp->filter[f].count++; + interp->filter[f].coeff[t] = c; + interp->filter[f].index[t] = j / interp->factor; + } + } + return interp; +} + +static void interp_destroy(interpolator* interp) { + unsigned int j = 0; + if (!interp) { + return; + } + for (j = 0; j < interp->factor; j++) { + free(interp->filter[j].index); + free(interp->filter[j].coeff); + } + free(interp->filter); + for (j = 0; j < interp->channels; j++) { + free(interp->z[j]); + } + free(interp->z); + free(interp); +} + +static size_t interp_process(interpolator* interp, size_t frames, float* in, float* out) { + size_t frame = 0; + unsigned int chan = 0; + unsigned int f = 0; + unsigned int t = 0; + unsigned int out_stride = interp->channels * interp->factor; + float* outp = 0; + double acc = 0; + double c = 0; + + for (frame = 0; frame < frames; frame++) { + for (chan = 0; chan < interp->channels; chan++) { + /* Add sample to delay buffer */ + interp->z[chan][interp->zi] = *in++; + /* Apply coefficients */ + outp = out + chan; + for (f = 0; f < interp->factor; f++) { + acc = 0.0; + for (t = 0; t < interp->filter[f].count; t++) { + int i = (int)interp->zi - (int)interp->filter[f].index[t]; + if (i < 0) { + i += interp->delay; + } + c = interp->filter[f].coeff[t]; + acc += interp->z[chan][i] * c; + } + *outp = (float)acc; + outp += interp->channels; + } + } + out += out_stride; + interp->zi++; + if (interp->zi == interp->delay) { + interp->zi = 0; + } + } + + return frames * interp->factor; +} + +static void ebur128_init_filter(ebur128_state* st) { + int i, j; + + double f0 = 1681.974450955533; + double G = 3.999843853973347; + double Q = 0.7071752369554196; + + double K = tan(M_PI * f0 / (double) st->samplerate); + double Vh = pow(10.0, G / 20.0); + double Vb = pow(Vh, 0.4996667741545416); + + double pb[3] = {0.0, 0.0, 0.0}; + double pa[3] = {1.0, 0.0, 0.0}; + double rb[3] = {1.0, -2.0, 1.0}; + double ra[3] = {1.0, 0.0, 0.0}; + + double a0 = 1.0 + K / Q + K * K ; + pb[0] = (Vh + Vb * K / Q + K * K) / a0; + pb[1] = 2.0 * (K * K - Vh) / a0; + pb[2] = (Vh - Vb * K / Q + K * K) / a0; + pa[1] = 2.0 * (K * K - 1.0) / a0; + pa[2] = (1.0 - K / Q + K * K) / a0; + + /* fprintf(stderr, "%.14f %.14f %.14f %.14f %.14f\n", + b1[0], b1[1], b1[2], a1[1], a1[2]); */ + + f0 = 38.13547087602444; + Q = 0.5003270373238773; + K = tan(M_PI * f0 / (double) st->samplerate); + + ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K); + ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K); + + /* fprintf(stderr, "%.14f %.14f\n", a2[1], a2[2]); */ + + st->d->b[0] = pb[0] * rb[0]; + st->d->b[1] = pb[0] * rb[1] + pb[1] * rb[0]; + st->d->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0]; + st->d->b[3] = pb[1] * rb[2] + pb[2] * rb[1]; + st->d->b[4] = pb[2] * rb[2]; + + st->d->a[0] = pa[0] * ra[0]; + st->d->a[1] = pa[0] * ra[1] + pa[1] * ra[0]; + st->d->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0]; + st->d->a[3] = pa[1] * ra[2] + pa[2] * ra[1]; + st->d->a[4] = pa[2] * ra[2]; + + for (i = 0; i < 5; ++i) { + for (j = 0; j < 5; ++j) { + st->d->v[i][j] = 0.0; + } + } +} + +static int ebur128_init_channel_map(ebur128_state* st) { + size_t i; + st->d->channel_map = (int*) malloc(st->channels * sizeof(int)); + if (!st->d->channel_map) { + return EBUR128_ERROR_NOMEM; + } + if (st->channels == 4) { + st->d->channel_map[0] = EBUR128_LEFT; + st->d->channel_map[1] = EBUR128_RIGHT; + st->d->channel_map[2] = EBUR128_LEFT_SURROUND; + st->d->channel_map[3] = EBUR128_RIGHT_SURROUND; + } else if (st->channels == 5) { + st->d->channel_map[0] = EBUR128_LEFT; + st->d->channel_map[1] = EBUR128_RIGHT; + st->d->channel_map[2] = EBUR128_CENTER; + st->d->channel_map[3] = EBUR128_LEFT_SURROUND; + st->d->channel_map[4] = EBUR128_RIGHT_SURROUND; + } else { + for (i = 0; i < st->channels; ++i) { + switch (i) { + case 0: st->d->channel_map[i] = EBUR128_LEFT; break; + case 1: st->d->channel_map[i] = EBUR128_RIGHT; break; + case 2: st->d->channel_map[i] = EBUR128_CENTER; break; + case 3: st->d->channel_map[i] = EBUR128_UNUSED; break; + case 4: st->d->channel_map[i] = EBUR128_LEFT_SURROUND; break; + case 5: st->d->channel_map[i] = EBUR128_RIGHT_SURROUND; break; + default: st->d->channel_map[i] = EBUR128_UNUSED; break; + } + } + } + return EBUR128_SUCCESS; +} + +static int ebur128_init_resampler(ebur128_state* st) { + int errcode = EBUR128_SUCCESS; + + if (st->samplerate < 96000) { + st->d->interp = interp_create(49, 4, st->channels); + CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit) + } else if (st->samplerate < 192000) { + st->d->interp = interp_create(49, 2, st->channels); + CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit) + } else { + st->d->resampler_buffer_input = NULL; + st->d->resampler_buffer_output = NULL; + st->d->interp = NULL; + goto exit; + } + + st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4; + st->d->resampler_buffer_input = malloc(st->d->resampler_buffer_input_frames * + st->channels * + sizeof(float)); + CHECK_ERROR(!st->d->resampler_buffer_input, EBUR128_ERROR_NOMEM, free_interp) + + st->d->resampler_buffer_output_frames = + st->d->resampler_buffer_input_frames * + st->d->interp->factor; + st->d->resampler_buffer_output = malloc + (st->d->resampler_buffer_output_frames * + st->channels * + sizeof(float)); + CHECK_ERROR(!st->d->resampler_buffer_output, EBUR128_ERROR_NOMEM, free_input) + + return errcode; + +free_interp: + interp_destroy(st->d->interp); + st->d->interp = NULL; +free_input: + free(st->d->resampler_buffer_input); + st->d->resampler_buffer_input = NULL; +exit: + return errcode; +} + +static void ebur128_destroy_resampler(ebur128_state* st) { + free(st->d->resampler_buffer_input); + st->d->resampler_buffer_input = NULL; + free(st->d->resampler_buffer_output); + st->d->resampler_buffer_output = NULL; + interp_destroy(st->d->interp); + st->d->interp = NULL; +} + +void ebur128_get_version(int* major, int* minor, int* patch) { + *major = EBUR128_VERSION_MAJOR; + *minor = EBUR128_VERSION_MINOR; + *patch = EBUR128_VERSION_PATCH; +} + +ebur128_state* ebur128_init(unsigned int channels, + unsigned long samplerate, + int mode) { + int result; + int errcode; + ebur128_state* st; + unsigned int i; + size_t j; + + if (channels == 0 || samplerate < 5) { + return NULL; + } + + st = (ebur128_state*) malloc(sizeof(ebur128_state)); + CHECK_ERROR(!st, 0, exit) + st->d = (struct ebur128_state_internal*) + malloc(sizeof(struct ebur128_state_internal)); + CHECK_ERROR(!st->d, 0, free_state) + st->channels = channels; + errcode = ebur128_init_channel_map(st); + CHECK_ERROR(errcode, 0, free_internal) + + st->d->sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map) + st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_sample_peak, 0, free_sample_peak) + st->d->true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->true_peak, 0, free_prev_sample_peak) + st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_true_peak, 0, free_true_peak) + for (i = 0; i < channels; ++i) { + st->d->sample_peak[i] = 0.0; + st->d->prev_sample_peak[i] = 0.0; + st->d->true_peak[i] = 0.0; + st->d->prev_true_peak[i] = 0.0; + } + + st->d->use_histogram = mode & EBUR128_MODE_HISTOGRAM ? 1 : 0; + st->d->history = ULONG_MAX; + st->samplerate = samplerate; + st->d->samples_in_100ms = (st->samplerate + 5) / 10; + st->mode = mode; + if ((mode & EBUR128_MODE_S) == EBUR128_MODE_S) { + st->d->window = 3000; + } else if ((mode & EBUR128_MODE_M) == EBUR128_MODE_M) { + st->d->window = 400; + } else { + goto free_prev_true_peak; + } + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, 0, free_true_peak) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + ebur128_init_filter(st); + + if (st->d->use_histogram) { + st->d->block_energy_histogram = malloc(1000 * sizeof(unsigned long)); + CHECK_ERROR(!st->d->block_energy_histogram, 0, free_audio_data) + for (i = 0; i < 1000; ++i) { + st->d->block_energy_histogram[i] = 0; + } + } else { + st->d->block_energy_histogram = NULL; + } + if (st->d->use_histogram) { + st->d->short_term_block_energy_histogram = malloc(1000 * sizeof(unsigned long)); + CHECK_ERROR(!st->d->short_term_block_energy_histogram, 0, free_block_energy_histogram) + for (i = 0; i < 1000; ++i) { + st->d->short_term_block_energy_histogram[i] = 0; + } + } else { + st->d->short_term_block_energy_histogram = NULL; + } + STAILQ_INIT(&st->d->block_list); + st->d->block_list_size = 0; + st->d->block_list_max = st->d->history / 100; + STAILQ_INIT(&st->d->short_term_block_list); + st->d->st_block_list_size = 0; + st->d->st_block_list_max = st->d->history / 3000; + st->d->short_term_frame_counter = 0; + + result = ebur128_init_resampler(st); + CHECK_ERROR(result, 0, free_short_term_block_energy_histogram) + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + + /* initialize static constants */ + relative_gate_factor = pow(10.0, relative_gate / 10.0); + minus_twenty_decibels = pow(10.0, -20.0 / 10.0); + histogram_energy_boundaries[0] = pow(10.0, (-70.0 + 0.691) / 10.0); + if (st->d->use_histogram) { + for (i = 0; i < 1000; ++i) { + histogram_energies[i] = pow(10.0, ((double) i / 10.0 - 69.95 + 0.691) / 10.0); + } + for (i = 1; i < 1001; ++i) { + histogram_energy_boundaries[i] = pow(10.0, ((double) i / 10.0 - 70.0 + 0.691) / 10.0); + } + } + + return st; + +free_short_term_block_energy_histogram: + free(st->d->short_term_block_energy_histogram); +free_block_energy_histogram: + free(st->d->block_energy_histogram); +free_audio_data: + free(st->d->audio_data); +free_prev_true_peak: + free(st->d->prev_true_peak); +free_true_peak: + free(st->d->true_peak); +free_prev_sample_peak: + free(st->d->prev_sample_peak); +free_sample_peak: + free(st->d->sample_peak); +free_channel_map: + free(st->d->channel_map); +free_internal: + free(st->d); +free_state: + free(st); +exit: + return NULL; +} + +void ebur128_destroy(ebur128_state** st) { + struct ebur128_dq_entry* entry; + free((*st)->d->block_energy_histogram); + free((*st)->d->short_term_block_energy_histogram); + free((*st)->d->audio_data); + free((*st)->d->channel_map); + free((*st)->d->sample_peak); + free((*st)->d->prev_sample_peak); + free((*st)->d->true_peak); + free((*st)->d->prev_true_peak); + while (!STAILQ_EMPTY(&(*st)->d->block_list)) { + entry = STAILQ_FIRST(&(*st)->d->block_list); + STAILQ_REMOVE_HEAD(&(*st)->d->block_list, entries); + free(entry); + } + while (!STAILQ_EMPTY(&(*st)->d->short_term_block_list)) { + entry = STAILQ_FIRST(&(*st)->d->short_term_block_list); + STAILQ_REMOVE_HEAD(&(*st)->d->short_term_block_list, entries); + free(entry); + } + ebur128_destroy_resampler(*st); + free((*st)->d); + free(*st); + *st = NULL; +} + +static void ebur128_check_true_peak(ebur128_state* st, size_t frames) { + size_t c, i, frames_out; + + frames_out = interp_process(st->d->interp, frames, + st->d->resampler_buffer_input, + st->d->resampler_buffer_output); + + for (i = 0; i < frames_out; ++i) { + for (c = 0; c < st->channels; ++c) { + float val = st->d->resampler_buffer_output[i * st->channels + c]; + + if (val > st->d->prev_true_peak[c]) { + st->d->prev_true_peak[c] = val; + } else if (-val > st->d->prev_true_peak[c]) { + st->d->prev_true_peak[c] = -val; + } + } + } +} + +#ifdef __SSE2_MATH__ +#include +#define TURN_ON_FTZ \ + unsigned int mxcsr = _mm_getcsr(); \ + _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON); +#define TURN_OFF_FTZ _mm_setcsr(mxcsr); +#define FLUSH_MANUALLY +#else +#warning "manual FTZ is being used, please enable SSE2 (-msse2 -mfpmath=sse)" +#define TURN_ON_FTZ +#define TURN_OFF_FTZ +#define FLUSH_MANUALLY \ + st->d->v[ci][4] = fabs(st->d->v[ci][4]) < DBL_MIN ? 0.0 : st->d->v[ci][4]; \ + st->d->v[ci][3] = fabs(st->d->v[ci][3]) < DBL_MIN ? 0.0 : st->d->v[ci][3]; \ + st->d->v[ci][2] = fabs(st->d->v[ci][2]) < DBL_MIN ? 0.0 : st->d->v[ci][2]; \ + st->d->v[ci][1] = fabs(st->d->v[ci][1]) < DBL_MIN ? 0.0 : st->d->v[ci][1]; +#endif + +#define EBUR128_FILTER(type, min_scale, max_scale) \ +static void ebur128_filter_##type(ebur128_state* st, const type* src, \ + size_t frames) { \ + static double scaling_factor = \ + -((double) (min_scale)) > (double) (max_scale) ? \ + -((double) (min_scale)) : (double) (max_scale); \ + double* audio_data = st->d->audio_data + st->d->audio_data_index; \ + size_t i, c; \ + \ + TURN_ON_FTZ \ + \ + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK) { \ + for (c = 0; c < st->channels; ++c) { \ + double max = 0.0; \ + for (i = 0; i < frames; ++i) { \ + if (src[i * st->channels + c] > max) { \ + max = src[i * st->channels + c]; \ + } else if (-src[i * st->channels + c] > max) { \ + max = -1.0 * src[i * st->channels + c]; \ + } \ + } \ + max /= scaling_factor; \ + if (max > st->d->prev_sample_peak[c]) st->d->prev_sample_peak[c] = max; \ + } \ + } \ + if ((st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK && \ + st->d->interp) { \ + for (c = 0; c < st->channels; ++c) { \ + for (i = 0; i < frames; ++i) { \ + st->d->resampler_buffer_input[i * st->channels + c] = \ + (float) (src[i * st->channels + c] / scaling_factor); \ + } \ + } \ + ebur128_check_true_peak(st, frames); \ + } \ + for (c = 0; c < st->channels; ++c) { \ + int ci = st->d->channel_map[c] - 1; \ + if (ci < 0) continue; \ + else if (ci == EBUR128_DUAL_MONO - 1) ci = 0; /*dual mono */ \ + for (i = 0; i < frames; ++i) { \ + st->d->v[ci][0] = (double) (src[i * st->channels + c] / scaling_factor) \ + - st->d->a[1] * st->d->v[ci][1] \ + - st->d->a[2] * st->d->v[ci][2] \ + - st->d->a[3] * st->d->v[ci][3] \ + - st->d->a[4] * st->d->v[ci][4]; \ + audio_data[i * st->channels + c] = \ + st->d->b[0] * st->d->v[ci][0] \ + + st->d->b[1] * st->d->v[ci][1] \ + + st->d->b[2] * st->d->v[ci][2] \ + + st->d->b[3] * st->d->v[ci][3] \ + + st->d->b[4] * st->d->v[ci][4]; \ + st->d->v[ci][4] = st->d->v[ci][3]; \ + st->d->v[ci][3] = st->d->v[ci][2]; \ + st->d->v[ci][2] = st->d->v[ci][1]; \ + st->d->v[ci][1] = st->d->v[ci][0]; \ + } \ + FLUSH_MANUALLY \ + } \ + TURN_OFF_FTZ \ +} +EBUR128_FILTER(short, SHRT_MIN, SHRT_MAX) +EBUR128_FILTER(int, INT_MIN, INT_MAX) +EBUR128_FILTER(float, -1.0f, 1.0f) +EBUR128_FILTER(double, -1.0, 1.0) + +static double ebur128_energy_to_loudness(double energy) { + return 10 * (log(energy) / log(10.0)) - 0.691; +} + +static size_t find_histogram_index(double energy) { + size_t index_min = 0; + size_t index_max = 1000; + size_t index_mid; + + do { + index_mid = (index_min + index_max) / 2; + if (energy >= histogram_energy_boundaries[index_mid]) { + index_min = index_mid; + } else { + index_max = index_mid; + } + } while (index_max - index_min != 1); + + return index_min; +} + +static int ebur128_calc_gating_block(ebur128_state* st, size_t frames_per_block, + double* optional_output) { + size_t i, c; + double sum = 0.0; + double channel_sum; + for (c = 0; c < st->channels; ++c) { + if (st->d->channel_map[c] == EBUR128_UNUSED) { + continue; + } + channel_sum = 0.0; + if (st->d->audio_data_index < frames_per_block * st->channels) { + for (i = 0; i < st->d->audio_data_index / st->channels; ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + for (i = st->d->audio_data_frames - + (frames_per_block - + st->d->audio_data_index / st->channels); + i < st->d->audio_data_frames; ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + } else { + for (i = st->d->audio_data_index / st->channels - frames_per_block; + i < st->d->audio_data_index / st->channels; + ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + } + if (st->d->channel_map[c] == EBUR128_Mp110 || + st->d->channel_map[c] == EBUR128_Mm110 || + st->d->channel_map[c] == EBUR128_Mp060 || + st->d->channel_map[c] == EBUR128_Mm060 || + st->d->channel_map[c] == EBUR128_Mp090 || + st->d->channel_map[c] == EBUR128_Mm090) { + channel_sum *= 1.41; + } else if (st->d->channel_map[c] == EBUR128_DUAL_MONO) { + channel_sum *= 2.0; + } + sum += channel_sum; + } + sum /= (double) frames_per_block; + if (optional_output) { + *optional_output = sum; + return EBUR128_SUCCESS; + } else if (sum >= histogram_energy_boundaries[0]) { + if (st->d->use_histogram) { + ++st->d->block_energy_histogram[find_histogram_index(sum)]; + } else { + struct ebur128_dq_entry* block; + if (st->d->block_list_size == st->d->block_list_max) { + block = STAILQ_FIRST(&st->d->block_list); + STAILQ_REMOVE_HEAD(&st->d->block_list, entries); + } else { + block = (struct ebur128_dq_entry*) malloc(sizeof(struct ebur128_dq_entry)); + if (!block) { + return EBUR128_ERROR_NOMEM; + } + st->d->block_list_size++; + } + block->z = sum; + STAILQ_INSERT_TAIL(&st->d->block_list, block, entries); + } + return EBUR128_SUCCESS; + } else { + return EBUR128_SUCCESS; + } +} + +int ebur128_set_channel(ebur128_state* st, + unsigned int channel_number, + int value) { + if (channel_number >= st->channels) { + return 1; + } + if (value == EBUR128_DUAL_MONO && + (st->channels != 1 || channel_number != 0)) { + fprintf(stderr, "EBUR128_DUAL_MONO only works with mono files!\n"); + return 1; + } + st->d->channel_map[channel_number] = value; + return 0; +} + +int ebur128_change_parameters(ebur128_state* st, + unsigned int channels, + unsigned long samplerate) { + int errcode = EBUR128_SUCCESS; + size_t j; + + if (channels == 0 || samplerate < 5) { + return EBUR128_ERROR_NOMEM; + } + + if (channels == st->channels && + samplerate == st->samplerate) { + return EBUR128_ERROR_NO_CHANGE; + } + + free(st->d->audio_data); + st->d->audio_data = NULL; + + if (channels != st->channels) { + unsigned int i; + + free(st->d->channel_map); st->d->channel_map = NULL; + free(st->d->sample_peak); st->d->sample_peak = NULL; + free(st->d->prev_sample_peak); st->d->prev_sample_peak = NULL; + free(st->d->true_peak); st->d->true_peak = NULL; + free(st->d->prev_true_peak); st->d->prev_true_peak = NULL; + st->channels = channels; + + errcode = ebur128_init_channel_map(st); + CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + + st->d->sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->sample_peak, EBUR128_ERROR_NOMEM, exit) + st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_sample_peak, EBUR128_ERROR_NOMEM, exit) + st->d->true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->true_peak, EBUR128_ERROR_NOMEM, exit) + st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_true_peak, EBUR128_ERROR_NOMEM, exit) + for (i = 0; i < channels; ++i) { + st->d->sample_peak[i] = 0.0; + st->d->prev_sample_peak[i] = 0.0; + st->d->true_peak[i] = 0.0; + st->d->prev_true_peak[i] = 0.0; + } + } + if (samplerate != st->samplerate) { + st->samplerate = samplerate; + st->d->samples_in_100ms = (st->samplerate + 5) / 10; + ebur128_init_filter(st); + } + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + ebur128_destroy_resampler(st); + errcode = ebur128_init_resampler(st); + CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + /* reset short term frame counter */ + st->d->short_term_frame_counter = 0; + +exit: + return errcode; +} + +int ebur128_set_max_window(ebur128_state* st, unsigned long window) +{ + int errcode = EBUR128_SUCCESS; + size_t j; + + if ((st->mode & EBUR128_MODE_S) == EBUR128_MODE_S && window < 3000) { + window = 3000; + } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && window < 400) { + window = 400; + } + if (window == st->d->window) { + return EBUR128_ERROR_NO_CHANGE; + } + + st->d->window = window; + free(st->d->audio_data); + st->d->audio_data = NULL; + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + /* reset short term frame counter */ + st->d->short_term_frame_counter = 0; + +exit: + return errcode; +} + +int ebur128_set_max_history(ebur128_state* st, unsigned long history) +{ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA && history < 3000) { + history = 3000; + } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && history < 400) { + history = 400; + } + if (history == st->d->history) { + return EBUR128_ERROR_NO_CHANGE; + } + st->d->history = history; + st->d->block_list_max = st->d->history / 100; + st->d->st_block_list_max = st->d->history / 3000; + while (st->d->block_list_size > st->d->block_list_max) { + struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->block_list); + STAILQ_REMOVE_HEAD(&st->d->block_list, entries); + free(block); + st->d->block_list_size--; + } + while (st->d->st_block_list_size > st->d->st_block_list_max) { + struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->short_term_block_list); + STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); + free(block); + st->d->st_block_list_size--; + } + return EBUR128_SUCCESS; +} + +static int ebur128_energy_shortterm(ebur128_state* st, double* out); +#define EBUR128_ADD_FRAMES(type) \ +int ebur128_add_frames_##type(ebur128_state* st, \ + const type* src, size_t frames) { \ + size_t src_index = 0; \ + unsigned int c = 0; \ + for (c = 0; c < st->channels; c++) { \ + st->d->prev_sample_peak[c] = 0.0; \ + st->d->prev_true_peak[c] = 0.0; \ + } \ + while (frames > 0) { \ + if (frames >= st->d->needed_frames) { \ + ebur128_filter_##type(st, src + src_index, st->d->needed_frames); \ + src_index += st->d->needed_frames * st->channels; \ + frames -= st->d->needed_frames; \ + st->d->audio_data_index += st->d->needed_frames * st->channels; \ + /* calculate the new gating block */ \ + if ((st->mode & EBUR128_MODE_I) == EBUR128_MODE_I) { \ + if (ebur128_calc_gating_block(st, st->d->samples_in_100ms * 4, NULL)) {\ + return EBUR128_ERROR_NOMEM; \ + } \ + } \ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ + st->d->short_term_frame_counter += st->d->needed_frames; \ + if (st->d->short_term_frame_counter == st->d->samples_in_100ms * 30) { \ + struct ebur128_dq_entry* block; \ + double st_energy; \ + if (ebur128_energy_shortterm(st, &st_energy) == EBUR128_SUCCESS && \ + st_energy >= histogram_energy_boundaries[0]) { \ + if (st->d->use_histogram) { \ + ++st->d->short_term_block_energy_histogram[ \ + find_histogram_index(st_energy)];\ + } else { \ + if (st->d->st_block_list_size == st->d->st_block_list_max) { \ + block = STAILQ_FIRST(&st->d->short_term_block_list); \ + STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); \ + } else { \ + block = (struct ebur128_dq_entry*) \ + malloc(sizeof(struct ebur128_dq_entry)); \ + if (!block) return EBUR128_ERROR_NOMEM; \ + st->d->st_block_list_size++; \ + } \ + block->z = st_energy; \ + STAILQ_INSERT_TAIL(&st->d->short_term_block_list, \ + block, entries); \ + } \ + } \ + st->d->short_term_frame_counter = st->d->samples_in_100ms * 20; \ + } \ + } \ + /* 100ms are needed for all blocks besides the first one */ \ + st->d->needed_frames = st->d->samples_in_100ms; \ + /* reset audio_data_index when buffer full */ \ + if (st->d->audio_data_index == st->d->audio_data_frames * st->channels) {\ + st->d->audio_data_index = 0; \ + } \ + } else { \ + ebur128_filter_##type(st, src + src_index, frames); \ + st->d->audio_data_index += frames * st->channels; \ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ + st->d->short_term_frame_counter += frames; \ + } \ + st->d->needed_frames -= frames; \ + frames = 0; \ + } \ + } \ + for (c = 0; c < st->channels; c++) { \ + if (st->d->prev_sample_peak[c] > st->d->sample_peak[c]) { \ + st->d->sample_peak[c] = st->d->prev_sample_peak[c]; \ + } \ + if (st->d->prev_true_peak[c] > st->d->true_peak[c]) { \ + st->d->true_peak[c] = st->d->prev_true_peak[c]; \ + } \ + } \ + return EBUR128_SUCCESS; \ +} +EBUR128_ADD_FRAMES(short) +EBUR128_ADD_FRAMES(int) +EBUR128_ADD_FRAMES(float) +EBUR128_ADD_FRAMES(double) + +static int ebur128_calc_relative_threshold(ebur128_state* st, + size_t* above_thresh_counter, + double* relative_threshold) { + struct ebur128_dq_entry* it; + size_t i; + + if (st->d->use_histogram) { + for (i = 0; i < 1000; ++i) { + *relative_threshold += st->d->block_energy_histogram[i] * + histogram_energies[i]; + *above_thresh_counter += st->d->block_energy_histogram[i]; + } + } else { + STAILQ_FOREACH(it, &st->d->block_list, entries) { + ++*above_thresh_counter; + *relative_threshold += it->z; + } + } + + return EBUR128_SUCCESS; +} + +static int ebur128_gated_loudness(ebur128_state** sts, size_t size, + double* out) { + struct ebur128_dq_entry* it; + double gated_loudness = 0.0; + double relative_threshold = 0.0; + size_t above_thresh_counter = 0; + size_t i, j, start_index; + + for (i = 0; i < size; i++) { + if (sts[i] && (sts[i]->mode & EBUR128_MODE_I) != EBUR128_MODE_I) { + return EBUR128_ERROR_INVALID_MODE; + } + } + + for (i = 0; i < size; i++) { + if (!sts[i]) { + continue; + } + ebur128_calc_relative_threshold(sts[i], &above_thresh_counter, &relative_threshold); + } + if (!above_thresh_counter) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + + relative_threshold /= (double)above_thresh_counter; + relative_threshold *= relative_gate_factor; + + above_thresh_counter = 0; + if (relative_threshold < histogram_energy_boundaries[0]) { + start_index = 0; + } else { + start_index = find_histogram_index(relative_threshold); + if (relative_threshold > histogram_energies[start_index]) { + ++start_index; + } + } + for (i = 0; i < size; i++) { + if (!sts[i]) { + continue; + } + if (sts[i]->d->use_histogram) { + for (j = start_index; j < 1000; ++j) { + gated_loudness += sts[i]->d->block_energy_histogram[j] * + histogram_energies[j]; + above_thresh_counter += sts[i]->d->block_energy_histogram[j]; + } + } else { + STAILQ_FOREACH(it, &sts[i]->d->block_list, entries) { + if (it->z >= relative_threshold) { + ++above_thresh_counter; + gated_loudness += it->z; + } + } + } + } + if (!above_thresh_counter) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + gated_loudness /= (double) above_thresh_counter; + *out = ebur128_energy_to_loudness(gated_loudness); + return EBUR128_SUCCESS; +} + +int ebur128_relative_threshold(ebur128_state* st, double* out) { + double relative_threshold = 0.0; + size_t above_thresh_counter = 0; + + if ((st->mode & EBUR128_MODE_I) != EBUR128_MODE_I) { + return EBUR128_ERROR_INVALID_MODE; + } + + ebur128_calc_relative_threshold(st, &above_thresh_counter, &relative_threshold); + + if (!above_thresh_counter) { + *out = -70.0; + return EBUR128_SUCCESS; + } + + relative_threshold /= (double)above_thresh_counter; + relative_threshold *= relative_gate_factor; + + *out = ebur128_energy_to_loudness(relative_threshold); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_global(ebur128_state* st, double* out) { + return ebur128_gated_loudness(&st, 1, out); +} + +int ebur128_loudness_global_multiple(ebur128_state** sts, size_t size, + double* out) { + return ebur128_gated_loudness(sts, size, out); +} + +static int ebur128_energy_in_interval(ebur128_state* st, + size_t interval_frames, + double* out) { + if (interval_frames > st->d->audio_data_frames) { + return EBUR128_ERROR_INVALID_MODE; + } + ebur128_calc_gating_block(st, interval_frames, out); + return EBUR128_SUCCESS; +} + +static int ebur128_energy_shortterm(ebur128_state* st, double* out) { + return ebur128_energy_in_interval(st, st->d->samples_in_100ms * 30, out); +} + +int ebur128_loudness_momentary(ebur128_state* st, double* out) { + double energy; + int error = ebur128_energy_in_interval(st, st->d->samples_in_100ms * 4, + &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_shortterm(ebur128_state* st, double* out) { + double energy; + int error = ebur128_energy_shortterm(st, &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_window(ebur128_state* st, + unsigned long window, + double* out) { + double energy; + size_t interval_frames = st->samplerate * window / 1000; + int error = ebur128_energy_in_interval(st, interval_frames, &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +static int ebur128_double_cmp(const void *p1, const void *p2) { + const double* d1 = (const double*) p1; + const double* d2 = (const double*) p2; + return (*d1 > *d2) - (*d1 < *d2); +} + +/* EBU - TECH 3342 */ +int ebur128_loudness_range_multiple(ebur128_state** sts, size_t size, + double* out) { + size_t i, j; + struct ebur128_dq_entry* it; + double* stl_vector; + size_t stl_size; + double* stl_relgated; + size_t stl_relgated_size; + double stl_power, stl_integrated; + /* High and low percentile energy */ + double h_en, l_en; + int use_histogram = 0; + + for (i = 0; i < size; ++i) { + if (sts[i]) { + if ((sts[i]->mode & EBUR128_MODE_LRA) != EBUR128_MODE_LRA) { + return EBUR128_ERROR_INVALID_MODE; + } + if (i == 0 && sts[i]->mode & EBUR128_MODE_HISTOGRAM) { + use_histogram = 1; + } else if (use_histogram != !!(sts[i]->mode & EBUR128_MODE_HISTOGRAM)) { + return EBUR128_ERROR_INVALID_MODE; + } + } + } + + if (use_histogram) { + unsigned long hist[1000] = { 0 }; + size_t percentile_low, percentile_high; + size_t index; + + stl_size = 0; + stl_power = 0.0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + for (j = 0; j < 1000; ++j) { + hist[j] += sts[i]->d->short_term_block_energy_histogram[j]; + stl_size += sts[i]->d->short_term_block_energy_histogram[j]; + stl_power += sts[i]->d->short_term_block_energy_histogram[j] + * histogram_energies[j]; + } + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + + stl_power /= stl_size; + stl_integrated = minus_twenty_decibels * stl_power; + + if (stl_integrated < histogram_energy_boundaries[0]) { + index = 0; + } else { + index = find_histogram_index(stl_integrated); + if (stl_integrated > histogram_energies[index]) { + ++index; + } + } + stl_size = 0; + for (j = index; j < 1000; ++j) { + stl_size += hist[j]; + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + + percentile_low = (size_t) ((stl_size - 1) * 0.1 + 0.5); + percentile_high = (size_t) ((stl_size - 1) * 0.95 + 0.5); + + stl_size = 0; + j = index; + while (stl_size <= percentile_low) { + stl_size += hist[j++]; + } + l_en = histogram_energies[j - 1]; + while (stl_size <= percentile_high) { + stl_size += hist[j++]; + } + h_en = histogram_energies[j - 1]; + *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en); + return EBUR128_SUCCESS; + + } else { + stl_size = 0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) { + ++stl_size; + } + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + stl_vector = (double*) malloc(stl_size * sizeof(double)); + if (!stl_vector) { + return EBUR128_ERROR_NOMEM; + } + + j = 0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) { + stl_vector[j] = it->z; + ++j; + } + } + qsort(stl_vector, stl_size, sizeof(double), ebur128_double_cmp); + stl_power = 0.0; + for (i = 0; i < stl_size; ++i) { + stl_power += stl_vector[i]; + } + stl_power /= (double) stl_size; + stl_integrated = minus_twenty_decibels * stl_power; + + stl_relgated = stl_vector; + stl_relgated_size = stl_size; + while (stl_relgated_size > 0 && *stl_relgated < stl_integrated) { + ++stl_relgated; + --stl_relgated_size; + } + + if (stl_relgated_size) { + h_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.95 + 0.5)]; + l_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.1 + 0.5)]; + free(stl_vector); + *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en); + return EBUR128_SUCCESS; + } else { + free(stl_vector); + *out = 0.0; + return EBUR128_SUCCESS; + } + } +} + +int ebur128_loudness_range(ebur128_state* st, double* out) { + return ebur128_loudness_range_multiple(&st, 1, out); +} + +int ebur128_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_prev_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->prev_sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->true_peak[channel_number] > st->d->sample_peak[channel_number] + ? st->d->true_peak[channel_number] + : st->d->sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_prev_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->prev_true_peak[channel_number] + > st->d->prev_sample_peak[channel_number] + ? st->d->prev_true_peak[channel_number] + : st->d->prev_sample_peak[channel_number]; + return EBUR128_SUCCESS; +} \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/CMakeLists.txt b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/CMakeLists.txt new file mode 100644 index 0000000..3045b00 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_WAVES_SRCS) +add_library(waves ${DIR_WAVES_SRCS}) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/inc/ExtraMono.h b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/inc/ExtraMono.h new file mode 100644 index 0000000..280fab0 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/inc/ExtraMono.h @@ -0,0 +1,230 @@ + +#include +#include + +#define SIZE_LONG 4 +#define SIZE_SHORT 2 + +#define SIZE_FLAG 4 +#define FMT_TAG 0x0001 + +#define BITS_PER_BYTE 8 + +#ifndef AFS_CMPL_MAX_WAV +#define AFS_CMPL_MAX_WAV 15360000 // 时长16分(960*16000) +#endif + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个32位数据 +//+---------------------------------------------------------------------------+ +unsigned long fa_read_u32(FILE* fp) +{ + unsigned long cx; + unsigned char temp[SIZE_LONG]; + + fread(temp, sizeof(unsigned char), SIZE_LONG, fp); + cx = (unsigned long)temp[0]; + cx |= (unsigned long)temp[1] << 8; + cx |= (unsigned long)temp[2] << 16; + cx |= (unsigned long)temp[3] << 24; + return cx; +} + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个16位数据 +//+---------------------------------------------------------------------------+ +unsigned short fa_read_u16(FILE *fp) +{ + unsigned short cx; + unsigned char temp[SIZE_SHORT]; + + fread(temp, sizeof(unsigned char), SIZE_SHORT, fp); + cx = temp[0] | (temp[1] * 256); + return cx; +} + +int GetWaveHeadLen(const char* pszFile,unsigned short &channels, int &nPos, int& nLength) +{ + //+---------------------------------------------------------------------------+ + //+ 读取WAVE的头信息 + //+---------------------------------------------------------------------------+ + unsigned char temp[SIZE_FLAG]; + unsigned short bits_per_sample; + unsigned long x_size; + unsigned long n_skip; + + unsigned short format; + //unsigned short channels; + unsigned long sample_rate; + unsigned short block_align; + unsigned long data_size; + int nCnt = 0; + + /* 读取通用信息 */ + FILE* pWavFile = fopen(pszFile, "rb"); + if ( pWavFile == NULL ) + { + printf("Input file can not be opened!\n"); + return -1; + } + + fseek(pWavFile, 0, SEEK_END ); + nLength = ftell(pWavFile); + fseek(pWavFile, 0, SEEK_SET ); + + // 判断资源标识为"RIFF" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "RIFF", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Resource flag is not RIFF!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + nCnt += SIZE_LONG; + + // 判断文件标识为"WAVE" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "WAVE", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "File flag is not WAVE\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + // 判断格式标识为"fmt " + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "fmt ", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Format flag is not FMT!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + x_size = fa_read_u32(pWavFile); + nCnt += SIZE_LONG; + + // 判断编码格式为0x0001 + format = fa_read_u16(pWavFile); + nCnt += SIZE_SHORT; + if ( format != FMT_TAG ) + { + fprintf(stderr, "Encoding format is not 0x0001!\n"); + fclose(pWavFile); + + return -1; + } + + // 读取声道数目和采样频率 + channels = fa_read_u16(pWavFile); + sample_rate = fa_read_u32(pWavFile); + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + + // 读取对齐单位和样本位数 + block_align = fa_read_u16(pWavFile); + bits_per_sample = fa_read_u16(pWavFile); + + /* 读取特殊信息 */ + x_size -= (4*SIZE_SHORT + 2*SIZE_LONG); + if ( x_size != 0 ) + { + fseek(pWavFile, x_size, SEEK_CUR); + } + + // 读取数据大小 + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + while ( memcmp(temp, "data", SIZE_FLAG) != 0 ) + { + n_skip = fa_read_u32(pWavFile); + fseek(pWavFile, n_skip, SEEK_CUR); + + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + } + + data_size = fa_read_u32(pWavFile); + fclose(pWavFile); + + //+---------------------------------------------------------------------------+ + //+ 返回WAVE的头长度 + //+---------------------------------------------------------------------------+ + nPos = nCnt; + int nHeadLength = nLength - data_size; + return nHeadLength; +} + +bool ExtraMono(const std::string &sInput, const std::string &sOutput) +{ + FILE *pFile = fopen(sInput.c_str(), "rb"); + if ( NULL == pFile ) + { + printf("Fopen Error %s", sInput.c_str()); + return false; + } + + FILE *pFile2 = fopen(sOutput.c_str(), "wb"); + if ( NULL == pFile2 ) + { + printf("Fopen2 Error %s", sOutput.c_str()); + return false; + } + + short *pBuf = new short[AFS_CMPL_MAX_WAV]; + int nLen = 0; + + nLen = fread(pBuf, sizeof(short), AFS_CMPL_MAX_WAV, pFile); + if ( nLen <= 0 ) + { + perror("Fread Error!"); + return false; + } + + unsigned short channels=0; + int nPos; + int nLength; + int nHeadByte = GetWaveHeadLen(sInput.c_str(),channels, nPos, nLength); + int nHeadShort = nHeadByte/2; + + if (channels==1) + { + fwrite(pBuf + nHeadShort, sizeof(short), nLen - nHeadShort, pFile2); + } + else + { + short *pBuf2 = new short[AFS_CMPL_MAX_WAV]; + memcpy( pBuf2, pBuf, nHeadShort*sizeof(short)); + pBuf2[nPos] = 1; + + unsigned char tmp[2]; + memcpy(tmp, &pBuf2[nPos], 2); + + pBuf2[nPos] = static_cast(tmp[0] | tmp[1]*256); + + short *pWav = pBuf + nHeadShort; + nLen -= nHeadShort; + + int halfnlen=nLen/2; + for (int i=0;i<=halfnlen;i++ ) + { + pBuf2[nHeadShort+i] = *(pWav+i*2); + } + fwrite(pBuf2, sizeof(short), nLen+nHeadShort, pFile2); + + delete []pBuf; + delete []pBuf2; + pBuf = NULL; + pBuf2 = NULL; + } + + + fclose(pFile); + fclose(pFile2); + return true; +} diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/inc/WaveFile.h b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/inc/WaveFile.h new file mode 100644 index 0000000..8b57806 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/inc/WaveFile.h @@ -0,0 +1,74 @@ +#ifndef WAVE_FILE_H +#define WAVE_FILE_H + +#include +#include + + +typedef enum SAMPLE_FORMAT +{ + SF_U8 = 8, + SF_S16 = 16, + SF_S24 = 24, + SF_S32 = 32, + SF_IEEE_FLOAT = 0x100 + 32, + SF_IEEE_DOUBLE = 0x100 + 64, + SF_MAX, +} SAMPLE_FORMAT; + +/* 主处理对象 **/ +class CWaveFile +{ +public: + /* 构造传入文件及 是读还是写 **/ + CWaveFile(const char* Filename, bool Write); + virtual ~CWaveFile(); + +public: + int GetChannels(); + int GetSampleRate(); + double GetDuration(); // in second + uint32_t GetChannelMask(); + void SetChannels(int Channels); + void SetSampleRate(int SampleRate); + void SetSampleFormat(SAMPLE_FORMAT Format); + void SetChannelMask(uint32_t Mask); + void Stat(); + void SetupDone(); + bool ReadFrameAsS16(short* FrameSamples, int Frames = 1); + bool ReadFrameAsDouble(double* FrameSamples, int Frames = 1); + bool ReadFrameAsfloat(float* FrameSamples, int Frames = 1); + void WriteRaw(void* Raw, int Size); + void WriteFrame(uint8_t* FrameSamples, int Frames = 1); + void WriteFrame(short* FrameSamples, int Frames = 1); + void WriteFrame(int32_t* FrameSamples, int Frames = 1); + void WriteFrameS24(int32_t* FrameSamples, int Frames = 1); + void WriteFrame(double* FrameSamples, int Frames = 1); + void WriteFrame(float* FrameSamples, int Frames=1); + void Seek(int FramePos, int Where = SEEK_SET); + bool GetStatus(); + SAMPLE_FORMAT GetFormat(); + int GetTotalFrames(); + int GetFramesRead(); + + +protected: + FILE* File; + int Channels; /* 通道数 **/ + int SampleRate; /* 采样率 **/ + SAMPLE_FORMAT Format; /* 采样精度 **/ + int SampleSize; // Measured in Bits + unsigned int FrameStartPos; /* 音频数据的起始位置 **/ + unsigned long TotalFrames; /* 总帧数,如果16bit,则一个short为一帧 **/ + unsigned long FramesRead; + double Duration; /* 时长 **/ + + bool ReadOnly; /* 是度还是写 **/ + + uint32_t ChannelMask; + + bool m_bOK; /* 文件是否已经被打开 **/ +}; + + +#endif \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/src/WaveFile.cpp b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/src/WaveFile.cpp new file mode 100644 index 0000000..1a47272 --- /dev/null +++ b/AIMeiSheng/voice_classification/script/music_voice_class/standard_audio_no_cut/ref/waves/src/WaveFile.cpp @@ -0,0 +1,818 @@ + +#include +#include +#include +#include + +#if WIN32 +#else +#include +#endif + +#include "WaveFile.h" + +#define SPEAKER_FRONT_LEFT 0x1 +#define SPEAKER_FRONT_RIGHT 0x2 +#define SPEAKER_FRONT_CENTER 0x4 +#define SPEAKER_LOW_FREQUENCY 0x8 +#define SPEAKER_BACK_LEFT 0x10 +#define SPEAKER_BACK_RIGHT 0x20 +#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40 +#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80 +#define SPEAKER_BACK_CENTER 0x100 +#define SPEAKER_SIDE_LEFT 0x200 +#define SPEAKER_SIDE_RIGHT 0x400 +#define SPEAKER_TOP_CENTER 0x800 +#define SPEAKER_TOP_FRONT_LEFT 0x1000 +#define SPEAKER_TOP_FRONT_CENTER 0x2000 +#define SPEAKER_TOP_FRONT_RIGHT 0x4000 +#define SPEAKER_TOP_BACK_LEFT 0x8000 +#define SPEAKER_TOP_BACK_CENTER 0x10000 +#define SPEAKER_TOP_BACK_RIGHT 0x20000 +#define SPEAKER_RESERVED 0x80000000 + + +#define SPEAKER_REAR_CENTER_SURROUND SPEAKER_BACK_CENTER + +#define DCA_MONO 0 +#define DCA_CHANNEL 1 +#define DCA_STEREO 2 +#define DCA_STEREO_SUMDIFF 3 +#define DCA_STEREO_TOTAL 4 +#define DCA_3F 5 +#define DCA_2F1R 6 +#define DCA_3F1R 7 +#define DCA_2F2R 8 +#define DCA_3F2R 9 +#define DCA_4F2R 10 + +#define DCA_DOLBY 101 /* FIXME */ + +#define DCA_CHANNEL_MAX DCA_3F2R /* We don't handle anything above that */ +#define DCA_CHANNEL_BITS 6 +#define DCA_CHANNEL_MASK 0x3F + +#define DCA_LFE 0x80 +#define DCA_ADJUST_LEVEL 0x100 + +#define WAVE_FORMAT_PCM 0x0001 +#define WAVE_FORMAT_IEEE_FLOAT 0x0003 +#define WAVE_FORMAT_EXTENSIBLE 0xFFFE + +static uint8_t wav_header[] = { + 'R', 'I', 'F', 'F', 0xfc, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 16, 0, 0, 0, + WAVE_FORMAT_PCM, WAVE_FORMAT_PCM >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, + 'd', 'a', 't', 'a', 0xd8, 0xff, 0xff, 0xff +}; + +static uint8_t wavmulti_header[] = { + 'R', 'I', 'F', 'F', 0xf0, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 40, 0, 0, 0, + (uint8_t)(WAVE_FORMAT_EXTENSIBLE & 0xFF), WAVE_FORMAT_EXTENSIBLE >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 22, 0, + 0, 0, 0, 0, 0, 0, + WAVE_FORMAT_IEEE_FLOAT, WAVE_FORMAT_IEEE_FLOAT >> 8, + 0, 0, 0, 0, 0x10, 0x00, 0x80, 0, 0, 0xaa, 0, 0x38, 0x9b, 0x71, + 'd', 'a', 't', 'a', 0xb4, 0xff, 0xff, 0xff +}; + +static void store4 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; + buf[2] = value >> 16; + buf[3] = value >> 24; +} + +static void store2 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; +} + + +static uint32_t find_chunk(FILE * file, const uint8_t chunk_id[4]) +{ + uint8_t buffer[8]; + while (1) { + size_t chunksize; + size_t s = fread(buffer, 1, 8, file); + if (s < 8) + return 0; + chunksize = (uint32_t)buffer[4] | ((uint32_t)buffer[5] << 8) | + ((uint32_t)buffer[6] << 16) | ((uint32_t)buffer[7] << 24); + if (!memcmp(buffer, chunk_id, 4)) + return chunksize; + fseek(file, chunksize, SEEK_CUR); + } +} + + +CWaveFile::CWaveFile(const char* Filename, bool Write) + : Duration(0), ReadOnly(false), m_bOK(false) +{ + Channels = 0; + + /* 打开文件 **/ + File = fopen(Filename, Write ? "wb":"rb"); + if ( !File ) + return; + + /* 设置写文件初始参数 **/ + if ( Write ) + { + SampleRate = 44100; + Channels = 2; + Format = SF_S16; + SampleSize = 16; + ChannelMask = 0; + m_bOK = true; + return; + } + + ReadOnly = true; + + size_t s; + uint8_t buffer[8]; + uint8_t *fmt = NULL; + uint32_t v; + uint32_t avg_bps; + uint32_t block_align; + unsigned short FormatType; + unsigned short SampleType; + + static const uint8_t riff[4] = { 'R', 'I', 'F', 'F' }; + static const uint8_t wave[4] = { 'W', 'A', 'V', 'E' }; + static const uint8_t fmt_[4] = { 'f', 'm', 't', ' ' }; + static const uint8_t data[4] = { 'd', 'a', 't', 'a' }; + + /* 前四个字节为 riff **/ + s = fread(buffer, 1, 8, File); + if (s < 8) + goto err2; + + if (memcmp(buffer, riff, 4)) + goto err2; + + /* 8~12为wave **/ + /* TODO: check size (in buffer[4..8]) */ + s = fread(buffer, 1, 4, File); + if (s < 4) + goto err2; + + if (memcmp(buffer, wave, 4)) + goto err2; + + s = find_chunk(File, fmt_); + if ( s != 16 && s != 18 && s != 40 ) + goto err2; + + fmt = (uint8_t*)malloc(s); + if (!fmt) + goto err2; + + if (fread(fmt, 1, s, File) != s) + goto err3; + + /* wFormatTag */ + v = (uint32_t)fmt[0] | ((uint32_t)fmt[1] << 8); + if (v != WAVE_FORMAT_PCM && v != WAVE_FORMAT_IEEE_FLOAT && v != WAVE_FORMAT_EXTENSIBLE) + goto err3; + + FormatType = v; + + if (s == 40 && 0xfffe == v) + { + // fmt begins at 0x14 of the wave file + v = *(unsigned short*)&fmt[0x2C - 0x14]; + } + + SampleType = v; + + /* wChannels */ + v = (uint32_t)fmt[2] | ((uint32_t)fmt[3] << 8); + + Channels = v; + + if (v < 1 || v > 32) + goto err3; + + /* dwSamplesPerSec */ + SampleRate = (uint32_t)fmt[4] | ((uint32_t)fmt[5] << 8) | + ((uint32_t)fmt[6] << 16) | ((uint32_t)fmt[7] << 24); + + /* dwAvgBytesPerSec */ + avg_bps = (uint32_t)fmt[8] | ((uint32_t)fmt[9] << 8) | + ((uint32_t)fmt[10] << 16) | ((uint32_t)fmt[11] << 24); + + /* wBlockAlign */ + block_align = (uint32_t)fmt[12] | ((uint32_t)fmt[13] << 8); + + /* wBitsPerSample */ + SampleSize = (uint32_t)fmt[14] | ((uint32_t)fmt[15] << 8); + if (SampleSize != 8 && SampleSize != 16 && SampleSize != 32 && SampleSize != 24 && SampleSize != 64) + goto err3; + + switch (SampleSize) + { + case 8: + Format = SF_U8; + break; + case 16: + Format = SF_S16; + break; + case 24: + Format = SF_S24; + break; + case 32: + { + if (SampleType == WAVE_FORMAT_IEEE_FLOAT) + Format = SF_IEEE_FLOAT; + else + Format = SF_S32; + + } + break; + case 64: + if (SampleType != WAVE_FORMAT_IEEE_FLOAT) + goto err3; + Format = SF_IEEE_DOUBLE; + break; + } + + + // Handle 24-bit samples individually +#if 0 + if (SampleSize == 24 && Channels <= 2) + { + int ba24 = Channels * (SampleSize / 8); // Align to 4x + + ba24 = (ba24 + 3) / 4 * 4; + + if (block_align != ba24) + goto err3; + } + else +#endif + { + if (block_align != Channels * (SampleSize / 8)) + goto err3; + } + + if (avg_bps != block_align * SampleRate) + goto err3; + + v = find_chunk(File, data); + + if (v == 0 || v % block_align != 0) + goto err3; + + TotalFrames = v / block_align; + + FramesRead = 0; + + if (FormatType == WAVE_FORMAT_EXTENSIBLE) + { + ChannelMask = *(unsigned int*)(&fmt[0x14]); + } + else + { + ChannelMask = 0; + } + + FrameStartPos = ftell(File); + + free(fmt); + m_bOK = true; + return; + +err3: + free(fmt); +err2: + fclose(File); + + File = NULL; +} + +bool CWaveFile::GetStatus() +{ + return m_bOK; +} + +SAMPLE_FORMAT CWaveFile::GetFormat() +{ + return Format; +} + +int CWaveFile::GetTotalFrames() +{ + return TotalFrames; +} + +int CWaveFile::GetFramesRead() +{ + return FramesRead; +} + +CWaveFile::~CWaveFile() +{ + if (File != NULL) + { + if (!ReadOnly) + { + unsigned int Size = ftell(File) - FrameStartPos;// 44; + + fseek(File, FrameStartPos - 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + + Size += FrameStartPos - 8; + + fseek(File, 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + } + + fclose(File); + } +} + +int CWaveFile::GetSampleRate() +{ + return SampleRate; +} + +void CWaveFile::SetSampleRate(int SampleRate) +{ + this->SampleRate = SampleRate; +} + +void CWaveFile::SetupDone() +{ + unsigned char Header[68]; + + fseek(File, 0, SEEK_SET); + + SampleSize = Format & 0xFF; + + if (ChannelMask) + { + memcpy(Header, wavmulti_header, sizeof(wavmulti_header)); + + if (Format < SF_IEEE_FLOAT) + { + // store2(Header + 20, WAVE_FORMAT_PCM); + store2(Header + 44, WAVE_FORMAT_PCM); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + store2(Header + 38, SampleSize / 8 * 8); + store4(Header + 40, ChannelMask); + + fwrite(Header, sizeof(wavmulti_header), 1, File); + } + else + { + memcpy(Header, wav_header, sizeof(wav_header)); + + if (Format >= SF_IEEE_FLOAT) + { + store2(Header + 20, WAVE_FORMAT_IEEE_FLOAT); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + fwrite(Header, sizeof(wav_header), 1, File); + } + + + FrameStartPos = ftell(File); +} + + +void CWaveFile::Seek(int FramePos, int Where) +{ + // Ignoring Where + + fseek(File, FrameStartPos + FramePos * Channels* (SampleSize / 8), Where); + + FramesRead = FramePos; + +} + +int CWaveFile::GetChannels() +{ + return Channels; +} + +void CWaveFile::SetChannels(int Channels) +{ + this->Channels = Channels; +} + +void CWaveFile::SetSampleFormat(SAMPLE_FORMAT Format) +{ + this->Format = Format; +} + +uint32_t CWaveFile::GetChannelMask() +{ + return ChannelMask; +} + +void CWaveFile::SetChannelMask(uint32_t Mask) +{ + ChannelMask = Mask; +} + +bool CWaveFile::ReadFrameAsS16(short* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) << 8; + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + return Frames == fread(FrameSamples, sizeof(FrameSamples[0])*Channels, Frames, File); + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 8); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 16); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + double DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + } + return false; +} + +bool CWaveFile::ReadFrameAsfloat(float* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +bool CWaveFile::ReadFrameAsDouble(double* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample ))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +void CWaveFile::WriteRaw(void* Raw, int Size) +{ + fwrite(Raw, Size, 1, File); +} + + +void CWaveFile::WriteFrame(uint8_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(short* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(int32_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrameS24(int32_t* FrameSamples, int Frames) +{ + for (int c = 0; c < Channels; c++) + { + fwrite(&FrameSamples[c], 3, 1, File); + } +} + +void CWaveFile::WriteFrame(double* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(float* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + + +double CWaveFile::GetDuration() +{ + return Duration; +} diff --git a/AIMeiSheng/voice_classification/train/CNN_v1.py b/AIMeiSheng/voice_classification/train/CNN_v1.py new file mode 100644 index 0000000..320b21a --- /dev/null +++ b/AIMeiSheng/voice_classification/train/CNN_v1.py @@ -0,0 +1,458 @@ +import torch.nn as nn +import torch +import torch.nn.functional as functional +from tqdm import tqdm +import os, sys +from torch.optim.lr_scheduler import MultiStepLR +from time import time +from torch.utils.data import DataLoader +import torch.utils.data as data +import glob +import numpy as np +from torch.optim.rmsprop import RMSprop +from torch.optim.adam import Adam +import librosa +from common import * + +FRAME_NUM = 31 # 每段语音中每个样本采取的帧数量 128ms一帧 +MFCC_LEN = 26 +SAMPEL_NUM = 1000 # 每段语音取的样本数量 +TRAIN_NUM = -1 # 训练时的语音段数 +CURRENT_STAT = "train" + +PREDICT = False +DEBUG = False + +def weights_init(m): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.zeros_(m.bias) + + +class GenderDNN(nn.Module): + """ + 10 帧 + 1 ---> + 2 ---> ---> 3 * 2 -> 2 + 3 ---> + """ + def __init__(self, input_dims, phase='train'): + super(GenderDNN, self).__init__() + layer1 = [ + nn.Conv2d(1, 8, 3), + nn.BatchNorm2d(8), + nn.ReLU(), + nn.Conv2d(8, 16, 3, 2), + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 32, 3, 2), + nn.BatchNorm2d(32), + nn.ReLU(), + # nn.AvgPool2d((6, 5)), + ] + + self.layer2 = [ + nn.Linear(32 * 6 * 5, 128), + nn.Linear(128, 32), + nn.Linear(32, 2), + ] + # self.layer2 = nn.Linear(32, 2) + + # if phase == "test": + # layer1.append(nn.Softmax(dim=-1)) + + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*self.layer2) + # self.apply(weights_init) + + def forward(self, x): + x = x.view([-1, 1, FRAME_NUM, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 32 * 6 * 5]) + x = self.layer2(x) + return x + # return self.layer1(x) + + +class GenderDataset(data.Dataset): + def __init__(self, root, predict=False): + self.root = root + self._predict = predict + self.files = [] + self.features, self.labels, self.files = self._get_dbs() + print("load data = feature_size={} lable_size{} files={}".format(len(self.features), len(self.labels), len(self.files))) + + def get_one_mfcc(self, file_url): + if file_url.split(".")[-1] == "npy": + mfcc = np.load(file_url) + if mfcc.shape[1] != 26: + mfcc = mfcc.transpose() + else: + data, sr = librosa.load(file_url, sr=16000) + mfcc = librosa.feature.mfcc(data, sr, n_mfcc=26) + mfcc = mfcc.transpose() + mfcc_o = [] + for n in mfcc: + # if np.count_nonzero(n) > 20: + mfcc_o.append(n) + return np.array(mfcc_o) + + def _load_mfcc(self, file_list): + tp_file_list = [] + mfcc = [] + print(file_list[0][0], len(file_list)) + + for idx, files in enumerate(file_list): + if DEBUG and idx > 64: + break + # 每个用户有多段 + new_item = [] + for file in files: + item = self.get_one_mfcc(file) + if item.shape[0] < FRAME_NUM: + continue + padding = FRAME_NUM // 2 + for i in range(padding, item.shape[0]-padding): + it = item[i-padding:i+padding+1] + new_item.append(it) + if len(new_item) > 0: + new_item = np.array(new_item).reshape([-1, FRAME_NUM, MFCC_LEN]) + mfcc.append(new_item) + tp_file_list.append(files[0].split("/")[-2]) + # if len(mfcc) > 10: + # return mfcc, tp_file_list + if len(mfcc) % 500 == 0: + print("curent:{}/{}".format(len(mfcc), len(file_list))) + return mfcc, tp_file_list + + def sp2dict(self, x_list): + tp_dict = {} + for x in x_list: + ret = str(x).split('/') + if ret[-2] not in tp_dict.keys(): + tp_dict[ret[-2]] = [] + tp_dict[ret[-2]].append(x) + return tp_dict + + def _get_dbs(self): + male_file_list = glob.glob(os.path.join(self.root, 'male/*/', '*.npy')) + male_file_list = list(self.sp2dict(male_file_list).values()) + # if 'train' == CURRENT_STAT: + # male_file_list = male_file_list + print("file_list_len = {} ".format(len(male_file_list))) + male_mfcc, mfile_list = self._load_mfcc(male_file_list) + print("real_female_file_list = {}".format(len(male_mfcc))) + + female_file_list = glob.glob(os.path.join(self.root, 'female/*/', '*.npy')) + female_file_list = list(self.sp2dict(female_file_list).values()) + # if 'train' == CURRENT_STAT: + # female_file_list = female_file_list + print("female_file_list_len {}".format(len(female_file_list))) + female_mfcc, fem_file_list = self._load_mfcc(female_file_list) + print("real_female_file_list_len {}".format(len(female_mfcc))) + + labels = [0] * len(male_mfcc) + [1] * len(female_mfcc) + male_mfcc.extend(female_mfcc) + mfile_list.extend(fem_file_list) + return np.array(male_mfcc), np.array(labels), np.array(mfile_list) + + def __len__(self): + return len(self.labels) + + def __getitem__(self, idx): + feature = self.features[idx] + if not PREDICT: + new_idx = np.random.randint(0, len(feature)) + feature = feature[new_idx] + label = self.labels[idx] + return feature, label + + +def get_dataloader(root): + batch_size = 64 + thread_num = 24 + + global CURRENT_STAT + CURRENT_STAT = 'mfcc_train' + train_root = os.path.join(root, 'mfcc_train') + trainset = GenderDataset(train_root) + trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=thread_num) + + CURRENT_STAT = 'mfcc_test' + val_root = os.path.join(root, 'mfcc_test') + valset = GenderDataset(val_root) + valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=thread_num) + + return trainloader, valloader + + +def train_one_epoch(model, device, loader, optimizer, criterion): + model.train() + + total_num = 0 + total_loss = 0 + correct = 0 + + for mfcces, labels in tqdm(loader): + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + + predicts = model(mfcces) + + optimizer.zero_grad() + loss = criterion(predicts, labels) + loss.backward() + optimizer.step() + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def val_one_epoch(model, device, loader, criterion): + model.eval() + + total_num = 0 + total_loss = 0 + correct = 0 + + with torch.no_grad(): + for mfcces, labels in loader: + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + predicts = model(mfcces) + + loss = criterion(predicts, labels) + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def train(model, device, model_path, set_dir): + # 训练配置参数 + max_epoch = 500 + lr = 0.01 + momentum = 0 + weight_decay = 0 + # 学习率调整参数 + milestones = [300, 400, 500, 650, 750] + gamma = 0.1 + # 模型保存路径 + save_directory = model_path + if not os.path.exists(save_directory): + os.makedirs(save_directory) + + optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = Adam(model.parameters(), lr=lr) + criterion = nn.CrossEntropyLoss() + scheduler = MultiStepLR(optimizer, milestones, gamma) + + # 文件地址 + data_dir = set_dir + train_loader, val_loader = get_dataloader(data_dir) + + max_acc = 0 + + for i in range(max_epoch): + start = time.time() + t_loss, t_acc = train_one_epoch(model, device, train_loader, optimizer, criterion) + v_loss, v_acc = val_one_epoch(model, device, val_loader, criterion) + end = time.time() + + scheduler.step(i) + + msg = 't_loss:%f\tt_acc:%.2f' % (t_loss, t_acc * 100) + msg += '\tv_loss:%f\tv_acc:%.2f' % (v_loss, v_acc * 100) + msg += '\ttime:%f\tepoch:%d' % (end - start, i) + print(msg) + + params = model.state_dict() + save_path = os.path.join(save_directory, 'DNN_epoch_' + str(i) + '_' + str(v_acc) + '.pth') + torch.save(params, save_path) + + max_acc = max(max_acc, v_acc) + + print('最大acc为:', max_acc) + + +def predict_frames(model_path, set_dir): + """ + 预测代码 + """ + global SAMPEL_NUM + SAMPEL_NUM = 2000 + + global PREDICT + PREDICT = True + device = 'cuda' if torch.cuda.is_available() else 'cpu' + # device = 'cpu' + model = GenderDNN(MFCC_LEN, phase='test') + model.load_state_dict(torch.load(model_path)) + model.to(device) + model.eval() + + predict_set = GenderDataset(set_dir, predict=True) + predict_loader = DataLoader(predict_set, batch_size=1, shuffle=False, num_workers=0) + + male_male = 0 + male_female = 0 + female_male = 0 + female_female = 0 + + with torch.no_grad(): + for mfcces, labels in tqdm(predict_loader): + mfcces = mfcces.reshape([-1, FRAME_NUM * MFCC_LEN]) + mfcces = mfcces.to(device) + # labels = labels.to(device) + predicts = model(mfcces) + _, predicts = predicts.max(dim=1) + pre_gender = predicts.sum().item() + + # 拆解 + if labels == 1: + female_female += pre_gender + female_male += mfcces.shape[0] - pre_gender + elif labels != 1: + male_female += pre_gender + male_male += mfcces.shape[0] - pre_gender + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male+female_male), male_male/(male_male+male_female))) + print("female: acc={} recall={}".format(female_female/(male_female+female_female), female_female/(female_female+female_male))) + print("total: acc={}".format((male_male+female_female)/(male_female+female_female+female_male+male_male))) + + +def predict(model_path, set_dir, err_dir): + """ + 预测代码 + """ + global SAMPEL_NUM + SAMPEL_NUM = 2000 + + global PREDICT + PREDICT = True + + device = 'cuda' if torch.cuda.is_available() else 'cpu' + # device = 'cpu' + model = GenderDNN(MFCC_LEN, phase='test') + model.load_state_dict(torch.load(model_path)) + model.to(device) + model.eval() + predict_set = GenderDataset(set_dir, predict=True) + predict_loader = DataLoader(predict_set, batch_size=1, shuffle=False, num_workers=0) + + male_male = 0 + male_female = 0 + female_male = 0 + female_female = 0 + err_recording_male = [] + err_recording_female = [] + files = predict_set.files + + + gender_id_frame_rate = { + 0: {}, + 1: {} + } + with torch.no_grad(): + idx = 0 + for mfcces, labels in tqdm(predict_loader): + mfcces = mfcces.reshape([-1, FRAME_NUM * MFCC_LEN]) + mfcces = mfcces.to(device) + # labels = labels.to(device) + predicts = model(mfcces) + # female_score = torch.sum(predicts[:, 1]) + # male_score = torch.sum(predicts[:, 0]) + # pre_gender = int(female_score > male_score) + + # # 新方法 + # new_predicts = predicts[:, 1] - predicts[:, 0] + # res.append(new_predicts.cpu().detach().numpy()) + # threshold = 0.9 + # female_num = torch.sum(new_predicts >= threshold) + # male_num = torch.sum(new_predicts <= -threshold) + # + # pre_gender = int(female_num > male_num) + id = str(files[idx]) + labels = labels.item() + gender_id_frame_rate[labels][id] = predicts.cpu().detach().numpy() + + _, predicts = predicts.max(dim=1) + pre_gender = predicts.sum().item() >= 0.5 * mfcces.shape[0] + + # 拆解 + if labels == 1 and pre_gender == 1: + female_female += 1 + elif labels != 1 and pre_gender == 1: + male_female += 1 + err_recording_male.append(files[idx]) + elif labels == 1 and pre_gender != 1: + female_male += 1 + err_recording_female.append(files[idx]) + + else: + male_male += 1 + idx += 1 + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male+female_male), male_male/(male_male+male_female))) + print("female: acc={} recall={}".format(female_female/(male_female+female_female), female_female/(female_female+female_male))) + print("total: acc={}".format((male_male+female_female)/(male_female+female_female+female_male+male_male))) + + # np.save("dnn_gender_id_frame_rate", np.array(gender_id_frame_rate)) + # write_file(os.path.join(err_dir, "male_err_list.txt"), err_recording_male) + # write_file(os.path.join(err_dir, "female_err_list.txt"), err_recording_female) + + +def main(model_path, set_dir): + device = 'cuda' if torch.cuda.is_available() else 'cpu' + + model = GenderDNN(MFCC_LEN, phase='train') + model.to(device) + + train(model, device, model_path, set_dir) + + +if __name__ == '__main__': + run_mode = sys.argv[1] + model_path = sys.argv[2] + set_dir = sys.argv[3] + if len(sys.argv) >= 5: + err_dir = sys.argv[4] + if run_mode == "predict": + predict(model_path, set_dir, err_dir) + # predict_frames(model_path, set_dir) + else: + run_mode = "train" + if "train" == run_mode: + main(model_path, set_dir) diff --git a/AIMeiSheng/voice_classification/train/DNN_v2.py b/AIMeiSheng/voice_classification/train/DNN_v2.py new file mode 100644 index 0000000..438c6c1 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/DNN_v2.py @@ -0,0 +1,447 @@ +import torch.nn as nn +import torch +from tqdm import tqdm +import os, sys +from torch.optim.lr_scheduler import MultiStepLR +from time import time +from torch.utils.data import DataLoader +import torch.utils.data as data +import glob +import numpy as np +from torch.optim.rmsprop import RMSprop +from torch.optim.adam import Adam +import librosa +from common import * + +FRAME_NUM = 11 # 每段语音中每个样本采取的帧数量 +MFCC_LEN = 26 +SAMPEL_NUM = 1000 # 每段语音取的样本数量 +TRAIN_NUM = -1 # 训练时的语音段数 +CURRENT_STAT = "train" + +PREDICT = False + + +def weights_init(m): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + if m.bias is not None: + nn.init.zeros_(m.bias) + + +class GenderDNN(nn.Module): + """ + 10 帧 + 1 ---> + 2 ---> ---> 3 * 2 -> 2 + 3 ---> + """ + def __init__(self, input_dims, phase='train'): + super(GenderDNN, self).__init__() + layer1 = [ + nn.Linear(input_dims, 1024), + nn.BatchNorm1d(1024), + nn.Sigmoid(), # 增加激活函数 + nn.Linear(1024, 512, bias=True), + nn.Dropout(0.5), + nn.BatchNorm1d(512), + nn.Sigmoid(), + nn.Linear(512, 2, bias=True) + ] + layer2 = [ + nn.Linear(FRAME_NUM*2, 2, bias=True), + ] + + if phase == 'test': + layer2.append(nn.Softmax(dim=-1)) + + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + self.apply(weights_init) + + def forward(self, x): + x = torch.reshape(x, [-1, MFCC_LEN]) + y = self.layer1(x) + y = torch.reshape(y, [-1, FRAME_NUM * 2]) + return self.layer2(y) + + +class GenderDataset(data.Dataset): + def __init__(self, root, predict=False): + self.root = root + self._predict = predict + self.files = [] + self.features, self.labels, self.files = self._get_dbs() + print("load data = feature_size={} lable_size{} files={}".format(len(self.features), len(self.labels), len(self.files))) + + def get_one_mfcc(self, file_url): + if file_url.split(".")[-1] == "npy": + mfcc = np.load(file_url) + if mfcc.shape[1] != 26: + mfcc = mfcc.transpose() + else: + data, sr = librosa.load(file_url, sr=16000) + mfcc = librosa.feature.mfcc(data, sr, n_mfcc=26) + mfcc = mfcc.transpose() + mfcc_o = [] + for n in mfcc: + # if np.count_nonzero(n) > 20: + mfcc_o.append(n) + return np.array(mfcc_o) + + def _load_mfcc(self, file_list): + tp_file_list = [] + mfcc = [] + print(file_list[0][0], len(file_list)) + for files in file_list: + # 每个用户有多段 + new_item = [] + for file in files: + item = self.get_one_mfcc(file) + padding = FRAME_NUM // 2 + for i in range(padding, item.shape[0]-padding): + it = item[i-padding:i+padding+1] + new_item.append(it) + if len(new_item) > 0: + new_item = np.array(new_item).reshape([-1, FRAME_NUM, MFCC_LEN]) + mfcc.append(new_item) + tp_file_list.append(files[0].split("/")[-2]) + # if len(mfcc) > 10: + # return mfcc, tp_file_list + if len(mfcc) % 500 == 0: + print("curent:{}/{}".format(len(mfcc), len(file_list))) + return mfcc, tp_file_list + + def sp2dict(self, x_list): + tp_dict = {} + for x in x_list: + ret = str(x).split('/') + if ret[-2] not in tp_dict.keys(): + tp_dict[ret[-2]] = [] + tp_dict[ret[-2]].append(x) + return tp_dict + + def _get_dbs(self): + male_file_list = glob.glob(os.path.join(self.root, 'gender0/*/', '*.npy')) + male_file_list = list(self.sp2dict(male_file_list).values()) + if 'train' == CURRENT_STAT: + male_file_list = male_file_list + print("file_list_len = {} ".format(len(male_file_list))) + male_mfcc, mfile_list = self._load_mfcc(male_file_list) + print("real_female_file_list = {}".format(len(male_mfcc))) + + female_file_list = glob.glob(os.path.join(self.root, 'gender1/*/', '*.npy')) + female_file_list = list(self.sp2dict(female_file_list).values()) + if 'train' == CURRENT_STAT: + female_file_list = female_file_list + print("female_file_list_len {}".format(len(female_file_list))) + female_mfcc, fem_file_list = self._load_mfcc(female_file_list) + print("real_female_file_list_len {}".format(len(female_mfcc))) + + labels = [0] * len(male_mfcc) + [1] * len(female_mfcc) + male_mfcc.extend(female_mfcc) + mfile_list.extend(fem_file_list) + return np.array(male_mfcc), np.array(labels), np.array(mfile_list) + + def __len__(self): + return len(self.labels) + + def __getitem__(self, idx): + feature = self.features[idx] + if not PREDICT: + new_idx = np.random.randint(0, len(feature)) + feature = feature[new_idx] + label = self.labels[idx] + return feature, label + + +def get_dataloader(root): + batch_size = 128 + thread_num = 24 + + global CURRENT_STAT + CURRENT_STAT = 'train' + train_root = os.path.join(root, 'train') + trainset = GenderDataset(train_root) + trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=thread_num) + + CURRENT_STAT = 'val' + val_root = os.path.join(root, 'val') + valset = GenderDataset(val_root) + valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=thread_num) + + return trainloader, valloader + + +def train_one_epoch(model, device, loader, optimizer, criterion): + model.train() + + total_num = 0 + total_loss = 0 + correct = 0 + + for mfcces, labels in tqdm(loader): + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + + predicts = model(mfcces) + + optimizer.zero_grad() + loss = criterion(predicts, labels) + loss.backward() + optimizer.step() + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def val_one_epoch(model, device, loader, criterion): + model.eval() + + total_num = 0 + total_loss = 0 + correct = 0 + + with torch.no_grad(): + for mfcces, labels in loader: + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + predicts = model(mfcces) + + loss = criterion(predicts, labels) + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def train(model, device): + # 训练配置参数 + max_epoch = 500 + lr = 0.001 + momentum = 0 + weight_decay = 0 + # 学习率调整参数 + milestones = [100, 200, 300, 350, 450] + gamma = 0.1 + # 模型保存路径 + save_directory = os.path.join(os.getcwd(), "torch_dnn_sgd_models_10_1") + # save_directory = os.path.join(os.getcwd(), "build/torch_dnn_models_frame1") + if not os.path.exists(save_directory): + os.makedirs(save_directory) + + optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + optimizer = Adam(model.parameters(), lr=lr) + criterion = nn.CrossEntropyLoss() + scheduler = MultiStepLR(optimizer, milestones, gamma) + + # 文件地址 + data_dir = os.path.join(os.getcwd(), "/data1/jianli.yang/st_voice_classification/corpus/dataset/mfcc") + # data_dir = os.path.join(os.getcwd(), "build/mini_data_local") + train_loader, val_loader = get_dataloader(data_dir) + + max_acc = 0 + + for i in range(max_epoch): + start = time.time() + t_loss, t_acc = train_one_epoch(model, device, train_loader, optimizer, criterion) + v_loss, v_acc = val_one_epoch(model, device, val_loader, criterion) + end = time.time() + + scheduler.step(i) + + msg = 't_loss:%f\tt_acc:%.2f' % (t_loss, t_acc * 100) + msg += '\tv_loss:%f\tv_acc:%.2f' % (v_loss, v_acc * 100) + msg += '\ttime:%f\tepoch:%d' % (end - start, i) + print(msg) + + params = model.state_dict() + save_path = os.path.join(save_directory, 'DNN_epoch_' + str(i) + '_' + str(v_acc) + '.pth') + torch.save(params, save_path) + + max_acc = max(max_acc, v_acc) + + print('最大acc为:', max_acc) + + +def predict_frames(model_path, set_dir): + """ + 预测代码 + """ + global SAMPEL_NUM + SAMPEL_NUM = 2000 + + global PREDICT + PREDICT = True + device = 'cuda' if torch.cuda.is_available() else 'cpu' + # device = 'cpu' + model = GenderDNN(MFCC_LEN, phase='test') + model.load_state_dict(torch.load(model_path)) + model.to(device) + model.eval() + + predict_set = GenderDataset(set_dir, predict=True) + predict_loader = DataLoader(predict_set, batch_size=1, shuffle=False, num_workers=0) + + male_male = 0 + male_female = 0 + female_male = 0 + female_female = 0 + + with torch.no_grad(): + for mfcces, labels in tqdm(predict_loader): + mfcces = mfcces.reshape([-1, FRAME_NUM * MFCC_LEN]) + mfcces = mfcces.to(device) + # labels = labels.to(device) + predicts = model(mfcces) + _, predicts = predicts.max(dim=1) + pre_gender = predicts.sum().item() + + # 拆解 + if labels == 1: + female_female += pre_gender + female_male += mfcces.shape[0] - pre_gender + elif labels != 1: + male_female += pre_gender + male_male += mfcces.shape[0] - pre_gender + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male+female_male), male_male/(male_male+male_female))) + print("female: acc={} recall={}".format(female_female/(male_female+female_female), female_female/(female_female+female_male))) + print("total: acc={}".format((male_male+female_female)/(male_female+female_female+female_male+male_male))) + + +def predict(model_path, set_dir, err_dir): + """ + 预测代码 + """ + global SAMPEL_NUM + SAMPEL_NUM = 2000 + + global PREDICT + PREDICT = True + + device = 'cuda' if torch.cuda.is_available() else 'cpu' + # device = 'cpu' + model = GenderDNN(MFCC_LEN, phase='test') + model.load_state_dict(torch.load(model_path)) + model.to(device) + model.eval() + predict_set = GenderDataset(set_dir, predict=True) + predict_loader = DataLoader(predict_set, batch_size=1, shuffle=False, num_workers=0) + + male_male = 0 + male_female = 0 + female_male = 0 + female_female = 0 + err_recording_male = [] + err_recording_female = [] + files = predict_set.files + + + gender_id_frame_rate = { + 0: {}, + 1: {} + } + with torch.no_grad(): + idx = 0 + for mfcces, labels in tqdm(predict_loader): + mfcces = mfcces.reshape([-1, FRAME_NUM * MFCC_LEN]) + mfcces = mfcces.to(device) + # labels = labels.to(device) + predicts = model(mfcces) + # female_score = torch.sum(predicts[:, 1]) + # male_score = torch.sum(predicts[:, 0]) + # pre_gender = int(female_score > male_score) + + # # 新方法 + # new_predicts = predicts[:, 1] - predicts[:, 0] + # res.append(new_predicts.cpu().detach().numpy()) + # threshold = 0.9 + # female_num = torch.sum(new_predicts >= threshold) + # male_num = torch.sum(new_predicts <= -threshold) + # + # pre_gender = int(female_num > male_num) + id = str(files[idx]) + labels = labels.item() + gender_id_frame_rate[labels][id] = predicts.cpu().detach().numpy() + + _, predicts = predicts.max(dim=1) + pre_gender = predicts.sum().item() >= 0.5 * mfcces.shape[0] + + # 拆解 + if labels == 1 and pre_gender == 1: + female_female += 1 + elif labels != 1 and pre_gender == 1: + male_female += 1 + err_recording_male.append(files[idx]) + elif labels == 1 and pre_gender != 1: + female_male += 1 + err_recording_female.append(files[idx]) + + else: + male_male += 1 + idx += 1 + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male+female_male), male_male/(male_male+male_female))) + print("female: acc={} recall={}".format(female_female/(male_female+female_female), female_female/(female_female+female_male))) + print("total: acc={}".format((male_male+female_female)/(male_female+female_female+female_male+male_male))) + + # np.save("dnn_gender_id_frame_rate", np.array(gender_id_frame_rate)) + # write_file(os.path.join(err_dir, "male_err_list.txt"), err_recording_male) + # write_file(os.path.join(err_dir, "female_err_list.txt"), err_recording_female) + + +def main(): + device = 'cuda' if torch.cuda.is_available() else 'cpu' + + model = GenderDNN(MFCC_LEN, phase='train') + model.to(device) + + train(model, device) + + +if __name__ == '__main__': + run_mode = "train" + if len(sys.argv) >= 5: + run_mode = sys.argv[1] + model_path = sys.argv[2] + set_dir = sys.argv[3] + err_dir = sys.argv[4] + if run_mode == "predict": + predict(model_path, set_dir, err_dir) + # predict_frames(model_path, set_dir) + else: + run_mode = "train" + if "train" == run_mode: + main() diff --git a/AIMeiSheng/voice_classification/train/analy/find_threshold.py b/AIMeiSheng/voice_classification/train/analy/find_threshold.py new file mode 100644 index 0000000..693b254 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/analy/find_threshold.py @@ -0,0 +1,77 @@ +""" +展示预测正确的分数分布 & 预测错误的分数分布 +""" +import matplotlib.pyplot as plt +import sys + +def read_file(filename): + lines = [] + with open(filename) as f: + while True: + line = f.readline() + if not line: + break + lines.append(line) + return lines + + +def gen_dis(lines): + """ + 做四个分布|男声正确|男声错误|女声正确|女声错误 + :param lines: + :return: + """ + male_true = {} + male_false = {} + female_true = {} + female_false = {} + for line in lines: + # 584534337,1,male,0.999 id,1正确-1错误,男,男生分数 + arr = str(line).split(",") + flag = int(arr[1]) == 1 + male = arr[2] == "male" + score = float(arr[3]) + + tmp = male_true + if flag and male: + tmp = male_true + elif flag and not male: + tmp = female_true + elif not flag and male: + tmp = male_false + elif not flag and not male: + tmp = female_false + if score not in tmp.keys(): + tmp[score] = 0 + tmp[score] += 1 + return [male_true, male_false, female_true, female_false] + + +def plot_point(items): + plt.figure() + plt.title("Show Distributed") + + colors = ['red', 'blue', 'green', 'black'] + labels = ['male_true', 'male_false', 'female_true', 'female_false'] + for idx, item in enumerate(items): + # if idx in (0, 2): + # continue + plt.scatter(item.keys(), item.values(), color=colors[idx], label=labels[idx]) + + plt.legend() + plt.xlabel('male_score') + plt.ylabel('distributed') + # plt.savefig(filename) + plt.show() + + +def process(filename): + data = read_file(filename) + dis = gen_dis(data) + plot_point(dis) + + +if __name__ == "__main__": + # filename = sys.argv[1] + filename = "resource/sa_all.txt" + process(filename) \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/train/common.py b/AIMeiSheng/voice_classification/train/common.py new file mode 100644 index 0000000..e04f865 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/common.py @@ -0,0 +1,221 @@ +# -*- coding: utf-8 -*- +import logging +import os +import pymysql +from datetime import datetime, timedelta +import subprocess +import logging +import multiprocessing as mp +import time + +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + +def write_file(filename, data): + with open(filename, 'w') as f: + for dt in data: + dt = str(dt).strip('\n') + f.write(str(dt)+"\n") + + +def read_file(filename): + res = [] + with open(filename, 'r') as f: + while True: + line = f.readline() + if not line: + break + res.append(line.strip()) + return res + + +def n_days_ago(n_time, days): + """ + + :param n_time: n_time => 20180719 + :param days: + :return: + """ + now_time = datetime.strptime(n_time, '%Y%m%d') + delta = timedelta(days=days) + n_days = now_time - delta + return n_days.strftime("%Y%m%d") + + +def connect_db(host="research-db-r1.starmaker.co", port=3306, user="root", passwd="Qrdl1130", db="rec"): + logging.info("connect mysql host={} port={} user={} passwd={} db={}".format(host, port, user, passwd, db)) + return pymysql.connect(host=host, port=port, user=user, passwd=passwd, db=db) + + +def get_data_by_sql(sql): + db = connect_db() + db_cursor = db.cursor() + if len(sql) < 100: + logging.info("execute = {}".format(sql)) + else: + logging.info("execute = {}...".format(sql[:100])) + + db_cursor.execute(sql) + res = db_cursor.fetchall() + db_cursor.close() + db.close() + logging.info("res size={}".format(len(res))) + return res + + +def get_recording_msg_batch(filename=None): + """ + 分批获取msg + 获取recording_id/user_id即可 + :return: + """ + rid_uid_label = [] + max_item = 100000 + ssql = "select r_id, r_user_id,sm_labels from recording where r_id > {} and sm_labels like \"%male%\" order by r_id asc limit {}" + current_id = 0 + while True: + res = get_data_by_sql(ssql.format(current_id, max_item)) + if len(res) == 0: + break + current_id = res[-1][0] + rid_uid_label.extend(res) + logging.info("------current_size size={}".format(len(rid_uid_label))) + # 写入文件 + if filename: + res_str = list(map(lambda x: ",".join(map(str, x)), rid_uid_label)) + write_file(filename, res_str) + return rid_uid_label + + +def parse_label(label): + label = str(label).lower() + gender = "female" + idx = label.find(gender) + if idx >= 0: + label = label.replace("female", "") + idx2 = label.find("male") + + # 抛弃同时存在男和女 + if idx2 >= 0: + return "" + return gender + + # 判断是否是男 + gender = "male" + idx = label.find(gender) + if idx >= 0: + return gender + return "" + + +def parse_labels(rid_uid_label, filename=None): + res = [] + for rid, uid, label in rid_uid_label: + gender = parse_label(label) + if "" != gender: + res.append((rid, uid, gender)) + + if filename: + res_str = list(map(lambda x: ",".join(map(str, x)), res)) + write_file(filename, res_str) + return res + + +def parse_line(x): + ss = str(x).strip().split(',') + return ss[0], ss[1], ",".join(ss[2:]) + + +def get_recording_cache(filename=None): + """ + 可以从缓存中取数据 + :param filename: + :return: + """ + if filename: + res = read_file(filename) + res = list(map(parse_line, res)) + + return res + return get_recording_msg_batch(filename) + + +def func_run_time(func): + def wrapper(*args, **kw): + local_time = time.time() + func(*args, **kw) + logging.info('current Function [%s] run time is %.2f' % (func.__name__, time.time() - local_time)) + return wrapper + + +def download_mp4(dir, recording_id): + """ + 1 下载干声文件 + 2 下载完之后重命名 + """ + file_path = os.path.join(dir, recording_id) + filename_download = file_path + ".download" + filename = file_path + ".mp4" + + if os.path.exists(filename_download): + os.unlink(filename_download) + + cmd = "coscmd -b starmaker-1256122840 download production/uploading/recordings/{}/origin_master.mp4 {}"\ + .format(recording_id, filename_download) + # logging.info("now:{}".format(cmd)) + ret = os.system(cmd) + if not ret: + cmd = "mv {} {}".format(filename_download, filename) + os.system(cmd) + return True + return False + + +class SimpleMultiProcesser: + """ + 多进程处理类 + 目的:单进程生产,多进程消费,且不需要返回值 + """ + def __init__(self, data_path, worker_num=1, timeout=10): + self._worker_num = worker_num + self._res = [] + self._timeout = timeout + self._data_path = data_path + @func_run_time + def load_data(self): + """ + 数据载入函数,需要返回一个list + :return: + """ + return [] + + @func_run_time + def processer(self, single_job): + """ + 处理list中单个数据的方法 + :param single_job: + :return: + """ + pass + + def task_error_callback(self, msg): + logging.error(msg) + + @func_run_time + def process(self): + tp_queue = self.load_data() + logging.info("process -- queue_size={} worker_num={} timeout={}".format(len(tp_queue), self._worker_num,self._timeout)) + res = [] + pool = mp.Pool(processes=self._worker_num) + while len(tp_queue) > 0: + job = tp_queue.pop(0) + ret = pool.apply_async(self.processer, args=(job, ), error_callback=self.task_error_callback) + res.append(ret) + pool.close() + pool.join() + + for i in res: + self._res.append(i.get(timeout=self._timeout)) + + def get_res_data(self): + return self._res \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/train/mfcc_voice_classification_v1.py b/AIMeiSheng/voice_classification/train/mfcc_voice_classification_v1.py new file mode 100644 index 0000000..9d379ef --- /dev/null +++ b/AIMeiSheng/voice_classification/train/mfcc_voice_classification_v1.py @@ -0,0 +1,744 @@ +import argparse +import glob +import os +import time +import librosa +import numpy as np +from sklearn.mixture import GaussianMixture +from sklearn.neural_network import MLPClassifier +import joblib +from mload_v1 import * +import random +from copy import deepcopy + +np.seterr(divide='ignore', invalid='ignore') + + +class GenderMfcc(object): + def __init__(self): + self.t_set_dir = "" + self.s_dir = "" + self._split_rate = 0.7 # 训练集占据比例 + + @staticmethod + def progress(percent, width=50): + if percent >= 100: + percent = 100 + + show_str = ('[%%-%ds]' % width) % (int(width * percent / 100) * "#") # 字符串拼接的嵌套使用 + print('\r%s %d%%' % (show_str, percent), end='') + + def save_mfcc(self, file_list): + r_cnt = 0 + a_cnt = len(file_list) + for file in file_list: + mfcc = self.get_one_mfcc(file) + np.save(file + '.mfcc', mfcc) + r_cnt += 1 + self.progress(r_cnt / a_cnt * 100) + + def testset2mfccs(self, set_dir): + male_file_list = glob.glob((os.path.join(set_dir, "gender1/*.mp3"))) + female_file_list = glob.glob((os.path.join(set_dir, "gender2/*.mp3"))) + + male_npyfile_list = glob.glob((os.path.join(set_dir, "gender1/*.mfcc.npy"))) + female_npyfile_list = glob.glob((os.path.join(set_dir, "gender2/*.mfcc.npy"))) + + if len(male_file_list) - len(male_npyfile_list) > 5: + print("\nmale_file_list len = ", len(male_file_list), "\n") + self.save_mfcc(male_file_list) + + if len(female_file_list) - len(female_npyfile_list) > 5: + print("\nfemale_file_list len = ", len(female_file_list), "\n") + self.save_mfcc(female_file_list) + + def load_mfcc(self, file_list): + st = time.time() + mfcc = np.array([]) + for idx, file in enumerate(file_list): + # data = np.load(file) + if len(file_list) < 220: + d = self.get_one_mfcc(file)[100:900] + else: + d = self.get_one_mfcc(file)[100:700] + mfcc = np.append(mfcc, d) + if idx % 100 == 0: + print("load {} spend time {}".format(idx, time.time() - st)) + mfcc = mfcc.reshape([-1, 26]) + print("load {} spend time {}".format(idx, time.time() - st)) + return mfcc + + def mload_mfcc(self, file_list, reshape=True): + st = time.time() + md_data_loader = MDataLoader(data_path=file_list, worker_num=24) + md_data_loader.process() + + # 筛选合法数据 + # 使用map代替for循环拼接,速度也有成倍提升 + mfcc = md_data_loader.get_res_data() + print(len(mfcc)) + idx = np.array(list(map(lambda x: x.shape[0] > 0, mfcc))) + mfcc = list(np.array(mfcc)[idx]) + if reshape: + mfcc = np.concatenate(mfcc) + # mfcc = mfcc.reshape([-1, 26]) + print("mload spend time {}".format(time.time() - st)) + return np.array(mfcc) + + def sp2dict(self, x_list): + tp_dict = {} + for x in x_list: + ret = str(x).split('/') + if ret[-2] not in tp_dict.keys(): + tp_dict[ret[-2]] = [] + tp_dict[ret[-2]].append(x) + return tp_dict + + def load_all_file(self): + base_dir = "/data/datasets/voice_classification" + areas = ["av_area_in/mfcc_all", "av_area_sa/mfcc_all"] + + train_male_file = [] + test_male_file = [] + train_female_file = [] + test_female_file = [] + for area in areas: + male_file_list = glob.glob(os.path.join(os.path.join(base_dir, area), 'male/*/*.npy')) + train_male_file_list = male_file_list[:int(len(male_file_list) * self._split_rate)] + test_male_file_list = male_file_list[int(len(male_file_list) * self._split_rate):] + female_file_list = glob.glob(os.path.join(os.path.join(base_dir, area), 'female/*/*.npy')) + train_female_file_list = female_file_list[:int(len(female_file_list) * self._split_rate)] + test_female_file_list = female_file_list[int(len(female_file_list) * self._split_rate):] + + # 放入 + train_male_file.extend(train_male_file_list) + test_male_file.extend(test_male_file_list) + train_female_file.extend(train_female_file_list) + test_female_file.extend(test_female_file_list) + return train_male_file, test_male_file, train_female_file, test_female_file + + def load_one_set(self): + male_file_list = glob.glob(os.path.join(self.t_set_dir, 'male/*/*.npy')) + train_male_file_list = male_file_list[:int(len(male_file_list) * self._split_rate)] + test_male_file_list = male_file_list[int(len(male_file_list) * self._split_rate):] + female_file_list = glob.glob(os.path.join(self.t_set_dir, 'female/*/*.npy')) + train_female_file_list = female_file_list[:int(len(female_file_list) * self._split_rate)] + test_female_file_list = female_file_list[int(len(female_file_list) * self._split_rate):] + return train_male_file_list, test_male_file_list, train_female_file_list, test_female_file_list + + def mfcc_gmm(self): + + train_male_file_list, test_male_file_list, train_female_file_list, test_female_file_list = self.load_all_file() + # 做整理 + train_male_file_list = list(self.sp2dict(train_male_file_list).values()) + train_female_file_list = list(self.sp2dict(train_female_file_list).values()) + test_male_file_list = list(self.sp2dict(test_male_file_list).values()) + test_female_file_list = list(self.sp2dict(test_female_file_list).values()) + + print(train_female_file_list[0]) + print("all size") + print("train_male = {}".format(len(train_male_file_list))) + print("test_male = {}".format(len(test_male_file_list))) + print("train_female = {}".format(len(train_female_file_list))) + print("test_female = {}".format(len(test_female_file_list))) + print("-----------------------") + + # 这块大约优化7-10倍速度 + # train_male_mfcc0 = self.load_mfcc(train_male_file_list) + train_male_mfcc = self.mload_mfcc(train_male_file_list) + # train_female_mfcc0 = self.load_mfcc(train_female_file_list) + train_female_mfcc = self.mload_mfcc(train_female_file_list) + + print("train_male mfcc = {}".format(train_male_mfcc.shape)) + print("train_female mfcc = {}".format(train_female_mfcc.shape)) + + num_m = 32 + num_fe = 18 + + st = time.time() + male_gmm = GaussianMixture(n_components=num_m, covariance_type='full', random_state=28) + male_gmm.fit(train_male_mfcc) + print("male fit sp={}".format(time.time() - st)) + + joblib.dump(male_gmm, os.path.join(self.s_dir, "mfcc_male_gmm_%s_ns.m" % num_m)) + + st = time.time() + female_gmm = GaussianMixture(n_components=num_fe, covariance_type='full', random_state=28) + female_gmm.fit(train_female_mfcc) + print("female fit sp={}".format(time.time() - st)) + + joblib.dump(female_gmm, os.path.join(self.s_dir, "mfcc_female_gmm_%s_ns.m" % num_fe)) + self.batch_test_predict(male_gmm, female_gmm, test_male_file_list, test_female_file_list) + + def mfcc_dnn(self): + male_file_list = glob.glob(os.path.join(self.t_set_dir, 'gender0/*.npy')) + train_male_file_list = male_file_list[:int(len(male_file_list) * self._split_rate)] + test_male_file_list = male_file_list[int(len(male_file_list) * self._split_rate):] + female_file_list = glob.glob(os.path.join(self.t_set_dir, 'gender1/*.npy')) + train_female_file_list = female_file_list[:int(len(female_file_list) * self._split_rate)] + test_female_file_list = female_file_list[int(len(female_file_list) * self._split_rate):] + + print("all size") + print("train_male = {}".format(len(train_male_file_list))) + print("test_male = {}".format(len(test_male_file_list))) + print("train_female = {}".format(len(train_female_file_list))) + print("test_female = {}".format(len(test_female_file_list))) + print("-----------------------") + + train_male_mfcc = self.mload_mfcc(train_male_file_list) + train_female_mfcc = self.mload_mfcc(train_female_file_list) + + print("train_male mfcc = {}".format(train_male_mfcc.shape)) + print("train_female mfcc = {}".format(train_female_mfcc.shape)) + + train_data = np.concatenate([train_male_mfcc, train_female_mfcc], axis=0) + train_label = np.array([0] * train_male_mfcc.shape[0] + [1] * train_female_mfcc.shape[0]) + # 打乱 + idx = list(range(0, len(train_label))) + random.shuffle(idx) + train_data = train_data[idx] + train_label = train_label[idx] + del train_male_mfcc + del train_female_mfcc + + # 打印出20个看看混乱程度 + print(train_label[:20]) + + # dnn训练 + mlp = MLPClassifier(solver='adam', + activation='logistic', + alpha=1e-3, + hidden_layer_sizes=(1024, 1024), + random_state=1, + verbose=True, + early_stopping=True, + max_iter=5 + ) + mlp.fit(train_data, train_label) + + # 保存模型 + joblib.dump(mlp, os.path.join(self.s_dir, "mfcc_dnn_%d.m" % mlp.n_iter_)) + + # 测试 + del train_data + del train_label + self.batch_test_dnn(mlp, male_file_list, test_female_file_list) + + @staticmethod + def get_one_mfcc(file_url): + if file_url.split(".")[-1] == "npy": + mfcc = np.load(file_url) + if mfcc.shape[1] != 26: + mfcc = mfcc.transpose() + else: + data, sr = librosa.load(file_url, sr=16000) + mfcc = librosa.feature.mfcc(data, sr, n_mfcc=26) + mfcc = mfcc.transpose() + mfcc_o = [] + for n in mfcc: + if np.count_nonzero(n) > 20: + mfcc_o.append(n) + return np.array(mfcc_o) + + def predict_one(self, file_url): + male_gmm = joblib.load(os.path.join(self.s_dir, "mfcc_male_gmm_32_ns.m")) + female_gmm = joblib.load(os.path.join(self.s_dir, "mfcc_female_gmm_14_ns.m")) + mean_pitch = self.get_one_mfcc(file_url) + print(mean_pitch.shape) + print(male_gmm.score_samples(mean_pitch)) + return np.sum(male_gmm.score_samples(mean_pitch)), np.sum(female_gmm.score_samples(mean_pitch)) + + def predict_one_dnn(self, file_url, iter): + dnn_model = joblib.load(os.path.join(self.s_dir, "mfcc_dnn_{}.m".format(iter))) + mean_pitch = self.get_one_mfcc(file_url) + res = dnn_model.predict(mean_pitch) + print(np.sum(res), mean_pitch.shape[0]) + return np.sum(res) >= 0.5 * mean_pitch.shape[0] # 返回True是女 + + def batch_test_predict_frame(self, male_gmm, female_gmm, male_file_list, female_file_list): + """ + 单帧计算 + :param male_gmm: + :param female_gmm: + :param male_file_list: + :param female_file_list: + :return: + """ + + ana_err = [] + ana_cor = [] + test_male_mfcc = self.mload_mfcc(male_file_list, True) + test_female_mfcc = self.mload_mfcc(female_file_list, True) + st = time.time() + acc = 0 + + start = 0 + step = 256 + male_prob_list = [] + while True: + if start >= len(test_male_mfcc): + break + if start + step > len(test_male_mfcc): + step = len(test_male_mfcc) - start + data = test_male_mfcc[start:start + step] + male_test_male_lable = male_gmm.score_samples(data) + male_test_female_lable = female_gmm.score_samples(data) + for tp_male, tp_female in zip(male_test_male_lable, male_test_female_lable): + tp_male = np.exp(tp_male) + tp_female = np.exp(tp_female) + tp_male_prob = tp_male / (tp_female + tp_male) + tp_female_prob = tp_female / (tp_female + tp_male) + male_prob_list.append(tp_male_prob) # 记录概率的数据 + if tp_female_prob >= tp_male_prob: + ana_err.append(tp_female_prob - tp_male_prob) + else: + acc += 1 + ana_cor.append(tp_male_prob - tp_female_prob) + # acc += np.sum(male_test_male_lable > male_test_female_lable) + start += step + print(acc / test_male_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + male_male = acc + male_female = test_male_mfcc.shape[0] - acc + + st = time.time() + acc = 0 + start = 0 + step = 256 + female_prob_list = [] + while True: + if start >= len(test_female_mfcc): + break + if start + step > len(test_female_mfcc): + step = len(test_female_mfcc) - start + data = test_female_mfcc[start:start + step] + female_test_male_lable = male_gmm.score_samples(data) + female_test_female_lable = female_gmm.score_samples(data) + for tp_male, tp_female in zip(female_test_male_lable, female_test_female_lable): + tp_male = np.exp(tp_male) + tp_female = np.exp(tp_female) + tp_male_prob = tp_male / (tp_female + tp_male) + tp_female_prob = tp_female / (tp_female + tp_male) + female_prob_list.append(tp_female_prob) # 记录概率数据 + if tp_female_prob <= tp_male_prob: + ana_err.append(tp_male_prob - tp_female_prob) + else: + acc += 1 + ana_cor.append(tp_female_prob - tp_male_prob) + # acc += np.sum(female_test_male_lable < female_test_female_lable) + start += step + print(acc / test_female_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + female_female = acc + female_male = test_female_mfcc.shape[0] - acc + + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male + female_male), + male_male / (male_male + male_female))) + print("female: acc={} recall={}".format(female_female / (male_female + female_female), + female_female / (female_female + female_male))) + print("total: acc={}".format( + (male_male + female_female) / (male_female + female_female + male_male + female_male))) + + np.save("ana_err", np.array(ana_err)) + np.save("ana_cor", np.array(ana_cor)) + np.save("male_prob_list.txt", np.array(male_prob_list)) + np.save("female_prob_list.txt", np.array(female_prob_list)) + + def batch_test_predict_v1(self, male_gmm, female_gmm, male_file_list, female_file_list): + """ + 新的计算方式 + 绝对值>=0.9才算有效 + :param male_gmm: + :param female_gmm: + :param male_file_list: + :param female_file_list: + :return: + """ + + ma = deepcopy(male_file_list) + fe = deepcopy(female_file_list) + test_male_mfcc = self.mload_mfcc(male_file_list, False) + test_female_mfcc = self.mload_mfcc(female_file_list, False) + + print("file_list = {} test male mfcc={}".format(len(ma), test_male_mfcc.shape)) + print("file_list= {} test female mfcc={}".format(len(fe), test_female_mfcc.shape)) + + err_recording_male = [] + male_data = [] + st = time.time() + acc = 0 + idx = 0 + id_gender = [] + for data in test_male_mfcc: + male_test_male_lable = male_gmm.score_samples(data) + male_test_female_lable = female_gmm.score_samples(data) + + # 做softmax处理 + male_test_male_label = np.exp(male_test_male_lable) + male_test_female_lable = np.exp(male_test_female_lable) + tot = male_test_male_label + male_test_female_lable + male_test_male_lable = male_test_male_label / tot + male_test_female_lable = male_test_female_lable / tot + + male_data.append([male_test_male_lable, male_test_female_lable]) + + # 计算正确数量 + male_male = np.sum(male_test_male_lable - male_test_female_lable >= 0.8) + male_female = np.sum(male_test_female_lable - male_test_male_lable >= 0.8) + new_lb = int(male_female > male_male) + if male_male > male_female: + acc += 1 + else: + err_recording_male.append(ma[idx][0].split("/")[-2]) + id_gender.append("{},{}".format(ma[idx][0].split("/")[-2], new_lb)) + idx += 1 + + print(acc / test_male_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + male_male = acc + male_female = test_male_mfcc.shape[0] - acc + + st = time.time() + acc = 0 + idx = 0 + err_recording_female = [] + female_data = [] + for data in test_female_mfcc: + female_test_male_lable = male_gmm.score_samples(data) + female_test_female_lable = female_gmm.score_samples(data) + + # 做softmax处理 + female_test_male_lable = np.exp(female_test_male_lable) + female_test_female_lable = np.exp(female_test_female_lable) + tot = female_test_male_lable + female_test_female_lable + + female_test_male_lable = female_test_male_lable / tot + female_test_female_lable = female_test_female_lable / tot + + female_data.append([female_test_male_lable, female_test_female_lable]) + + female_male = np.sum(female_test_male_lable - female_test_female_lable >= 0.8) + female_female = np.sum(female_test_female_lable - female_test_male_lable >= 0.8) + new_lb = int(female_female > female_male) + if female_female > female_male: + acc += 1 + else: + err_recording_female.append(fe[idx][0].split("/")[-2]) + id_gender.append("{},{}".format(fe[idx][0].split("/")[-2], new_lb)) + idx += 1 + + print(acc / test_female_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + female_female = acc + female_male = test_female_mfcc.shape[0] - acc + + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male + female_male), + male_male / (male_male + male_female))) + print("female: acc={} recall={}".format(female_female / (male_female + female_female), + female_female / (female_female + female_male))) + print("total: acc={}".format( + (male_male + female_female) / (male_female + female_female + male_male + female_male))) + + np.save("male_data", np.array(male_data)) + np.save("female_data", np.array(female_data)) + + # write_file("../../corpus/stage_one_plus/relabel_data/need_relabeled_data/male_err_list.txt", err_recording_male) + # write_file("../../corpus/stage_one_plus/relabel_data/need_relabeled_data/female_err_list.txt", err_recording_female) + write_file("../../corpus/stage_one_plus/relabel_data/merge_om/gmm_label.txt", id_gender) + + def batch_test_predict(self, male_gmm, female_gmm, male_file_list, female_file_list): + + ma = deepcopy(male_file_list) + fe = deepcopy(female_file_list) + err_ma = [] + err_fe = [] + + test_male_mfcc = self.mload_mfcc(male_file_list, False) + test_female_mfcc = self.mload_mfcc(female_file_list, False) + + print("test male mfcc={}".format(test_male_mfcc.shape)) + print("test female mfcc={}".format(test_female_mfcc.shape)) + + st = time.time() + acc = 0 + for idx, data in enumerate(test_male_mfcc): + male_test_male_lable = male_gmm.score_samples(data) + male_test_female_lable = female_gmm.score_samples(data) + if np.sum(male_test_male_lable) > np.sum(male_test_female_lable): + acc += 1 + else: + err_ma.append(ma[idx][0].split("/")[-2]) + # 换一种计算方式 + # if np.sum(male_test_male_lable > male_test_female_lable) > 0.5 * data.shape[0]: + # acc += 1 + + print(acc / test_male_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + male_male = acc + male_female = test_male_mfcc.shape[0] - acc + + st = time.time() + acc = 0 + for idx, data in enumerate(test_female_mfcc): + female_test_male_lable = male_gmm.score_samples(data) + female_test_female_lable = female_gmm.score_samples(data) + if np.sum(female_test_male_lable) < np.sum(female_test_female_lable): + acc += 1 + else: + err_fe.append(fe[idx][0].split("/")[-2]) + # if np.sum(female_test_female_lable < female_test_male_lable) > 0.5 * data.shape[0]: + # acc += 1 + + print(acc / test_female_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + female_female = acc + female_male = test_female_mfcc.shape[0] - acc + + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male + female_male), + male_male / (male_male + male_female))) + print("female: acc={} recall={}".format(female_female / (male_female + female_female), + female_female / (female_female + female_male))) + print("total: acc={}".format( + (male_male + female_female) / (male_female + female_female + male_male + female_male))) + write_file("/tmp/male_err_list.txt", err_ma) + write_file("/tmp/female_err_list.txt", err_fe) + + def batch_test_predict_v2(self, male_gmm, female_gmm, male_file_list, female_file_list): + """ + 计算softmax,得到男生分数 + :param male_gmm: + :param female_gmm: + :param male_file_list: + :param female_file_list: + :return: + """ + ma = deepcopy(male_file_list) + fe = deepcopy(female_file_list) + err_ma = [] + err_fe = [] + + score_list = [] + test_male_mfcc = self.mload_mfcc(male_file_list, False) + test_female_mfcc = self.mload_mfcc(female_file_list, False) + + print("test male mfcc={}".format(test_male_mfcc.shape)) + print("test female mfcc={}".format(test_female_mfcc.shape)) + + st = time.time() + acc = 0 + for idx, data in enumerate(test_male_mfcc): + flag = 1 + male_test_male_lable = male_gmm.score_samples(data) + male_test_female_lable = female_gmm.score_samples(data) + if np.sum(male_test_male_lable) > np.sum(male_test_female_lable): + acc += 1 + else: + err_ma.append(ma[idx][0].split("/")[-2]) + flag = -1 + # 做softmax + male_mean_label = np.mean(male_test_male_lable) + female_mean_label = np.mean(male_test_female_lable) + rate = np.exp(male_mean_label) / (np.exp(male_mean_label) + np.exp(female_mean_label)) + score_list.append("{},{},{},{}".format(ma[idx][0].split("/")[-2], flag, "male", round(rate, 3))) + # 换一种计算方式 + # if np.sum(male_test_male_lable > male_test_female_lable) > 0.5 * data.shape[0]: + # acc += 1 + + print(acc / test_male_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + male_male = acc + male_female = test_male_mfcc.shape[0] - acc + + st = time.time() + acc = 0 + for idx, data in enumerate(test_female_mfcc): + flag = 1 + female_test_male_lable = male_gmm.score_samples(data) + female_test_female_lable = female_gmm.score_samples(data) + if np.sum(female_test_male_lable) < np.sum(female_test_female_lable): + acc += 1 + else: + err_fe.append(fe[idx][0].split("/")[-2]) + flag = -1 + + # 做softmax + male_mean_label = np.mean(female_test_male_lable) + female_mean_label = np.mean(female_test_female_lable) + rate = np.exp(male_mean_label) / (np.exp(male_mean_label) + np.exp(female_mean_label)) + score_list.append("{},{},{},{}".format(fe[idx][0].split("/")[-2], flag, "female", round(rate, 3))) + # if np.sum(female_test_female_lable < female_test_male_lable) > 0.5 * data.shape[0]: + # acc += 1 + + print(acc / test_female_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + female_female = acc + female_male = test_female_mfcc.shape[0] - acc + + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male + female_male), + male_male / (male_male + male_female))) + print("female: acc={} recall={}".format(female_female / (male_female + female_female), + female_female / (female_female + female_male))) + print("total: acc={}".format( + (male_male + female_female) / (male_female + female_female + male_male + female_male))) + + write_file("/tmp/male_err_list.txt", err_ma) + write_file("/tmp/female_err_list.txt", err_fe) + write_file("/tmp/score_label.txt", score_list) + + def batch_test_dnn(self, dnn_model, male_file_list, female_file_list): + test_male_mfcc = self.mload_mfcc(male_file_list, False) + test_female_mfcc = self.mload_mfcc(female_file_list, False) + + st = time.time() + acc = 0 + # for data in test_male_mfcc: + # male_test_male_lable = dnn_model.predict(data) + # if np.sum(male_test_male_lable) < 0.5 * data.shape[0]: + # acc += 1 + m_pre = MDNNPredictor(list(test_male_mfcc), 24) + m_pre.set_model(dnn_model) + m_pre.process() + res = m_pre.get_res_data() + for rr in res: + if rr == 0: + acc += 1 + print(acc / test_male_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + male_male = acc + male_female = test_male_mfcc.shape[0] - acc + + st = time.time() + acc = 0 + # for data in test_female_mfcc: + # female_test_male_lable = dnn_model.predict(data) + # if np.sum(female_test_male_lable) >= 0.5 * data.shape[0]: + # acc += 1 + m_pre = MDNNPredictor(data_path=list(test_female_mfcc), worker_num=24, timeout=60) + m_pre.set_model(dnn_model) + m_pre.process() + res = m_pre.get_res_data() + for rr in res: + if rr == 1: + acc += 1 + print(acc / test_female_mfcc.shape[0]) + print("sp time = {}".format(time.time() - st)) + + female_female = acc + female_male = test_female_mfcc.shape[0] - acc + print(" predict: male, female") + print("actual:male: {} | {}".format(male_male, male_female)) + print("actual:female: {} | {}".format(female_male, female_female)) + print("male: acc={} recall={}".format(male_male / (male_male + female_male), + male_male / (male_male + male_female))) + print("female: acc={} recall={}".format(female_female / (male_female + female_female), + female_female / (female_female + female_male))) + print("total: acc={}".format( + (male_male + female_female) / (male_female + female_female + female_male + male_male))) + + def predict_batch(self): + male_gmm = joblib.load(os.path.join(self.s_dir, "mfcc_male_gmm_32_ns.m")) + female_gmm = joblib.load(os.path.join(self.s_dir, "mfcc_female_gmm_18_ns.m")) + male_file_list = glob.glob(os.path.join(self.t_set_dir, 'male/*/*.npy')) + female_file_list = glob.glob(os.path.join(self.t_set_dir, 'female/*/*.npy')) + + # # # 30%的测试集 + # male_file_list = male_file_list[int(len(male_file_list) * self._split_rate):] + # female_file_list = female_file_list[int(len(female_file_list) * self._split_rate):] + + male_file_list = list(self.sp2dict(male_file_list).values()) + female_file_list = list(self.sp2dict(female_file_list).values()) + self.batch_test_predict(male_gmm, female_gmm, male_file_list, female_file_list) + # self.batch_test_predict_v2(male_gmm, female_gmm, male_file_list, female_file_list) + # self.batch_test_predict_v1(male_gmm, female_gmm, male_file_list, female_file_list) + # self.batch_test_predict_frame(male_gmm, female_gmm, male_file_list, female_file_list) + + def predict_batch_dnn(self, iter): + dnn_model = joblib.load(os.path.join(self.s_dir, "mfcc_dnn_{}.m".format(iter))) + male_file_list = glob.glob(os.path.join(self.t_set_dir, 'gender0/*.npy')) + female_file_list = glob.glob(os.path.join(self.t_set_dir, 'gender1/*.npy')) + + print("total", len(male_file_list), len(female_file_list)) + male_file_list = male_file_list[int(len(male_file_list) * self._split_rate):] + female_file_list = female_file_list[int(len(female_file_list) * self._split_rate):] + print("{}".format(self._split_rate), len(male_file_list), len(female_file_list)) + + self.batch_test_dnn(dnn_model, male_file_list, female_file_list) + + +if __name__ == '__main__': + ap = argparse.ArgumentParser() + + ap.add_argument("-r", "--run_mode", type=str, required=True, choices=['train', 'predict'], + help="Run mode, train or predict", ) + ap.add_argument("-td", "--t_set_dir", type=str, default="/tmp/audios", + help="The dir of train_test set dir", ) + ap.add_argument("-sd", "--save_dir", type=str, default="./models", help="models save dir", ) + ap.add_argument("-d", "--debug", action="store_true", help="debug mode", ) + ap.add_argument("-u", "--local_url", type=str, default=None, help="The local url of mp3 file", ) + ap.add_argument("-a", "--algorithm", type=str, default='gmm', choices=['gmm', 'dnn'], + help="the algorithm of train or test", ) + ap.add_argument("-i", "--iter", type=str, default='5', help="the iter of dnn") + + args = vars(ap.parse_args()) + + run_mode = args["run_mode"] + t_set_dir = args["t_set_dir"] + s_dir = args["save_dir"] + debug = args["debug"] + l_url = args["local_url"] + algorithm = args['algorithm'] + iter = args['iter'] + + gm = GenderMfcc() + gm.t_set_dir = t_set_dir + gm.s_dir = s_dir + + if run_mode == "train": + if algorithm == "dnn": + gm.mfcc_dnn() + else: + gm.mfcc_gmm() + elif run_mode == "predict": + if not l_url: + if algorithm == "dnn": + gm.predict_batch_dnn(iter) + else: + gm.predict_batch() + else: + if algorithm == "dnn": + gender = gm.predict_one_dnn(l_url, iter) + if gender: + gender = "female" + else: + gender = "male" + print("{}".format(gender)) + else: + m_c, f_c = gm.predict_one(l_url) + if debug: + print("mfcc m_c", m_c, np.exp(m_c)) + print("mfcc f_c", f_c, np.exp(f_c)) + if m_c > f_c: + print("male") + else: + print("female") diff --git a/AIMeiSheng/voice_classification/train/mload_v1.py b/AIMeiSheng/voice_classification/train/mload_v1.py new file mode 100644 index 0000000..f6785a5 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/mload_v1.py @@ -0,0 +1,57 @@ +# -*- coding: utf-8 -*- +from common import * +import numpy as np +import time + + +class MDataLoader(SimpleMultiProcesser): + """ + 多进程载入 + """ + + def load_data(self): + return self._data_path + + def processer(self, single_job): + mfccs = [] + for file_url in single_job: + if file_url.split(".")[-1] == "npy": + mfcc = np.load(file_url) + if mfcc.shape[1] != 26: + mfcc = mfcc.transpose() + + # TODO 测试时需要将这个位置去掉 + # if mfcc.shape[0] >= 2000: + # mfcc = mfcc[:2000] + mfccs.append(mfcc) + # print("before concatenate = {}".format(len(mfccs))) + mfccs = np.concatenate(mfccs) + # return mfccs[100:1100] + return mfccs + + # 长度大约2000的占据数据集的99% + # if mfcc.shape[0] >= 2000: + # mfcc = mfcc[0:2000] # 取12.8s长度做测试 + # return mfcc + # return np.array([]) + + +class MDNNPredictor(SimpleMultiProcesser): + """ + 多进程预测 + """ + + def load_data(self): + return self._data_path + + def set_model(self, model): + self._model = model + + def processer(self, single_job): + male_test_male_lable = self._model.predict(single_job) + return np.sum(male_test_male_lable) >= 0.5 * single_job.shape[0] + +# if __name__ == "__main__": +# md_data_loader = MDataLoader(data_path=[12,33], worker_num=4) +# md_data_loader.process() +# print(md_data_loader.get_res_data()) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/__init__.py b/AIMeiSheng/voice_classification/train/music_voice_class/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/mobilenet_v2_custom.py b/AIMeiSheng/voice_classification/train/music_voice_class/mobilenet_v2_custom.py new file mode 100644 index 0000000..57b1227 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/mobilenet_v2_custom.py @@ -0,0 +1,142 @@ +""" +直接从代码库中拷贝出的代码 +目的: mobilenet_v2只允许输入图片的通道数为3,不满足要求,因此拷贝出来做修改 +""" + +from torch import nn + + +def _make_divisible(v, divisor, min_value=None): + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + :param v: + :param divisor: + :param min_value: + :return: + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + padding = (kernel_size - 1) // 2 + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2Custom(nn.Module): + def __init__(self, num_classes=2, in_channel=1, width_mult=1.0, inverted_residual_setting=None, round_nearest=8): + """ + MobileNet V2 main class + + Args: + num_classes (int): Number of classes + width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount + inverted_residual_setting: Network structure + round_nearest (int): Round the number of channels in each layer to be a multiple of this number + Set to 1 to turn off rounding + """ + super(MobileNetV2Custom, self).__init__() + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + + if inverted_residual_setting is None: + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = _make_divisible(input_channel * width_mult, round_nearest) + self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest) + # 修改的地方,原来in_channel=3 + features = [ConvBNReLU(in_channel, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = _make_divisible(c * width_mult, round_nearest) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + # building last several layers + features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) + # make it nn.Sequential + self.features = nn.Sequential(*features) + + # building classifier + self.classifier = nn.Sequential( + nn.Dropout(0.2), + nn.Linear(self.last_channel, num_classes), + ) + + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def forward(self, x): + x = self.features(x) + x = x.mean([2, 3]) + x = self.classifier(x) + return x diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/mobilenet_v3_custom.py b/AIMeiSheng/voice_classification/train/music_voice_class/mobilenet_v3_custom.py new file mode 100644 index 0000000..b81605e --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/mobilenet_v3_custom.py @@ -0,0 +1,266 @@ +import warnings +import torch +from torch import nn, Tensor +from typing import Any, Callable, List, Optional, Sequence + +from functools import partial +from v3_ops_misc import ConvNormActivation, SqueezeExcitation as SElayer +from v3_ops_misc import _make_divisible + + +class SqueezeExcitationV(SElayer): + """DEPRECATED + """ + + def __init__(self, input_channels: int, squeeze_factor: int = 4): + squeeze_channels = _make_divisible(input_channels // squeeze_factor, 8) + super().__init__(input_channels, squeeze_channels, scale_activation=nn.Hardsigmoid) + self.relu = self.activation + delattr(self, 'activation') + warnings.warn( + "This SqueezeExcitation class is deprecated and will be removed in future versions. " + "Use torchvision.ops.misc.SqueezeExcitation instead.", FutureWarning) + + +class InvertedResidualConfig: + # Stores information listed at Tables 1 and 2 of the MobileNetV3 paper + def __init__(self, input_channels: int, kernel: int, expanded_channels: int, out_channels: int, use_se: bool, + activation: str, stride: int, dilation: int, width_mult: float): + self.input_channels = self.adjust_channels(input_channels, width_mult) + self.kernel = kernel + self.expanded_channels = self.adjust_channels(expanded_channels, width_mult) + self.out_channels = self.adjust_channels(out_channels, width_mult) + self.use_se = use_se + self.use_hs = activation == "HS" + self.stride = stride + self.dilation = dilation + + @staticmethod + def adjust_channels(channels: int, width_mult: float): + return _make_divisible(channels * width_mult, 8) + + +class InvertedResidual(nn.Module): + # Implemented as described at section 5 of MobileNetV3 paper + def __init__(self, cnf: InvertedResidualConfig, norm_layer: Callable[..., nn.Module], + se_layer: Callable[..., nn.Module] = partial(SElayer, + scale_activation=nn.Hardsigmoid)): + super().__init__() + if not (1 <= cnf.stride <= 2): + raise ValueError('illegal stride value') + + self.use_res_connect = cnf.stride == 1 and cnf.input_channels == cnf.out_channels + + layers: List[nn.Module] = [] + activation_layer = nn.Hardswish if cnf.use_hs else nn.ReLU + + # expand + if cnf.expanded_channels != cnf.input_channels: + layers.append(ConvNormActivation(cnf.input_channels, cnf.expanded_channels, kernel_size=1, + norm_layer=norm_layer, activation_layer=activation_layer)) + + # depthwise + stride = 1 if cnf.dilation > 1 else cnf.stride + layers.append(ConvNormActivation(cnf.expanded_channels, cnf.expanded_channels, kernel_size=cnf.kernel, + stride=stride, dilation=cnf.dilation, groups=cnf.expanded_channels, + norm_layer=norm_layer, activation_layer=activation_layer)) + if cnf.use_se: + squeeze_channels = _make_divisible(cnf.expanded_channels // 4, 8) + layers.append(se_layer(cnf.expanded_channels, squeeze_channels)) + + # project + layers.append(ConvNormActivation(cnf.expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer, + activation_layer=None)) + + self.block = nn.Sequential(*layers) + self.out_channels = cnf.out_channels + self._is_cn = cnf.stride > 1 + + def forward(self, input: Tensor) -> Tensor: + result = self.block(input) + if self.use_res_connect: + result += input + return result + + +class MobileNetV3Custom(nn.Module): + + def __init__( + self, + inverted_residual_setting: List[InvertedResidualConfig], + last_channel: int, + num_classes: int = 2, + block: Optional[Callable[..., nn.Module]] = None, + norm_layer: Optional[Callable[..., nn.Module]] = None, + **kwargs: Any + ) -> None: + """ + MobileNet V3 main class + + Args: + inverted_residual_setting (List[InvertedResidualConfig]): Network structure + last_channel (int): The number of channels on the penultimate layer + num_classes (int): Number of classes + block (Optional[Callable[..., nn.Module]]): Module specifying inverted residual building block for mobilenet + norm_layer (Optional[Callable[..., nn.Module]]): Module specifying the normalization layer to use + """ + super().__init__() + + if not inverted_residual_setting: + raise ValueError("The inverted_residual_setting should not be empty") + elif not (isinstance(inverted_residual_setting, Sequence) and + all([isinstance(s, InvertedResidualConfig) for s in inverted_residual_setting])): + raise TypeError("The inverted_residual_setting should be List[InvertedResidualConfig]") + + if block is None: + block = InvertedResidual + + if norm_layer is None: + norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.01) + + layers: List[nn.Module] = [] + + # building first layer + firstconv_output_channels = inverted_residual_setting[0].input_channels + # jianli_change 3 - > 1 + layers.append(ConvNormActivation(1, firstconv_output_channels, kernel_size=3, stride=2, norm_layer=norm_layer, + activation_layer=nn.Hardswish)) + + # building inverted residual blocks + for cnf in inverted_residual_setting: + layers.append(block(cnf, norm_layer)) + + # building last several layers + lastconv_input_channels = inverted_residual_setting[-1].out_channels + lastconv_output_channels = 6 * lastconv_input_channels + layers.append(ConvNormActivation(lastconv_input_channels, lastconv_output_channels, kernel_size=1, + norm_layer=norm_layer, activation_layer=nn.Hardswish)) + + self.features = nn.Sequential(*layers) + self.avgpool = nn.AdaptiveAvgPool2d(1) + self.classifier = nn.Sequential( + nn.Linear(lastconv_output_channels, last_channel), + nn.Hardswish(inplace=True), + nn.Dropout(p=0.2, inplace=True), + nn.Linear(last_channel, num_classes), + ) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def _forward_impl(self, x: Tensor) -> Tensor: + x = self.features(x) + + x = self.avgpool(x) + x = torch.flatten(x, 1) + + x = self.classifier(x) + + return x + + def forward(self, x: Tensor) -> Tensor: + return self._forward_impl(x) + + +# 临时使用,后面确定后可以放到数据集中做 +class MobileNetV3CustomTmp(MobileNetV3Custom): + + def forward(self, x: Tensor) -> Tensor: + x = x.view(-1, 1, 128, 80) + return super(MobileNetV3CustomTmp, self).forward(x) + + +def _mobilenet_v3_conf(arch: str, width_mult: float = 1.0, reduced_tail: bool = False, dilated: bool = False, + **kwargs: Any): + reduce_divider = 2 if reduced_tail else 1 + dilation = 2 if dilated else 1 + + bneck_conf = partial(InvertedResidualConfig, width_mult=width_mult) + adjust_channels = partial(InvertedResidualConfig.adjust_channels, width_mult=width_mult) + + if arch == "mobilenet_v3_large": + inverted_residual_setting = [ + bneck_conf(16, 3, 16, 16, False, "RE", 1, 1), + bneck_conf(16, 3, 64, 24, False, "RE", 2, 1), # C1 + bneck_conf(24, 3, 72, 24, False, "RE", 1, 1), + bneck_conf(24, 5, 72, 40, True, "RE", 2, 1), # C2 + bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), + bneck_conf(40, 5, 120, 40, True, "RE", 1, 1), + bneck_conf(40, 3, 240, 80, False, "HS", 2, 1), # C3 + bneck_conf(80, 3, 200, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 184, 80, False, "HS", 1, 1), + bneck_conf(80, 3, 480, 112, True, "HS", 1, 1), + bneck_conf(112, 3, 672, 112, True, "HS", 1, 1), + bneck_conf(112, 5, 672, 160 // reduce_divider, True, "HS", 2, dilation), # C4 + bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), + bneck_conf(160 // reduce_divider, 5, 960 // reduce_divider, 160 // reduce_divider, True, "HS", 1, dilation), + ] + last_channel = adjust_channels(1280 // reduce_divider) # C5 + elif arch == "mobilenet_v3_small": + inverted_residual_setting = [ + bneck_conf(16, 3, 16, 16, True, "RE", 2, 1), # C1 + bneck_conf(16, 3, 72, 24, False, "RE", 2, 1), # C2 + bneck_conf(24, 3, 88, 24, False, "RE", 1, 1), + bneck_conf(24, 5, 96, 40, True, "HS", 2, 1), # C3 + bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), + bneck_conf(40, 5, 240, 40, True, "HS", 1, 1), + bneck_conf(40, 5, 120, 48, True, "HS", 1, 1), + bneck_conf(48, 5, 144, 48, True, "HS", 1, 1), + bneck_conf(48, 5, 288, 96 // reduce_divider, True, "HS", 2, dilation), # C4 + bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), + bneck_conf(96 // reduce_divider, 5, 576 // reduce_divider, 96 // reduce_divider, True, "HS", 1, dilation), + ] + last_channel = adjust_channels(1024 // reduce_divider) # C5 + else: + raise ValueError("Unsupported model type {}".format(arch)) + return inverted_residual_setting, last_channel + + +def _mobilenet_v3_model( + arch: str, + inverted_residual_setting: List[InvertedResidualConfig], + last_channel: int, + pretrained: bool, + progress: bool, + **kwargs: Any +): + model = MobileNetV3CustomTmp(inverted_residual_setting, last_channel, **kwargs) + return model + + +def mobilenet_v3_large(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV3CustomTmp: + """ + Constructs a large MobileNetV3 architecture from + `"Searching for MobileNetV3" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + arch = "mobilenet_v3_large" + inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, **kwargs) + return _mobilenet_v3_model(arch, inverted_residual_setting, last_channel, pretrained, progress, **kwargs) + + +def mobilenet_v3_small(pretrained: bool = False, progress: bool = True, **kwargs: Any) -> MobileNetV3CustomTmp: + """ + Constructs a small MobileNetV3 architecture from + `"Searching for MobileNetV3" `_. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + progress (bool): If True, displays a progress bar of the download to stderr + """ + arch = "mobilenet_v3_small" + inverted_residual_setting, last_channel = _mobilenet_v3_conf(arch, **kwargs) + return _mobilenet_v3_model(arch, inverted_residual_setting, last_channel, pretrained, progress, **kwargs) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class.py new file mode 100644 index 0000000..b0c6b75 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class.py @@ -0,0 +1,553 @@ +import torch.nn as nn +import torch +import torch.nn.functional as functional +from tqdm import tqdm +import os +import sys +from torch.optim.lr_scheduler import MultiStepLR +import time +from torch.utils.data import DataLoader +import torch.utils.data as data +import glob +import numpy as np +from torch.optim.rmsprop import RMSprop +from torch.optim.adam import Adam +import librosa + +""" +模型 +""" + +MFCC_LEN = 80 +FRAME_LEN = 128 +RATIO = 0.8 +STEP = 1 # 每隔STEP帧产生一组数据 +TEST_RATE = 1 # 少量数据集测试时可以设置该值,1.0代表全部数据参与 + +from music_gender_models import * + +""" + 数据集 +""" + + +class MusicVoiceDataset(data.Dataset): + """ + other->2 male->1 female->0 + """ + + def __init__(self, root, is_train=True): + """ + :param root: 特征的目录 + """ + self.root = root + self._dataset = [] + self._files = [] + self._is_train = is_train + self._features, self._labels = self.get_db() + print("load data train={} feature_size={} label_size={}".format(is_train, self._features.shape, + self._labels.shape)) + + def __len__(self): + return len(self._labels) + + def __getitem__(self, idx): + feature = self.get_one_data(idx) + label = self._labels[idx] + return feature, label + + def get_item(self, idx): + return self._features[idx], self._labels[idx] + + def get_files(self): + male_files = glob.glob(os.path.join(self.root, "*/pure/male/*.feature.npy")) + female_files = glob.glob(os.path.join(self.root, "*/pure/female/*.feature.npy")) + other_files = glob.glob(os.path.join(self.root, "*/other/*/*.feature.npy")) + acc_files = glob.glob(os.path.join(self.root, "*/acc/*/*.feature.npy")) + other_files.extend(acc_files) + return male_files, female_files, other_files + + def get_random_data(self, features, labels, rate): + idx = list(range(0, len(labels))) + np.random.shuffle(idx) + features = np.array(features)[idx] + labels = np.array(labels)[idx] + num = int(len(idx) * rate) + return list(features[:num]), list(labels[:num]) + + def shuffle_train_test_by_song(self, st_idx, features, labels): + """ + 按照歌曲进行切分 + :param st_idx: + :param features: + :param labels: + :return: + """ + print("shuffle_train_test_by_song....") + song_ids = [] + for i in range(st_idx, len(self._files)): + file = self._files[i] + song_id = str(file).split("/")[-1].split("_")[0] + if song_id not in song_ids: + song_ids.append(song_id) + idx = list(range(len(song_ids))) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + new_song_ids = np.array(song_ids)[np.array(idx)] + train_features = [] + train_label = [] + test_features = [] + test_label = [] + + train_area_files = {} + test_area_files = {} + for i, feature in enumerate(features): + area = str(self._files[feature[0]]).split("/")[-4] + song_id = str(self._files[feature[0]]).split("/")[-1].split("_")[0] + if song_id in new_song_ids[train_idx:]: + test_features.append(feature) + test_label.append(labels[i]) + + if area not in test_area_files.keys(): + test_area_files[area] = set() + test_area_files[area].add(song_id) + else: + train_features.append(feature) + train_label.append(labels[i]) + if area not in train_area_files.keys(): + train_area_files[area] = set() + train_area_files[area].add(song_id) + + print(">>>>>>>>>>>test_dict>>>>>>>>>>>>") + for k, v in test_area_files.items(): + print("{}:{}".format(k, len(v))) + + print(">>>>>>>>>>>train_dict>>>>>>>>>>>>") + for k, v in train_area_files.items(): + print("{}:{}".format(k, len(v))) + + return train_features, train_label, test_features, test_label + + def shuffle_train_test(self, st_idx, features, labels): + """ + 对于歌曲整体shuffle,然后取20%做测试集合 + self._dataset 存储的是所有的数据,我们只需要将部分数据进行处理即可 + :param st_idx + :param features: + :param labels: + :return: + """ + print("shuffle_train_test ....") + idx = list(range(len(self._dataset) - st_idx)) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + train_features = [] + train_label = [] + test_features = [] + test_label = [] + for i, feature in enumerate(features): + cur_idx = feature[0] - st_idx + assert cur_idx >= 0 + if cur_idx in idx[train_idx:]: + test_features.append(feature) + test_label.append(labels[i]) + else: + train_features.append(feature) + train_label.append(labels[i]) + return train_features, train_label, test_features, test_label + + def gen_data(self, files, label): + """ + :return: + """ + features = [] + labels = [] + cur_idx = len(self._dataset) + for idx, file in enumerate(files): + dt = np.load(file) + file_idx = len(self._dataset) + self._dataset.append(dt) + self._files.append(file) + for i in range(FRAME_LEN, len(dt), STEP): # 间隔为STEP + features.append([file_idx, i]) # 存储文件位置, 帧位置 + labels.append(label) + + # 对于歌曲乱序之后,按照歌曲段分隔训练数据集和测试数据集 + # train_x, train_y, test_x, test_y = self.shuffle_train_test(cur_idx, features, labels) + # 对于歌曲乱序之后,按照歌曲分隔训练数据集和测试数据集 + train_x, train_y, test_x, test_y = self.shuffle_train_test_by_song(cur_idx, features, labels) + np.random.seed(10) + train_x, train_y = self.get_random_data(train_x, train_y, TEST_RATE) + np.random.seed(64) + test_x, test_y = self.get_random_data(test_x, test_y, TEST_RATE) + return train_x, train_y, test_x, test_y + + def get_db(self): + male_files, female_files, other_files = self.get_files() + self._dataset = [] + self._files = [] + + female_train_x, female_train_y, female_test_x, female_test_y = self.gen_data(female_files, 0) # 女声是0 + male_train_x, male_train_y, male_test_x, male_test_y = self.gen_data(male_files, 1) # 男声是1 + other_train_x, other_train_y, other_test_x, other_test_y = self.gen_data(other_files, 2) + + # 构造为1:1 + # 不确定->暂时试一下 + # min_train_len = min(min(len(female_train_y), len(male_train_y)), len(other_train_y)) + # min_test_len = min(min(len(female_test_y), len(male_test_y)), len(other_test_y)) + # + # female_train_x = female_train_x[:min_train_len] + # female_train_y = female_train_y[:min_train_len] + # male_train_x = male_train_x[:min_train_len] + # male_train_y = male_train_y[:min_train_len] + # other_train_x = other_train_x[:min_train_len] + # other_train_y = other_train_y[:min_train_len] + # + # female_test_x = female_test_x[:min_test_len] + # female_test_y = female_test_y[:min_test_len] + # male_test_x = male_test_x[:min_test_len] + # male_test_y = male_test_y[:min_test_len] + # other_test_x = other_test_x[:min_test_len] + # other_test_y = other_test_y[:min_test_len] + + # 合并数据 + train_x = female_train_x + male_train_x + other_train_x + train_y = female_train_y + male_train_y + other_train_y + test_x = female_test_x + male_test_x + other_test_x + test_y = female_test_y + male_test_y + other_test_y + if self._is_train: + return np.array(train_x), np.array(train_y) + return np.array(test_x), np.array(test_y) + + def get_one_data(self, idx): + file_idx, frame_idx = self._features[idx] + feature = self._dataset[file_idx][frame_idx - FRAME_LEN: frame_idx] + return feature + + def get_dataset(self): + return self._files + + +class MusicVoiceDatasetV1(data.Dataset): + """ + other->2 male->1 female->0 + """ + + def __init__(self, root, is_train=True): + """ + :param root: 特征的目录 + """ + self.root = root + self.ma_label = os.path.join(self.root, "ma_label_32.txt") + print("load label file ..{}".format(self.ma_label)) + self._dataset = [] + self._dataset_dt = [] + self._files = [] + self._is_train = is_train + self._features, self._labels = self.get_db() + print("load data train={} feature_size={} label_size={}".format(is_train, self._features.shape, + self._labels.shape)) + + def __len__(self): + return len(self._labels) + + def __getitem__(self, idx): + feature = self.get_one_data(idx) + label = self._labels[idx] + return feature, label + + def load_file(self): + features = [] + labels = [] + self._dataset = [] + data_dict = {} + with open(self.ma_label, "r") as f: + while True: + line = f.readline() + if not line: + break + arr = line.split(",") + fname = arr[0] + if fname not in data_dict.keys(): + data_dict[fname] = len(self._dataset) + self._dataset.append(fname) + self._dataset_dt.append([]) + features.append([]) + labels.append([]) + idx = data_dict[fname] + # 控制一下STEP + if len(features[idx]) > 1: + cur_idx = features[idx][-1][1] + if int(arr[1]) - cur_idx < 32: # 同一个文件,帧移为32,该值可变 + continue + + features[idx].append([data_dict[fname], int(arr[1])]) + labels[idx].append(int(arr[2])) + return features, labels + + def get_item(self, idx): + return self._features[idx], self._labels[idx] + + def get_random_data(self, features, labels, rate): + idx = list(range(0, len(labels))) + np.random.shuffle(idx) + features = np.array(features)[idx] + labels = np.array(labels)[idx] + num = int(len(idx) * rate) + return list(features[:num]), list(labels[:num]) + + def shuffle_train_test(self, features, labels, is_train): + """ + 对于歌曲整体shuffle,然后取20%做测试集合 + :param features: + :param labels: + :return: + """ + idx = list(range(len(labels))) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + + train_features = [] + train_label = [] + test_features = [] + test_label = [] + if is_train: + for ii in idx[:train_idx]: + train_features.extend(features[ii]) + train_label.extend(labels[ii]) + return train_features, train_label + for ii in idx[train_idx:]: + test_features.extend(features[ii]) + test_label.extend(labels[ii]) + return test_features, test_label + + def keep_rate(self, x, y): + """ + 保证比例 + :param x: + :param y: + :return: + """ + # 获取每个分类最少 + td = {} + for i in range(0, len(y)): + if y[i] not in td.keys(): + td[y[i]] = 0 + td[y[i]] += 1 + mmin = -1 + for k, v in td.items(): + if mmin == -1 or v < mmin: + mmin = v + print("keep rate....{}".format(td)) + td = {} + new_feature = [] + new_y = [] + for i in range(0, len(x)): + if y[i] not in td.keys(): + td[y[i]] = 0 + td[y[i]] += 1 + + if td[y[i]] > mmin: + continue + new_feature.append(x[i]) + new_y.append(y[i]) + return new_feature, new_y + + def get_db(self): + tm = time.time() + features, labels = self.load_file() + print("load file ok--> file_num={} | sp={}".format(len(self._dataset), time.time() - tm)) + + tm = time.time() + x, y = self.shuffle_train_test(features, labels, self._is_train) + print("shuffle_train_test ok-->{},{} | sp={}".format(len(x), len(y), time.time() - tm)) + + tm = time.time() + if self._is_train: + np.random.seed(10) + else: + np.random.seed(64) + + x, y = self.get_random_data(x, y, TEST_RATE) + print("get_random_data ok-->{},{}|sp={}".format(len(x), len(y), time.time() - tm)) + x, y = self.keep_rate(x, y) + print("keep_rate ok-->{},{}|sp={}".format(len(x), len(y), time.time() - tm)) + return np.array(x), np.array(y) + + def get_one_data(self, idx): + file_idx, frame_idx = self._features[idx] + if len(self._dataset_dt[file_idx]) == 0: + self._dataset_dt[file_idx] = np.load(self._dataset[file_idx]) + feature = self._dataset_dt[file_idx][frame_idx - FRAME_LEN: frame_idx] + return feature + + def get_dataset(self): + return self._files + + +""" +处理逻辑 +""" + + +def get_dataloader(root): + batch_size = 256 + thread_num = 1 + + # trainset = MusicVoiceDataset(root) + trainset = MusicVoiceDatasetV1(root) + trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=thread_num) + + # valset = MusicVoiceDataset(root, False) + valset = MusicVoiceDatasetV1(root, False) + valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=thread_num) + + return trainloader, valloader + + +def train_one_epoch(model, device, loader, optimizer, criterion): + model.train() + + total_num = 0 + total_loss = 0 + correct = 0 + + for mfcces, labels in tqdm(loader): + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + + predicts = model(mfcces) + + optimizer.zero_grad() + loss = criterion(predicts, labels) + loss.backward() + optimizer.step() + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def val_one_epoch(model, device, loader, criterion): + model.eval() + + total_num = 0 + total_loss = 0 + correct = 0 + + # 展示多个数据 + correct_dict = { + 0: {0: 0, 1: 0, 2: 0}, # 标签是0,预期出是0,1,2 + 1: {0: 0, 1: 0, 2: 0}, + 2: {0: 0, 1: 0, 2: 0}, + } + + with torch.no_grad(): + for mfcces, labels in loader: + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + predicts = model(mfcces) + + loss = criterion(predicts, labels) + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + # 检查数据 + for ii in range(0, len(labels)): + kk = int(labels[ii]) + vv = predicts[ii].item() + correct_dict[kk][vv] += 1 + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + print("----------------------------->") + print(correct_dict) + return total_loss, correct + + +def train(model, device, model_path, set_dir): + # 训练配置参数 + max_epoch = 200 + lr = 1e-2 + momentum = 0 + weight_decay = 0 + # 学习率调整参数 + milestones = [10, 30, 50, 80, 100] + gamma = 0.1 + # 模型保存路径 + save_directory = model_path + if not os.path.exists(save_directory): + os.makedirs(save_directory) + + optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = Adam(model.parameters(), lr=lr) + criterion = nn.CrossEntropyLoss() + scheduler = MultiStepLR(optimizer, milestones, gamma) + + # 文件地址 + data_dir = set_dir + train_loader, val_loader = get_dataloader(data_dir) + + max_acc = 0 + + for i in range(max_epoch): + start = time.time() + t_loss, t_acc = train_one_epoch(model, device, train_loader, optimizer, criterion) + v_loss, v_acc = val_one_epoch(model, device, val_loader, criterion) + end = time.time() + + scheduler.step(i) + + msg = 't_loss:%f\tt_acc:%.2f' % (t_loss, t_acc * 100) + msg += '\tv_loss:%f\tv_acc:%.2f' % (v_loss, v_acc * 100) + msg += '\ttime:%f\tepoch:%d' % (end - start, i) + print(msg) + + params = model.state_dict() + save_path = os.path.join(save_directory, 'CNN_epoch_' + str(i) + '_' + str(v_acc) + '.pth') + torch.save(params, save_path) + + max_acc = max(max_acc, v_acc) + + print('最大acc为:', max_acc) + + +def main(model_path, set_dir): + device = 'cuda' if torch.cuda.is_available() else 'cpu' + model = get_models("v5")() + model.to(device) + + train(model, device, model_path, set_dir) + + +# def get_num(): +# from torchstat import stat +# model = MusicVoiceV1Model() +# stat(model, (1, 128, 80)) + + +if __name__ == '__main__': + # get_num() + model_path = sys.argv[1] + set_dir = sys.argv[2] + + main(model_path, set_dir) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_gmm.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_gmm.py new file mode 100644 index 0000000..38be4f5 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_gmm.py @@ -0,0 +1,264 @@ +""" +使用GMM的方式进行训练 +""" +import torch.nn as nn +import torch +import torch.nn.functional as functional +from tqdm import tqdm +import os +import sys +from torch.optim.lr_scheduler import MultiStepLR +import time +from torch.utils.data import DataLoader +import torch.utils.data as data +import glob +import numpy as np +from torch.optim.rmsprop import RMSprop +from torch.optim.adam import Adam +import librosa +from sklearn.mixture import GaussianMixture +import joblib + +MFCC_LEN = 80 +FRAME_LEN = 128 +RATIO = 0.8 +STEP = 1 # 每隔STEP帧产生一组数据 +TEST_RATE = 0.1 # 少量数据集测试时可以设置该值,1.0代表全部数据参与 + + +class MusicVoiceDatasetV1(data.Dataset): + """ + other->2 male->1 female->0 + """ + + def __init__(self, root, is_train=True): + """ + :param root: 特征的目录 + """ + self.root = root + self.ma_label = os.path.join(self.root, "ma_label_32.txt") + self._dataset = [] + self._dataset_dt = [] + self._cur_dataset_idxs = [] # 保证内部数据量不会太多 + self._files = [] + self._is_train = is_train + self._features, self._labels = self.get_db() + print("load data train={} feature_size={} label_size={}".format(is_train, self._features.shape, + self._labels.shape)) + + def __len__(self): + return len(self._labels) + + def __getitem__(self, idx): + feature = self.get_one_data(idx) + label = self._labels[idx] + return feature, label + + def load_file(self): + print("load file={}".format(self.ma_label)) + features = [] + labels = [] + self._dataset = [] + data_dict = {} + with open(self.ma_label, "r") as f: + while True: + line = f.readline() + if not line: + break + arr = line.split(",") + fname = arr[0] + if fname not in data_dict.keys(): + data_dict[fname] = len(self._dataset) + self._dataset.append(fname) + self._dataset_dt.append([]) + features.append([]) + labels.append([]) + idx = data_dict[fname] + + # 控制一下STEP + if len(features[idx]) > 1: + cur_idx = features[idx][-1][1] + if int(arr[1]) - cur_idx < 32: # 同一个文件,帧移为64,该值可变 + continue + + # 不要其他 + if int(arr[2]) == 2: + continue + + features[idx].append([data_dict[fname], int(arr[1])]) + labels[idx].append(int(arr[2])) + return features, labels + + def get_item(self, idx): + return self._features[idx], self._labels[idx] + + def get_random_data(self, features, labels, rate): + idx = list(range(0, len(labels))) + np.random.shuffle(idx) + features = np.array(features)[idx] + labels = np.array(labels)[idx] + num = int(len(idx) * rate) + return list(features[:num]), list(labels[:num]) + + def shuffle_train_test(self, features, labels, is_train): + """ + 对于歌曲整体shuffle,然后取20%做测试集合 + :param features: + :param labels: + :return: + """ + idx = list(range(len(labels))) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + + train_features = [] + train_label = [] + test_features = [] + test_label = [] + if is_train: + for ii in idx[:train_idx]: + train_features.extend(features[ii]) + train_label.extend(labels[ii]) + return train_features, train_label + for ii in idx[train_idx:]: + test_features.extend(features[ii]) + test_label.extend(labels[ii]) + return test_features, test_label + + def keep_rate(self, x, y): + """ + 保证比例 + :param x: + :param y: + :return: + """ + # 获取每个分类最少 + td = {} + for i in range(0, len(y)): + if y[i] not in td.keys(): + td[y[i]] = 0 + td[y[i]] += 1 + mmin = -1 + for k, v in td.items(): + if mmin == -1 or v < mmin: + mmin = v + print("keep rate....{}".format(td)) + td = {} + new_feature = [] + new_y = [] + for i in range(0, len(x)): + if y[i] not in td.keys(): + td[y[i]] = 0 + td[y[i]] += 1 + + if td[y[i]] > mmin: + continue + new_feature.append(x[i]) + new_y.append(y[i]) + return new_feature, new_y + + def get_db(self): + tm = time.time() + features, labels = self.load_file() + print("load file ok--> file_num={} | sp={}".format(len(self._dataset), time.time() - tm)) + + tm = time.time() + x, y = self.shuffle_train_test(features, labels, self._is_train) + print("shuffle_train_test ok-->{},{} | sp={}".format(len(x), len(y), time.time() - tm)) + + tm = time.time() + if self._is_train: + np.random.seed(10) + else: + np.random.seed(64) + + x, y = self.get_random_data(x, y, TEST_RATE) + print("get_random_data ok-->{},{}|sp={}".format(len(x), len(y), time.time() - tm)) + x, y = self.keep_rate(x, y) + print("keep_rate ok-->{},{}|sp={}".format(len(x), len(y), time.time() - tm)) + return np.array(x), np.array(y) + + def get_one_data(self, idx): + file_idx, frame_idx = self._features[idx] + if len(self._dataset_dt[file_idx]) == 0: + self._dataset_dt[file_idx] = np.load(self._dataset[file_idx]) + self._cur_dataset_idxs.append(file_idx) + + if len(self._cur_dataset_idxs) >= 400: + f_idx = self._cur_dataset_idxs.pop(0) + self._dataset_dt[f_idx] = [] + + feature = self._dataset_dt[file_idx][frame_idx - FRAME_LEN: frame_idx - FRAME_LEN + 1] + return feature + + def get_dataset(self): + return self._files + + +def train(root, model_path): + num_m = 32 + num_fe = 18 + + batch_size = 10000 + thread_num = 1 + + st = time.time() + male_gmm = GaussianMixture(n_components=num_m, covariance_type='full', random_state=28, warm_start=True) + female_gmm = GaussianMixture(n_components=num_fe, covariance_type='full', random_state=28, warm_start=True) + + # 训练 + data_set = MusicVoiceDatasetV1(root, True) + data_loader = DataLoader(data_set, batch_size=batch_size, shuffle=True, num_workers=thread_num) + for mfcces, labels in tqdm(data_loader): + + # 获取训练数据 + female = [] + male = [] + for idx, label in enumerate(labels): + if int(label) == 0: + female.append(np.array(mfcces[idx]).flatten()) + else: + male.append(np.array(mfcces[idx]).flatten()) + male = np.array(male) + female = np.array(female) + # print("male_shape={} female_shape={}".format(male.shape, female.shape)) + # 训练 + male_gmm.fit(male) + female_gmm.fit(female) + + # 保存模型 + joblib.dump(male_gmm, os.path.join(model_path, "mfcc_male_gmm_%s_ns.m" % num_m)) + joblib.dump(female_gmm, os.path.join(model_path, "mfcc_female_gmm_%s_ns.m" % num_fe)) + break + print("train .. spend_time={}".format(time.time() - st)) + del data_set + del data_loader + + # 预测 + ret_dict = { + 0: {0: 0, 1: 0}, # 0 female 1 male + 1: {0: 0, 1: 0}, + } + data_set = MusicVoiceDatasetV1(root, False) + data_loader = DataLoader(data_set, batch_size=batch_size, shuffle=True, num_workers=thread_num) + for mfcces, labels in tqdm(data_loader): + + # 获取数据 + data = [] + for idx, label in enumerate(labels): + data.append(np.array(mfcces[idx]).flatten()) + male_score = male_gmm.score_samples(data) + female_score = female_gmm.score_samples(data) + for idx, label in enumerate(labels): + kk = int(label) + val = int(male_score[idx] > female_score[idx]) + ret_dict[kk][val] += 1 + + print("test_ret={}".format(ret_dict)) + + +if __name__ == "__main__": + work_dir = sys.argv[1] + model_path = sys.argv[2] + train(work_dir, model_path) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_simple.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_simple.py new file mode 100644 index 0000000..cdd01e4 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_simple.py @@ -0,0 +1,603 @@ +import torch.nn as nn +import torch +import torch.nn.functional as functional +from tqdm import tqdm +import os +import sys +from torch.optim.lr_scheduler import MultiStepLR +import time +from torch.utils.data import DataLoader +import torch.utils.data as data +import glob +import numpy as np +from torch.optim.rmsprop import RMSprop +from torch.optim.adam import Adam +import librosa + +""" +模型 +""" + +MFCC_LEN = 80 +FRAME_LEN = 128 +RATIO = 0.8 +STEP = 1 # 每隔STEP帧产生一组数据 +TEST_RATE = 0.1 # 少量数据集测试时可以设置该值,1.0代表全部数据参与 + +from music_gender_models_simple import * +import torch.nn.functional as F + +""" + 数据集 +""" + + +class MusicVoiceDataset(data.Dataset): + """ + other->2 male->1 female->0 + """ + + def __init__(self, root, is_train=True): + """ + :param root: 特征的目录 + """ + self.root = root + self._dataset = [] + self._files = [] + self._is_train = is_train + self._features, self._labels = self.get_db() + print("load data train={} feature_size={} label_size={}".format(is_train, self._features.shape, + self._labels.shape)) + + def __len__(self): + return len(self._labels) + + def __getitem__(self, idx): + feature = self.get_one_data(idx) + label = self._labels[idx] + return feature, label + + def get_item(self, idx): + return self._features[idx], self._labels[idx] + + def get_files(self): + male_files = glob.glob(os.path.join(self.root, "*/pure/male/*.feature.npy")) + female_files = glob.glob(os.path.join(self.root, "*/pure/female/*.feature.npy")) + return male_files, female_files + + def get_random_data(self, features, labels, rate): + idx = list(range(0, len(labels))) + np.random.shuffle(idx) + features = np.array(features)[idx] + labels = np.array(labels)[idx] + num = int(len(idx) * rate) + return list(features[:num]), list(labels[:num]) + + def shuffle_train_test_by_song(self, st_idx, features, labels): + """ + 按照歌曲进行切分 + :param st_idx: + :param features: + :param labels: + :return: + """ + print("shuffle_train_test_by_song....") + song_ids = [] + for i in range(st_idx, len(self._files)): + file = self._files[i] + song_id = str(file).split("/")[-1].split("_")[0] + if song_id not in song_ids: + song_ids.append(song_id) + idx = list(range(len(song_ids))) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + new_song_ids = np.array(song_ids)[np.array(idx)] + train_features = [] + train_label = [] + test_features = [] + test_label = [] + + train_area_files = {} + test_area_files = {} + for i, feature in enumerate(features): + area = str(self._files[feature[0]]).split("/")[-4] + song_id = str(self._files[feature[0]]).split("/")[-1].split("_")[0] + if song_id in new_song_ids[train_idx:]: + test_features.append(feature) + test_label.append(labels[i]) + + if area not in test_area_files.keys(): + test_area_files[area] = set() + test_area_files[area].add(song_id) + else: + train_features.append(feature) + train_label.append(labels[i]) + if area not in train_area_files.keys(): + train_area_files[area] = set() + train_area_files[area].add(song_id) + + print(">>>>>>>>>>>test_dict>>>>>>>>>>>>") + for k, v in test_area_files.items(): + print("{}:{}".format(k, len(v))) + + print(">>>>>>>>>>>train_dict>>>>>>>>>>>>") + for k, v in train_area_files.items(): + print("{}:{}".format(k, len(v))) + + return train_features, train_label, test_features, test_label + + def shuffle_train_test(self, st_idx, features, labels): + """ + 对于歌曲整体shuffle,然后取20%做测试集合 + self._dataset 存储的是所有的数据,我们只需要将部分数据进行处理即可 + :param st_idx + :param features: + :param labels: + :return: + """ + print("shuffle_train_test ....") + idx = list(range(len(self._dataset) - st_idx)) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + train_features = [] + train_label = [] + test_features = [] + test_label = [] + for i, feature in enumerate(features): + cur_idx = feature[0] - st_idx + assert cur_idx >= 0 + if cur_idx in idx[train_idx:]: + test_features.append(feature) + test_label.append(labels[i]) + else: + train_features.append(feature) + train_label.append(labels[i]) + return train_features, train_label, test_features, test_label + + def gen_data(self, files, label): + """ + :return: + """ + features = [] + labels = [] + cur_idx = len(self._dataset) + for idx, file in enumerate(files): + dt = np.load(file) + file_idx = len(self._dataset) + self._dataset.append(dt) + self._files.append(file) + for i in range(FRAME_LEN, len(dt), STEP): # 间隔为STEP + features.append([file_idx, i]) # 存储文件位置, 帧位置 + labels.append(label) + + # 对于歌曲乱序之后,按照歌曲段分隔训练数据集和测试数据集 + # train_x, train_y, test_x, test_y = self.shuffle_train_test(cur_idx, features, labels) + # 对于歌曲乱序之后,按照歌曲分隔训练数据集和测试数据集 + train_x, train_y, test_x, test_y = self.shuffle_train_test_by_song(cur_idx, features, labels) + np.random.seed(10) + train_x, train_y = self.get_random_data(train_x, train_y, TEST_RATE) + np.random.seed(64) + test_x, test_y = self.get_random_data(test_x, test_y, TEST_RATE) + return train_x, train_y, test_x, test_y + + def get_db(self): + male_files, female_files = self.get_files() + self._dataset = [] + self._files = [] + + female_train_x, female_train_y, female_test_x, female_test_y = self.gen_data(female_files, 0) # 女声是0 + male_train_x, male_train_y, male_test_x, male_test_y = self.gen_data(male_files, 1) # 男声是1 + + # 合并数据 + train_x = female_train_x + male_train_x + train_y = female_train_y + male_train_y + test_x = female_test_x + male_test_x + test_y = female_test_y + male_test_y + if self._is_train: + return np.array(train_x), np.array(train_y) + return np.array(test_x), np.array(test_y) + + def get_one_data(self, idx): + file_idx, frame_idx = self._features[idx] + feature = self._dataset[file_idx][frame_idx - FRAME_LEN: frame_idx] + return feature + + def get_dataset(self): + return self._files + + +class MusicVoiceDatasetV1(data.Dataset): + """ + other->2 male->1 female->0 + """ + + def __init__(self, root, is_train=True): + """ + :param root: 特征的目录 + """ + self.root = root + self.ma_label = os.path.join(self.root, "ma_label_32_strict_v1.txt") + # self.ma_label = os.path.join(self.root, "ma_label_32.txt") + self._dataset = [] + self._dataset_dt = [] + self._cur_dataset_idxs = [] # 保证内部数据量不会太多 + self._files = [] + self._is_train = is_train + self._features, self._labels = self.get_db() + print("load data train={} feature_size={} label_size={}".format(is_train, self._features.shape, + self._labels.shape)) + + def __len__(self): + return len(self._labels) + + def __getitem__(self, idx): + feature = self.get_one_data(idx) + label = self._labels[idx] + return feature, label + + def load_file(self): + print("load file={}".format(self.ma_label)) + features = [] + labels = [] + self._dataset = [] + data_dict = {} + with open(self.ma_label, "r") as f: + while True: + line = f.readline() + if not line: + break + arr = line.split(",") + fname = arr[0] + if fname not in data_dict.keys(): + self._files.append(fname) + data_dict[fname] = len(self._dataset) + self._dataset.append(fname) + self._dataset_dt.append([]) + features.append([]) + labels.append([]) + idx = data_dict[fname] + + # 控制一下STEP + if len(features[idx]) > 1: + cur_idx = features[idx][-1][1] + if int(arr[1]) - cur_idx < 32: # 同一个文件,帧移为32,该值可变 + continue + + # 不要其他 + if int(arr[2]) == 2: + continue + + features[idx].append([data_dict[fname], int(arr[1])]) + labels[idx].append(int(arr[2])) + return features, labels + + def get_item(self, idx): + return self._features[idx], self._labels[idx] + + def get_random_data(self, features, labels, rate): + idx = list(range(0, len(labels))) + np.random.shuffle(idx) + features = np.array(features)[idx] + labels = np.array(labels)[idx] + num = int(len(idx) * rate) + return list(features[:num]), list(labels[:num]) + + def shuffle_train_test(self, features, labels, is_train): + """ + 对于歌曲整体shuffle,然后取20%做测试集合 + :param features: + :param labels: + :return: + """ + idx = list(range(len(labels))) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + + train_features = [] + train_label = [] + test_features = [] + test_label = [] + if is_train: + for ii in idx[:train_idx]: + train_features.extend(features[ii]) + train_label.extend(labels[ii]) + return train_features, train_label + for ii in idx[train_idx:]: + test_features.extend(features[ii]) + test_label.extend(labels[ii]) + return test_features, test_label + + def keep_rate(self, x, y): + """ + 保证比例 + :param x: + :param y: + :return: + """ + # 获取每个分类最少 + td = {} + for i in range(0, len(y)): + if y[i] not in td.keys(): + td[y[i]] = 0 + td[y[i]] += 1 + mmin = -1 + for k, v in td.items(): + if mmin == -1 or v < mmin: + mmin = v + print("keep rate....{}".format(td)) + td = {} + new_feature = [] + new_y = [] + for i in range(0, len(x)): + if y[i] not in td.keys(): + td[y[i]] = 0 + td[y[i]] += 1 + + if td[y[i]] > mmin: + continue + new_feature.append(x[i]) + new_y.append(y[i]) + return new_feature, new_y + + def get_db(self): + tm = time.time() + features, labels = self.load_file() + print("load file ok--> file_num={} | sp={}".format(len(self._dataset), time.time() - tm)) + + tm = time.time() + x, y = self.shuffle_train_test(features, labels, self._is_train) + print("shuffle_train_test ok-->{},{} | sp={}".format(len(x), len(y), time.time() - tm)) + + tm = time.time() + if self._is_train: + np.random.seed(10) + else: + np.random.seed(64) + + x, y = self.get_random_data(x, y, TEST_RATE) + print("get_random_data ok-->{},{}|sp={}".format(len(x), len(y), time.time() - tm)) + x, y = self.keep_rate(x, y) + print("keep_rate ok-->{},{}|sp={}".format(len(x), len(y), time.time() - tm)) + return np.array(x), np.array(y) + + def get_one_data(self, idx): + file_idx, frame_idx = self._features[idx] + if len(self._dataset_dt[file_idx]) == 0: + self._dataset_dt[file_idx] = np.load(self._dataset[file_idx]) + self._cur_dataset_idxs.append(file_idx) + + if len(self._cur_dataset_idxs) >= 10000: + f_idx = self._cur_dataset_idxs.pop(0) + self._dataset_dt[f_idx] = [] + + feature = self._dataset_dt[file_idx][frame_idx - FRAME_LEN: frame_idx] + return feature + + def get_dataset(self): + return self._files + + +""" +损失函数 +""" + +""" +参考: https://ranmaosong.github.io/2019/07/20/cv-imbalance-between-easy-and-hard-examples +3.3 +""" + + +class FocalLossCustom(nn.Module): + def __init__(self, gamma=2.0, reduction=True): + super().__init__() + self.gamma = gamma + self.reduction = reduction + + def forward(self, pred, target): + # pred给yi=1时候的概率 + # 交叉熵为 sum(-y_i * log(p_i)) + # 当y_i = 0 时, 则用的是-log(1 - p_i) p_i增loss增 + # 当y_i = 1 时,则用的时-log(p_i) p_i减 loss减 + # BCE要求概率是一个值p_i代表正例发生的概率即可 + pred = F.log_softmax(pred, dim=1) + loss = F.nll_loss(pred, target, reduction="none") + pred = pred[:, 1] + # print(pred) + # loss = torch.nn.BCELoss(reduction="none")(pred.float(), target.float()) + # focal_loss + # 对容易分的情况消减loss + # p_i代表正例发生概率 + # 对于正例 乘以 (1 - p_i)^gamma + # 对于负例 乘以 p_i^gamma + # loss *= (target * torch.pow((1 - pred), self.gamma) + (1 - target) * torch.pow(pred, self.gamma)) + if self.reduction: + loss = torch.mean(loss) + return loss + + +def ohem_loss(criterion, pred, target, keep_num): + loss = criterion(pred, target) + loss_sorted, idx = torch.sort(loss, descending=True) + loss_keep = loss_sorted[:keep_num] + return loss_keep.sum() / keep_num + + +""" +处理逻辑 +""" + + +def get_dataloader(root): + batch_size = 256 + thread_num = 1 + + # trainset = MusicVoiceDataset(root) + trainset = MusicVoiceDatasetV1(root) + trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=thread_num) + + # valset = MusicVoiceDataset(root, False) + valset = MusicVoiceDatasetV1(root, False) + valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=thread_num) + + return trainloader, valloader + + +def train_one_epoch(model, device, loader, optimizer, criterion, use_topk=0): + model.train() + + total_num = 0 + total_loss = 0 + correct = 0 + + for mfcces, labels in tqdm(loader): + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + + predicts = model(mfcces) + + optimizer.zero_grad() + # loss = criterion(predicts, labels) + if use_topk > 0: + loss = ohem_loss(criterion, predicts, labels, use_topk) + else: + loss = criterion(predicts, labels) + loss.backward() + optimizer.step() + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def val_one_epoch(model, device, loader, criterion, use_topk): + model.eval() + + total_num = 0 + total_loss = 0 + correct = 0 + + # 展示多个数据 + correct_dict = { + 0: {0: 0, 1: 0, 2: 0}, # 标签是0,预期出是0,1,2 + 1: {0: 0, 1: 0, 2: 0}, + 2: {0: 0, 1: 0, 2: 0}, + } + + with torch.no_grad(): + for mfcces, labels in loader: + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + predicts = model(mfcces) + + if use_topk > 0: + loss = ohem_loss(criterion, predicts, labels, use_topk) + else: + loss = criterion(predicts, labels) + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + # 检查数据 + for ii in range(0, len(labels)): + kk = int(labels[ii]) + vv = predicts[ii].item() + correct_dict[kk][vv] += 1 + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + print("----------------------------->") + print(correct_dict) + return total_loss, correct + + +def train(model, device, model_path, set_dir): + # 训练配置参数 + max_epoch = 200 + lr = 1e-2 + momentum = 0.9 + weight_decay = 0 + # 学习率调整参数 + milestones = [10, 30, 50, 80, 100] + gamma = 0.1 + # 模型保存路径 + save_directory = model_path + if not os.path.exists(save_directory): + os.makedirs(save_directory) + + # optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + optimizer = Adam(model.parameters(), lr=lr) + criterion = nn.CrossEntropyLoss() + # criterion = FocalLossCustom() + scheduler = MultiStepLR(optimizer, milestones, gamma) + + # 文件地址 + data_dir = set_dir + train_loader, val_loader = get_dataloader(data_dir) + + max_acc = 0 + + for i in range(max_epoch): + start = time.time() + use_topk = 0 + # if i >= 5: + # use_topk = 128 + # criterion = nn.CrossEntropyLoss(reduction='none') + # print("use topk={}".format(use_topk)) + t_loss, t_acc = train_one_epoch(model, device, train_loader, optimizer, criterion, use_topk) + v_loss, v_acc = val_one_epoch(model, device, val_loader, criterion, use_topk) + end = time.time() + + scheduler.step(i) + + msg = 't_loss:%f\tt_acc:%.2f' % (t_loss, t_acc * 100) + msg += '\tv_loss:%f\tv_acc:%.2f' % (v_loss, v_acc * 100) + msg += '\ttime:%f\tepoch:%d' % (end - start, i) + print(msg) + + params = model.state_dict() + save_path = os.path.join(save_directory, 'CNN_epoch_' + str(i) + '_' + str(v_acc) + '.pth') + torch.save(params, save_path) + + max_acc = max(max_acc, v_acc) + + print('最大acc为:', max_acc) + + +def main(model_path, set_dir): + device = 'cuda' if torch.cuda.is_available() else 'cpu' + model = get_models("v6")() + model.to(device) + + train(model, device, model_path, set_dir) + + +# def get_num(): +# from torchstat import stat +# model = MusicVoiceV1Model() +# stat(model, (1, 128, 80)) + + +if __name__ == '__main__': + # get_num() + model_path = sys.argv[1] + set_dir = sys.argv[2] + + main(model_path, set_dir) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val.py new file mode 100644 index 0000000..1a7959a --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val.py @@ -0,0 +1,157 @@ +""" +验证music_gender分类情况 + +1 判断一首歌曲的是否是男女的标准,某个分类占比达到50%以上,否则不认定 +2 判断长度为2s的段,段移为1s-> 暂定,可以修改 +""" +import os +import sys +import glob +import numpy as np +import psutil +import time +import torch.nn.functional + +os.environ["LRU_CACHE_CAPACITY"] = "1" + +FRAME_LEN = 128 +MFCC_LEN = 80 + +from music_gender_models import * + + +def get_current_memory_gb(): + # 获取当前进程内存占用。 + pid = os.getpid() + p = psutil.Process(pid) + info = p.memory_full_info() + print("cur memory=:{} M".format(info.uss / 1024 / 1024)) + + +class PredictModel: + """ + 测试一下模型的效果,将错误的直接输出出来 + """ + + def __init__(self, model_path, features_dir): + self.device = 'cuda' + model = get_models("v5")() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + self.model = model + self.frame_num = FRAME_LEN + self.batch_size = 128 + self.features_dir = features_dir + + self._female_files = glob.glob(os.path.join(feature_dir, "*/female/*.feature.npy")) # 女_0 + self._male_files = glob.glob(os.path.join(features_dir, "*/male/*.feature.npy")) # 男_1 + self._other_files = glob.glob(os.path.join(features_dir, "*/other/*.feature.npy")) # 其他2 + + def process_one(self, file, gender): + + # 构建数据 + mfccs = np.load(file) + data = [] + for i in range(FRAME_LEN, len(mfccs), 128): # 间隔稍微宽一点,减少计算量 + data.append(mfccs[i - FRAME_LEN:i]) + data = torch.from_numpy(np.array(data)) + print("load data ok.... shape={}".format(data.shape)) + + # 预测 + female_num = 0 + male_num = 0 + other_num = 0 + + female_sm = [] + male_sm = [] + # filename, gender, idx, female_score, male_score, other_score + ret_msg = [] + with torch.no_grad(): + batch_size = 256 + for i in range(0, len(data), batch_size): + predicts = self.model(data[i:i + batch_size]) + predicts_score = torch.nn.functional.softmax(predicts, dim=1) + _, predicts = predicts.max(dim=1) + print("predict ok...") + # 统计结果 + for j in range(len(predicts)): + ret_msg.append( + "{},{},{},{},{},{}".format(file, gender, i + j, predicts_score[j][0], predicts_score[j][1], + predicts_score[j][2])) + male_sm.append(predicts_score[j][1]) + female_sm.append(predicts_score[j][0]) + if predicts[j] == 0: + female_num += 1 + if predicts[j] == 1: + male_num += 1 + if predicts[j] == 2: + other_num += 1 + print("calc ok...") + print("{},{}".format(sum(female_sm) / len(female_sm), sum(male_sm) / len(male_sm))) + print("torch {},{},{}....".format(female_num, male_num, other_num)) + + # 占比超过一半,则判定为男/女,否则不确定 + tot = female_num + male_num + other_num + if female_num / tot > 0.5: + return 0, ret_msg + if male_num / tot > 0.5: + return 1, ret_msg + return 2, ret_msg + + def process_files(self, files, gender, log_file): + # 处理女声的结果 + f_num = 0 + m_num = 0 + o_num = 0 + for file in files: + ret, ret_msg = self.process_one(file, gender) + print("file_name={} ret={}".format(file, ret)) + if ret == 0: + f_num += 1 + elif ret == 1: + m_num += 1 + else: + o_num += 1 + + # 追加写入到文件 + with open(log_file, "a") as f: + for line in ret_msg: + f.write(line + "\n") + + print("f_num={}, m_num={}, o_num={}".format(f_num, m_num, o_num)) + return f_num, m_num, o_num + + def process(self, log_file): + f_num, m_num, o_num = self.process_files(self._female_files, 0, log_file) + f_num1, m_num1, o_num1 = self.process_files(self._male_files, 1, log_file) + self.process_files(self._other_files, 2, log_file) + # 对于女声 + f_acc = f_num / (f_num + f_num1) + f_recall = f_num / len(self._female_files) + print("f_male= acc={} recall={}".format(f_acc, f_recall)) + # 对于男声 + m_acc = m_num1 / (m_num + m_num1) + m_recall = m_num1 / len(self._male_files) + print("m_male= acc={} recall={}".format(m_acc, m_recall)) + + +def process_one(model_dir, filepath, log_file): + pm = PredictModel(model_dir, "") + pm.process_files([filepath], 0, log_file) + + +if __name__ == "__main__": + model_dir = sys.argv[1] + feature_dir = sys.argv[2] + log_file = sys.argv[3] + mode = sys.argv[4] + if mode == "one": + if os.path.exists(log_file): + os.unlink(log_file) + process_one(model_dir, feature_dir, log_file) + else: + pm = PredictModel(model_dir, feature_dir) + if os.path.exists(log_file): + os.unlink(log_file) + pm.process(log_file) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v1.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v1.py new file mode 100644 index 0000000..3f3d455 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v1.py @@ -0,0 +1,123 @@ +""" +验证music_gender分类的线上情况 +输入特征文件所在文件夹,一个个文件判定,列出男声的文件列表和女声文件列表 +""" +import os +import sys +import glob +import numpy as np +import psutil +import time +import torch.nn.functional + +os.environ["LRU_CACHE_CAPACITY"] = "1" + +FRAME_LEN = 128 +MFCC_LEN = 80 + +from music_gender_models import * + + +def get_current_memory_gb(): + # 获取当前进程内存占用。 + pid = os.getpid() + p = psutil.Process(pid) + info = p.memory_full_info() + print("cur memory=:{} M".format(info.uss / 1024 / 1024)) + + +def stargy_v2(msg, filename): + """ + 去除不确定之后: 男性分数大于女性分数 2倍以上则判定男,同理去判定女 + :param msg: + :return: + """ + f_score = [] + m_score = [] + for i in range(len(msg)): + if msg[i][2] > 0.3: + continue + f_score.append(msg[i][0]) + m_score.append(msg[i][1]) + if (len(f_score) + len(m_score)) / len(msg) < 0.1: + return 2 + + f_avg = 0 + if len(f_score) > 0: + f_avg = sum(f_score) / len(f_score) + m_avg = 0 + if len(m_score) > 0: + m_avg = sum(m_score) / len(m_score) + + print("{},{},{},{},{},{}".format(filename, f_avg, m_avg, len(m_score), m_avg / f_avg, len(msg) - len(f_score))) + if f_avg > 3 * m_avg: + return 0 + if m_avg > 3 * f_avg: + return 1 + return 2 + + +class PredictModel: + """ + 测试一下模型的效果,将错误的直接输出出来 + """ + + def __init__(self, model_path, features_dir): + self.device = 'cuda' + model = get_models("v5")() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + self.model = model + self.frame_num = FRAME_LEN + self.batch_size = 128 + self.features_dir = features_dir + + self.files = glob.glob(os.path.join(feature_dir, "*.feature.npy")) # 女_0 + + def process_one(self, file): + + # 构建数据 + mfccs = np.load(file) + data = [] + for i in range(FRAME_LEN, len(mfccs), 128): # 间隔稍微宽一点,减少计算量 + data.append(mfccs[i - FRAME_LEN:i]) + data = torch.from_numpy(np.array(data)) + print("load data ok.... shape={}".format(data.shape)) + + # 预测 + scores = [] + with torch.no_grad(): + batch_size = 256 + for i in range(0, len(data), batch_size): + predicts = self.model(data[i:i + batch_size]) + predicts_score = torch.nn.functional.softmax(predicts, dim=1) + _, predicts = predicts.max(dim=1) + print("predict ok...") + # 统计结果 + for j in range(len(predicts)): + # 女-男-其他 + scores.append([predicts_score[j][0], predicts_score[j][1], predicts_score[j][2]]) + return stargy_v2(scores, file) + + def process_files(self, files, log_path): + ret_msg = [] + for file in files: + st = time.time() + ret = self.process_one(file) + print("spend_tm = {} | ret={}".format(time.time() - st, ret)) + ret_msg.append([file, ret]) + with open(log_path, "w") as f: + for line in ret_msg: + f.write("{},{}\n".format(line[0], line[1])) + + def process(self, log_path): + self.process_files(self.files, log_path) + + +if __name__ == "__main__": + model_dir = sys.argv[1] + feature_dir = sys.argv[2] + log_file = sys.argv[3] + pm = PredictModel(model_dir, feature_dir) + pm.process(log_file) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v2.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v2.py new file mode 100644 index 0000000..982fadb --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v2.py @@ -0,0 +1,174 @@ +""" +使用两个模型来验证歌曲级别的准确率和召回率 +生成出结果之后需要使用script/music_voice_class/ana中的代码 v2 代码进行结果分析 +""" +import os +import sys +import glob +import numpy as np +import psutil +import time +import torch.nn.functional + +os.environ["LRU_CACHE_CAPACITY"] = "1" + +FRAME_LEN = 128 +MFCC_LEN = 80 + +import music_voice_models +import music_gender_models_simple + + +def get_current_memory_gb(): + # 获取当前进程内存占用。 + pid = os.getpid() + p = psutil.Process(pid) + info = p.memory_full_info() + print("cur memory=:{} M".format(info.uss / 1024 / 1024)) + + +class PredictModel: + """ + 测试一下模型的效果,将错误的直接输出出来 + """ + + def __init__(self, model_path, model2_path, features_dir): + self.device = 'cuda' + model = music_voice_models.get_models("v5")() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + model.to(self.device) + + model2 = music_gender_models_simple.get_models("v6")() + params2 = torch.load(model2_path) + model2.load_state_dict(state_dict=params2) + model2.eval() + model2.to(self.device) + + self.model = model # 人声/其他 + self.model2 = model2 # 男女声 + + self.frame_num = FRAME_LEN + self.batch_size = 128 + self.features_dir = features_dir + + self._female_files = glob.glob(os.path.join(feature_dir, "*/female/*.feature.npy")) # 女_0 + self._male_files = glob.glob(os.path.join(features_dir, "*/male/*.feature.npy")) # 男_1 + self._other_files = glob.glob(os.path.join(features_dir, "*/other/*.feature.npy")) # 其他2 + + def process_one(self, file, gender): + + # 构建数据 + mfccs = np.load(file) + data = [] + for i in range(FRAME_LEN, len(mfccs), 128): # 间隔稍微宽一点,减少计算量 + data.append(mfccs[i - FRAME_LEN:i]) + data = torch.from_numpy(np.array(data)) + print("load data ok.... shape={}".format(data.shape)) + + # 预测 + female_num = 0 + male_num = 0 + other_num = 0 + + female_sm = [] + male_sm = [] + # filename, gender, idx, female_score, male_score, other_score + ret_msg = [] + with torch.no_grad(): + batch_size = 256 + for i in range(0, len(data), batch_size): + cur_data = data[i:i + batch_size].to(self.device) + predicts = self.model(cur_data) + predicts_score = torch.nn.functional.softmax(predicts, dim=1) + _, predicts = predicts.max(dim=1) + + predicts2 = self.model2(cur_data) + predicts_score2 = torch.nn.functional.softmax(predicts2, dim=1) + _, predicts2 = predicts2.max(dim=1) + + print("predict ok...") + # 统计结果 + for j in range(len(predicts)): + ret_msg.append( + "{},{},{},{},{},{},{}".format(file, gender, i + j, predicts_score[j][0], + predicts_score[j][1], predicts_score2[j][0], + predicts_score2[j][1])) + male_sm.append(predicts_score2[j][1]) + female_sm.append(predicts_score2[j][0]) + if predicts2[j] == 0: + female_num += 1 + if predicts2[j] == 1: + male_num += 1 + if predicts2[j] == 2: + other_num += 1 + print("calc ok...") + print("{},{}".format(sum(female_sm) / len(female_sm), sum(male_sm) / len(male_sm))) + print("torch {},{},{}....".format(female_num, male_num, other_num)) + + # 占比超过一半,则判定为男/女,否则不确定 + tot = female_num + male_num + other_num + if female_num / tot > 0.5: + return 0, ret_msg + if male_num / tot > 0.5: + return 1, ret_msg + return 2, ret_msg + + def process_files(self, files, gender, log_file): + # 处理女声的结果 + f_num = 0 + m_num = 0 + o_num = 0 + for file in files: + ret, ret_msg = self.process_one(file, gender) + print("file_name={} ret={}".format(file, ret)) + if ret == 0: + f_num += 1 + elif ret == 1: + m_num += 1 + else: + o_num += 1 + + # 追加写入到文件 + with open(log_file, "a") as f: + for line in ret_msg: + f.write(line + "\n") + + print("f_num={}, m_num={}, o_num={}".format(f_num, m_num, o_num)) + return f_num, m_num, o_num + + def process(self, log_file): + f_num, m_num, o_num = self.process_files(self._female_files, 0, log_file) + f_num1, m_num1, o_num1 = self.process_files(self._male_files, 1, log_file) + self.process_files(self._other_files, 2, log_file) + # 对于女声 + f_acc = f_num / (f_num + f_num1) + f_recall = f_num / len(self._female_files) + print("f_male= acc={} recall={}".format(f_acc, f_recall)) + # 对于男声 + m_acc = m_num1 / (m_num + m_num1) + m_recall = m_num1 / len(self._male_files) + print("m_male= acc={} recall={}".format(m_acc, m_recall)) + + +def process_one(model_dir, model_dir2, filepath, log_file): + pm = PredictModel(model_dir, model_dir2, "") + pm.process_files([filepath], 0, log_file) + + +if __name__ == "__main__": + model_dir = sys.argv[1] + model_dir2 = sys.argv[2] + feature_dir = sys.argv[3] + log_file = sys.argv[4] + mode = sys.argv[5] + if mode == "one": + if os.path.exists(log_file): + os.unlink(log_file) + process_one(model_dir, model_dir2, feature_dir, log_file) + else: + pm = PredictModel(model_dir, model_dir2, feature_dir) + if os.path.exists(log_file): + os.unlink(log_file) + pm.process(log_file) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v3.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v3.py new file mode 100644 index 0000000..89211e6 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_class_val_v3.py @@ -0,0 +1,189 @@ +""" +使用两个模型来验证歌曲级别的准确率和召回率 +生成出结果之后需要使用script/music_voice_class/ana中的代码 v3 代码进行结果分析 +""" +import os +import sys +import glob +import numpy as np +import psutil +import time +import torch.nn.functional + +os.environ["LRU_CACHE_CAPACITY"] = "1" + +FRAME_LEN = 128 +MFCC_LEN = 80 + +import music_voice_models +import music_gender_models_simple + + +def get_current_memory_gb(): + # 获取当前进程内存占用。 + pid = os.getpid() + p = psutil.Process(pid) + info = p.memory_full_info() + print("cur memory=:{} M".format(info.uss / 1024 / 1024)) + + +class PredictModel: + """ + 测试一下模型的效果,将错误的直接输出出来 + """ + + def __init__(self, model_path, model2_path, model3_path, features_dir): + self.device = 'cuda' + model = music_voice_models.get_models("v5")() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + + model2 = music_voice_models.get_models("v5")() + params = torch.load(model2_path) + model2.load_state_dict(state_dict=params) + model2.eval() + + model3 = music_gender_models_simple.get_models("v5")() + params3 = torch.load(model3_path) + model3.load_state_dict(state_dict=params3) + model3.eval() + + self.model = model # 纯人声/其他 + self.model2 = model2 # 带有人声/其他 + self.model3 = model3 # 男女声 + self.model.to(self.device) + self.model2.to(self.device) + self.model3.to(self.device) + + self.frame_num = FRAME_LEN + self.batch_size = 128 + self.features_dir = features_dir + + self._female_files = glob.glob(os.path.join(feature_dir, "female/*.feature.npy")) # 女_0 + self._male_files = glob.glob(os.path.join(features_dir, "male/*.feature.npy")) # 男_1 + self._other_files = glob.glob(os.path.join(features_dir, "other/*.feature.npy")) # 其他2 + + def process_one(self, file, gender): + + # 构建数据 + mfccs = np.load(file) + data = [] + for i in range(FRAME_LEN, len(mfccs), 128): # 间隔稍微宽一点,减少计算量 + data.append(mfccs[i - FRAME_LEN:i]) + data = torch.from_numpy(np.array(data)) + print("load data ok.... shape={}".format(data.shape)) + + # 预测 + female_num = 0 + male_num = 0 + other_num = 0 + + female_sm = [] + male_sm = [] + # filename, gender, idx, female_score, male_score, other_score + ret_msg = [] + with torch.no_grad(): + batch_size = 256 + for i in range(0, len(data), batch_size): + cur_data = data[i:i + batch_size].to(self.device) + predicts = self.model(cur_data) + predicts_score = torch.nn.functional.softmax(predicts, dim=1) + _, predicts = predicts.max(dim=1) + + predicts2 = self.model2(cur_data) + predicts_score2 = torch.nn.functional.softmax(predicts2, dim=1) + _, predicts2 = predicts2.max(dim=1) + + predicts3 = self.model3(cur_data) + predicts_score3 = torch.nn.functional.softmax(predicts3, dim=1) + _, predicts3 = predicts3.max(dim=1) + + print("predict ok...") + # 统计结果 + for j in range(len(predicts)): + ret_msg.append( + "{},{},{},{},{},{},{},{},{}".format(file, gender, i + j, predicts_score[j][0], + predicts_score[j][1], predicts_score2[j][0], + predicts_score2[j][1], + predicts_score3[j][0], + predicts_score3[j][1], + )) + male_sm.append(predicts_score2[j][1]) + female_sm.append(predicts_score2[j][0]) + if predicts2[j] == 0: + female_num += 1 + if predicts2[j] == 1: + male_num += 1 + if predicts2[j] == 2: + other_num += 1 + print("calc ok...") + print("{},{}".format(sum(female_sm) / len(female_sm), sum(male_sm) / len(male_sm))) + print("torch {},{},{}....".format(female_num, male_num, other_num)) + + # 占比超过一半,则判定为男/女,否则不确定 + tot = female_num + male_num + other_num + if female_num / tot > 0.5: + return 0, ret_msg + if male_num / tot > 0.5: + return 1, ret_msg + return 2, ret_msg + + def process_files(self, files, gender, log_file): + # 处理女声的结果 + f_num = 0 + m_num = 0 + o_num = 0 + for file in files: + ret, ret_msg = self.process_one(file, gender) + print("file_name={} ret={}".format(file, ret)) + if ret == 0: + f_num += 1 + elif ret == 1: + m_num += 1 + else: + o_num += 1 + + # 追加写入到文件 + with open(log_file, "a") as f: + for line in ret_msg: + f.write(line + "\n") + + print("f_num={}, m_num={}, o_num={}".format(f_num, m_num, o_num)) + return f_num, m_num, o_num + + def process(self, log_file): + f_num, m_num, o_num = self.process_files(self._female_files, 0, log_file) + f_num1, m_num1, o_num1 = self.process_files(self._male_files, 1, log_file) + self.process_files(self._other_files, 2, log_file) + # 对于女声 + f_acc = f_num / (f_num + f_num1) + f_recall = f_num / len(self._female_files) + print("f_male= acc={} recall={}".format(f_acc, f_recall)) + # 对于男声 + m_acc = m_num1 / (m_num + m_num1) + m_recall = m_num1 / len(self._male_files) + print("m_male= acc={} recall={}".format(m_acc, m_recall)) + + +def process_one(model_dir, model_dir2, model_dir3, filepath, log_file): + pm = PredictModel(model_dir, model_dir2, model_dir3, "") + pm.process_files([filepath], 0, log_file) + + +if __name__ == "__main__": + model_dir = sys.argv[1] + model_dir2 = sys.argv[2] + model_dir3 = sys.argv[3] + feature_dir = sys.argv[4] + log_file = sys.argv[5] + mode = sys.argv[6] + if mode == "one": + if os.path.exists(log_file): + os.unlink(log_file) + process_one(model_dir, model_dir2, model_dir3, feature_dir, log_file) + else: + pm = PredictModel(model_dir, model_dir2, model_dir3, feature_dir) + if os.path.exists(log_file): + os.unlink(log_file) + pm.process(log_file) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_models.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_models.py new file mode 100644 index 0000000..6122035 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_models.py @@ -0,0 +1,283 @@ +""" +模型列表 +""" + +import torch +import torch.nn as nn + +MFCC_LEN = 80 +FRAME_LEN = 128 + + +class MusicVoiceModel(nn.Module): + def __init__(self): + super(MusicVoiceModel, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 32, 3, 2), # (62, 38)--> v3_2新增 + nn.BatchNorm2d(32), + nn.ReLU(), + nn.Conv2d(32, 32, 3, 2), # (62, 38) + nn.BatchNorm2d(32), + nn.ReLU(), + # nn.Conv2d(32, 32, 3, 2), # (30, 18) + # nn.BatchNorm2d(32), + # nn.ReLU(), + nn.Conv2d(32, 16, 3, 2), # (14, 8) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 16, 3, 2), # (6, 3) + # nn.AvgPool2d((14, 8)), + ] + layer2 = [ + nn.Linear(16 * 6 * 3, 3), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 16 * 6 * 3]) + x = self.layer2(x) + return x + + +class MusicVoiceV4Model(nn.Module): + def __init__(self): + super(MusicVoiceV4Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (62, 38) -> layers2 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (60, 36) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (58, 34) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (56, 32) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (27, 15) -> layer5 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 32, 3), # (25, 13) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (12, 6) -> layers8 + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3), # (10, 4) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (4, 1) -> layers10 + ] + layer2 = [ + nn.Linear(32 * 4 * 1, 3), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 32 * 4 * 1]) + x = self.layer2(x) + return x + + +class MusicVoiceV4AMPModel(nn.Module): + def __init__(self): + super(MusicVoiceV4AMPModel, self).__init__() + layer1 = [ + # (128, 257) + nn.Conv2d(1, 24, 3), # (126, 255) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (62, 127) -> layers2 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (60, 125) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (58, 123) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (56, 121) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (27, 60) -> layer5 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 32, 3), # (25, 58) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (12, 28) -> layers8 + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3), # (10, 26) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (4, 12) -> layers10 + ] + layer2 = [ + nn.Linear(32 * 4 * 12, 3), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, 257]) + x = self.layer1(x) + x = x.view([-1, 32 * 4 * 12]) + x = self.layer2(x) + return x + + +class MusicVoiceV5Model(nn.Module): + def __init__(self): + super(MusicVoiceV5Model, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + conv_bn(1, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + nn.AvgPool2d((4, 3)), + ) + self.fc = nn.Linear(1024, 3) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.model(x) + x = x.view(-1, 1024) + x = self.fc(x) + return x + + +class MusicVoiceV5AMPModel(nn.Module): + def __init__(self): + super(MusicVoiceV5AMPModel, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + # 128, 257 + conv_bn(1, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + conv_dw(1024, 1024, 2), + nn.AvgPool2d((2, 5)), + ) + self.fc = nn.Linear(1024, 3) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, 257]) + x = self.model(x) + x = x.view(-1, 1024) + x = self.fc(x) + return x + + +def get_models(tp): + if tp == "v4": + print("load model v4 ...") + return MusicVoiceV4Model + if tp == "v4_amp": + print("load model v4 ...") + return MusicVoiceV4AMPModel + if tp == "v5": + print("load model v5 ...") + return MusicVoiceV5Model + if tp == "v5_amp": + print("load model v5_amp ...") + return MusicVoiceV5AMPModel + return MusicVoiceModel + +# if __name__ == "__main__": +# mm = get_models("v5")() +# xx = torch.randn((100, 128, 80)) +# predicts = mm(xx) +# _, p_max = predicts.max(dim=1) +# print(predicts[p_max != 2]) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_models_simple.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_models_simple.py new file mode 100644 index 0000000..7585013 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_gender_models_simple.py @@ -0,0 +1,300 @@ +""" +模型列表 +""" + +import torch +import torch.nn as nn + +MFCC_LEN = 80 +FRAME_LEN = 128 + +from mobilenet_v2_custom import MobileNetV2Custom +from mobilenet_v3_custom import mobilenet_v3_large + +class MusicVoiceModel(nn.Module): + def __init__(self): + super(MusicVoiceModel, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 32, 3, 2), # (62, 38)--> v3_2新增 + nn.BatchNorm2d(32), + nn.ReLU(), + nn.Conv2d(32, 32, 3, 2), # (62, 38) + nn.BatchNorm2d(32), + nn.ReLU(), + # nn.Conv2d(32, 32, 3, 2), # (30, 18) + # nn.BatchNorm2d(32), + # nn.ReLU(), + nn.Conv2d(32, 16, 3, 2), # (14, 8) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 16, 3, 2), # (6, 3) + # nn.AvgPool2d((14, 8)), + ] + layer2 = [ + nn.Linear(16 * 6 * 3, 3), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 16 * 6 * 3]) + x = self.layer2(x) + return x + + +class MusicVoiceV4Model(nn.Module): + def __init__(self): + super(MusicVoiceV4Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (62, 38) -> layers2 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (60, 36) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (58, 34) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (56, 32) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (27, 15) -> layer5 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 32, 3), # (25, 13) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (12, 6) -> layers8 + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3), # (10, 4) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (4, 1) -> layers10 + ] + layer2 = [ + nn.Linear(32 * 4 * 1, 3), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 32 * 4 * 1]) + x = self.layer2(x) + return x + + +class MusicVoiceV4AMPModel(nn.Module): + def __init__(self): + super(MusicVoiceV4AMPModel, self).__init__() + layer1 = [ + # (128, 257) + nn.Conv2d(1, 24, 3), # (126, 255) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (62, 127) -> layers2 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (60, 125) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (58, 123) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (56, 121) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (27, 60) -> layer5 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 32, 3), # (25, 58) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (12, 28) -> layers8 + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3), # (10, 26) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (4, 12) -> layers10 + ] + layer2 = [ + nn.Linear(32 * 4 * 12, 3), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, 257]) + x = self.layer1(x) + x = x.view([-1, 32 * 4 * 12]) + x = self.layer2(x) + return x + + +class MusicVoiceV5Model(nn.Module): + def __init__(self): + super(MusicVoiceV5Model, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + conv_bn(1, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + nn.AvgPool2d((4, 3)), + ) + self.fc = nn.Linear(1024, 2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.model(x) + x = x.view(-1, 1024) + x = self.fc(x) + return x + + +class MusicVoiceV5AMPModel(nn.Module): + def __init__(self): + super(MusicVoiceV5AMPModel, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + # 128, 257 + conv_bn(1, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + conv_dw(1024, 1024, 2), + nn.AvgPool2d((2, 5)), + ) + self.fc = nn.Linear(1024, 3) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, 257]) + x = self.model(x) + x = x.view(-1, 1024) + x = self.fc(x) + return x + + +class MobileNetV2Gender(MobileNetV2Custom): + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + return super(MobileNetV2Gender, self).forward(x) + + +def get_models(tp): + if tp == "v4": + print("load model v4 ...") + return MusicVoiceV4Model + if tp == "v4_amp": + print("load model v4 ...") + return MusicVoiceV4AMPModel + if tp == "v5": + print("load model v5 ...") + return MusicVoiceV5Model + if tp == "v5_amp": + print("load model v5_amp ...") + return MusicVoiceV5AMPModel + if tp == "v6": + print("load model v6 ...") + return MobileNetV2Gender + if tp == "v7": + print("load model v7") + return mobilenet_v3_large + return MusicVoiceModel + + +# if __name__ == "__main__": +# mm = get_models("v7")() +# xx = torch.randn((1, 1, 128, 80)) +# predicts = mm(xx) +# _, p_max = predicts.max(dim=1) +# print(predicts[p_max != 2]) + diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_class.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_class.py new file mode 100644 index 0000000..daca732 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_class.py @@ -0,0 +1,358 @@ +import torch.nn as nn +import torch +import torch.nn.functional as functional +from tqdm import tqdm +import os +import sys +from torch.optim.lr_scheduler import MultiStepLR +import time +from torch.utils.data import DataLoader +import torch.utils.data as data +import glob +import numpy as np +from torch.optim.rmsprop import RMSprop +from torch.optim.adam import Adam +import librosa + +""" +模型 +""" + +MFCC_LEN = 80 +FRAME_LEN = 128 +RATIO = 0.8 + +TEST_RATE = 1 # 少量数据集测试时可以设置该值,1.0代表全部数据参与 + +from music_voice_models import * + +""" + 数据集 +""" + + +class MusicVoiceDataset(data.Dataset): + """ + pure->1 other->0 + """ + + def __init__(self, root, is_train=True): + """ + :param root: 特征的目录 + """ + self.root = root + self._dataset = [] + self._files = [] + self._is_train = is_train + self._features, self._labels = self.get_db() + print("load data train={} feature_size={} label_size={}".format(is_train, self._features.shape, + self._labels.shape)) + + def __len__(self): + return len(self._labels) + + def __getitem__(self, idx): + feature = self.get_one_data(idx) + label = self._labels[idx] + return feature, label + + def get_item(self, idx): + return self._features[idx], self._labels[idx] + + def get_files(self): + pure_files = glob.glob(os.path.join(self.root, "*/pure/*/*.feature.npy")) + other_files = glob.glob(os.path.join(self.root, "*/other/*/*.feature.npy")) + acc_files = glob.glob(os.path.join(self.root, "*/acc/*/*.feature.npy")) + other_files.extend(acc_files) + return pure_files, other_files + + def get_random_data(self, features, labels, rate): + idx = list(range(0, len(labels))) + np.random.shuffle(idx) + features = np.array(features)[idx] + labels = np.array(labels)[idx] + num = int(len(idx) * rate) + return list(features[:num]), list(labels[:num]) + + def shuffle_train_test_by_song(self, st_idx, features, labels): + """ + 按照歌曲进行切分 + :param st_idx: + :param features: + :param labels: + :return: + """ + print("shuffle_train_test_by_song....") + song_ids = [] + for i in range(st_idx, len(self._files)): + file = self._files[i] + song_id = str(file).split("/")[-1].split("_")[0] + if song_id not in song_ids: + song_ids.append(song_id) + idx = list(range(len(song_ids))) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + new_song_ids = np.array(song_ids)[np.array(idx)] + train_features = [] + train_label = [] + test_features = [] + test_label = [] + for i, feature in enumerate(features): + song_id = str(self._files[feature[0]]).split("/")[-1].split("_")[0] + if song_id in new_song_ids[train_idx:]: + test_features.append(feature) + test_label.append(labels[i]) + else: + train_features.append(feature) + train_label.append(labels[i]) + return train_features, train_label, test_features, test_label + + def shuffle_train_test(self, st_idx, features, labels): + """ + 对于歌曲整体shuffle,然后取20%做测试集合 + self._dataset 存储的是所有的数据,我们只需要将部分数据进行处理即可 + :param st_idx + :param features: + :param labels: + :return: + """ + print("shuffle_train_test ....") + idx = list(range(len(self._dataset) - st_idx)) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + train_features = [] + train_label = [] + test_features = [] + test_label = [] + for i, feature in enumerate(features): + cur_idx = feature[0] - st_idx + assert cur_idx >= 0 + if cur_idx in idx[train_idx:]: + test_features.append(feature) + test_label.append(labels[i]) + else: + train_features.append(feature) + train_label.append(labels[i]) + return train_features, train_label, test_features, test_label + + def gen_data(self, files, label): + """ + :return: + """ + st_idx = len(self._dataset) + features = [] + labels = [] + for idx, file in enumerate(files): + dt = np.load(file) + file_idx = len(self._dataset) + self._dataset.append(dt) + self._files.append(file) + for i in range(FRAME_LEN, len(dt)): + features.append([file_idx, i]) # 存储文件位置, 帧位置 + labels.append(label) + + # 对于歌曲乱序之后,按照歌曲分隔训练数据集和测试数据集 + # 由于只对本次添加入的数据进行shuffle,比如本次是人声,则只对人声进行切分 + # 而self_dataset中存储所有数据,所以需要传入idx + # train_x, train_y, test_x, test_y = self.shuffle_train_test(st_idx, features, labels) + train_x, train_y, test_x, test_y = self.shuffle_train_test_by_song(st_idx, features, labels) + print('cur_train:{},{}| cur_test={},{}'.format(len(train_x), len(train_y), len(test_x), len(test_y))) + np.random.seed(10) + train_x, train_y = self.get_random_data(train_x, train_y, TEST_RATE) + np.random.seed(64) + test_x, test_y = self.get_random_data(test_x, test_y, TEST_RATE) + return train_x, train_y, test_x, test_y + + def get_db(self): + pure_files, other_files = self.get_files() + self._dataset = [] + self._files = [] + pure_train_x, pure_train_y, pure_test_x, pure_test_y = self.gen_data(pure_files, 1) # 纯人声是1 + other_train_x, other_train_y, other_test_x, other_test_y = self.gen_data(other_files, 0) + + # 构造为1:1 + # 不确定->暂时试一下 + # min_train_len = min(len(pure_train_y), len(other_train_y)) + # min_test_len = min(len(pure_test_y), len(other_test_y)) + # pure_train_x = pure_train_x[:min_train_len] + # pure_train_y = pure_train_y[:min_train_len] + # other_train_x = other_train_x[:min_train_len] + # other_train_y = other_train_y[:min_train_len] + # + # pure_test_x = pure_test_x[:min_test_len] + # pure_test_y = pure_test_y[:min_test_len] + # other_test_x = other_test_x[:min_test_len] + # other_test_y = other_test_y[:min_test_len] + + # 合并数据 + pure_train_x.extend(other_train_x) + pure_test_x.extend(other_test_x) + pure_train_y.extend(other_train_y) + pure_test_y.extend(other_test_y) + + pure_train_x = np.array(pure_train_x) + pure_test_x = np.array(pure_test_x) + pure_train_y = np.array(pure_train_y) + pure_test_y = np.array(pure_test_y) + if self._is_train: + return pure_train_x, pure_train_y + return pure_test_x, pure_test_y + + def get_one_data(self, idx): + file_idx, frame_idx = self._features[idx] + feature = self._dataset[file_idx][frame_idx - FRAME_LEN: frame_idx] + return feature + + def get_dataset(self): + return self._files + + +""" +处理逻辑 +""" + + +def get_dataloader(root): + batch_size = 256 + thread_num = 24 + + trainset = MusicVoiceDataset(root) + trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=thread_num) + + valset = MusicVoiceDataset(root, False) + valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=thread_num) + + return trainloader, valloader + + +def train_one_epoch(model, device, loader, optimizer, criterion): + model.train() + + total_num = 0 + total_loss = 0 + correct = 0 + + for mfcces, labels in tqdm(loader): + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + + predicts = model(mfcces) + + optimizer.zero_grad() + loss = criterion(predicts, labels) + loss.backward() + optimizer.step() + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def val_one_epoch(model, device, loader, criterion): + model.eval() + + total_num = 0 + total_loss = 0 + correct = 0 + + with torch.no_grad(): + for mfcces, labels in loader: + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + predicts = model(mfcces) + + loss = criterion(predicts, labels) + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + + return total_loss, correct + + +def train(model, device, model_path, set_dir): + # 训练配置参数 + max_epoch = 200 + lr = 1e-2 + momentum = 0 + weight_decay = 0 + # 学习率调整参数 + milestones = [1, 30, 50, 100] + gamma = 0.1 + # 模型保存路径 + save_directory = model_path + if not os.path.exists(save_directory): + os.makedirs(save_directory) + + optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = Adam(model.parameters(), lr=lr) + criterion = nn.CrossEntropyLoss() + scheduler = MultiStepLR(optimizer, milestones, gamma) + + # 文件地址 + data_dir = set_dir + train_loader, val_loader = get_dataloader(data_dir) + + max_acc = 0 + + for i in range(max_epoch): + start = time.time() + t_loss, t_acc = train_one_epoch(model, device, train_loader, optimizer, criterion) + v_loss, v_acc = val_one_epoch(model, device, val_loader, criterion) + end = time.time() + + scheduler.step(i) + + msg = 't_loss:%f\tt_acc:%.2f' % (t_loss, t_acc * 100) + msg += '\tv_loss:%f\tv_acc:%.2f' % (v_loss, v_acc * 100) + msg += '\ttime:%f\tepoch:%d' % (end - start, i) + print(msg) + + params = model.state_dict() + save_path = os.path.join(save_directory, 'CNN_epoch_' + str(i) + '_' + str(v_acc) + '.pth') + torch.save(params, save_path) + + max_acc = max(max_acc, v_acc) + + print('最大acc为:', max_acc) + + +def main(model_path, set_dir): + device = 'cuda' if torch.cuda.is_available() else 'cpu' + model = get_models("v5")() + model.to(device) + + train(model, device, model_path, set_dir) + + +# def get_num(): +# from torchstat import stat +# model = MusicVoiceV1Model() +# stat(model, (1, 128, 80)) + + +if __name__ == '__main__': + # get_num() + model_path = sys.argv[1] + set_dir = sys.argv[2] + + main(model_path, set_dir) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_class_rate.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_class_rate.py new file mode 100644 index 0000000..39ab90a --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_class_rate.py @@ -0,0 +1,464 @@ +import torch.nn as nn +import torch +import torch.nn.functional as functional +from tqdm import tqdm +import os +import sys +from torch.optim.lr_scheduler import MultiStepLR +import time +from torch.utils.data import DataLoader +import torch.utils.data as data +import glob +import numpy as np +from torch.optim.rmsprop import RMSprop +from torch.optim.adam import Adam +import librosa + +""" +模型 +""" + +MFCC_LEN = 80 +FRAME_LEN = 128 +RATIO = 0.8 + +TEST_RATE = 1.0 # 少量数据集测试时可以设置该值,1.0代表全部数据参与 + +from music_voice_models import * + +""" + 数据集 +""" + + +class MusicVoiceDataset(data.Dataset): + """ + pure->1 other->0 + """ + + def __init__(self, root, is_train=True): + """ + :param root: 特征的目录 + """ + self._file2rate = self.get_file_rate(os.path.join(root, "file2rate.txt")) + self.root = root + self._dataset = [] + self._files = [] + self._is_train = is_train + self._features, self._labels = self.get_db() + print("load data train={} feature_size={} label_size={}".format(is_train, self._features.shape, + self._labels.shape)) + + def __len__(self): + return len(self._labels) + + def __getitem__(self, idx): + feature, acc_rate, filename = self.get_one_data(idx) + label = self._labels[idx] + return feature, label, acc_rate, filename + + def get_item(self, idx): + return self._features[idx], self._labels[idx] + + def get_file_rate(self, filename): + file2rate = {} + with open(filename, "r") as f: + while True: + line = f.readline() + if not line: + break + line = line.split(",") + file2rate[line[0]] = float(line[1]) + print("get_file_rate {}".format(len(file2rate))) + return file2rate + + def get_files(self): + pure_files = glob.glob(os.path.join(self.root, "*/pure/*/*.feature.npy")) + other_files = glob.glob(os.path.join(self.root, "*/other/*/*.feature.npy")) + acc_files = glob.glob(os.path.join(self.root, "*/acc/*/*.feature.npy")) + pure_rec = glob.glob(os.path.join(self.root, "*/pure_rec/*/*.feature.npy")) + other_files.extend(acc_files) + other_files.extend(pure_rec) + return pure_files, other_files + + def get_random_data(self, features, labels, rate): + idx = list(range(0, len(labels))) + np.random.shuffle(idx) + features = np.array(features)[idx] + labels = np.array(labels)[idx] + num = int(len(idx) * rate) + return list(features[:num]), list(labels[:num]) + + def shuffle_train_test_by_song(self, st_idx, features, labels): + """ + 按照歌曲进行切分 + :param st_idx: + :param features: + :param labels: + :return: + """ + print("shuffle_train_test_by_song....") + song_ids = [] + for i in range(st_idx, len(self._files)): + file = self._files[i] + song_id = str(file).split("/")[-1].split("_")[0] + if song_id not in song_ids: + song_ids.append(song_id) + idx = list(range(len(song_ids))) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + new_song_ids = np.array(song_ids)[np.array(idx)] + train_features = [] + train_label = [] + test_features = [] + test_label = [] + for i, feature in enumerate(features): + song_id = str(self._files[feature[0]]).split("/")[-1].split("_")[0] + if song_id in new_song_ids[train_idx:]: + test_features.append(feature) + test_label.append(labels[i]) + else: + train_features.append(feature) + train_label.append(labels[i]) + return train_features, train_label, test_features, test_label + + def shuffle_train_test(self, st_idx, features, labels): + """ + 对于歌曲整体shuffle,然后取20%做测试集合 + self._dataset 存储的是所有的数据,我们只需要将部分数据进行处理即可 + :param st_idx + :param features: + :param labels: + :return: + """ + print("shuffle_train_test ....") + idx = list(range(len(self._dataset) - st_idx)) + np.random.seed(4) + np.random.shuffle(idx) + train_idx = int(len(idx) * RATIO) + train_features = [] + train_label = [] + test_features = [] + test_label = [] + for i, feature in enumerate(features): + cur_idx = feature[0] - st_idx + assert cur_idx >= 0 + if cur_idx in idx[train_idx:]: + test_features.append(feature) + test_label.append(labels[i]) + else: + train_features.append(feature) + train_label.append(labels[i]) + return train_features, train_label, test_features, test_label + + def gen_data(self, files, label): + """ + :return: + """ + st_idx = len(self._dataset) + features = [] + labels = [] + for idx, file in enumerate(files): + dt = np.load(file) + file_idx = len(self._dataset) + self._dataset.append(dt) + self._files.append(file) + for i in range(FRAME_LEN, len(dt)): + features.append([file_idx, i]) # 存储文件位置, 帧位置 + labels.append(label) + + # 对于歌曲乱序之后,按照歌曲分隔训练数据集和测试数据集 + # 由于只对本次添加入的数据进行shuffle,比如本次是人声,则只对人声进行切分 + # 而self_dataset中存储所有数据,所以需要传入idx + # train_x, train_y, test_x, test_y = self.shuffle_train_test(st_idx, features, labels) + train_x, train_y, test_x, test_y = self.shuffle_train_test_by_song(st_idx, features, labels) + print('cur_train:{},{}| cur_test={},{}'.format(len(train_x), len(train_y), len(test_x), len(test_y))) + np.random.seed(10) + train_x, train_y = self.get_random_data(train_x, train_y, TEST_RATE) + np.random.seed(64) + test_x, test_y = self.get_random_data(test_x, test_y, TEST_RATE) + return train_x, train_y, test_x, test_y + + def get_db(self): + pure_files, other_files = self.get_files() + self._dataset = [] + self._files = [] + pure_train_x, pure_train_y, pure_test_x, pure_test_y = self.gen_data(pure_files, 1) # 纯人声是1 + other_train_x, other_train_y, other_test_x, other_test_y = self.gen_data(other_files, 0) + + # 构造为1:1 + # 不确定->暂时试一下 + # min_train_len = min(len(pure_train_y), len(other_train_y)) + # min_test_len = min(len(pure_test_y), len(other_test_y)) + # pure_train_x = pure_train_x[:min_train_len] + # pure_train_y = pure_train_y[:min_train_len] + # other_train_x = other_train_x[:min_train_len] + # other_train_y = other_train_y[:min_train_len] + # + # pure_test_x = pure_test_x[:min_test_len] + # pure_test_y = pure_test_y[:min_test_len] + # other_test_x = other_test_x[:min_test_len] + # other_test_y = other_test_y[:min_test_len] + + # 合并数据 + pure_train_x.extend(other_train_x) + pure_test_x.extend(other_test_x) + pure_train_y.extend(other_train_y) + pure_test_y.extend(other_test_y) + + pure_train_x = np.array(pure_train_x) + pure_test_x = np.array(pure_test_x) + pure_train_y = np.array(pure_train_y) + pure_test_y = np.array(pure_test_y) + if self._is_train: + return pure_train_x, pure_train_y + return pure_test_x, pure_test_y + + def get_one_data(self, idx): + file_idx, frame_idx = self._features[idx] + feature = self._dataset[file_idx][frame_idx - FRAME_LEN: frame_idx] + filename = self._files[file_idx] + acc_rate = -1 + if "_rec.feature.npy" in filename: + acc_rate = 1 + if "_acc.mp4.feature.npy" not in filename and "_rec.feature.npy" not in filename: + acc_rate = self._file2rate[filename] + return feature, acc_rate, filename + + def get_dataset(self): + return self._files + + +""" +处理逻辑 +""" + + +def get_dataloader(root): + batch_size = 256 + thread_num = 24 + + trainset = MusicVoiceDataset(root) + trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=thread_num) + + valset = MusicVoiceDataset(root, False) + valloader = DataLoader(valset, batch_size=batch_size, shuffle=False, num_workers=thread_num) + + return trainloader, valloader + + +def rate_func(x): + # 无人声 + if x == -1: + return 1 + # 伴奏占比 0<=x<0.05 + if 0.05 >= x >= 0: + return 1 - x + if x > 0.05: + return x + # 不应该出现 + print("rate_func err!", x) + return 0 + + +def train_one_epoch(model, device, loader, optimizer, criterion): + model.train() + + total_num = 0 + total_loss = 0 + correct = 0 + + correct_dict = { + 0: {0: 0, 1: 0}, # 1=>pure 0=>other + 1: {0: 0, 1: 0}, + } + + err_top_dict = {} + + for mfcces, labels, acc_rates, filenames in tqdm(loader): + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + + predicts = model(mfcces) + + optimizer.zero_grad() + loss = criterion(predicts, labels) + loss *= torch.tensor(list(map(rate_func, acc_rates))).to(device) + loss = loss.mean() + loss.backward() + optimizer.step() + + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + + # 等于-1和>=0.8的作为无人声 等于0的当作有人声,其他不算 + # 检查数据 + for ii in range(0, len(labels)): + kk = int(labels[ii]) + vv = predicts[ii].item() # 1=>pure,0=>other + + # 0.8>rate > 0.05 这段区间不容易判定,不处理 + if acc_rates[ii] >= 0.8 or acc_rates[ii] <= 0.05: + correct_dict[kk][vv] += 1 + if kk != vv: + key = filenames[ii] + "," + str(labels[ii]) + if key not in err_top_dict.keys(): + err_top_dict[key] = 0 + err_top_dict[key] += 1 + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + rate_0 = correct_dict[0][0] / (correct_dict[0][0] + correct_dict[1][0]) + rate_1 = correct_dict[1][1] / (correct_dict[0][1] + correct_dict[1][1]) + print("train_rate:{}|{},{}".format(correct_dict, rate_0, rate_1)) + print("train_rate:----------------------top10的错误文件------------------->") + tot = sum(err_top_dict.values()) + sorted_dict = sorted(err_top_dict.items(), key=lambda item: item[1], reverse=True) + top = 10 + cur = 0 + for i in range(0, top): + cur += sorted_dict[i][1] + print("top10 占比:{}".format(cur / tot)) + for i in range(0, top): + print("{},{}".format(sorted_dict[i][0], sorted_dict[i][1])) + print("---------------------------------------------------------------->") + + return total_loss, correct + + +def val_one_epoch(model, device, loader, criterion): + model.eval() + + total_num = 0 + total_loss = 0 + correct = 0 + correct_dict = { + 0: {0: 0, 1: 0}, # 0=>pure 1=>other + 1: {0: 0, 1: 0}, + } + err_top_dict = {} + with torch.no_grad(): + for mfcces, labels, acc_rates, filenames in loader: + batch_size = mfcces.size(0) + mfcces = mfcces.to(device) + labels = labels.to(device) + predicts = model(mfcces) + + loss = criterion(predicts, labels) + loss *= torch.tensor(list(map(rate_func, acc_rates))).to(device) + loss = loss.mean() + total_num += batch_size + total_loss += loss.item() * batch_size + + _, predicts = predicts.max(dim=1) + correct += predicts.eq(labels).sum().item() + # 等于-1和>=0.8的作为无人声 等于0的当作有人声,其他不算 + # 检查数据 + for ii in range(0, len(labels)): + kk = int(labels[ii]) + vv = predicts[ii].item() # 0=>pure,1=>other + + # 0.8>rate > 0.05 这段区间不容易判定,不处理 + if acc_rates[ii] >= 0.8 or acc_rates[ii] <= 0.05: + correct_dict[kk][vv] += 1 + + if kk != vv: + key = filenames[ii] + "," + str(labels[ii]) + if key not in err_top_dict.keys(): + err_top_dict[key] = 0 + err_top_dict[key] += 1 + + if total_num != 0: + total_loss = total_loss / total_num + correct = correct / total_num + rate_0 = correct_dict[0][0] / (correct_dict[0][0] + correct_dict[1][0]) + rate_1 = correct_dict[1][1] / (correct_dict[0][1] + correct_dict[1][1]) + print("val_rate:{}|{},{}".format(correct_dict, rate_0, rate_1)) + print("val_rate:----------------------top10的错误文件------------------->") + tot = sum(err_top_dict.values()) + sorted_dict = sorted(err_top_dict.items(), key=lambda item: item[1], reverse=True) + top = 10 + cur = 0 + for i in range(0, top): + cur += sorted_dict[i][1] + print("top10 占比:{}".format(cur / tot)) + for i in range(0, top): + print("{},{}".format(sorted_dict[i][0], sorted_dict[i][1])) + print("--------------------------------------------------------------->") + return total_loss, correct + + +def train(model, device, model_path, set_dir): + # 训练配置参数 + max_epoch = 200 + lr = 1e-2 + momentum = 0 + weight_decay = 0 + # 学习率调整参数 + milestones = [1, 30, 50, 100] + gamma = 0.1 + # 模型保存路径 + save_directory = model_path + if not os.path.exists(save_directory): + os.makedirs(save_directory) + + optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = RMSprop(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) + # optimizer = Adam(model.parameters(), lr=lr) + criterion = nn.CrossEntropyLoss(reduction="none") + scheduler = MultiStepLR(optimizer, milestones, gamma) + + # 文件地址 + data_dir = set_dir + train_loader, val_loader = get_dataloader(data_dir) + + max_acc = 0 + + for i in range(max_epoch): + start = time.time() + t_loss, t_acc = train_one_epoch(model, device, train_loader, optimizer, criterion) + v_loss, v_acc = val_one_epoch(model, device, val_loader, criterion) + end = time.time() + + scheduler.step(i) + + msg = 't_loss:%f\tt_acc:%.2f' % (t_loss, t_acc * 100) + msg += '\tv_loss:%f\tv_acc:%.2f' % (v_loss, v_acc * 100) + msg += '\ttime:%f\tepoch:%d' % (end - start, i) + print(msg) + + params = model.state_dict() + save_path = os.path.join(save_directory, 'CNN_epoch_' + str(i) + '_' + str(v_acc) + '.pth') + torch.save(params, save_path) + + max_acc = max(max_acc, v_acc) + + print('最大acc为:', max_acc) + + +def main(model_path, set_dir): + device = 'cuda' if torch.cuda.is_available() else 'cpu' + model = get_models("v5")() + model.to(device) + + train(model, device, model_path, set_dir) + + +# def get_num(): +# from torchstat import stat +# model = MusicVoiceV1Model() +# stat(model, (1, 128, 80)) + + +if __name__ == '__main__': + # get_num() + model_path = sys.argv[1] + set_dir = sys.argv[2] + + main(model_path, set_dir) diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_models.py b/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_models.py new file mode 100644 index 0000000..e6f2747 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/music_voice_models.py @@ -0,0 +1,342 @@ +""" +模型列表 +""" + +import torch +import torch.nn as nn + +MFCC_LEN = 80 +FRAME_LEN = 128 + + +class MusicVoiceV1Model(nn.Module): + def __init__(self): + super(MusicVoiceV1Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 8, 3), # (126, 78) + nn.BatchNorm2d(8), + nn.ReLU(), + nn.Conv2d(8, 16, 3, 2), # (62, 38) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 16, 3, 2), # (30, 18) + nn.AvgPool2d((30, 18)), + ] + layer2 = [ + nn.Linear(16, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 16]) + x = self.layer2(x) + return x + + +class MusicVoiceV2Model(nn.Module): + def __init__(self): + super(MusicVoiceV2Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 32, 3, 2), # (62, 38) + nn.BatchNorm2d(32), + nn.ReLU(), + nn.Conv2d(32, 32, 3, 2), # (30, 18) + nn.BatchNorm2d(32), + nn.ReLU(), + nn.Conv2d(32, 16, 3, 2), # (14, 8) + nn.AvgPool2d((14, 8)), + ] + layer2 = [ + nn.Linear(16, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 16]) + x = self.layer2(x) + return x + + +class MusicVoiceV3Model(nn.Module): + def __init__(self): + super(MusicVoiceV3Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 32, 3, 2), # (62, 38)--> v3_2新增 + nn.BatchNorm2d(32), + nn.ReLU(), + nn.Conv2d(32, 32, 3, 2), # (62, 38) + nn.BatchNorm2d(32), + nn.ReLU(), + # nn.Conv2d(32, 32, 3, 2), # (30, 18) + # nn.BatchNorm2d(32), + # nn.ReLU(), + nn.Conv2d(32, 16, 3, 2), # (14, 8) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 16, 3, 2), # (6, 3) + # nn.AvgPool2d((14, 8)), + ] + layer2 = [ + nn.Linear(16 * 6 * 3, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 16 * 6 * 3]) + x = self.layer2(x) + return x + + +class MusicVoiceModel(nn.Module): + def __init__(self): + super(MusicVoiceModel, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 24, 3), # (124, 76) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 24, 3), # (122, 74) + nn.BatchNorm2d(24), + nn.ReLU(), + nn.Conv2d(24, 16, 3), # (120, 72) + nn.BatchNorm2d(16), + nn.ReLU(), + + nn.Conv2d(16, 16, 3), # (118, 70) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 16, 3), # (116, 68) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 16, 3), # (114, 66) + nn.BatchNorm2d(16), + nn.ReLU(), + + nn.Conv2d(16, 16, 3, 2), # (56, 32) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 16, 3, 2), # (27, 15) + nn.BatchNorm2d(16), + nn.ReLU(), + nn.Conv2d(16, 8, 3, 2), # (13, 7) + nn.BatchNorm2d(8), + nn.ReLU(), + nn.Conv2d(8, 8, 3, 2), # (6, 3) + ] + layer2 = [ + nn.Linear(8 * 18, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 8 * 18]) + x = self.layer2(x) + return x + + +class MusicVoiceV4Model(nn.Module): + def __init__(self): + super(MusicVoiceV4Model, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (62, 38) -> layers2 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (60, 36) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (58, 34) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (56, 32) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (27, 15) -> layer5 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 32, 3), # (25, 13) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (12, 6) -> layers8 + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3), # (10, 4) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (4, 1) -> layers10 + ] + layer2 = [ + nn.Linear(32 * 4 * 1, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.layer1(x) + x = x.view([-1, 32 * 4 * 1]) + x = self.layer2(x) + return x + + +class MusicVoiceV4AMPModel(nn.Module): + def __init__(self): + super(MusicVoiceV4AMPModel, self).__init__() + layer1 = [ + # (128, 80) + nn.Conv2d(1, 24, 3), # (126, 78) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (62, 38) -> layers2 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (60, 36) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (58, 34) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3), # (56, 32) + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 24, 3, 2), # (27, 15) -> layer5 + nn.BatchNorm2d(24), + nn.ReLU(), + + nn.Conv2d(24, 32, 3), # (25, 13) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (12, 6) -> layers8 + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3), # (10, 4) + nn.BatchNorm2d(32), + nn.ReLU(), + + nn.Conv2d(32, 32, 3, 2), # (4, 1) -> layers10 + ] + layer2 = [ + nn.Linear(32 * 4 * 12, 2), + ] + self.layer1 = nn.Sequential(*layer1) + self.layer2 = nn.Sequential(*layer2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, 257]) + x = self.layer1(x) + x = x.view([-1, 32 * 4 * 12]) + x = self.layer2(x) + return x + + +class MusicVoiceV5Model(nn.Module): + def __init__(self): + super(MusicVoiceV5Model, self).__init__() + + def conv_bn(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, oup, 3, stride, 1, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True) + ) + + def conv_dw(inp, oup, stride): + return nn.Sequential( + nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False), + nn.BatchNorm2d(inp), + nn.ReLU(inplace=True), + + nn.Conv2d(inp, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + nn.ReLU(inplace=True), + ) + + self.model = nn.Sequential( + conv_bn(1, 32, 2), + conv_dw(32, 64, 1), + conv_dw(64, 128, 2), + conv_dw(128, 128, 1), + conv_dw(128, 256, 2), + conv_dw(256, 256, 1), + conv_dw(256, 512, 2), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 512, 1), + conv_dw(512, 1024, 2), + conv_dw(1024, 1024, 1), + nn.AvgPool2d((4, 3)), + ) + self.fc = nn.Linear(1024, 2) + + def forward(self, x): + x = x.view([-1, 1, FRAME_LEN, MFCC_LEN]) + x = self.model(x) + x = x.view(-1, 1024) + x = self.fc(x) + return x + + +def get_models(tp): + if tp == "v1": + return MusicVoiceV1Model + if tp == "v2": + return MusicVoiceV2Model + if tp == "v3": + print("load model .... v3") + return MusicVoiceV3Model + if tp == "v4": + print("load model .... v4") + return MusicVoiceV4Model + if tp == "v4_amp": + print("load model .... v4_amp") + return MusicVoiceV4AMPModel + if tp == "v5": + print("load model .... v5") + return MusicVoiceV5Model + + return MusicVoiceV1Model diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/readme.txt b/AIMeiSheng/voice_classification/train/music_voice_class/readme.txt new file mode 100644 index 0000000..c25d698 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/readme.txt @@ -0,0 +1,5 @@ +训练一个纯人声和其他的分类模型 +主要关注三个文件: +1 music_gender_class_simple.py 训练男女声分类的代码(换不同的数据集,训练对应模型) +2 music_voice_class.py 训练带人声/伴奏的代码 +3 music_voice_class.py 训练纯人声/伴奏的代码 \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/test.py b/AIMeiSheng/voice_classification/train/music_voice_class/test.py new file mode 100644 index 0000000..c7170ca --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/test.py @@ -0,0 +1,1062 @@ +""" +临时脚本,随时删除 +""" +from torchvision.models import MobileNetV2 +import os + + +def get_msg(): + msg_02 = [ + "1221 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_31.wav", + # 音乐声很大 + "897 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_21.wav", + # 音乐声很大 + "280 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/1688849883080769_1.wav", + "254 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/584342372_25.wav", + "184 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/584342372_11.wav", + "128 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/7881299372541512_1.wav", + "122 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/7881299372541512_7.wav", + "113 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324863857272_2.wav", # 修复 + "105 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/7881299372541512_3.wav", + "91 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_13.wav", + "84 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/584342372_21.wav", + "72 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/2251799837114998_13.wav", + "69 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_19.wav", + "61 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/584342372_9.wav", + "54 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/2251799832928172_2.wav", + "45 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4785074276307355_77.wav", + "44 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/584342372_5.wav", + "41 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324861051108_10.wav", + "39 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_33.wav", + "37 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_29.wav", + "36 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_3.wav", + "30 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_25.wav", + "25 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996203683_5.wav", + "23 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996465112_2.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_9.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/8444249326840821_1.wav", + "17 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175871794_1.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/7881299372541512_5.wav", + ] + msg_20 = [ + "830 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_0.wav", + "507 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_2.wav", + "483 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_4.wav", + "374 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_5.wav", + "163 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/582580300_acc.mp4.wav", + "162 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/3940649698096657_15.wav", + "118 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/844424950319984_0.wav", + "118 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/844424950319984_0.wav", + "96 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/acc/female/582580300_acc.mp4.wav", + "85 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/10414574175871794_8.wav", + "64 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_9.wav", + "58 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/1688849879695015_0.wav", + "57 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/2533274809943541_acc.mp4.wav", + "56 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/8725724301904279_0.wav", + "52 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415598515_16.wav", + "38 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/1688849882529806_acc.mp4.wav", + "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/female/6192449415597971_acc.mp4.wav", + "30 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/281474996282111_0.wav", + "30 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/281474996282111_0.wav", + "17 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/2533274809943541_0.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/562949977421174_22.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415598515_0.wav", + ] + msg_21 = [ + "585 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_0.wav", + "466 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_4.wav", + "394 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_9.wav", + "241 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_5.wav", + "194 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/2251799832901180_acc.mp4.wav", + "172 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_2.wav", + "170 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/844424950319984_0.wav", + "170 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/844424950319984_0.wav", + "148 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/3940649698096657_17.wav", + "46 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_7.wav", + ] + + msg_10 = [ + "2866 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10977524125385053_1.wav", + "2344 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10977524125385053_3.wav", + "947 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956800355_11.wav", + "615 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10977524125385053_5.wav", + "529 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956800355_13.wav", + "516 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399319087018_7.wav", + "489 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/1125899930760402_5.wav", + "488 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/583726605_5.wav", + "408 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/583726605_9.wav", + "400 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_7.wav", + "381 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/1125899930760402_3.wav", + "358 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/male/1970324853320187_1.wav", + "333 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/583726605_7.wav", + "331 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399319087018_17.wav", + "322 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1688849879701965_2.wav", + "322 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/1688849879701965_2.wav", + "322 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/583726605_3_2.wav", + "304 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1688849879701965_5.wav", + "304 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/1688849879701965_5.wav", + "303 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/8725724304232173_1.wav", + "290 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/2251799837613441_17.wav", + "285 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/281475000783630_39.wav", + "283 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/281475000783630_9.wav", + "278 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/281475000783630_17.wav", + "243 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_19.wav", + "224 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/281475000783630_5.wav", + "187 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956801312_13.wav", + "181 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956801312_17.wav", + "175 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_17.wav", + "172 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956800355_15.wav", + "154 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/2251799837613441_13.wav", + "143 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/583726605_3.wav", + "135 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_11.wav", + "135 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399319087018_21.wav", + "130 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/2251799837613441_9.wav", + "130 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/1125899930760402_1.wav", + "124 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/4785074275959707_21.wav", + "111 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/2251799837613441_15.wav", + "109 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/4785074275959707_45.wav", + "104 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_15.wav", + "100 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371939299_55.wav", + "100 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956213980_1.wav", + "96 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/3096224769648315_3.wav", + "90 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/4785074275959707_49.wav", + "89 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_67.wav", + "88 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/281475000783630_11.wav", + "86 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/3096224769648836_5.wav", + "82 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_35.wav", + "79 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/281474997973301_2.wav", + "79 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/281474997973301_2.wav", + "77 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1125899929550020_0.wav", + "77 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_61.wav", + "77 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/1125899929550020_0.wav", + "76 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_23.wav", + "73 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/4785074275959707_27.wav", + "71 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10414574174473429_9.wav", + "71 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_49.wav", + "69 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/844424950319984_1.wav", + "69 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/844424950319984_1.wav", + "65 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399319087018_19.wav", + "64 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6192449415883142_41.wav", + "64 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/3096224769648315_9.wav", + "63 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/3096224769648836_19.wav", + "56 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371939299_49.wav", + "55 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371939299_41.wav", + "53 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_5.wav", + "48 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371939299_57.wav", + "47 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10414574174473429_11.wav", + "40 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_21.wav", + "39 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1688849879962030_3.wav", + "39 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1125899928841868_0.wav", + "39 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/1688849879962030_3.wav", + "39 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/1125899928841868_0.wav", + "38 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399319087018_5.wav", + "38 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/3096224769648836_11.wav", + "35 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399319087018_3.wav", + "34 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/582584533_2.wav", + "34 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/281475000783630_29.wav", + "34 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_21.wav", + "34 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/582584533_2.wav", + "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/4785074275959707_53.wav", + "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956800355_3.wav", + "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_57.wav", + "32 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_45.wav", + "32 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_33.wav", + "31 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956800355_5.wav", + "28 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_17.wav", + "23 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/281475000783630_33.wav", + "22 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_13.wav", + "21 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_3.wav", + "21 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/3096224769648836_7.wav", + "20 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371939299_1.wav", + "20 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_31.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/4785074275959707_39.wav", + "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1688849879962030_1.wav", + "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_47.wav", + "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/1688849879962030_1.wav", + "15 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371939299_23.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/582584533_4.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956801312_11.wav", + ] + + msg_01 = [ + "3203 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_31.wav", + "1816 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694524_15.wav", + "1290 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_13.wav", + "943 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_21.wav", + "856 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_5.wav", + "852 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_9.wav", + "804 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_7.wav", + "770 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694524_9.wav", + "745 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_1.wav", + "671 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_3.wav", + "621 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_19.wav", + "519 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_21.wav", + "488 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/281474996540139_3.wav", + "477 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_13.wav", + "414 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_7.wav", + + "396 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/2251799832928172_1.wav", + "393 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/580993708_1.wav", + "369 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/8444249325679686_27.wav", + "345 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/7881299372541512_3.wav", + "336 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996465112_2.wav", + "334 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694524_17.wav", + "318 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_9.wav", + "309 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4785074276307355_77.wav", + "268 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_11.wav", + "265 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/281474996540139_2.wav", + "262 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_11.wav", + "254 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_11.wav", + "230 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7318349418849499_5.wav", + "223 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_3.wav", + "218 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694524_3.wav", + "217 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/7881299372541512_1.wav", + "215 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/8444249325679686_31.wav", + "203 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_17.wav", + "197 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_29.wav", + "167 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/1688849883080769_2.wav", + "165 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_7.wav", + "164 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/580825128_6.wav", + "158 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/281474997008641_2.wav", + "157 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_3.wav", + "153 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694524_5.wav", + "152 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_5.wav", + "138 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175871794_9.wav", + "136 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_7.wav", + "125 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175865538_15.wav", + "124 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_5.wav", + "118 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_25.wav", + "110 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_23.wav", + "108 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1970324856348562_5.wav", + "107 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_19.wav", + "105 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/7881299372541512_7.wav", + "104 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/8444249325679686_33.wav", + "102 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_1.wav", + "102 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/281474997008641_6.wav", + "99 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1970324856348562_4.wav", + "97 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694076_13.wav", + "94 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/281474997008641_5.wav", + "89 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996465112_4.wav", + "84 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2814749786739623_3.wav", + "82 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/8444249326840821_1.wav", + "79 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_15.wav", + "76 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_35.wav", + "76 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_17.wav", + "71 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/844424949505770_4.wav", + "69 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/1688849883080769_1.wav", + "68 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175865538_11.wav", + "66 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1688849879955498_1.wav", + "64 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324861051108_10.wav", + "63 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/844424949505770_3.wav", + "59 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/281474997008641_4.wav", + "56 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/580825128_2.wav", + "55 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/7881299372541512_5.wav", + "52 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694524_11.wav", + "51 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/580825128_5.wav", + "48 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2814749786739623_2.wav", + "47 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_23.wav", + "46 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/281474997008641_3.wav", + "42 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_9.wav", + "40 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1688849879954083_6.wav", + "38 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1970324856350610_1.wav", + "36 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_5.wav", + "35 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/2251799832928172_2.wav", + "35 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694524_1.wav", + "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/584342372_5.wav", + "32 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/580825128_8.wav", + "32 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12947848932925749_13.wav", + "32 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324863857272_2.wav", + "31 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324863857272_3.wav", + "29 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_21.wav", + "28 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324863857272_0.wav", + "27 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/281474997008641_1.wav", + "26 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/844424949806543_1.wav", + "26 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7318349418849499_3.wav", + "26 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_9_2.wav", + "21 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/584342372_11.wav", + "21 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324861051108_6.wav", + "20 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7599824394656914_33.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274813694524_7.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996465112_3.wav", + "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/1688849880501970_1.wav", + "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1688849879949240_2.wav", + ] + return msg_10 + + +def cp_test_v1(): + msg = [ + "1076 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/281474996452113_1.wav", # 听起来像女声 + "523 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/male/1970324853320187_1.wav", + "460 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/4222124674638311_13.wav", + "436 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/1688849884324702_23.wav", + "430 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_33.wav", + "425 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/11540474052027974_7_4.wav", + "420 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/10414574175873734_9.wav", + "418 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/4222124674638311_7.wav", + "363 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_27.wav", + "349 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_3.wav", + "316 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/2533274809930887_1.wav", + "307 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849883796298_5.wav", + "304 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/10414574175873734_3.wav", + "288 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/10414574175873734_13.wav", + "284 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/11540474052027974_11.wav", + "279 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_17.wav", + "278 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2814749791033021_16.wav", + "271 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/7881299372542927_1.wav", + "255 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6755399319087018_17.wav", + "254 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/10414574175873734_7.wav", + "228 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/7881299372542927_15.wav", + "227 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/1688849884324702_15_4.wav", + "226 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/11540474052027974_13.wav", + "223 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6192449415597971_17.wav", + "216 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_35.wav", + "211 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_29.wav", + "207 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2814749791033021_22.wav", + "193 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/4222124674638311_3.wav", + "188 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6755399319087018_21.wav", + "180 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_19.wav", + "171 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_13_4.wav", + "166 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/281474996464521_0.wav", + "163 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/4222124674638311_5.wav", + "153 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/2251799832928172_1.wav", + "151 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/10414574175873734_5.wav", + "150 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1688849882728090_4.wav", + "143 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1688849882728090_3.wav", + "143 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/11540474052027974_5.wav", + "140 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_19.wav", + "139 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/281475000783630_9.wav", + "133 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_17.wav", + "130 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_67.wav", + "129 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_35_2.wav", + "129 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1688849882728090_2.wav", + "128 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849883796298_7.wav", + "127 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_21.wav", + "125 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/7881299372542927_7.wav", + "123 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/11540474052027974_9.wav", + "120 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_11_2.wav", + "114 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1688849879954498_6.wav", + "111 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_17.wav", + "109 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_7.wav", + "99 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/11540474052027974_3.wav", + "95 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/580825128_6.wav", + "93 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/7881299372746776_9.wav", + "91 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/281475000783630_17.wav", + "91 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849883796298_3.wav", + "90 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6755399319087018_3.wav", + "90 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6755399319087018_19.wav", + "88 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/1688849883080769_2.wav", + "87 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2814749791033021_26.wav", + "86 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1688849882728090_6.wav", + "84 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2814749791033021_24.wav", + "83 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/281474998815784_1.wav", + "83 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1688849882728090_5.wav", + "77 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_9.wav", + "73 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_15.wav", + "73 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849884322930_21.wav", + "73 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/6755399319010233_9.wav", + "71 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/281475000783630_39.wav", + "70 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_1.wav", + "67 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/12384898984234608_15.wav", + "66 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/4222124674638311_17.wav", + "64 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_41.wav", + "64 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2814749791033021_12.wav", + "62 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2251799837613441_17.wav", + "62 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/12384898984234608_11.wav", + "61 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_31.wav", + "57 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/10414574175873734_11.wav", + "54 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1688849879954083_3.wav", + "54 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1688849879949240_2.wav", + "54 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6473924416461347_11.wav", + "53 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_21.wav", + "53 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/12666373956213980_1.wav", + "51 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/580825128_5.wav", + "51 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/12666373956213980_5.wav", + "51 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_9.wav", + "50 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6192449415299528_25.wav", + "49 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_31.wav", + "49 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6755399319086968_1.wav", + "45 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_45.wav", + "45 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/1688849884324702_17.wav", + "44 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_11.wav", + "43 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/2251799832928172_2.wav", + "42 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/580993708_1.wav", + "42 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/844424949506044_1.wav", + "40 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_15.wav", + "40 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1125899928822132_4.wav", + "39 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_21.wav", + "39 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_7.wav", + "37 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/1970324861051108_2.wav", + "35 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/1688849884324702_27.wav", + "33 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_11.wav", + "33 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/1970324861051108_10.wav", + "32 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/12384898984234608_13.wav", + "31 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/281475000783630_27.wav", + "30 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_7.wav", + "30 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6755399319087018_7.wav", + "30 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6192449415597971_3.wav", + "29 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/580825128_2.wav", + "29 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1688849879949240_0.wav", + "27 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/580825128_3.wav", + "26 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/6755399319010233_25.wav", + "25 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849884322930_29.wav", + "25 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6755399319087018_23.wav", + "25 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/3940649698096657_16.wav", + "24 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_13.wav", + "24 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1688849879954083_5.wav", + "23 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/4222124674638311_21.wav", + "23 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849884322930_9.wav", + "23 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/7881299372746776_5.wav", + "22 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_33.wav", + "22 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/4222124674638311_9_2.wav", + "22 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1688849879949240_1.wav", + "21 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/580825128_7.wav", + "21 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_49.wav", + "21 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_15.wav", + "21 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1970324856347918_1.wav", + "20 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_1.wav", + "20 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1970324856347918_2.wav", + "20 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1688849879954083_6.wav", + "20 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/6755399319010233_27.wav", + "19 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_43.wav", + "19 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_21.wav", + "19 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6192449415597971_7.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/1688849884324702_21.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849884324256_5.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/281474998815784_2.wav", + "18 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/5910944689202387_1.wav", + "17 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/2251799835673034_1.wav", + "17 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/1688849883080769_1.wav", + "17 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/11821949029469093_17_2.wav", + "17 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849884322930_19.wav", + "17 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/11540474052027974_7_2.wav", + "16 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/281475000783630_5.wav", + "16 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/12384898984234608_5.wav", + "15 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2814749791033021_18.wav", + "15 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2251799837613441_13.wav", + "15 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/1688849884324702_15.wav", + "15 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6473924416461347_9.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/580825128_8.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_47.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849884324256_7.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1125899925978417_2.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1125899925978417_1.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6473924416461347_17.wav", + "14 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/6755399319010233_1.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/281475000783630_25.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2814749791033021_14.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/2251799837447031_5.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/562949978433913_29.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6192449415597971_21.wav", + "13 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/male/6192449415299528_23.wav", + "11 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/844424954116115_35.wav", + "11 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/12384898984234608_9.wav", + "11 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/male/1688849882728090_1.wav", + "11 /data/datasets/music_voice_dataset_2000/split_10/av_area_me/pure/female/1688849879954498_5.wav", + "10 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/male/579390945_1.wav", + "10 /data/datasets/music_voice_dataset_2000/split_10/av_area_sea/pure/female/1688849880501970_1.wav", + "10 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/6755399476482830_25.wav", + "10 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/male/1688849884324702_15_2.wav", + "10 /data/datasets/music_voice_dataset_2000/split_10/av_area_sa/pure/female/1688849884324256_1.wav", + "10 /data/datasets/music_voice_dataset_2000/split_10/av_area_in/pure/female/1970324861051108_6.wav", + ] + # msg = get_msg() + for line in msg: + ll = line.strip().split(" ") + fname = ll[1] + dst_dir = "tmp/" + "/".join(ll[1].split("/")[-4:-1]) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + cmd = "cp {} {}".format(fname, dst_dir) + print(cmd) + os.system(cmd) + + +def cp_test(): + msg = [ + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/acc/female/6192449415299528_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/acc/male/4785074276813036_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/acc/male/8725724304056375_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/female/10696049147228515_3.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/female/10977524125034331_34.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/female/3940649698096657_7.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/female/4785074276813036_1.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/female/562949979225244_1.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/female/562949979225244_3.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/female/6473924416827142_34.feature.npy", + # 修复,当作人声,背景中带有的人声 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/female/7881299372541512_4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/male/10414574170383861_2.feature.npy", + # 错误,背景中有一点点人声,不应该当作人声 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/male/11540474052027974_12.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/male/3096224768333380_0.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/male/3096224769648836_6.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/male/5066859163244423_7.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/male/6192449415439266_6.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/male/6192449415439579_0.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/other/male/6192449415598515_26.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/female/10977524125034331_21.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/female/4785074276813036_0.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/female/5629499359363469_4.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/female/562949979225244_6.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/female/6473924415865592_1.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/male/11540474052027974_7.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/male/3096224768333380_19.feature.npy", + # 删除该段 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/male/4785074276076639_31.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/male/583726605_3.feature.npy", # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_in/pure/male/6755399318715899_6.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/acc/male/1688849879953430_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/other/female/1125899926478738_2.feature.npy", + # 错误,是口哨声,本应该认为是其他 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/other/female/2251799832883726_3.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/other/female/281474996464521_4.feature.npy", + # 背景声中带有人声, 剔除 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/other/female/844424949504097_3.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/other/male/1125899925978417_3.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/other/male/281474998837544_0.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/other/male/844424951090072_0.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/pure/female/1688849879955498_1.feature.npy", + # 错误,拉长声音时,容易被误判 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/pure/female/2533274809939469_4.feature.npy", + # 错误,拉长声音被误判 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/pure/female/281474996201383_2.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/pure/female/281474996203683_3.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/pure/female/844424949763020_1.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/pure/male/1125899926477579_2.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/pure/male/281474998837544_1.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_me/pure/male/582584533_1.feature.npy", # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/acc/female/6755399476482830_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/acc/male/12384898979578969_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/acc/male/4785074276307355_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/other/female/10414574175875361_8.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/other/female/12384898983720862_16.feature.npy", + # 背景音中存在人声,剔除 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/other/female/2533274813694076_6.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/other/female/7881299372949474_4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/other/male/10414574174468093_0.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/other/male/12103424002800797_39.feature.npy", + # 错误 + + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/female/10414574175871525_3.feature.npy", + # 错误 呼吸声 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/female/1688849883796298_5.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/female/2251799836929554_21.feature.npy", + # 尾音被判错 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/female/562949978433913_11.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/female/562949978433913_13.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/female/562949978433913_3.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/female/7318349418849613_11.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/female/7318349418849613_3.feature.npy", + # 拉尾音,判断错误 + + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/10133099197881676_11.feature.npy", + # 呼吸声判断错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/10133099198707076_21.feature.npy", + # 尾音判断错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/10133099198707076_35.feature.npy", + # 尾音判断错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/12103424002800797_0.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/12103424002987496_11.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/12103424002987496_43.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/12384898979580597_3.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/4222124677424631_43.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/6192449415672938_2.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/6755399476482901_3.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sa/pure/male/7881299371777717_5.feature.npy", + # 错误 + + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sea/acc/male/2533274809750905_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sea/acc/male/562949972321604_acc.mp4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sea/other/female/1125899929543426_0.feature.npy", + # 背景音中有一点人声,占比不大 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sea/other/female/1688849880001340_0.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sea/other/male/2251799828394027_0.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sea/pure/female/2251799832530659_4.feature.npy", + # 错误 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sea/pure/female/2251799833700683_2.feature.npy", + # 修复 + "/data/datasets/music_voice_dataset_2000/feature_10_bf/av_area_sea/pure/female/562949972751073_0.feature.npy" + # 错误 + + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/2533274813694076_6.wav",2949], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/11540474052027974_12.wav",1900], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/281474998837544_0.wav",1533], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/3940649698096657_7.wav",955], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/7881299372541512_4.wav",776], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/2251799832530659_4.wav",656], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/2251799833700683_2.wav",650], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/4785074276813036_1.wav",604], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/562949979225244_1.wav",497], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/10414574174468093_0.wav",399], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12103424002800797_0.wav",292], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/5629499359363469_4.wav",273], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/562949979225244_3.wav",260], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_3.wav",157], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/male/8725724304056375_acc.mp4.wav",154], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/11540474052027974_7.wav",148], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/male/4785074276813036_acc.mp4.wav",140], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/3096224769648836_6.wav",128], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175871525_3.wav",124], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_35.wav",120], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/female/6192449415299528_acc.mp4.wav",119], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002800797_39.wav",117], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/4785074276813036_0.wav",111], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7318349418849613_3.wav",101], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_13.wav",97], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/1688849883796298_5.wav",93], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/4785074276307355_acc.mp4.wav",90], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/12384898979578969_acc.mp4.wav",81], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415439579_0.wav",81], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2251799836929554_21.wav",73], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/583726605_3.wav",71], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415439266_6.wav",64], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/acc/male/562949972321604_acc.mp4.wav",63], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12103424002987496_11.wav",60], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12384898979580597_3.wav",51], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/4222124677424631_43.wav",48], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_11.wav",45], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/281474996464521_4.wav",43], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1688849879955498_1.wav",38], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/acc/male/2533274809750905_acc.mp4.wav",36], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/7881299372949474_4.wav",31], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/10977524125034331_21.wav",31], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/male/2251799828394027_0.wav",28], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/1125899926478738_2.wav",27], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/1125899926477579_2.wav",26], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371777717_5.wav",25], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/3096224768333380_0.wav",25], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/281474998837544_1.wav",24], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/3096224768333380_19.wav",21], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12103424002987496_43.wav",20], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/10414574170383861_2.wav",18], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6192449415672938_2.wav",16], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099197881676_11.wav",15], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/4785074276076639_31.wav",14], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996201383_2.wav",13], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_3.wav",11], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/6755399476482830_acc.mp4.wav",11], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/844424949763020_1.wav",11], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/6473924415865592_1.wav",11], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7318349418849613_11.wav",10], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415598515_26.wav",10], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_21.wav",9], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/2533274809939469_4.wav",9], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482901_3.wav",8], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/844424949504097_3.wav",7], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/acc/male/1688849879953430_acc.mp4.wav",7], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/562949979225244_6.wav",7], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/582584533_1.wav",6], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/6473924416827142_34.wav",4], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/562949972751073_0.wav",3], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/12384898983720862_16.wav",3], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/10414574175875361_8.wav",3], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10977524125034331_34.wav",3], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996203683_3.wav",2], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/844424951090072_0.wav",2], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/2251799832883726_3.wav",2], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399318715899_6.wav",2], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/5066859163244423_7.wav",2], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/female/1688849880001340_0.wav",1], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/female/1125899929543426_0.wav",1], + # ["/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/1125899925978417_3.wav",1], + ] + for line in msg: + # ll = line.strip().split(" ") + fname = line[0] + dst_dir = "tmp/" + "/".join(line[0].split("/")[-4:-1]) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + cmd = "cp {} {}".format(fname, dst_dir) + print(cmd) + os.system(cmd) + + +def cp(): + msg = [ + "316 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/10133099198706260_8.wav", # 已经修复 + "270 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1125899930243712_3.wav", # 已经修复 + "263 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/female/1125899925848222_1.wav", + # 其他中本身存在声音,从数据集中剔除 + "182 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/8444249325679417_27.wav", + # 已经修复 + "172 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978632898_1.wav", # 已经修复 + "146 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/male/579390945_3.wav", # 已经修复 + "138 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324861051108_8.wav", # 已经修复 + "133 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/5910944690229224_7.wav", # 修复 + "128 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_5.wav", # 修复 + "115 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/281474996464521_0.wav", # 修复 + "113 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/acc/female/582580300_acc.mp4.wav", # 错误 + "111 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371777717_7.wav", # 修复 + "109 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/1125899926246215_0.wav", # 修复 + "101 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/5910944690229224_5.wav", # 修复 + "99 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/6755399319009886_44.wav", # 修复 + "98 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/7318349418849613_0.wav", + # 很像小孩子的哭声,被判别为人声,但是感觉还得算它错误,应该为伴奏 + "94 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/562949979225244_2.wav", # 修复 + "94 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/5629499359363469_2.wav", # 修复 + "92 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/3096224768334838_acc.mp4.wav", # 错误 + "90 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/584342372_11.wav", # 修复 + "88 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/10414574170383861_1.wav", # 修复 + "87 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/844424954116115_36.wav", + # 错误,伴奏中和声[很浅] + "83 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/5066859163732658_0.wav", # 修复 + "83 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/584342372_12.wav", # 修复 + "79 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/female/6192449415677716_acc.mp4.wav", + # 错误 + "75 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/584342372_10.wav", # 错误 + "72 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/6192449415883142_acc.mp4.wav", + # 这段伴奏中本身存在很明显的人声 + "69 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2533274814209302_5.wav", # 修复 + "68 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/8725724304231937_11.wav", # 修复 + "66 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/7881299371777717_0.wav", # 修复 + "66 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/2251799832886752_0.wav", + # 踢掉,非正式的人声,又不典型,被判定为人声 + "66 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/562949979225244_0.wav", # 修复 + "66 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/male/10696049147228515_acc.mp4.wav", + # 错误 + "64 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/10977524125034331_11.wav", # 修复 + "60 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/female/281474996203259_4.wav", + # 修复 非正式人声,被判定为人声 + "59 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/7881299372947989_2.wav", # 修复 + "59 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/844424956075496_1.wav", # 修复 + "55 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/1125899928841868_2.wav", # 错误 + "53 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_1.wav", # 修复 + "50 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/4785074276813036_4.wav", # 修复 + "49 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/562949979225244_4.wav", # 修复 + "49 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415439266_4.wav", # 修复 + "47 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/5066859163244423_9.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/female/6192449415439266_acc.mp4.wav", + # 错误_伴奏中带的一点点背景音 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/female/6755399319428674_acc.mp4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_0.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_2.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_4.wav", + # 错误,乐器有点像人声 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/3940649698096657_1.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/3940649698096657_5.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/562949979225244_5.wav", # 错误,有点像人声 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/562949979225244_7.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/584342372_16.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/584342372_20.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/6755399477050620_7.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/844424956075496_0.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/8725724304056375_4.wav", + # 错误,乐器有点像人声 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/3096224768333380_6.wav", + # 错误,像人吸气的声音 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/3096224769806508_6.wav", + # 错误,有点像人吸气的声音 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/583726605_12.wav", # 错误,乐器声比较像人声 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_11.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_3.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_7.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_9.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415299528_26.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415439266_2.wav", + # 错误,伴奏中的比较轻的和声 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415598515_16.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/8725724301904279_0.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/10977524125034331_7.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324863857272_0.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/281475001556604_14.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/3940649698096657_0.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/3940649698096657_4.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/5629499359363469_8.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/6473924416827142_3.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/8444249325964995_3.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/8444249325964995_9.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/10414574171783815_15.wav", + # 错误,人声比较小导致 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/11540474052027974_1.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/11540474052027974_15.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/11540474052027974_9.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/4785074276075815_1.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/4785074276075815_11.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/4785074276075815_25.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/5629499359096645_20.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6192449415439579_13.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6192449415439579_15.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6192449415439579_5.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6192449415677716_4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399317989499_0.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399317989499_10.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399318715899_10.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399318930073_2.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/7881299372947989_0.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/acc/male/2251799832882985_acc.mp4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/1688849879695015_0.wav", + # 错误,尾部有一点点人 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/2251799832885209_0.wav", + # 伴奏中带的一点点人声 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/281474996458245_0.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/562949972938693_0.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/844424950319984_0.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1688849879954498_1.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996203683_2.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/844424950535206_2.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/844424950535206_4.wav", # 修复 + # 下午开始 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/11821949029468621_acc.mp4.wav", + # 隐约像人声 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/281475000783630_acc.mp4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/3377699745184645_acc.mp4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/5066859163242046_acc.mp4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/1688849883796298_acc.mp4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/2533274813694076_acc.mp4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/2251799836929554_6.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/2533274816528676_12.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/7599824394656914_16.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/7599824394656914_2.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/1125899930243712_46.wav", + # 存在争议,已经删除 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/11821949029470168_0.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002987496_12.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002987496_18.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002987496_34.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002987496_44.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12947848933509127_6.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/2814749791033021_19.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/844424954116115_10.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/844424954116115_6.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175871525_1.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175873734_9.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10696049152650551_36.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12384898983715412_3.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2251799836929554_1.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_9.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_1.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_15.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_17.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7881299372949474_3.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099197881676_9.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_47.wav", + # 错误_拉长尾音被误判 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_57.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_61.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/11821949029469093_17.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/11821949029469093_35.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12103424002987496_41.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12384898979580597_5.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956801312_13.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1688849884324702_15.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6192449415672938_9.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482830_33.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371777717_1.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371777717_3.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371939299_59.wav", # 修复_直接删除了 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/acc/female/281474993643001_acc.mp4.wav", + # 伴奏有点像人声 + + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/female/2533274809750905_4.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/male/2251799828394027_2.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/male/2251799828394323_4.wav", # 修复 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/male/2251799828401178_3.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/1125899929543426_2.wav", # 错误 + "/data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/2251799833675526_0.wav" + # "46 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_1.wav", + # "46 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175873734_9.wav", + # "43 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/844424954116115_6.wav", # 修复 + # "43 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/2533274813694076_acc.mp4.wav", + # "42 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/1125899930243712_46.wav", + # "41 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_17.wav", + # "38 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371939299_59.wav", + # "36 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/11821949029470168_0.wav", + # "46 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/281474996203683_2.wav", + # "42 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/844424950535206_4.wav", + # "45 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/11540474052027974_15.wav", + # "44 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_4.wav", # 修复 + # "44 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_0.wav", # 修复 + # "43 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/female/6192449415439266_acc.mp4.wav", + # "40 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/3940649698096657_0.wav", + # "40 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/3940649698096657_5.wav", + # "39 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/6755399477050620_7.wav", + # "37 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_11.wav", + # "35 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6192449415439579_15.wav", + # "34 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_7.wav", + # "38 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/1125899929543426_2.wav", + # "36 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/female/2533274809750905_4.wav", + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/2251799836929554_1.wav", + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/male/1688849883796298_acc.mp4.wav", + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/acc/male/2251799832882985_acc.mp4.wav", + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/5629499359096645_20.wav", + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/11540474052027974_9.wav", + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/11540474052027974_1.wav", + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_3.wav", + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/844424956075496_0.wav", # 不典型的3,掺杂伴奏中的人声,剔除 + # "33 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/562949979225244_5.wav", # 伴奏中的人声 + # "32 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6192449415672938_9.wav", + # "31 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399318715899_10.wav", + # "31 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/8444249325964995_3.wav", + # "31 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/8725724301904279_0.wav", + # "30 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_61.wav", + # "30 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/12384898983715412_3.wav", + # "30 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/male/844424950319984_0.wav", + # "30 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399317989499_10.wav", + # "29 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/11821949029469093_35.wav", + # "29 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099197881676_9.wav", + # "29 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10414574175871525_1.wav", + # "29 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/844424954116115_10.wav", # 修复 + # "28 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/male/2251799828394323_4.wav", + # "28 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/281475000783630_acc.mp4.wav", + # "28 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/2251799832885209_0.wav", + # "27 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371777717_3.wav", # 修复 + # "27 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415299528_26.wav", + # "27 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/583726605_12.wav", + # "26 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/pure/female/2251799833675526_0.wav", + # "26 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/10696049152650551_36.wav", + # "26 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002987496_44.wav", + # "26 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/11821949029468621_acc.mp4.wav", + # "26 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/584342372_20.wav", # 错误 + # "25 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/3377699745184645_acc.mp4.wav", + # "25 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/4785074276075815_11.wav", + # "25 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/281475001556604_14.wav", + # "25 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/584173493_9.wav", + # "24 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/281474996458245_0.wav", + # "24 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/6473924416827142_3.wav", + # "23 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12666373956801312_13.wav", + # "23 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12103424002987496_41.wav", + # "23 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/female/1688849879954498_1.wav", + # "23 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/1688849879695015_0.wav", + # "23 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/1970324863857272_0.wav", + # "22 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_57.wav", + # "22 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/acc/female/5066859163242046_acc.mp4.wav", + # "21 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/12384898979580597_5.wav", + # "21 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415598515_16.wav", + # "20 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002987496_34.wav", + # "20 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/584342372_16.wav", # 错误 + # "19 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/6755399476482830_33.wav", + # "19 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/2533274816528676_12.wav", + # "19 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/2251799836929554_6.wav", + # "19 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/6192449415439266_2.wav", # 错误 + # "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/7881299371777717_1.wav", # 修复 + # "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/7599824394656914_2.wav", + # "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6192449415439579_13.wav", + # "18 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/8725724304056375_4.wav", + # "17 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/10133099198707076_47.wav", + # "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/acc/female/281474993643001_acc.mp4.wav", + # "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/11821949029469093_17.wav", + # "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002987496_18.wav", + # "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/female/7599824394656914_16.wav", + # "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6192449415439579_5.wav", + # "16 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/3096224768333380_6.wav", + # "15 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/4222124674638311_9.wav", + # "15 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12103424002987496_12.wav", + # "14 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/male/2251799828394027_2.wav", + # "14 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/male/1688849884324702_15.wav", + # "14 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/pure/male/844424950535206_2.wav", + # "14 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/3940649698096657_1.wav", + # "14 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/acc/female/6755399319428674_acc.mp4.wav", + # "13 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/7881299372947989_0.wav", # 修复 + # "13 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399317989499_0.wav", + # "13 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6192449415677716_4.wav", + # "13 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/4785074276075815_1.wav", + # "12 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/7881299372949474_3.wav", + # "12 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/4785074276075815_25.wav", + # "12 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/10414574171783815_15.wav", + # "12 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/562949979225244_7.wav", + # "11 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sea/other/male/2251799828401178_3.wav", + # "11 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/12947848933509127_6.wav", + # "11 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/5629499359363469_8.wav", # 修复 + # "11 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/10977524125034331_7.wav", # 错误 + # "11 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/female/10696049147228515_2.wav", + # "10 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/pure/female/562949978433913_15.wav", + # "10 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_sa/other/male/2814749791033021_19.wav", + # "10 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_me/other/female/562949972938693_0.wav", + # "10 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/male/6755399318930073_2.wav", + # "10 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/8444249325964995_9.wav", + # "10 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/pure/female/3940649698096657_4.wav", + # "10 /data/datasets/music_voice_dataset_2000/split_10_bf/av_area_in/other/male/3096224769806508_6.wav", + ] + for line in msg: + ll = line.strip().split(" ") + fname = ll[1] + dst_dir = "tmp/" + "/".join(ll[1].split("/")[-4:-1]) + if not os.path.exists(dst_dir): + os.makedirs(dst_dir) + cmd = "cp {} {}".format(fname, dst_dir) + print(cmd) + os.system(cmd) + + +def calc(): + msg = [ + + ] + + tot = 0 + for i in range(len(msg)): + file, num = msg[i] + tot += num + + cur_tot = 0 + for i in range(len(msg)): + cur_tot += msg[i][1] + print("idx={}, per={}".format(i, cur_tot / tot)) + + +if __name__ == "__main__": + # calc() + # cp() + # cp_test() + cp_test_v1() + import torchvision.models diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/test_frame.py b/AIMeiSheng/voice_classification/train/music_voice_class/test_frame.py new file mode 100644 index 0000000..aa1a1e1 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/test_frame.py @@ -0,0 +1,74 @@ +""" +拿到分错的音频 +""" +import torch +from torch import nn +import librosa +import numpy as np +import os +import shutil +import sys +import time +FRAME_LEN = 128 +MFCC_LEN = 80 +os.environ["LRU_CACHE_CAPACITY"] = "1" + +from music_voice_models import * +from music_voice_class import MusicVoiceDataset + + +class PredictModel: + """ + 测试一下模型的效果,将错误的直接输出出来 + """ + + def __init__(self, model_path, features_dir): + self.device = 'cuda' + model = get_models("v5")() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + self.model = model + self.frame_num = FRAME_LEN + self.batch_size = 256 + self.features_dir = features_dir + + def process(self): + mvd = MusicVoiceDataset(self.features_dir, True) + datasets = mvd.get_dataset() + llen = mvd.__len__() + + tm = time.time() + with torch.no_grad(): + for i in range(0, llen, self.batch_size): + cur_batch = self.batch_size + if cur_batch + i > llen: + cur_batch = llen - i + data = [] + label = [] + for j in range(i, i + cur_batch): + fe, la = mvd.__getitem__(j) + data.append(fe) + label.append(la) + data = torch.from_numpy(np.array(data)) + predicts = self.model(data) + _, predicts = predicts.max(dim=1) + for ii in range(0, cur_batch): + if predicts[ii] != label[ii]: + ff, ll = mvd.get_item(ii + i) + filename = datasets[ff[0]] + # dttt = np.load(filename) + # path,frame_idx,label + print("{},{},{}".format(filename, ff[1], ll)) + # print("path={} err_msg={}|max_len={}, real_label={}".format(filename, ff, len(dttt), ll)) + + if i % (self.batch_size * 100) == 0: + print("cur_process={}/{} sp={}".format(i, llen, time.time() - tm)) + + +if __name__ == "__main__": + # model_dir = sys.argv[1] + model_dir = "/data/datasets/music_voice_dataset_2000/models/v5_005_prod/CNN_epoch_0_0.8320833711933235.pth" + feature_dir = "/data/datasets/music_voice_dataset_2000/feature_005" + pm = PredictModel(model_dir, feature_dir) + pm.process() diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/test_gender_frame.py b/AIMeiSheng/voice_classification/train/music_voice_class/test_gender_frame.py new file mode 100644 index 0000000..cf865b9 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/test_gender_frame.py @@ -0,0 +1,73 @@ +""" +拿到分错的音频 +""" +import torch +from torch import nn +import librosa +import numpy as np +import os +import shutil +import sys +import time + +FRAME_LEN = 128 +MFCC_LEN = 80 +os.environ["LRU_CACHE_CAPACITY"] = "1" + +from music_gender_models import * +from music_gender_class import MusicVoiceDataset + + +class PredictModel: + """ + 测试一下模型的效果,将错误的直接输出出来 + """ + + def __init__(self, model_path, features_dir): + self.device = 'cuda' + model = get_models("v4")() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + self.model = model + self.frame_num = FRAME_LEN + self.batch_size = 256 + self.features_dir = features_dir + + def process(self): + mvd = MusicVoiceDataset(self.features_dir, False) + datasets = mvd.get_dataset() + llen = mvd.__len__() + + tm = time.time() + with torch.no_grad(): + for i in range(0, llen, self.batch_size): + cur_batch = self.batch_size + if cur_batch + i > llen: + cur_batch = llen - i + data = [] + label = [] + for j in range(i, i + cur_batch): + fe, la = mvd.__getitem__(j) + data.append(fe) + label.append(la) + data = torch.from_numpy(np.array(data)) + predicts = self.model(data) + _, predicts = predicts.max(dim=1) + for ii in range(0, cur_batch): + if predicts[ii] != label[ii]: + ff, ll = mvd.get_item(ii + i) + filename = datasets[ff[0]] + # 文件名, 帧号, 人工标签, 预期标签 + print("{},{},{},{}".format(filename, ff[1], ll, predicts[ii])) + + if i % (self.batch_size * 100) == 0: + print("cur_process={}/{} sp={}".format(i, llen, time.time() - tm)) + + +if __name__ == "__main__": + # model_dir = sys.argv[1] + model_dir = "/data/datasets/music_voice_dataset_2000/feature_10_bf/models/gender_test_v4/CNN_epoch_5_0.9352189705967032.pth" + feature_dir = "/data/datasets/music_voice_dataset_2000/feature_10_bf" + pm = PredictModel(model_dir, feature_dir) + pm.process() diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/test_rate_frame.py b/AIMeiSheng/voice_classification/train/music_voice_class/test_rate_frame.py new file mode 100644 index 0000000..04a0c59 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/test_rate_frame.py @@ -0,0 +1,84 @@ +""" +拿到分错的音频 +""" +import torch +from torch import nn +import librosa +import numpy as np +import os +import shutil +import sys +import time + +FRAME_LEN = 128 +MFCC_LEN = 80 +os.environ["LRU_CACHE_CAPACITY"] = "1" + +from music_voice_models import * +from music_voice_class_rate import MusicVoiceDataset + + +class PredictModel: + """ + 测试一下模型的效果,将错误的直接输出出来 + """ + + def __init__(self, model_path, features_dir): + self.device = 'cuda' + model = get_models("v5")() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + self.model = model + self.model.to(self.device) + self.frame_num = FRAME_LEN + self.batch_size = 256 + self.features_dir = features_dir + + def process(self): + mvd = MusicVoiceDataset(self.features_dir, False) + datasets = mvd.get_dataset() + llen = mvd.__len__() + + tm = time.time() + with torch.no_grad(): + for i in range(0, llen, self.batch_size): + cur_batch = self.batch_size + if cur_batch + i > llen: + cur_batch = llen - i + data = [] + label = [] + acc_rates = [] + filenames = [] + for j in range(i, i + cur_batch): + fe, la, acc_rate, filename = mvd.__getitem__(j) + data.append(fe) + label.append(la) + acc_rates.append(acc_rate) + filenames.append(filename) + + data = torch.from_numpy(np.array(data)).to(self.device) + predicts = self.model(data) + _, predicts = predicts.max(dim=1) + for ii in range(0, cur_batch): + if 0.05 < acc_rates[ii] < 0.8: + continue + + if predicts[ii] != label[ii]: + ff, ll = mvd.get_item(ii + i) + filename = datasets[ff[0]] + # dttt = np.load(filename) + # path,frame_idx,label + print("{},{},{}".format(filename, ff[1], ll)) + # print("path={} err_msg={}|max_len={}, real_label={}".format(filename, ff, len(dttt), ll)) + + if i % (self.batch_size * 100) == 0: + print("cur_process={}/{} sp={}".format(i, llen, time.time() - tm)) + + +if __name__ == "__main__": + # model_dir = sys.argv[1] + model_dir = "/data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod_rec/CNN_epoch_1_0.8822981570144947.pth" + feature_dir = "/data/datasets/music_voice_dataset_2000/feature_rec_005" + pm = PredictModel(model_dir, feature_dir) + pm.process() diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/test_simple_gender_frame.py b/AIMeiSheng/voice_classification/train/music_voice_class/test_simple_gender_frame.py new file mode 100644 index 0000000..376c6b4 --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/test_simple_gender_frame.py @@ -0,0 +1,88 @@ +""" +拿到分错的音频 +""" +import torch +from torch import nn +import librosa +import numpy as np +import os +import shutil +import sys +import time + +FRAME_LEN = 128 +MFCC_LEN = 80 +os.environ["LRU_CACHE_CAPACITY"] = "1" + +from music_gender_models_simple import * +from music_gender_class_simple import MusicVoiceDataset +from music_gender_class_simple import MusicVoiceDatasetV1 + + +class PredictModel: + """ + 测试一下模型的效果,将错误的直接输出出来 + """ + + def __init__(self, model_path, features_dir): + self.device = 'cuda' + model = get_models("v6")() + params = torch.load(model_path) + model.load_state_dict(state_dict=params) + model.eval() + model.to(self.device) + self.model = model + self.frame_num = FRAME_LEN + self.batch_size = 256 + self.features_dir = features_dir + + def process(self): + mvd = MusicVoiceDatasetV1(self.features_dir, False) + # mvd = MusicVoiceDataset(self.features_dir, False) + datasets = mvd.get_dataset() + llen = mvd.__len__() + + tm = time.time() + tot_num = 0 + err_num = 0 + with torch.no_grad(): + for i in range(0, llen, self.batch_size): + cur_batch = self.batch_size + if cur_batch + i > llen: + cur_batch = llen - i + data = [] + label = [] + for j in range(i, i + cur_batch): + fe, la = mvd.__getitem__(j) + data.append(fe) + label.append(la) + data = torch.from_numpy(np.array(data)).to(self.device) + predicts = self.model(data) + # _, predicts = predicts.max(dim=1) + predicts_score = torch.nn.functional.softmax(predicts, dim=1) + predicts_score = predicts_score.to("cpu").numpy() + tot_num += cur_batch + for ii in range(0, cur_batch): + female_score = predicts_score[ii][0] + # if predicts[ii] != label[ii]: + if not (int(female_score > 0.5) ^ (label[ii] == 1)): + err_num += 1 + ff, ll = mvd.get_item(ii + i) + filename = datasets[ff[0]] + # 文件名, 帧号, 人工标签, 预期标签, 女性分数 + print("{},{},{},{},{},{}".format(filename, ff[1], ll, predicts[ii], int(female_score > 0.5), + female_score)) + + if i % (self.batch_size * 100) == 0: + print("cur_process={}/{} sp={}".format(i, llen, time.time() - tm)) + + print("rate={}/{}={}".format(tot_num - err_num, tot_num, (tot_num - err_num) / tot_num)) + + +if __name__ == "__main__": + # model_dir = sys.argv[1] + # model_dir = "/data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v5_simple_test/CNN_epoch_5_0.9349034720602855.pth" + model_dir = "/data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_prod_rmspop_2/CNN_epoch_12_0.9522509979793997.pth" + feature_dir = "/data/datasets/music_voice_dataset_full/feature_no2000" + pm = PredictModel(model_dir, feature_dir) + pm.process() \ No newline at end of file diff --git a/AIMeiSheng/voice_classification/train/music_voice_class/v3_ops_misc.py b/AIMeiSheng/voice_classification/train/music_voice_class/v3_ops_misc.py new file mode 100644 index 0000000..acbe4ef --- /dev/null +++ b/AIMeiSheng/voice_classification/train/music_voice_class/v3_ops_misc.py @@ -0,0 +1,72 @@ +import torch +from torch import nn, Tensor +from typing import Any, Callable, List, Optional, Sequence + + +class ConvNormActivation(torch.nn.Sequential): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + stride: int = 1, + padding: Optional[int] = None, + groups: int = 1, + norm_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.BatchNorm2d, + activation_layer: Optional[Callable[..., torch.nn.Module]] = torch.nn.ReLU, + dilation: int = 1, + inplace: bool = True, + ) -> None: + if padding is None: + padding = (kernel_size - 1) // 2 * dilation + layers = [torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, + dilation=dilation, groups=groups, bias=norm_layer is None)] + if norm_layer is not None: + layers.append(norm_layer(out_channels)) + if activation_layer is not None: + layers.append(activation_layer(inplace=inplace)) + super().__init__(*layers) + self.out_channels = out_channels + + +class SqueezeExcitation(torch.nn.Module): + def __init__( + self, + input_channels: int, + squeeze_channels: int, + activation: Callable[..., torch.nn.Module] = torch.nn.ReLU, + scale_activation: Callable[..., torch.nn.Module] = torch.nn.Sigmoid, + ) -> None: + super().__init__() + self.avgpool = torch.nn.AdaptiveAvgPool2d(1) + self.fc1 = torch.nn.Conv2d(input_channels, squeeze_channels, 1) + self.fc2 = torch.nn.Conv2d(squeeze_channels, input_channels, 1) + self.activation = activation() + self.scale_activation = scale_activation() + + def _scale(self, input: Tensor) -> Tensor: + scale = self.avgpool(input) + scale = self.fc1(scale) + scale = self.activation(scale) + scale = self.fc2(scale) + return self.scale_activation(scale) + + def forward(self, input: Tensor) -> Tensor: + scale = self._scale(input) + return scale * input + + +def _make_divisible(v: float, divisor: int, min_value: Optional[int] = None) -> int: + """ + This function is taken from the original tf repo. + It ensures that all layers have a channel number that is divisible by 8 + It can be seen here: + https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py + """ + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v diff --git "a/AIMeiSheng/voice_classification/train/music_voice_class/\346\250\241\345\236\213\350\260\203\346\225\264\350\256\260\345\275\225" "b/AIMeiSheng/voice_classification/train/music_voice_class/\346\250\241\345\236\213\350\260\203\346\225\264\350\256\260\345\275\225" new file mode 100644 index 0000000..d2cf99c --- /dev/null +++ "b/AIMeiSheng/voice_classification/train/music_voice_class/\346\250\241\345\236\213\350\260\203\346\225\264\350\256\260\345\275\225" @@ -0,0 +1,888 @@ +1 先小数据量试水模型结构 & 参数 + 基本保证在1轮在1分钟以内 + 1 调节结构 & 参数 + base情况下: + 使用v2模型结构,10% 训练数据进行验证模型 + load data train=True feature_size=(217562, 2) label_size=(217562,) + load data train=False feature_size=(54391, 2) label_size=(54391,) + t_loss:0.449561 t_acc:78.91 v_loss:0.318363 v_acc:84.85 time:63.418361 epoch:0 + t_loss:0.353835 t_acc:83.94 v_loss:0.263710 v_acc:87.25 time:64.326015 epoch:1 + t_loss:0.316482 t_acc:85.88 v_loss:0.171938 v_acc:92.69 time:64.213856 epoch:2 + t_loss:0.294969 t_acc:86.98 v_loss:0.180633 v_acc:92.19 time:63.653347 epoch:3 + t_loss:0.279277 t_acc:87.91 v_loss:0.188967 v_acc:91.95 time:62.561779 epoch:4 + t_loss:0.269135 t_acc:88.41 v_loss:0.183673 v_acc:92.61 time:62.442590 epoch:5 + t_loss:0.261508 t_acc:88.81 v_loss:0.131333 v_acc:95.32 time:62.433702 epoch:6 + t_loss:0.248857 t_acc:89.43 v_loss:0.308053 v_acc:85.35 time:62.477476 epoch:7 + t_loss:0.241682 t_acc:89.81 v_loss:0.145480 v_acc:94.63 time:62.635452 epoch:8 + t_loss:0.243857 t_acc:89.65 v_loss:0.123565 v_acc:95.37 time:62.681790 epoch:9 + t_loss:0.241291 t_acc:89.83 v_loss:0.171195 v_acc:93.18 time:62.517998 epoch:10 + + t_loss:0.213777 t_acc:91.21 v_loss:0.162857 v_acc:93.94 time:62.629202 epoch:66 + t_loss:0.213624 t_acc:91.20 v_loss:0.156899 v_acc:94.20 time:62.549534 epoch:67 + 结论-> 复杂度不够,需要加深模型 + + 增加复杂度: + 当训练集过拟合之后,测试集表现变差 + 猜测: 训练集存在问题? + t_loss:0.390758 t_acc:82.23 v_loss:0.207184 v_acc:90.15 time:65.252627 epoch:0 + t_loss:0.263802 t_acc:88.45 v_loss:0.299487 v_acc:84.91 time:65.534486 epoch:1 + t_loss:0.229928 t_acc:90.23 v_loss:0.199293 v_acc:91.71 time:65.572901 epoch:2 + t_loss:0.209117 t_acc:91.33 v_loss:0.133716 v_acc:95.42 time:65.615580 epoch:3 + t_loss:0.193563 t_acc:92.05 v_loss:0.249096 v_acc:90.03 time:65.625524 epoch:4 + t_loss:0.182014 t_acc:92.60 v_loss:0.088716 v_acc:97.08 time:65.745589 epoch:5 + + 数据集处理之后 + load data train=True feature_size=(217562, 2) label_size=(217562,) + load data train=False feature_size=(54391, 2) label_size=(54391,) + t_loss:0.372757 t_acc:83.56 v_loss:0.262712 v_acc:88.55 time:64.657586 epoch:0 + t_loss:0.237446 t_acc:89.79 v_loss:0.229696 v_acc:90.23 time:64.794124 epoch:1 + t_loss:0.210335 t_acc:91.21 v_loss:0.201309 v_acc:91.48 time:65.402407 epoch:2 + t_loss:0.190796 t_acc:92.14 v_loss:0.427703 v_acc:85.39 time:65.630273 epoch:3 + t_loss:0.177163 t_acc:92.77 v_loss:0.395974 v_acc:81.41 time:65.661027 epoch:4 + t_loss:0.166219 t_acc:93.30 v_loss:0.168076 v_acc:92.93 time:65.716892 epoch:5 + t_loss:0.155935 t_acc:93.80 v_loss:0.159153 v_acc:93.44 time:65.568116 epoch:6 + t_loss:0.147620 t_acc:94.13 v_loss:0.169707 v_acc:93.28 time:65.585254 epoch:7 + t_loss:0.140627 t_acc:94.44 v_loss:0.165090 v_acc:93.18 time:65.722044 epoch:8 + t_loss:0.134832 t_acc:94.68 v_loss:0.219670 v_acc:91.39 time:65.591995 epoch:9 + t_loss:0.129087 t_acc:94.92 v_loss:0.123388 v_acc:95.17 time:65.554105 epoch:10 + t_loss:0.096197 t_acc:96.47 v_loss:0.101193 v_acc:96.25 time:65.915924 epoch:36 + + 纯人声/伴奏分类最终效果:--> 取370轮作为最终结果 + t_loss:0.032227 t_acc:98.97 v_loss:0.031318 v_acc:99.01 time:625.441707 epoch:7 + t_loss:0.009864 t_acc:99.76 v_loss:0.008986 v_acc:99.80 time:625.957975 epoch:51 + t_loss:0.008323 t_acc:99.82 v_loss:0.007603 v_acc:99.86 time:627.456355 epoch:370 + + + # 修改一下数据获取方式,上面还是会有问题,会出现歌曲的重叠 + # 取20%的歌曲切分开 + 训练集拟合速度很慢,同时测试集合表现不佳 + t_loss:0.099110 t_acc:96.39 v_loss:0.169146 v_acc:93.58 time:66.992251 epoch:57 + + # 试着加大模型复杂度 + 单纯去掉了avgPool->直接用DNN来承接 + t_loss:0.062487 t_acc:97.74 v_loss:0.143869 v_acc:94.88 time:122.709533 epoch:11 + 在上面的基础上加一层CNN,目的是将输出的大小缩小到10以内 + t_loss:0.037297 t_acc:98.68 v_loss:0.129751 v_acc:95.57 time:123.484769 epoch:13 + # 可以再加深一层试试,过拟合了 + t_loss:0.030694 t_acc:98.94 v_loss:0.150477 v_acc:95.00 time:118.676794 epoch:12 + # 尝试砍掉一层大的 + t_loss:0.053908 t_acc:98.04 v_loss:0.123287 v_acc:95.71 time:62.914505 epoch:6 + # 尝试砍掉一层CNN,换成DNN + + # 恢复成当前状态: + CNN_epoch_9_0.9549072164948453.pth -> CNN_v3.pth + 查看下错误的样本 + + # 目前最佳模型为v3在全量数据集上的表现为: + t_loss:0.026421 t_acc:99.03 v_loss:0.134751 v_acc:96.30 time:640.373347 epoch:1 + # 最佳模型上使用新数据集的效果 + t_loss:0.039758 t_acc:98.52 v_loss:0.257718 v_acc:94.18 time:1444.300765 epoch:1 + + # 10%数据表现 + # 换方案,判别纯音乐/人声[允许带有伴奏] + t_loss:0.017819 t_acc:99.45 v_loss:0.098521 v_acc:98.34 time:174.100219 epoch:13 + # 上面基础上使用三分类男/女/纯音乐 + t_loss:0.020218 t_acc:99.43 v_loss:0.225065 v_acc:95.70 time:175.335642 epoch:14 + + # 100%数据表现 + 判别纯音乐/人声[允许带有伴奏] + t_loss:0.024891 t_acc:99.20 v_loss:0.133731 v_acc:98.25 time:1731.272228 epoch:0 + # 上面基础上使用三分类男/女/纯音乐 + t_loss:0.023616 t_acc:99.22 v_loss:0.281031 v_acc:95.77 time:1477.138014 epoch:1 + + # 当前模型目录: + 纯音乐=> /data/datasets/music_voice_dataset_2000/voice_10_bf_models_prod/CNN_epoch_0_0.9825138916688112.pth + 性别:/data/datasets/music_voice_dataset_2000/gender_10_bf_models_prod/CNN_epoch_1_0.9576633220592798.pth + + # v4 %10 数据表现 + 判别纯音乐/人声 + 基本到10轮的时候就过拟合了。6,7轮的时候还能用 + t_loss:0.065584 t_acc:97.86 v_loss:0.100188 v_acc:98.04 time:413.426260 epoch:0 + + # v4 %100 数据表现 + 判别纯音乐/人声 + 当前目录:/data/datasets/music_voice_dataset_2000/voice_10_bf_models_test_v4 + t_loss:0.015261 t_acc:99.49 v_loss:0.165193 v_acc:98.25 time:4067.698646 epoch:0 + t_loss:0.001386 t_acc:99.96 v_loss:0.205789 v_acc:98.39 time:4068.362716 epoch:1 + + 看一下错误样本 + 目前来看,样本中存在一些问题,比如标注错误或者声音太小却没有被拉伸 + 不过也存在分类错误的样本 + 现在使用v3的模型过一下训练集和测试集,看一下错误样本的情况 + + # 重新标注后,v4 100% 数据情况如下: + t_loss:0.000614 t_acc:99.99 v_loss:0.023665 v_acc:99.57 time:2757.327274 epoch:1 + # 原来是按照歌曲段划分测试集和训练集,现在使用歌曲级别,保证同一首歌同一个类别不同时在训练集和测试集中 + 模型地址: /data/datasets/music_voice_dataset_2000/voice_10_bf_models_prod_v4 + t_loss:0.000156 t_acc:100.00 v_loss:0.054263 v_acc:99.11 time:4072.099376 epoch:2 + + # 使用4000首歌曲训练男女声分类的效果 + 模型地址:/data/datasets/music_voice_dataset_full/feature_no2000_v1_train_gender_models/test_5 + t_loss:0.127433 t_acc:94.49 v_loss:0.264918 v_acc:90.16 time:6022.119970 epoch:5 + + # 歌曲段级别划分测试和训练集,v4全量 + t_loss:0.001140 t_acc:99.97 v_loss:0.152362 v_acc:97.64 time:1354.751538 epoch:2 + # 使用歌曲级别划分v4全量 + 模型地址:/data/datasets/music_voice_dataset_2000/feature_10_bf/gender_prod_v4 + t_loss:0.000432 t_acc:99.99 v_loss:0.481137 v_acc:94.66 time:1364.221850 epoch:3 + + # 查看一下错误样本 + 发现错误的基本是sa的歌曲,且错误时集中在几首歌曲上. + 增加sa的样本数量 + 查看效果: + 测试一下: 模型地址为: /data/datasets/music_voice_dataset_2000/feature_10_bf/models/gender_test_v4 + t_loss:0.000462 t_acc:99.99 v_loss:0.539443 v_acc:93.57 time:2872.799518 epoch:3 + 效果变不好,查看下原因 + 从效果上看,基本都是男女声分类导致,且聚集在特定的几首歌曲中,也就是一错基本就错一首歌。 + 歌曲大多数伴奏的声音比较大,不纯净。 + + # 实验一下幅度谱的效果: v4基础上精标数据 + 模型地址: /data/datasets/music_voice_dataset_2000/models/gender_v5_amp + t_loss:0.065925 t_acc:97.63 v_loss:0.182696 v_acc:93.66 time:1010.414945 epoch:2 + + # 实验一下男女声分类的情况, 精标数据 + 模型地址:/data/datasets/music_voice_dataset_2000/models/gender_simple + t_loss:0.001239 t_acc:99.98 v_loss:0.588768 v_acc:89.84 time:230.209819 epoch:11 + + +第一阶段: + 4000首歌曲,10%数据(帧移)训练出来,v5模型 + 地址: /data/datasets/music_voice_dataset_full/models/v1_train_model_v5_01 + t_loss:0.116909 t_acc:95.12 v_loss:0.252035 v_acc:90.51 time:5696.957614 epoch:21 + + 4000首歌曲,100%数据,帧移32, v5模型 + 地址: /data/datasets/music_voice_dataset_full/models/v1_train_model_v5_32_1 + 尚未有结果 + +本周主要: + 1 观察 纯音乐/含人声 错误数据,并修复错误标注,结果: + 使用v4模型,验证集准确率达到99% + t_loss:0.000156 t_acc:100.00 v_loss:0.054263 v_acc:99.11 time:4072.099376 epoch:2 + 2 调整男女声和纯音乐 三分类模型 + v4添加sa之前: + t_loss:0.000432 t_acc:99.99 v_loss:0.481137 v_acc:94.66 time:1364.221850 epoch:3 + 添加sa数据之后: + 模型地址: /data/datasets/music_voice_dataset_2000/models/v4_prod + {0: {0: 213726, 1: 19302, 2: 2188}, 1: {0: 21025, 1: 185287, 2: 336}, 2: {0: 3277, 1: 1937, 2: 403881}} + t_loss:0.001005 t_acc:99.97 v_loss:0.392775 v_acc:94.35 time:4356.904859 epoch:3 + 运行一下v5的三分类情况 + 模型地址: /data/datasets/music_voice_dataset_2000/models/v5_prod + {0: {0: 213289, 1: 17473, 2: 4454}, 1: {0: 14995, 1: 191496, 2: 157}, 2: {0: 2463, 1: 2466, 2: 404166}} + t_loss:0.000289 t_acc:99.99 v_loss:0.366252 v_acc:95.06 time:4268.140554 epoch:5 + 结论: v5确实比v4的效果要好. + + 3 使用包含人声/不包含人声 判定模型, 对4000首男女声做分类,然后在此基础上做训练。查看效果。 + 之前使用v4的时候训练集基本不拟合,所以本次使用v5模型 + 3.1 帧移为1,取10%数据训练,结果为: + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_train_model_v5_01/CNN_epoch_21_0.9051419197812333.pth + t_loss:0.116909 t_acc:95.12 v_loss:0.252035 v_acc:90.51 time:5696.957614 epoch:21 + 3.2 帧移为32,取100%数据训练,结果为: + 模型地址:/data/datasets/music_voice_dataset_full/models/v1_train_model_v5_32_1/CNN_epoch_33_0.9000197692808634.pth + {0: {0: 98870, 1: 15131, 2: 655}, 1: {0: 15384, 1: 97608, 2: 1664}, 2: {0: 991, 1: 565, 2: 113100}} + t_loss:0.015330 t_acc:99.49 v_loss:0.532758 v_acc:90.00 time:1009.850993 epoch:33 + 3.2基础上,学习率调低重新训练一下,并没有什么明显提升,有下降,没有提升,没啥意义 + 地址: /data/datasets/music_voice_dataset_full/models/v1_train_model_v5_32_1_tt + + 3.3 帧移为8,取100%数据训练,结果为: + 地址: /data/datasets/music_voice_dataset_full/models/v1_train_model_v5_8_1 + 等待结果========>gpu_2 + {0: {0: 395811, 1: 55748, 2: 2570}, 1: {0: 64317, 1: 383493, 2: 6319}, 2: {0: 3963, 1: 2203, 2: 447963}} + t_loss:0.151807 t_acc:93.46 v_loss:0.238958 v_acc:90.08 time:8110.130126 epoch:6 + 看一下线上情况: + + + + 4 歌曲级别调整参数 + 1 3.1 的模型在v1_test的结果 + 地址:/data/jianli.yang/voice_classification/train/music_voice_class/v1_test/v5_01.log + 3.2 的模型在v1_test的结果 + 地址:/data/jianli.yang/voice_classification/train/music_voice_class/v1_test/v5_32_1.log + + 第一条路: + 验证现在模型在线上表现. + 现有模型: v5_01和v5_32_1 + 需要做的: 1 调节好判定规则, 然后取一部分线上真实干声数据,非精品池,查看准确率 + 目前来看,v5_32_1比v5_01要好,所以,跑个v5_8_1的数据 + + ## 训练一个 >= 0.8则认为是非人声的模型,对4000首歌曲进行切分 + 模型地址: /data/datasets/music_voice_dataset_2000/models/voice_0.8_v4_prod + 结果: t_loss:0.000945 t_acc:99.97 v_loss:0.313700 v_acc:96.66 time:1413.452663 epoch:1 + + + ## 添加作品之后再次训练: + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_train_rec_model_v5_32_1/CNN_epoch_14_0.8820793874225047.pth + 跑测试环境的结果:/data/datasets/music_voice_dataset_full/feature_no2000_v1_test v1_test/rec_v5_32_1.log + {0: {0: 95170, 1: 16565, 2: 8326}, 1: {0: 10599, 1: 108194, 2: 1268}, 2: {0: 5281, 1: 434, 2: 114346}} + t_loss:0.225400 t_acc:90.70 v_loss:0.291986 v_acc:88.21 time:2058.023302 epoch:14 + 等待结果 ====> gpu-2 + + + 第二条路: + 训练纯人声/伴奏 的模型,使用幅度谱特征 + 测试一下100%效果: + 模型地址:/data/datasets/music_voice_dataset_2000/models/mvc_v4_amp_prod + 分析结果: + +整理目前的思路: + 从现在来看是在伴奏声音较大的情况下,男女声分类的难度比较大。 + 思路1: 在4000首训练中添加作品信息,相当于增加数据样本,然后验证效果。 + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_train_rec_model_v5_32_1/CNN_epoch_14_0.8820793874225047.pth + 训练结果: + {0: {0: 95170, 1: 16565, 2: 8326}, 1: {0: 10599, 1: 108194, 2: 1268}, 2: {0: 5281, 1: 434, 2: 114346}} + t_loss:0.225400 t_acc:90.70 v_loss:0.291986 v_acc:88.21 time:2058.023302 epoch:14 + + 等待结果: + 验证在测试集合上歌曲级别准确率: + 结果地址: /data/jianli.yang/voice_classification/train/music_voice_class/v1_test/rec_v5_32_1.log + 验证在线上集合上的歌曲级别准确率: + 非常低,这个不合理--> 找时间分析一下 + female: acc=0.9797979797979798|recall=0.37890625 + male: acc=0.9568965517241379|recall=0.8604651162790697 + + 思路2: 在精准标注数据集上做纯人声/其他的分类情况 + 模型地址: /data/datasets/music_voice_dataset_2000/models/mvc_v4_amp_prod/CNN_epoch_0_0.8474216263460809.pth + 结果: t_loss:0.132001 t_acc:94.35 v_loss:0.654083 v_acc:84.74 time:8217.338682 epoch:0 + 分析错误数据: + + + +当前其他模型结果回收: + 实验名称: 人声/其他分类 + 目的: 训练一个比例在0.8以及以上的会被认为是其他,以下是人声 + 结果: + t_loss:0.014844 t_acc:99.49 v_loss:0.265037 v_acc:96.64 time:1426.662130 epoch:0 + 模型地址: /data/datasets/music_voice_dataset_2000/models/voice_0.8_v4_prod/CNN_epoch_0_0.966378517385332.pth + + + + +实验了一个方式: + 1 使用精确的数据去训练一个人声/其他的模型 + v5模型:/data/datasets/music_voice_dataset_2000/models/voice_v5_prod + t_loss:0.000473 t_acc:99.99 v_loss:0.059781 v_acc:99.20 time:2275.380936 epoch:3 + + 2 使用4000首歌曲去训练男女声 + 2.1 使用纯净的人声去设计 + v5模型: 间隔32: /data/datasets/music_voice_dataset_full/models/v1_train_model_v5_32_1_simple_tt + {0: {0: 154930, 1: 23811, 2: 0}, 1: {0: 23255, 1: 155486, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.175862 t_acc:92.00 v_loss:0.297609 v_acc:86.83 time:2006.342807 epoch:5 + 2.2 使用rec去设计 + v5模型: 间隔32: /data/datasets/music_voice_dataset_full/models/v1_train_rec_model_v5_32_1_simple + {0: {0: 153254, 1: 15592, 2: 0}, 1: {0: 18554, 1: 150292, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.057036 t_acc:97.70 v_loss:0.386268 v_acc:89.89 time:2133.461049 epoch:22 + +今天尝试使用两个模型来决策结果: +1 使用精标数据集来判断人声 v5模型 +2 使用4000首歌曲训练男女声 v5模型,间隔为32的数据集 +线上随机取的数据集表现如下: + ff:184,fm:0,fo:72 + mm:173,mf:0,mo:85 + female: acc=1.0|recall=0.71875 + male: acc=1.0|recall=0.6705426356589147 + +相同参数下直接训练出的三分类模型结果: + +v5模型,间隔为32的4000首歌曲数据集 ==> s1 +ff:174,fm:2,fo:80 +mm:76,mf:2,mo:180 +female: acc=0.9886363636363636|recall=0.6796875 +male: acc=0.9743589743589743|recall=0.29457364341085274 + +v5模型,间隔为32的4000首歌曲 + 作品 ==> s2 +ff:130,fm:2,fo:124 +mm:101,mf:2,mo:155 +female: acc=0.9848484848484849|recall=0.5078125 +male: acc=0.9805825242718447|recall=0.39147286821705424 + +v5模型,间隔为1的精标数据集 ==> s3 +ff:186,fm:5,fo:65 +mm:154,mf:6,mo:98 +female: acc=0.96875|recall=0.7265625 +male: acc=0.9685534591194969|recall=0.5968992248062015 + + +使用精标数据集训练出的人声伴奏模型分人声,然后使用 s3数据调整,得到的结果 +ff:177,fm:1,fo:77 +mm:175,mf:3,mo:79 +female: acc=0.9833333333333333|recall=0.6941176470588235 +male: acc=0.9943181818181818|recall=0.6809338521400778 + +使用精标数据集训练出的人声伴奏模型分人声,然后使用 s2数据调整,得到的结果 +ff:201,fm:2,fo:52 +mm:180,mf:1,mo:76 +female: acc=0.995049504950495|recall=0.788235294117647 +male: acc=0.989010989010989|recall=0.7003891050583657 + + +明日工作: + 1 再随机下载400首线上歌曲,查看效果,先下载下来标注下,然后统计 + 2 + + +验证线上数据v1的效果: +1 模型: + 人声伴奏判断模型:/data/datasets/music_voice_dataset_2000/models/voice_v5_prod/CNN_epoch_3_0.9919894003121993.pth + 男女声模型: /data/datasets/music_voice_dataset_full/models/v1_train_model_v5_32_1_simple_tt/CNN_epoch_5_0.8683402241231727.pth + 结果路径: /data/jianli.yang/voice_classification/train/music_voice_class/v1_test/online_data_v1/v5_prod_v5_32_1.log + +目前一共有三组验证数据集: + 1 v1_test + 2 online_data + 3 online_data_v1 + +三个可用模型,两种情况: +模型地址: +人声伴奏: /data/datasets/music_voice_dataset_2000/voice_10_bf_models_prod_v4/CNN_epoch_2_0.9910721598820542.pth [v4模型,暂未使用] +人声伴奏:/data/datasets/music_voice_dataset_2000/models/voice_v5_prod/CNN_epoch_3_0.9919894003121993.pth +男女声[4000干声]: /data/datasets/music_voice_dataset_full/models/v1_train_model_v5_32_1_simple_tt/CNN_epoch_5_0.8683402241231727.pth +男女声[4000干声+作品]: /data/datasets/music_voice_dataset_full/models/v1_train_rec_model_v5_32_1_simple/CNN_epoch_22_0.8988841903272805.pth +1 人声伴奏 + 男女声[4000干声] +1 人声伴奏 + 男女声[4000干声+作品] + +所以一共会有6个输出文件 +1. 1+1 => 结果地址: v1_test/v5_prod_v5_32_1_simple.log +ff:1509,fm:4,fo:586 +mm:1303,mf:3,mo:694 +female: acc=0.998015873015873|recall=0.7189137684611719 +male: acc=0.9969395562356542|recall=0.6515 + +2. 1+2 => 结果地址: v1_test/v5_prod_v5_32_1_simple_rec.log +ff:1510,fm:5,fo:584 +mm:1506,mf:9,mo:485 +female: acc=0.9940750493745886|recall=0.7193901858027633 +male: acc=0.9966909331568498|recall=0.753 +3. 2+1 => 结果地址: v1_test/online_data/v5_prod_v5_32_1_simple.log + +ff:136,fm:0,fo:120 +mm:173,mf:0,mo:85 +female: acc=1.0|recall=0.53125 +male: acc=1.0|recall=0.6705426356589147 + + +4. 2+2 => 结果地址: v1_test/online_data/v5_prod_v5_32_1_simple_rec.log +ff:138,fm:2,fo:116 +mm:174,mf:2,mo:82 +female: acc=0.9857142857142858|recall=0.5390625 +male: acc=0.9886363636363636|recall=0.6744186046511628 +前面四个调整参数 & 最后验证结果 +---------------------------------------------------------------------- +5. 3+1 => 结果地址: v1_test/online_data_v1/v5_prod_v5_32_1_simple.log +ff:119,fm:1,fo:104 +mm:124,mf:0,mo:92 +female: acc=1.0|recall=0.53125 +male: acc=0.992|recall=0.5740740740740741 + +6. 3+2 => 结果地址: v1_test/online_data_v1/v5_prod_v5_32_1_simple_rec.log +ff:136,fm:2,fo:86 +mm:142,mf:0,mo:74 +female: acc=1.0|recall=0.6071428571428571 +male: acc=0.9861111111111112|recall=0.6574074074074074 + +结论: 使用非作品的效果更好一点 +目前方案1: + 使用精标数据集去训练 纯音乐/人声 模型 [模型结构为v5] 准确率: 99.2% + 使用4000首作品,帧移为32帧,训练男/女声 模型[模型结构为v5] 准确率:86.83% + +目前方向: + 优化男女声模型 + 使用v5精标数据集在其验证集上的效果为:89.2% + 相同模型使用4000首歌曲训练出来的结果在精标验证集上的效果为: 91.67% + 重新标注4000首歌曲后训练出来的结果在精标验证集上的效果为:93.2% + 重新标注8000首歌曲后训练出来的结果在精标验证集上的效果为:95.48% + 重新标注8000首歌曲后使用MobilenetV2训练处理的结果在精标验证集上的效果为: 0.9628% +两类工作: + 1 使用模型重新标注数据: + 1.1 标注4000首干声数据 => 完成 + 使用其训练v5: + 地址为:/data/datasets/music_voice_dataset_full/models/v1_train_32_model_v5_32_1_simple/ + {0: {0: 128726, 1: 8779, 2: 0}, 1: {0: 9561, 1: 127944, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.054495 t_acc:97.83 v_loss:0.224657 v_acc:93.33 time:1470.237090 epoch:7 + 1.2 标注4000首作品数据: + 地址为:/data/datasets/music_voice_dataset_full/models/v1_train_32_model_v5_32_1_simple_rec/ + 等待结果: + {0: {0: 237128, 1: 21590, 2: 0}, 1: {0: 19365, 1: 239353, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.152439 t_acc:93.59 v_loss:0.196681 v_acc:92.09 time:1457.347468 epoch:3 + ------> + {0: {0: 238471, 1: 20247, 2: 0}, 1: {0: 21094, 1: 237624, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.008027 t_acc:99.73 v_loss:0.554205 v_acc:92.01 time:2930.421768 epoch:24 + + 1.3 标注8000首干声数据 + 地址为:/data/datasets/music_voice_dataset_full/models/v1_8000_32_model_v5_32_1_simple/ + 等待结果: + {0: {0: 249347, 1: 17200, 2: 0}, 1: {0: 18164, 1: 248383, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.071399 t_acc:97.16 v_loss:0.207060 v_acc:93.37 time:3213.233419 epoch:6 + ---------- + {0: {0: 249292, 1: 17255, 2: 0}, 1: {0: 17648, 1: 248899, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.010262 t_acc:99.63 v_loss:0.390779 v_acc:93.45 time:3169.252249 epoch:15 + 1.4 标注8000首干声+作品数据 + 地址为: /data/datasets/music_voice_dataset_full/models/v1_8000_32_model_v5_32_1_simple_rec/ + 等待结果: + t_loss:0.098285 t_acc:95.88 v_loss:0.203232 v_acc:92.53 time:30417.976227 epoch:7 + loss已经开始升高了,过拟合开始了。 + 1.5 标注8000首干声+2000首线上数据 + 地址为: /data/datasets/music_voice_dataset_full/models/v1_8000_2000_32_model_v5_32_1_simple/ + 等待结果: + t_loss:0.068190 t_acc:97.32 v_loss:0.251922 v_acc:92.32 time:2021.508124 epoch:13 + 开始过拟合了 + + + + 2 使用重新标注好的数据再次实验 + 选用模型: + 人声伴奏:/data/datasets/music_voice_dataset_2000/models/voice_v5_prod/CNN_epoch_3_0.9919894003121993.pth + 男女声[4000干声]: /data/datasets/music_voice_dataset_full/models/v1_train_32_model_v5_32_1_simple/CNN_epoch_7_0.9333115159448747.pth + + 三组测试数据集: + 1 v1_test + 2 online_data + 3 online_data_v1 + + 实验: + 三个输出文件: + 1 v1_test => 结果地址: v1_test/v5_prod_v5_32_1_simple_relabeled.log + ff:1590,fm:3,fo:506 + mm:1290,mf:4,mo:706 + female: acc=0.9974905897114178|recall=0.7575035731300619 + male: acc=0.9976798143851509|recall=0.645 + 2 online_data => v1_test/online_data/v5_prod_v5_32_1_simple_relabeled.log + ff:143,fm:0,fo:113 + mm:166,mf:0,mo:92 + female: acc=1.0|recall=0.55859375 + male: acc=1.0|recall=0.6434108527131783 + + 3 online_data_v1 => v1_test/online_data_v1/v5_prod_v5_32_1_simple_relabeled.log + ff:130,fm:1,fo:93 + mm:119,mf:0,mo:97 + female: acc=1.0|recall=0.5803571428571429 + male: acc=0.9916666666666667|recall=0.5509259259259259 + + 相同参数下,微弱提升. + 目前等待着1.2 和 1.3 的结果 + + 训练个三分类看看,如果可行的话,接下来可以直接用三分类来做. + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_train_32_model_v5_32_1_three + {0: {0: 130134, 1: 8717, 2: 1441}, 1: {0: 8921, 1: 129080, 2: 2291}, 2: {0: 1314, 1: 1257, 2: 137721}} + t_loss:0.020372 t_acc:99.30 v_loss:0.272711 v_acc:94.31 time:1156.574923 epoch:13 + + + 查看三分类的最终结果: + 三组测试数据集: + 1 v1_test + 2 online_data => v1_test/online_data/v5_prod_v5_32_1_gender_relabeled.log + 3 online_data_v1 =>v1_test/online_data_v1/v5_prod_v5_32_1_gender_relabeled.log +三分类效果略弱于两个模型叠加的结果 +----------------------------------------------------------------------------------------- +确定使用两个模型的方案: + +验证下面两个分类模型的效果: + 1 4000首干声+伴奏 + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_train_32_model_v5_32_1_simple_rec/CNN_epoch_24_0.9201041288197961.pth + + 2 8000首干声 + 模型地址:/data/datasets/music_voice_dataset_full/models/v1_8000_32_model_v5_32_1_simple/CNN_epoch_15_0.934527494213028.pth + +实验1: + 11 v1_test => 结果地址: v1_test/v5_prod_v5_32_1_simple_rec_relabeled.log + ff:1669,fm:3,fo:427 + mm:1449,mf:1,mo:550 + female: acc=0.9994011976047904|recall=0.7951405431157694 + male: acc=0.9979338842975206|recall=0.7245 + 12 v1_test => 结果地址: v1_test/v5_prod_v5_32_1_simple_8k_relabeled.log + 注意: 这是自己玩自己,不具有啥参考性 + ff:2015,fm:0,fo:84 + mm:1917,mf:0,mo:83 + female: acc=1.0|recall=0.9599809433063363 + male: acc=1.0|recall=0.9585 + + 21 online_data => v1_test/online_data/v5_prod_v5_32_1_simple_rec_relabeled.log + ff:149,fm:2,fo:105 + mm:188,mf:0,mo:70 + female: acc=1.0|recall=0.58203125 + male: acc=0.9894736842105263|recall=0.7286821705426356 + + 22 online_data => v1_test/online_data/v5_prod_v5_32_1_simple_8k_relabeled.log + ff:145,fm:3,fo:108 + mm:192,mf:0,mo:66 + female: acc=1.0|recall=0.56640625 + male: acc=0.9846153846153847|recall=0.7441860465116279 + 微调阈值后: + ff:145,fm:2,fo:109 + mm:186,mf:0,mo:72 + female: acc=1.0|recall=0.56640625 + male: acc=0.9893617021276596|recall=0.7209302325581395 + + 31 online_data_v1 => v1_test/online_data_v1/v5_prod_v5_32_1_simple_rec_relabeled.log + ff:141,fm:2,fo:81 + mm:140,mf:1,mo:75 + female: acc=0.9929577464788732|recall=0.6294642857142857 + male: acc=0.9859154929577465|recall=0.6481481481481481 + + 32 online_data_v1 => v1_test/online_data_v1/v5_prod_v5_32_1_simple_8k_relabeled.log + ff:139,fm:2,fo:83 + mm:148,mf:0,mo:68 + female: acc=1.0|recall=0.6205357142857143 + male: acc=0.9866666666666667|recall=0.6851851851851852 + 微调阈值后0.9,0.09: + ff:139,fm:1,fo:84 + mm:144,mf:0,mo:72 + female: acc=1.0|recall=0.6205357142857143 + male: acc=0.993103448275862|recall=0.6666666666666666 + + 8k数据进行训练召回率online_data和online_data_v1上均有提升,但是增加rec并未发现有提升。 + +线上数据验证效果: + 在online_data上 + 4k干声: + female: acc=1.0|recall=0.55859375 + male: acc=1.0|recall=0.6434108527131783 + 8k干声: + female: acc=1.0|recall=0.56640625 + male: acc=0.9846153846153847|recall=0.7441860465116279 + 8k干声新方案: + female: acc=1.0|recall=0.6733067729083665 + male: acc=0.9948717948717949|recall=0.7404580152671756 + + 在online_data_v1上: + 4k干声: + female: acc=1.0|recall=0.5803571428571429 + male: acc=0.9916666666666667|recall=0.5509259259259259 + 8k干声: + female: acc=1.0|recall=0.6205357142857143 + male: acc=0.993103448275862|recall=0.6666666666666666 + 8k干声新方案: + female: acc=1.0|recall=0.6954545454545454 + male: acc=0.9935897435897436|recall=0.7045454545454546 + +目前方案: + 1 目前方案确定为: + 人声模型 + 男女声模型 + 第一种:在人声模型判定音频时比较宽松,只要伴奏中存在人声,就认为有人声 + 优化男女声模型。 + 第一种的思路目前进度: + 1 4000首干声训练 + 训练验证集效果为: 93.33% + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_train_32_model_v5_32_1_simple/CNN_epoch_7_0.9333115159448747.pth + 线上情况: 准确率99%的情况下,召回率在55%左右 + 2 4000首干声+作品训练 + 模型地址:/data/datasets/music_voice_dataset_full/models/v1_train_32_model_v5_32_1_simple_rec/CNN_epoch_24_0.9201041288197961.pth + 线上情况: 准确率99%的情况下,召回率并未提升 + 3 8000首干声训练 + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_model_v5_32_1_simple/CNN_epoch_15_0.934527494213028.pth + 线上情况: 准确率99%的情况下,召回率在60%左右 + +结论: 8k干声比4k干声的召回率有提高,且明显,所以感觉是现在样本还不够。 + + + 接下来: + 1 将8k中判断出错的部分训练一个模型,做难点模型重点区分,用来保召回 + 模型地址:/data/datasets/music_voice_dataset_full/models/v1_8000_err_32_model_v5_32_1_simple + 验证集准确率很低,只有65% + 1.1 标注2000首线上数据,从中提取出小于0.9和大于0.1的部分加上上述8k中的该部分,做训练,看效果 + 结果地址: + v1_test/online_data_v2_full/v5_prod_v5_32_1_simple_8k_relabeled.log + 效果不好,感觉是数据量的问题。 + 两条路: + 1 标注了线上2000数据,将其塞入8000数据集,做训练 + 过程1: + 自动标注人声段 + 过程2: + 8000 + 2000 训练男女声 + + + + + + + + 第二种:在人声模型判定音频时要求比较严格,只要人声中存在伴奏,就认为没有人声,此时我们先优化人声模型 + 人声模型的判定: + 1 使用MFCC特征== + 2 使用频谱特征 + 训练<=0.005 的模型 + 先使用MFCC特征 + 模型地址: /data/datasets/music_voice_dataset_2000/models/v5_005_prod + 结果: + t_loss:0.102047 t_acc:95.63 v_loss:1.063264 v_acc:83.21 time:1699.988639 epoch:0 + t_loss:0.006602 t_acc:99.77 v_loss:1.264120 v_acc:82.94 time:1682.336411 epoch:1 + 分析错误: + 目的是确定一个目标 + + + + 2 目标是训练一个伴奏占比很小的时候才认为是人声的模型 + 1 伴奏占比 acc_rate 在 0-0.05的时候,认为是人声, loss *= (1 - acc_rate) + 伴奏占比 acc_rate 在 0.05 以上的时候,认为是伴奏 loss *= (1 - acc_rate) + 没有人声时,acc_rate = -1 此时loss *= 1 + 2 验证时,acc_rate 在 (0.05, 0.8) 之间的不参与准确率的计算 + 模型地址:/data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod + val_rate:{0: {0: 222569, 1: 1847}, 1: {0: 48781, 1: 173245}}|0.8202284871936614,0.9894512599090763 + 当前模型特点,纯人声容易被判出去,但是其他不容易被判定进来,将就着能用。 + 模型地址: /data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod/CNN_epoch_2_0.8271522530771435.pth + 0.8以及以上的段为158段,-1的段有1475段,0到0.05的段有1516.占比比较小,所以验证结果可能不准。 + 现在的思路是: + 取干声<=0.05的段同时切出其作品作为负样本,参与训练,查看情况 + /data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod_rec + val_rate:{0: {0: 542490, 1: 34872}, 1: {0: 84808, 1: 176415}}|0.8648042875953693,0.8349543511905607 + + 用其标注8k干声数据,训练一个纯干声模型, 用于保证高准确率. + 标注完成: ma_label_32_strict.txt + 训练一个男女声模型: + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_strict_32_model_v5_32_1_simple/ + {0: {0: 149735, 1: 10124, 2: 0}, 1: {0: 10437, 1: 149422, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.004969 t_acc:99.84 v_loss:0.475428 v_acc:93.57 time:2084.087150 epoch:24 + 结果不好。 + 调整学习率再来一下试试. + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_strict_64_model_v5_32_1_simple/ + {0: {0: 79000, 1: 6325, 2: 0}, 1: {0: 5360, 1: 79965, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.122782 t_acc:95.18 v_loss:0.187568 v_acc:93.15 time:1434.555420 epoch:9 + + 结果更低了。。 + + 实验一下情况: + 模型地址:/data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod/CNN_epoch_0_0.8399920319282035.pth + 实验一下在三组测试数据上的效果: + 数据1: online_data, 结果: v1_test/online_data/v5_rate_prod_v5_32_1_simple_rec_relabeled.log + 数据2: online_data_v1, 结果: v1_test/online_data_v1/v5_rate_prod_v5_32_1_simple_rec_relabeled.log + 数据3: online_data_v2_full, 结果: v1_test/online_data_v2/v5_rate_prod_v5_32_1_simple_rec_relabeled.log + 等待结果..... + + +今日工作: + 1 训练8000+2000的干声的情况以及观察结果 + 模型地址: + 2 构建干声判别模型 + 数据集使用作品+干声的方式 + + 3 等待结果: + 1 等待8000干声+作品的结果 + 2 等待8000干声使用干声判别模型切分后的结果 + + + GMM实验一下,先用4000首歌曲的情况试一下 + 模型地址:/data/datasets/music_voice_dataset_full/models/v1_4000_64_gender_gmm + + +本周工作: + 一共两个方向: + 1 纯净人声模型 + 纯净人声分类(保证准确) + 2 非纯净人声模型 + 非纯净人声分类(保证召回率) + + 对于1: + 目标是制作出准确率98%以上的男女声分类模型 + 方案是两个模型: + A: 纯净人声(<=0.05) 和 (>=0.8) 以上的分类情况要到99% + B: 纯净人声(<=0.05)模型标注8k干声数据,纯净人声的男女声分类要求验证集在98% + 周一先做这个: + 实验1: A模型先训练. + 数据集: /data/datasets/music_voice_dataset_2000/feature_rec_005 + 备注: 在原来标注的基础上增加了作品的人声段当作非纯人声且伴奏占比系数为1 + 当前结果: + val_rate:{0: {0: 542490, 1: 34872}, 1: {0: 84808, 1: 176415}}|0.8648042875953693,0.8349543511905607 + 0是非纯净人声,1是纯净人声 + 先看下错误情况: + 代码编写错误,导致数据标注错误, + 修复错误后: + val_rate:{0: {0: 459650, 1: 3144}, 1: {0: 47598, 1: 213625}}|0.9061642431315649,0.9854960810817045 + 观察top10错误文件,基本是1=>0,听完后感觉错误样本中基本是能听到音乐或者环境噪声。可以将其标注为0.1 + 验证全部的验证集 + 模型: /data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod_rec/CNN_epoch_1_0.8822981570144947.pth + 错误样本分布情况来看,基本是纯净人声被分为非纯净人声,而带伴奏被分为纯净人声的占比很小,且从错误样本的观察来看,两者很像,都是人声比较清晰 + 的前提下,可以听到音乐声。理论上这些可以归咎到0.1-0.8中,因此认定该模型可用。 + 接下来的操作中: + 1 用上述模型去标注8k干声数据 + /data/datasets/music_voice_dataset_full/feature_no2000/ma_label_32_strict_v1.txt + 2 利用新的8k干声数据做训练 + 全量数据实验一下: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v5_simple_test/ + {0: {0: 152726, 1: 10232, 2: 0}, 1: {0: 10984, 1: 151974, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.089455 t_acc:96.50 v_loss:0.192765 v_acc:93.49 time:1602.044117 epoch:5 + 从96%开始走下坡路,可以分析一下情况。 + 看一下错误样本的情况: + 存在标注错误的样本,修复后重新训练. + 100%实验一下: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v5_simple_test_v1 + t_loss:0.095087 t_acc:96.18 v_loss:0.165563 v_acc:93.80 time:987.646289 epoch:4 + t_loss:0.073731 t_acc:97.03 v_loss:0.178664 v_acc:93.76 time:1845.602625 epoch:6 + + 增加模型复杂度试试: + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_test_v1 + 10%数据试一下, + t_loss:0.090942 t_acc:96.55 v_loss:0.208635 v_acc:92.27 time:268.393310 epoch:14 + 模型地址:/data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_prod_v1 + 100% 数据试一下: + t_loss:0.046150 t_acc:98.19 v_loss:0.204861 v_acc:93.90 time:1945.790238 epoch:11 + + 换一下优化函数: + 10%试一下: + t_loss:0.073767 t_acc:96.97 v_loss:0.161529 v_acc:94.36 time:267.835021 epoch:11 + + 调整分类数量为2: + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_test_rmspop + t_loss:0.059705 t_acc:97.57 v_loss:0.182167 v_acc:94.31 time:204.898006 epoch:12 + 调整分类数量为2: + 100%数据试一下: + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_prod_rmspop_2 + {0: {0: 154495, 1: 7833, 2: 0}, 1: {0: 7669, 1: 154659, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.027944 t_acc:98.88 v_loss:0.189234 v_acc:95.23 time:2228.385751 epoch:12 + + 100%数据试一下: + 地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_prod_rmspop + t_loss:0.031015 t_acc:98.77 v_loss:0.176232 v_acc:95.36 time:3032.555058 epoch:11 + + + 结论是: 使用mobilnetV2比mobilenetV1好一点点,很微弱,换优化函数有明显提升,验证集效果提升了1.46% + 接下来: + 1 验证下mobilenetV1 使用rmsprop优化效果。 + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v5_simple_test_v1_rmspop + 10%数据实验一下: + t_loss:0.002758 t_acc:99.91 v_loss:0.485332 v_acc:94.10 time:115.288640 epoch:22 + 从这里来看,确实比mobilnetV2要差一点点。 + + 观察一下当前组合的准确率和召回率 + 人声判定模型 : /data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod_rec/CNN_epoch_1_0.8822981570144947.pth + 男女声判定模型:/data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_prod_rmspop/CNN_epoch_11_0.9535816371790449.pth + 数据集: + online_data + 结果地址: v1_test/online_data/v5_rate_prod_v6_32_1_simple_relabeled.log + online_data_v1 + 结果地址: v1_test/online_data_v1/v5_rate_prod_v6_32_1_simple_relabeled.log + 结果在上面对比。提升较为明显。 + + 接下来思路: + 1 模型以及优化器调整 + baseline: rmsprop(94.31%) + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_test_rmspop + 测试1: 使用adam优化器(94.54%) + {0: {0: 15373, 1: 905, 2: 0}, 1: {0: 874, 1: 15404, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.071549 t_acc:97.08 v_loss:0.156906 v_acc:94.54 time:337.212903 epoch:11 + 测试2: 调整学习率 + [5,10] -> e-2,e-3,e-4, 无明显提升,基本还是在94.20附近 + 测试3:mobilnetV3+adam + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v7_simple_test_adam + t_loss:0.089159 t_acc:96.39 v_loss:0.153217 v_acc:94.23 time:369.031749 epoch:8 + 无提升 + + 使用v6 + adam 跑一下rate的模型: + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_prod_adam + {0: {0: 154468, 1: 7860, 2: 0}, 1: {0: 7210, 1: 155118, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.040941 t_acc:98.36 v_loss:0.160263 v_acc:95.36 time:1169.141896 epoch:6 + + + 2 用v6+adam 跑一下干声模型: + baseline: v5+sgd(93.45%) + 模型地址: /data/datasets/music_voice_dataset_full/models/v1_8000_32_model_v6_32_1_simple + {0: {0: 253835, 1: 13091, 2: 0}, 1: {0: 12863, 1: 254063, 2: 0}, 2: {0: 0, 1: 0, 2: 0}} + t_loss:0.040572 t_acc:98.38 v_loss:0.174166 v_acc:95.14 time:1826.355314 epoch:6 + + +目前纯净人声最佳分类模型(95.36%):/data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_prod_adam/CNN_epoch_6_0.9535816371790449.pth +目前非纯净人声最佳分类模型(95.13%):/data/datasets/music_voice_dataset_full/models/v1_8000_32_model_v6_32_1_simple/CNN_epoch_6_0.9513835295175442.pth +接下来思路: + 仍旧先从纯净人声入手,看一下错误样本的情况. + 当前最佳模型结构为: mobileNet_v2 + adam优化器 + 错误样本: + logs/v6_rmspop_pure_8k_test.log + 1 使用FocalLoss进行一下困难样本处理 + 10% 测试一下: + 模型地址:/data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_test_adam + gpu-1/jianli_1 100%数据 实验ohem_20 + + + + + + + + + + 对于2: + 因为8k的样本的准确率比较高,查看下错误的样本具体情况 + 8k样本数据训练出来在精标数据集上的表现为95.48% + 错误的帧为: logs/test_8000_relabeled_gender.log + 目的是为了分析错误的帧的情况: + 从当前情况看,错误的原因是音频本身就比较像女声 + + 三个模型: + 纯人声/其他: /data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod_rec/CNN_epoch_1_0.8822981570144947.pth + 人声/其他: /data/datasets/music_voice_dataset_2000/models/voice_v5_prod/CNN_epoch_3_0.9919894003121993.pth + 男女声: /data/datasets/music_voice_dataset_full/models/v1_8000_32_model_v5_32_1_simple/CNN_epoch_15_0.934527494213028.pth + + 两个验证集: + 1 online_data + v1_test/online_data/v5_rate_v5_prod_v5_8000_32_1_simple_relabeled.log + 2 online_data_v1 + v1_test/online_data_v1/v5_rate_v5_prod_v5_8000_32_1_simple_relabeled.log + 没啥效果. + + + +验证下: + 纯人声/其他(人声准确率98.5%, 其他准确率90.6%):/data/datasets/music_voice_dataset_2000/models/v5_005_rate_prod_rec/CNN_epoch_1_0.8822981570144947.pth + 目前纯净人声最佳分类模型(95.36%):/data/datasets/music_voice_dataset_full/models/v1_8000_32_rate_v5_model_v6_simple_prod_adam/CNN_epoch_6_0.9535816371790449.pth + 人声/其他(99%): /data/datasets/music_voice_dataset_2000/models/voice_v5_prod/CNN_epoch_3_0.9919894003121993.pth + 目前非纯净人声最佳分类模型(95.13%):/data/datasets/music_voice_dataset_full/models/v1_8000_32_model_v6_32_1_simple/CNN_epoch_6_0.9513835295175442.pth + + online_data: + v1_test/online_data/v5_prod_v6_32_1_simple_relabeled.log + online_data_v1: + v1_test/online_data_v1/v5_prod_v6_32_1_simple_relabeled.log + online_data_v2_full: + v1_test/online_data_v2_full/v5_prod_v6_32_1_simple_relabeled.log + 参数调整后保证在三个数据集准确率均在99%以上时: + 具体数据: + 两组模型结合控制: + online_data: + ff:195,fm:1,fo:55 + mm:209,mf:0,mo:53 + female: acc=1.0|recall=0.7768924302788844 + male: acc=0.9952380952380953|recall=0.7977099236641222 + online_data_v1: + ff:180,fm:1,fo:39 + mm:159,mf:0,mo:61 + female: acc=1.0|recall=0.8181818181818182 + male: acc=0.99375|recall=0.7227272727272728 + online_data_v2_full: + ff:908,fm:7,fo:151 + mm:871,mf:7,mo:231 + female: acc=0.9923497267759562|recall=0.851782363977486 + male: acc=0.9920273348519362|recall=0.7853922452660054 + 单组优化后的模型: + online_data: + ff:187,fm:1,fo:63 + mm:205,mf:0,mo:57 + female: acc=1.0|recall=0.7450199203187251 + male: acc=0.9951456310679612|recall=0.7824427480916031 + online_data_v1: + ff:178,fm:1,fo:41 + mm:153,mf:0,mo:67 + female: acc=1.0|recall=0.8090909090909091 + male: acc=0.9935064935064936|recall=0.6954545454545454 + online_data_v2_full: + ff:890,fm:5,fo:171 + mm:853,mf:7,mo:249 + female: acc=0.992196209587514|recall=0.8348968105065666 + male: acc=0.9941724941724942|recall=0.7691614066726781 + + +思路: + 1 优化hard样本的准确率 + +------------------------------------------------------------------>>> +当前业务用到的脚本: +1 music_gender_class_simple.py # 用于男女声分类 +2 music_gender_class_val_v2.py # 用人声分类和男女声分类两个模型预测线上数据 +3 music_voice_class.py # 用于带人声/伴奏分类 +4 music_voice_class_rate.py # 用于人声/伴奏分类 + + + + + + + + + + + + + + + + diff --git a/AIMeiSheng/webui_play_huisen_fast.py b/AIMeiSheng/webui_play_huisen_fast.py new file mode 100644 index 0000000..89a0d85 --- /dev/null +++ b/AIMeiSheng/webui_play_huisen_fast.py @@ -0,0 +1,174 @@ +""" +构建唱歌音色转换网页(基于3.0) +1. 要求上传一个音频 +2. 选定男女声 +3. 选择一首歌曲 +""" + +import os +import time +import glob +import hashlib +import shutil +import librosa +import soundfile +import gradio as gr +from meisheng_svc import load_model, meisheng_svc + +##--init-- +embed_model, hubert_model = load_model() ##提前加载模型 +# gs_draw_volume_exe = "/data/gpu_env_common/bin/draw_volume" +gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" +ß +song_folder = "./data_meisheng/" #歌曲的存储位置 +gs_work_dir = "./data_meisheng/tmp" #暂存数据位置 +abs_path = "/data/bingxiao.fang/voice_conversion/SVC_MEISHENG/Retrieval-based-Voice-Conversion-WebUI/data_meisheng/tmp/" + +def get_song_map(): + female_song_names = [] + song_list = glob.glob(song_folder + "female/*") + for song in song_list: + female_song_names.append(song.replace(song_folder + "female/", "")) + male_song_names = [] + song_list = glob.glob(song_folder + "male/*") + for song in song_list: + male_song_names.append(song.replace(song_folder + "male/", "")) + song_map = { + "female": female_song_names, + "male": male_song_names, + } + return song_map + + +gs_song_map = get_song_map() +gs_song_list_dropdown = None + + +def song_select(gender): + return gs_song_list_dropdown.update(choices=gs_song_map[gender]), gs_song_map[gender][0] + + +def get_file_md5(filename): + with open(filename, "rb") as fp: + return hashlib.md5(fp.read()).hexdigest() + + +def mix(in_path, acc_path, dst_path): + # svc转码到442 + svc_442_file = in_path + "_442.wav" + st = time.time() + cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file) + os.system(cmd) + if not os.path.exists(svc_442_file): + return -1 + print("transcode,{},sp={}".format(in_path, time.time() - st)) + + # 混合 + st = time.time() + cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path) + os.system(cmd) + print("mixer,{},sp={}".format(in_path, time.time() - st)) + + + + +def process_svc(song_wav, target_wav, svc_out_path ): + + song_wav1, target_wav, svc_out_path = os.path.basename(song_wav),os.path.basename(target_wav),os.path.basename(svc_out_path) + song_wav, target_wav, svc_out_path = song_wav ,abs_path+target_wav ,abs_path +svc_out_path + embed_npy = target_wav[:-4] + '.npy' ##npy存储位置 + print("@@@@@@@@@@@@@@@@@@@@@@@@") + meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy) + print("svc finished!!") + +def get_svc(train_audio_data, gender, song_name): + ''' + :param train_audio_data: 目标音色 + :param gender: 性别选择 + :param song_name: 歌曲名字 + :return: + ''' + + if os.path.exists(gs_work_dir): ##清空临时路径 + shutil.rmtree(gs_work_dir) + os.makedirs(gs_work_dir) + + ##目标音色读取 + target_wav = train_audio_data + f_dst = os.path.join(gs_work_dir,os.path.basename(target_wav)) + print("dir :",f_dst) + shutil.move(target_wav, f_dst) ##放在工作目录 + target_wav = f_dst + #sr, data = train_audio_data + #data, sr = librosa.load(train_audio_data) + + ##歌曲伴奏读取 + inf_audio_path = os.path.join("{}{}/{}/vocal321.wav".format(song_folder,gender, song_name)) # 人声 + inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder,gender, song_name)) # 伴奏 + inf_out_path = os.path.join(gs_work_dir, "svc.wav")###推理后文件名字 + + print("svc out: {}".format(inf_out_path)) + + st = time.time() + song_wav, target_wav, svc_out_path = inf_audio_path, target_wav, inf_out_path + + ##数据拷贝到工作空间 + cmd = f"cp {song_wav} {gs_work_dir}" + os.system(cmd) + + ##svc + print("start inference...") + print("inputMsg:", song_wav, target_wav, svc_out_path) + process_svc(song_wav, target_wav, svc_out_path) + + #''' ##后处理 + ##加混响 + print("add reverbration...") + svc_out_path_effect = svc_out_path[:-4]+'_effect.wav' + cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}" + print("cmd :",cmd) + os.system(cmd) + + # 人声伴奏合并 + print("add acc...") + out_path = svc_out_path_effect[:-4]+'_music.wav' + mix(svc_out_path_effect, inf_acc_path, out_path) + #''' + + print("time cost = {}".format( time.time() - st)) + + return out_path + + + +def main(): + # header + app = gr.Blocks() + with app: + # 头部介绍 + gr.Markdown(value=""" + ### 用你的音色来唱歌 + #### 使用说明: 手机录一个15s左右的音频,拖拽到网页上,点击开始后,稍等15s左右 + ####音频中不要包含音乐,支持wav/m4a/mp4 等格式 + + 作者:starmaker音视频 + """) + #train_audio = gr.inputs.Audio(label="input_audio") ##直接得到sr和data + train_audio = gr.Audio(label="input_audio", type="filepath") + gender = gr.inputs.Radio(choices=["female", "male"], default="female") + global gs_song_list_dropdown + gs_song_list_dropdown = gr.Dropdown(choices=gs_song_map["female"], interactive=True, label="song list") + gender.change(song_select, inputs=[gender], outputs=[gs_song_list_dropdown, gs_song_list_dropdown]) + gen_btn = gr.Button("generate", variant="primary") + + output_audio = gr.outputs.Audio(label="output", type='filepath') + # gen_btn.click(fn=train_svc, inputs=[train_audio, gender, gs_song_list_dropdown], outputs=output_audio) + gen_btn.click(fn=get_svc, inputs=[train_audio, gender, gs_song_list_dropdown], outputs=output_audio) + + # 本方法实现同一时刻只有一个程序在服务器端运行 + app.queue(concurrency_count=1, max_size=2044).launch(server_name="0.0.0.0", inbrowser=True, quiet=True, + server_port=6768) + + +if __name__ == '__main__': + main()