diff --git a/mnn_demo/src/CRvcLiteOnline.cpp b/mnn_demo/src/CRvcLiteOnline.cpp index 4f9c833..241c6b8 100644 --- a/mnn_demo/src/CRvcLiteOnline.cpp +++ b/mnn_demo/src/CRvcLiteOnline.cpp @@ -1,811 +1,811 @@ // // Created by Administrator on 2023/11/29. // #include #include #include #include "CRvcLiteOnline.h" #include "Hubert.h" #include "CSynthesizer.h" #include "espyin-v1.0/ESPYIN.h" #include "ThreadPool.h" #include "CRvcCircleBuffer.h" #include "FfmpegResampler.h" #include inline bool file_exists (const std::string& name) { return ( access( name.c_str(), F_OK ) != -1 ); } // size代表了buf的长度 void stereo2mono(float *input, int size, float *output) { for (int i = 0; i < size - 1; i += 2) { output[i / 2] = (input[i] + input[i + 1]) / 2; } } void mono2stereo(float *input, int size, float *output) { for (int i = 0; i < size; i++) { output[2 * i] = input[i]; output[2 * i + 1] = input[i]; } } CRvcLiteOnline::CRvcLiteOnline() { init_variable(); m_init = false; m_switch_model = false; // 输入部分需要的变量 // 要求输入的时间片长度,采样点数 m_input_block_frame = int(gs_block_time * gs_src_samplerate); // 推理时额外需要的长度 m_input_extra_frame = int(gs_extra_time * gs_src_samplerate); int zc = gs_src_samplerate / 100; // 10ms的点数 int input_corssfade_frame = int(gs_crossfade_time * gs_src_samplerate); // 推理时使用的buffer长度 m_input_predict_buf_frame = int(ceil((m_input_extra_frame + input_corssfade_frame + m_input_block_frame) * 1.0 / zc) * zc); // 推理时使用的buffer m_input_predict_buf = new float[m_input_predict_buf_frame]; memset(m_input_predict_buf, 0, sizeof(float) * m_input_predict_buf_frame); // 输出部分需要的变量 m_crossfade_frame = int(gs_crossfade_time * gs_dst_samplerate); m_output_block_frame = int(gs_block_time * gs_dst_samplerate); int output_extra_frame = int(gs_extra_time * gs_dst_samplerate); zc = gs_dst_samplerate / 100; m_output_cache_buf_frame = int(ceil((m_output_block_frame + m_crossfade_frame + output_extra_frame) * 1.0 / zc) * zc); m_output_cache_buf = new float[m_output_cache_buf_frame]; memset(m_output_cache_buf, 0, sizeof(float) * m_output_cache_buf_frame); m_crossfade_buf = new float[m_crossfade_frame]; memset(m_crossfade_buf, 0, sizeof(float) * m_crossfade_frame); // 对于模型的输入和输出进行缓存 // 此处是写死的和模型有关 m_hubert_ret.resize(1); m_hubert_ret[0].resize(gs_hubert_frame); for (int i = 0; i < gs_hubert_frame; i++) { m_hubert_ret[0][i].resize(gs_hubert_dim); } // synth模型的输入 m_synth_input.resize(1); m_synth_input[0].resize(gs_synth_input_frame); for (int i = 0; i < gs_synth_input_frame; i++) { m_synth_input[0][i].resize(gs_synth_input_dim); } m_synth_out.resize(1); m_synth_out[0].resize(1); m_synth_out[0][0].resize(gs_synth_output_frame); } CRvcLiteOnline::~CRvcLiteOnline() { uninit(); } /**********************************对内函数*********************************************/ void CRvcLiteOnline::uninit() { if (m_input_predict_buf != NULL) { delete[] m_input_predict_buf; m_input_predict_buf = NULL; } if (m_output_cache_buf != NULL) { delete[] m_output_cache_buf; m_output_cache_buf = NULL; } if (m_crossfade_buf != NULL) { delete[] m_crossfade_buf; m_crossfade_buf = NULL; } init_variable(); } void CRvcLiteOnline::get_pyin_f0() { for (int i = 0; i < m_input_predict_buf_frame; i += 160) { m_es_pyin->process(m_input_predict_buf + i); } m_f0_data.clear(); ESFeatureSet feats = m_es_pyin->getRemainingFeatures(); if (!feats.empty()) { m_f0_data.resize(feats[4].size()); for (size_t i = 0; i < feats[4].size(); ++i) { // JL_DEBUG m_f0_data[i] = feats[4][i].values[0]; if (m_f0_data[i] < 0) { m_f0_data[i] = 0; } } } m_es_pyin->reset(); get_f0_post(); } void CRvcLiteOnline::get_f0_post() { int f0_min = 50; int f0_max = 1100; float f0_mel_min = 1127 * log2(1 + f0_min * 1.0 / 700); float f0_mel_max = 1127 * log2(1 + f0_max * 1.0 / 700); m_f0_coarse_data.clear(); m_f0_coarse_data.resize(m_f0_data.size()); for (int i = 0; i < m_f0_data.size(); i++) { float f0_mel = 1127 * log2(1 + m_f0_data[i] / 700); if (f0_mel > 0) { f0_mel = (f0_mel - f0_mel_min) * 254.f / (f0_mel_max - f0_mel_min) + 1; } if (f0_mel <= 1) { f0_mel = 1; } else if (f0_mel > 255) { f0_mel = 255; } m_f0_coarse_data[i] = float(int(f0_mel + 0.5)); } } void CRvcLiteOnline::init_variable() { m_init = false; m_switch_model = false; // 缓存使用的数据 // 要求输入的时间片长度,采样点数 m_input_block_frame = 0; m_input_extra_frame = 0; m_input_predict_buf_frame = 0; m_input_predict_buf = nullptr; m_f0_data.clear(); m_f0_coarse_data.clear(); m_crossfade_frame = 0; m_output_block_frame = 0; m_output_cache_buf_frame = 0; m_crossfade_buf = nullptr; m_output_cache_buf = nullptr; // 各个实例的返回结果 m_hubert_ret.clear(); m_synth_input.clear(); m_synth_out.clear(); m_fade_in = true; } /**********************************对外函数*********************************************/ int CRvcLiteOnline::init(const char *hubert_model_path) { if (m_init) { return ERR_RVC_LITE_REINIT; } m_hubert_inst = std::make_shared(); m_synthesizer_inst = std::make_shared(); m_hubert_inst->init(hubert_model_path); // m_synthesizer_inst->init(synth_model_path); // 要求stepSize必须是2^n m_es_pyin = std::make_shared(16000, 160, 1024, 50, 1100); m_init = true; m_switch_model = false; m_fade_in = true; return ERR_RVC_LITE_SUCCESS; } int CRvcLiteOnline::switch_synth_model(const char *synth_model_path) { if (!m_init) { return ERR_RVC_LITE_NOT_INIT; } if (file_exists(synth_model_path)) { m_synthesizer_inst = std::make_shared(); m_synthesizer_inst->init(synth_model_path); m_switch_model = true; return ERR_RVC_LITE_SUCCESS; } return ERR_RVC_LITE_MODEL_NOT_EXISTS; } void CRvcLiteOnline::reset() { memset(m_input_predict_buf, 0, sizeof(float) * m_input_predict_buf_frame); memset(m_crossfade_buf, 0, sizeof(float) * m_crossfade_frame); memset(m_output_cache_buf, 0, sizeof(float) * m_output_cache_buf_frame); m_fade_in = true; } int CRvcLiteOnline::process_block(float *in_buf, int in_len, float *out_buf, int out_len) { if (!m_init) { return ERR_RVC_LITE_NOT_INIT; } if (!m_switch_model) { return ERR_RVC_LITE_NOT_SWITCH_MODEL; } // 外部数据产生不连贯,比如做了reset的时候,需要做fade_in if (m_fade_in) { for(int i = 0; i < in_len; i++) { float rate = i * 1.0 / in_len; in_buf[i] = in_buf[i] * rate; } m_fade_in = false; } // 剔除尾部的block的数据 memcpy(m_input_predict_buf, m_input_predict_buf + in_len, sizeof(float) * (m_input_predict_buf_frame - in_len)); // 向尾部填充in_buf的数据 memcpy(m_input_predict_buf + (m_input_predict_buf_frame - in_len), in_buf, sizeof(float) * in_len); // 提取f0特征序列 struct timeval start; struct timeval end; gettimeofday(&start, NULL); get_pyin_f0(); gettimeofday(&end, NULL); LOGE("CRvcLiteOnline", "get pyin sp = %f ms\n", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0); // 推理hubert gettimeofday(&start, NULL); m_hubert_inst->process(m_input_predict_buf, m_hubert_ret); gettimeofday(&end, NULL); LOGE("CRvcLiteOnline", "m_hubert_inst sp = %f ms\n", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0); // 合成语音 for (int i = 0; i < gs_synth_input_frame; i++) { // 拷贝数据 1,gs_hubert_frame,258 for (int j = 0; j < gs_hubert_dim; j++) { m_synth_input[0][i][j] = m_hubert_ret[0][i][j]; } m_synth_input[0][i][256] = m_f0_coarse_data[i]; m_synth_input[0][i][257] = m_f0_data[i]; } gettimeofday(&start, NULL); m_synthesizer_inst->process(m_synth_input, m_synth_out); gettimeofday(&end, NULL); LOGE("CRvcLiteOnline", "m_synthesizer_inst sp = %f ms\n", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0); // 将结果全部放到缓存中 memcpy(m_output_cache_buf, m_output_cache_buf + gs_synth_output_frame, sizeof(float) * (m_output_cache_buf_frame - gs_synth_output_frame)); memcpy(m_output_cache_buf + (m_output_cache_buf_frame - gs_synth_output_frame), m_synth_out[0][0].data(), sizeof(float) * gs_synth_output_frame); int start_pos = m_output_cache_buf_frame - m_crossfade_frame - out_len; memcpy(out_buf, m_output_cache_buf + start_pos, sizeof(float) * out_len); // 对头部数据做fade_in以及fadeout for (int i = 0; i < m_crossfade_frame; i++) { float rate = float(i * 1.f / m_crossfade_frame); out_buf[i] = rate * out_buf[i] + m_crossfade_buf[i] * (1 - rate); } memcpy(m_crossfade_buf, m_output_cache_buf + (m_output_cache_buf_frame - m_crossfade_frame), sizeof(float) * m_crossfade_frame); return 0; } int CRvcLiteOnline::get_latency_ms() { return gs_crossfade_time * 1000; } /*******************************对内的类**************************************/ CResample::CResample() { m_resample_inst = nullptr; } CResample::~CResample() { } int CResample::init(int in_samplerate, int out_samplerate, int in_channel, int out_channel) { // 只是通道数不一致时走自驱逻辑 m_in_channel = in_channel; m_out_channel = out_channel; if (in_samplerate == out_samplerate && in_channel != out_channel) { m_resample_inst = nullptr; } else { m_resample_inst = std::make_shared(); return m_resample_inst->init(in_samplerate, out_samplerate, in_channel, out_channel); } return ERR_RVC_LITE_SUCCESS; } int CResample::get_out_samples(int num) { if (m_resample_inst) { return m_resample_inst->get_out_samples(num); } return num; } void CResample::reset() { if (m_resample_inst) { return m_resample_inst->reset(); } } int CResample::get_latency() { if (m_resample_inst) { return m_resample_inst->get_latency(); } return 0; } int CResample::resample(float *in_buf, int in_num, float *out_buf, int &out_num) { if (m_resample_inst) { return m_resample_inst->resample(in_buf, in_num, out_buf, out_num); } if (m_in_channel == 2 && m_out_channel == 1) { if (out_num < in_num) { return ERR_RVC_LITE_RT_RESAMPLE_OUTBUF_SHORT; } stereo2mono(in_buf, in_num, out_buf); return ERR_RVC_LITE_SUCCESS; } if (m_in_channel == 1 && m_out_channel == 2) { if (out_num < in_num) { return ERR_RVC_LITE_RT_RESAMPLE_OUTBUF_SHORT; } mono2stereo(in_buf, in_num, out_buf); return ERR_RVC_LITE_SUCCESS; } return ERR_RVC_LITE_SUCCESS; } /*******************************对外的类***************************************/ /*******************************对内函数***************************************/ void CRvcLiteOnlineRealTime::init_variable() { m_init = false; m_rvc_stop = true; m_sample_rate = 44100; m_channel = 1; m_synth_path = ""; m_new_synth_path = ""; m_syn_state = RVC_LITE_RT_SYN_STATE_DEFAULT; } /*******************************对外函数***************************************/ CRvcLiteOnlineRealTime::CRvcLiteOnlineRealTime() { init_variable(); } CRvcLiteOnlineRealTime::~CRvcLiteOnlineRealTime() { uninit(); } int CRvcLiteOnlineRealTime::init(const char *hubert_model_path, int sample_rate, int channel) { if (m_init) { return ERR_RVC_LITE_RT_REINIT; } if (sample_rate < 16000) { return ERR_RVC_LITE_RT_INPUT_SAMPLE_ERR; } init_variable(); m_sample_rate = sample_rate; m_channel = channel; m_synth_path = ""; m_new_synth_path = ""; m_syn_state = RVC_LITE_RT_SYN_STATE_DEFAULT; int output_one_sec_number = m_sample_rate * m_channel; // 临时使用的数据 int latency_len = gs_crossfade_time * m_sample_rate * m_channel; CThreadPool::Task task = std::bind(&CRvcLiteOnlineRealTime::rvc_process, this); m_rvc_inst = std::make_shared(); int err = m_rvc_inst->init(hubert_model_path); if (ERR_RVC_LITE_SUCCESS != err) { goto exit; } // 重采样部分 m_resample_queue = std::make_shared(sample_rate * 3 * m_channel); m_resample16 = std::make_shared(); err = m_resample16->init(m_sample_rate, gs_src_samplerate, m_channel, 1); if (ERR_RVC_LITE_SUCCESS != err) { goto exit; } m_resample2src = std::make_shared(); err = m_resample2src->init(gs_dst_samplerate, m_sample_rate, 1, m_channel); if (ERR_RVC_LITE_SUCCESS != err) { goto exit; } m_resample_buf_max_len = 2048; // 此时空间最大是2048,保证不超即可 m_resample_in_buf = std::shared_ptr(new float[m_resample_buf_max_len], std::default_delete()); m_resample_out_buf = std::shared_ptr(new float[m_resample_buf_max_len], std::default_delete()); // 核心处理部分 m_input_tmp_buf_len = gs_src_samplerate; m_output_tmp_buf_len = gs_dst_samplerate; m_input_tmp_buf = std::shared_ptr(new float[m_input_tmp_buf_len], std::default_delete()); m_output_tmp_buf = std::shared_ptr(new float[m_output_tmp_buf_len], std::default_delete()); memset(m_input_tmp_buf.get(), 0, sizeof(float) * m_input_tmp_buf_len); memset(m_output_tmp_buf.get(), 0, sizeof(float) * m_output_tmp_buf_len); // 循环buffer m_input_queue = std::make_shared(m_input_tmp_buf_len * 3); // 对外的是目标的采样率和通道数的数据 m_out_queue = std::make_shared(output_one_sec_number * 3); m_latency_queue = std::make_shared(latency_len); // 提前塞入两组,保证延迟稳定在2s for (int i = 0; i < 2; i++) { // 塞入1s数据 for (int j = 0; j < output_one_sec_number / m_output_tmp_buf_len; j++) { m_out_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len); } m_out_queue->push(m_output_tmp_buf.get(), output_one_sec_number % m_output_tmp_buf_len); } // 算法本身有延迟,所有为了保证延迟一致,在无效果的时候需要添加该延迟 for (int j = 0; j < latency_len / m_output_tmp_buf_len; j++) { m_latency_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len); } m_latency_queue->push(m_output_tmp_buf.get(), latency_len % m_output_tmp_buf_len); // 开始处理线程 m_thread_pool = std::make_shared(); m_thread_pool->start(1); m_rvc_stop = false; m_thread_pool->run(task); m_init = true; exit: if (ERR_RVC_LITE_SUCCESS != err) { m_init = true; uninit(); } return err; } int CRvcLiteOnlineRealTime::switch_synth(const char *synth_model_path) { if (!m_init) { return ERR_RVC_LITE_RT_NOT_INIT; } { std::unique_lock lock(m_rvc_mutex); m_new_synth_path = synth_model_path; } return ERR_RVC_LITE_SUCCESS; } int CRvcLiteOnlineRealTime::process(float *in_buf, int in_len, float *out_buf, int out_len) { if (!m_init) { return ERR_RVC_LITE_RT_NOT_INIT; } // 写入数据 { std::unique_lock lock(m_rvc_mutex); m_resample_queue->push(in_buf, in_len); m_rvc_cond.notify_all(); } memset(out_buf, 0, sizeof(float) * out_len); int tmp_out_len = out_len; // 获取数据 { std::unique_lock lock(m_rvc_mutex); m_out_queue->pop(out_buf, tmp_out_len); } if (tmp_out_len != out_len) { return ERR_RVC_LITE_RT_NOT_ENOUGH_DATA; } return ERR_RVC_LITE_SUCCESS; } void CRvcLiteOnlineRealTime::reset() { if (!m_init) { return; } { std::unique_lock lock(m_rvc_mutex); m_resample_queue->reset(); m_resample16->reset(); m_resample2src->reset(); m_input_queue->reset(); m_out_queue->reset(); m_rvc_inst->reset(); m_latency_queue->reset(); // 提前塞入两组,保证延迟稳定在2s int output_one_sec_number = m_sample_rate * m_channel; // 临时使用的数据 memset(m_output_tmp_buf.get(), 0, sizeof(float) * m_output_tmp_buf_len); for (int i = 0; i < 2; i++) { for (int j = 0; j < output_one_sec_number / m_output_tmp_buf_len; j++) { m_out_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len); } m_out_queue->push(m_output_tmp_buf.get(), output_one_sec_number % m_output_tmp_buf_len); } // 算法本身有延迟,所有为了保证延迟一致,在无效果的时候需要添加该延迟 int latency_len = gs_crossfade_time * m_sample_rate * m_channel; for (int j = 0; j < latency_len / m_output_tmp_buf_len; j++) { m_latency_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len); } m_latency_queue->push(m_output_tmp_buf.get(), latency_len % m_output_tmp_buf_len); } } void CRvcLiteOnlineRealTime::flush(float *&out_buf, int &len) { // 将内部的所有的数据吐出来 /** * 先停止 */ stop(); // 无音色转换的情况 int resample_in_len = 0; int resample_out_len = 0; if(m_syn_state == RVC_LITE_RT_SYN_STATE_DEFAULT) { while (m_resample_queue->size() > 0) { resample_in_len = m_resample_buf_max_len; m_resample_queue->pop(m_resample_in_buf.get(), resample_in_len); m_latency_queue->push(m_resample_in_buf.get(), resample_in_len); m_latency_queue->pop(m_resample_in_buf.get(), resample_in_len); m_out_queue->push(m_resample_in_buf.get(), resample_in_len); } - + while(m_latency_queue->size() > 0) { resample_in_len = m_resample_buf_max_len; m_latency_queue->pop(m_resample_in_buf.get(), resample_in_len); m_out_queue->push(m_resample_in_buf.get(), resample_in_len); } len = m_out_queue->size(); out_buf = new float[len]; m_out_queue->pop(out_buf, len); return; } // 有音色转换的情况 while (m_resample_queue->size() > 0) { resample_in_len = m_resample_buf_max_len; m_resample_queue->pop(m_resample_in_buf.get(), resample_in_len); // 输入的数据需要考虑channel resample_out_len = m_resample16->get_out_samples(resample_in_len / m_channel); m_resample16->resample(m_resample_in_buf.get(), resample_in_len / m_channel, m_resample_out_buf.get(), resample_out_len); // 输出是16k单声道,不需要考虑 m_input_queue->push(m_resample_out_buf.get(), resample_out_len); } memset(m_input_tmp_buf.get(), 0, sizeof(float) * m_input_tmp_buf_len); int add_size = m_input_tmp_buf_len - m_input_queue->size() % m_input_tmp_buf_len; if (add_size != 0 && add_size < m_input_tmp_buf_len) { m_input_queue->push(m_input_tmp_buf.get(), add_size); } int num = m_input_queue->size() / m_input_tmp_buf_len; for (int i = 0; i < num; i++) { rvc_process_step(); } // 将所有数据拷贝出来 len = m_out_queue->size(); out_buf = new float[len]; m_out_queue->pop(out_buf, len); } int CRvcLiteOnlineRealTime::get_latency_ms() { return m_rvc_inst->get_latency_ms() + 2000; } /*******************************对内函数***************************************/ void CRvcLiteOnlineRealTime::uninit() { if (!m_init) { return; } stop(); } void CRvcLiteOnlineRealTime::stop() { // 释放thread_pool的数据,先通知一下rvc_process,防止是在等待中 m_rvc_stop = true; if (m_thread_pool) { m_rvc_cond.notify_all(); m_thread_pool->stop(); } } void CRvcLiteOnlineRealTime::rvc_process_step() { struct timeval start; struct timeval end; int sample_out_len = 0; // 开始处理 if (m_input_queue->size() < m_input_tmp_buf_len) { return; } gettimeofday(&start, NULL); m_input_queue->pop(m_input_tmp_buf.get(), m_input_tmp_buf_len); m_rvc_inst->process_block(m_input_tmp_buf.get(), m_input_tmp_buf_len, m_output_tmp_buf.get(), m_output_tmp_buf_len); gettimeofday(&end, NULL); LOGD("RvcLite", "rvc_process process sp %f ms", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0); // 重采样 // 考虑到此处采样率变大,但是最多也不到两倍,但是通道数有可能扩展到两倍,所以按照1/4进行设置 gettimeofday(&start, NULL); bool last = false; int step = m_resample_buf_max_len / 4; for (int i = 0; i < m_output_tmp_buf_len; i += step) { if (i + step >= m_output_tmp_buf_len) { step = m_output_tmp_buf_len - i; last = true; } // 此时的输入是单声道,采样点数量和总长度一致 sample_out_len = m_resample2src->get_out_samples(step); m_resample2src->resample(m_output_tmp_buf.get() + i, step, m_resample_out_buf.get(), sample_out_len); // 从有到无 if(last && m_syn_state == RVC_LITE_RT_SYN_STATE_EFFECT2DEFAULT) { // 因为不加音效也需要延迟对齐,所以此处只要做fade_out就行了 for(int ii =0; ii < sample_out_len * m_channel; ii+=m_channel) { float rate = ii * 1.0 / step; for(int jj = 0; jj < m_channel; jj++) { m_resample_out_buf.get()[ii+jj] = m_resample_out_buf.get()[ii+jj] * (1 - rate); } } m_syn_state = RVC_LITE_RT_SYN_STATE_BEFORE_DEFAULT; } { std::unique_lock lock(m_rvc_mutex); m_out_queue->push(m_resample_out_buf.get(), sample_out_len * m_channel); } } gettimeofday(&end, NULL); LOGD("RvcLite", "rvc_process re_resample sp %f ms", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0); printf("finish ...\n"); } void CRvcLiteOnlineRealTime::rvc_process() { int sample_in_len; int sample_out_len = 0; while (!m_rvc_stop) { { // 重采样 std::unique_lock lock(m_rvc_mutex); if (m_resample_queue->size() < m_resample_buf_max_len) { // 睡眠前检查下情况 if (m_rvc_stop) { return; } m_rvc_cond.wait(lock); continue; } sample_in_len = m_resample_buf_max_len; m_resample_queue->pop(m_resample_in_buf.get(), sample_in_len); } /** * 此处有三种情况: * 因为无论哪种变换,有延迟的存在,导致输入的数据都是需要塞0进去,所以对当前的数据做fade_out即可 * 1. 无到有:对无到有的部分做个fade_out,对下一帧要塞入音效器的部分做fade_in * 2. 有到无:对无到有的部分做个fade_out,对下一帧要塞入音效器的部分做fade_in * 3. 有到有[这个不用考虑,内部自己做了处理] */ if (m_synth_path != m_new_synth_path) { // 从无到有,此时对本帧做fade_out,对下一帧输入做fade_in if(m_synth_path.empty() && !m_new_synth_path.empty()) { m_syn_state = RVC_LITE_RT_SYN_STATE_DEFAULT2EFFECT; } // 从有到无 if (!m_synth_path.empty() && m_new_synth_path.empty()) { m_syn_state = RVC_LITE_RT_SYN_STATE_EFFECT2DEFAULT; } { std::unique_lock lock(m_rvc_mutex); m_synth_path = m_new_synth_path; } m_rvc_inst->switch_synth_model(m_new_synth_path.c_str()); } // 刚切过来第一次做效果 if(m_syn_state == RVC_LITE_RT_SYN_STATE_BEFORE_DEFAULT) { // 刚从有到无,需要清空数据,以及对输入的队列添加fade_in m_latency_queue->reset(); // 算法本身有延迟,所有为了保证延迟一致,在无效果的时候需要添加该延迟 memset(m_output_tmp_buf.get(), 0, sizeof(float) * m_output_tmp_buf_len); int latency_len = gs_crossfade_time * m_sample_rate * m_channel; for (int j = 0; j < latency_len / m_output_tmp_buf_len; j++) { m_latency_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len); } m_latency_queue->push(m_output_tmp_buf.get(), latency_len % m_output_tmp_buf_len); // 对输入做fade_in for(int i = 0; i < sample_in_len; i+=m_channel) { float rate = i * 1.0 / sample_in_len; for(int j = 0; j < m_channel; j++) { m_resample_in_buf.get()[i+j] *= rate; } } m_syn_state = RVC_LITE_RT_SYN_STATE_DEFAULT; } // 不做效果 if(m_syn_state == RVC_LITE_RT_SYN_STATE_DEFAULT) { m_latency_queue->push(m_resample_in_buf.get(), sample_in_len); m_latency_queue->pop(m_resample_in_buf.get(), sample_in_len); { std::unique_lock lock(m_rvc_mutex); m_out_queue->push(m_resample_in_buf.get(), sample_in_len); } continue; } // 从无到有的转换 if (m_syn_state == RVC_LITE_RT_SYN_STATE_DEFAULT2EFFECT) { // 做fade_out for(int i = 0; i < sample_in_len; i+=m_channel) { float rate = i * 1.0 / sample_in_len; for(int j = 0; j < m_channel; j++) { m_resample_in_buf.get()[i+j] *= 1 - rate; } } m_latency_queue->push(m_resample_in_buf.get(), sample_in_len); m_latency_queue->pop(m_resample_in_buf.get(), sample_in_len); { std::unique_lock lock(m_rvc_mutex); m_out_queue->push(m_resample_in_buf.get(), sample_in_len); } // 此时对于rvc来说输入的数据不连贯了,所以清空内部数据重新搞 m_syn_state = RVC_LITE_RT_SYN_STATE_EFFECT; m_rvc_inst->reset(); continue; } // 重采样到16k,此处采样率变低,所以不会出现sample_out_len > sample_in_len的情况 sample_out_len = m_resample16->get_out_samples(sample_in_len / m_channel); m_resample16->resample(m_resample_in_buf.get(), sample_in_len / m_channel, m_resample_out_buf.get(), sample_out_len); m_input_queue->push(m_resample_out_buf.get(), sample_out_len); rvc_process_step(); } } \ No newline at end of file diff --git a/mnn_demo/third_party/espyin-v1.0/ESPYIN.h b/mnn_demo/third_party/espyin-v1.0/ESPYIN.h index b3c57ef..5d159a7 100644 --- a/mnn_demo/third_party/espyin-v1.0/ESPYIN.h +++ b/mnn_demo/third_party/espyin-v1.0/ESPYIN.h @@ -1,65 +1,65 @@ /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */ /* pYIN - A fundamental frequency estimator for monophonic audio Centre for Digital Music, Queen Mary, University of London. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. See the file COPYING included with this distribution for more information. */ #ifndef _ESPYIN_H_ #define _ESPYIN_H_ #include "ESYin.h" #include using std::map; struct ESFeature { std::vector values; }; typedef std::vector ESFeatureList; typedef std::map ESFeatureSet; class ESPYIN { public: ESPYIN(float inputSampleRate, size_t stepSize, size_t blockSize, size_t fmin, size_t fmax); - virtual ~ESPYIN(); + ~ESPYIN(); void reset(); void updata(int reserve_frame_num); ESFeatureSet process(const float * const inputBuffers); ESFeatureSet getRemainingFeatures(int reso_type=1); int getFrames(); protected: size_t m_stepSize; size_t m_blockSize; float m_fmin; float m_fmax; ESYin m_yin; mutable int m_oF0Candidates; mutable int m_oF0Probs; mutable int m_oVoicedProb; mutable int m_oCandidateSalience; mutable int m_oSmoothedPitchTrack; float m_threshDistr; float m_outputUnvoiced; vector > > m_pitchProb; }; #endif