diff --git a/mnn_demo/inc/CRvcLiteOnline.h b/mnn_demo/inc/CRvcLiteOnline.h
index ad62799..b4100aa 100644
--- a/mnn_demo/inc/CRvcLiteOnline.h
+++ b/mnn_demo/inc/CRvcLiteOnline.h
@@ -1,309 +1,318 @@
 //
 // Created by jianli.yang on 2023/11/29.
 //
 
 #ifndef MNN_DEMO_CRVCLITEONLINE_H
 #define MNN_DEMO_CRVCLITEONLINE_H
 
 
 #define DEBUG
 
 #ifdef __ANDROID__
 #include <android/log.h>
 
 #ifdef STRELEASE
 #define LOGD(...)
 #define LOGE(...)
 #else
 #define LOGD(TAG, ...) __android_log_print(ANDROID_LOG_DEBUG  , TAG, __VA_ARGS__)
 #define LOGE(TAG, ...) __android_log_print(ANDROID_LOG_ERROR  , TAG, __VA_ARGS__)
 #endif
 
 #else
 #ifdef DEBUG
 #define LOGD(TAG, ...)  printf("\nDebug: %s",TAG);printf(__VA_ARGS__);
 #define LOGE(TAG, ...)  printf("\nError: %s",TAG);printf(__VA_ARGS__);
 #else
 #define LOGD(TAG, ...)
 #define LOGE(TAG, ...)
 #endif
 
 #endif
 
 
 #include <mutex>
 #include <string>
 #include <memory>
 #include <vector>
 #include <condition_variable>
 
 #define gs_src_samplerate 16000
 #define gs_dst_samplerate 32000
 #define gs_crossfade_time 0.08  // 单位是s
 #define gs_block_time 1
 #define gs_extra_time 1
 #define gs_hubert_frame 206 // 和模型相关
 #define gs_hubert_dim 256 // 和模型相关
 #define gs_synth_input_frame 205 // 和模型相关
 #define gs_synth_input_dim 258 // 和模型相关
 #define gs_synth_output_frame 35840 // 和模型相关
 
 enum {
     ERR_RVC_LITE_SUCCESS = 0,
     ERR_RVC_LITE_NOT_INIT = 1,
     ERR_RVC_LITE_REINIT = 2,
     ERR_RVC_LITE_RT_REINIT = 3,
     ERR_RVC_LITE_RT_NOT_INIT = 4,
     ERR_RVC_LITE_RT_NOT_ENOUGH_DATA = 5,
     ERR_RVC_LITE_RT_INPUT_SAMPLE_ERR = 6, // 采样率小于16000
     ERR_RVC_LITE_RT_RESAMPLE_OUTBUF_SHORT = 7, // 重采样后的buf太短
-    ERR_RVC_LITE_NOT_SWITCH_MODEL = 8, // 重采样后的buf太短
+    ERR_RVC_LITE_NOT_SWITCH_MODEL = 8, // 没有选择音色模型
     ERR_RVC_LITE_MODEL_NOT_EXISTS = 9, // 没有人声模型
     ERR_RVC_LITE_BLOCK_TOO_LONG = 10, // 区块过大
 };
 
 const int RVC_LITE_RT_SYN_STATE_DEFAULT = 0;
 const int RVC_LITE_RT_SYN_STATE_EFFECT = 1;
 const int RVC_LITE_RT_SYN_STATE_DEFAULT2EFFECT = 2;
 const int RVC_LITE_RT_SYN_STATE_EFFECT2DEFAULT = 3;
 const int RVC_LITE_RT_SYN_STATE_BEFORE_DEFAULT = 4;
 
 class Hubert;
 
 class CSynthesizer;
 
 class ESPYIN;
 
 class CThreadPool;
 
 class CRvcCircleBuffer;
 
 class CFfmpegResampler;
 
 /**
  * Rvc轻量化实时推理代码
  * 要求输入16k的音频数据，输出是目标采样率的数据
  */
 class CRvcLiteOnline {
 
 public:
     CRvcLiteOnline();
 
     ~CRvcLiteOnline();
 
 private:
     void uninit();
 
     void get_f0_post();
 
     void get_pyin_f0();
 
     void init_variable();
 
 public:
     /**
      * 初始化函数
      * @param hubert_model_path
      * @return 0 表示正常
      */
     int init(const char *hubert_model_path);
 
     /**
      * 换音色模型
      * @param synth_model_path
      * @return
      */
     int switch_synth_model(const char* synth_model_path);
 
     /**
+     * 设置key的变化[-12,12]
+     * @param key
+     */
+    void set_up_key(int key);
+
+    /**
      * 处理定长的一帧数据
      * 要求输入单声道16k音频
      * @param in_buf
      * @param in_len 长度小于等于gs_src_samplerate,最佳是gs_src_samplerate
      * @param out_buf
      * @param out_len 小于等于gs_dst_samplerate,最佳是gs_dst_samplerate[和输入有关,如果是32k，则恰好是输入的两倍]
      * @return 0 表示正常
      */
     int process_block(float *in_buf, int in_len, float *out_buf, int out_len);
 
     /**
      * 清空存储
      * @return
      */
     void reset();
 
 
     /**
      * 获取延迟时间
      * @return
      */
     int get_latency_ms();
 
 
 private:
     // 是否进行过init
     bool m_init;
     bool m_switch_model;
     std::shared_ptr<Hubert> m_hubert_inst;
     std::shared_ptr<CSynthesizer> m_synthesizer_inst;
     std::shared_ptr<ESPYIN> m_es_pyin;
 
     // 缓存使用的数据
     // 要求输入的时间片长度,采样点数
     int m_input_block_frame;
     // 推理时额外需要的长度
     int m_input_extra_frame;
     // 推理时使用的buffer长度
     int m_input_predict_buf_frame;
     // 推理时使用的buffer
     float *m_input_predict_buf;
 
     std::vector<float> m_f0_data;
     std::vector<float> m_f0_coarse_data;
     // 输出的情况
     int m_crossfade_frame;
     int m_output_block_frame;
     int m_output_cache_buf_frame;
     float *m_crossfade_buf;
     float *m_output_cache_buf;
 
     bool m_fade_in;
+
+    float m_f0_new_up_key;
+    float m_f0_up_key;
     // 各个实例的返回结果
     std::vector<std::vector<std::vector<float>>> m_hubert_ret;
     std::vector<std::vector<std::vector<float>>> m_synth_input;
     std::vector<std::vector<std::vector<float>>> m_synth_out;
 };
 
 class CResample {
 public:
     CResample();
     ~CResample();
 public:
     int init(int in_samplerate, int out_samplerate, int in_channel=1, int out_channel=1);
     // 返回的是单通道的采样点数
     int get_out_samples(int num);
     int get_latency();
     void reset();
     // 不考虑让内部缓存的情况，有多少拿多少,in_num和out_num均是单通道采样点个数
     int resample(float * in_buf, int in_num, float * out_buf, int & out_num);
 
 private:
     std::shared_ptr<CFfmpegResampler> m_resample_inst;
     int m_in_channel;
     int m_out_channel;
 };
 
 /**
  * 实时处理的类
  * 入一帧出一帧，允许非常短的帧做输入，延迟较高，在2s左右
  * 思路:
  * 1. 构造函数设置变量
  * 2. init初始化环境，开启处理线程
  * 3. process，每次送一帧，触发一次判断逻辑
  * 4. flush函数将输入的未处理的数据全部处理一次，联合之前没有被取出的数据一起刷出来
  * 5. 析构时关闭处理线程，并释放所有空间
  */
 class CRvcLiteOnlineRealTime {
 public:
     CRvcLiteOnlineRealTime();
 
     ~CRvcLiteOnlineRealTime();
 
 private:
     void init_variable();
 
     void rvc_process();
 
     void rvc_process_step();
 
     void uninit();
 
     void stop();
 
 public:
 
     /**
      * 初始化函数
      * @param hubert_model_path
      * @param sample_rate
      * @param channel
      * @return
      */
     int init(const char *hubert_model_path, int sample_rate, int channel);
 
     /**
      * 切换音色
      * @param synth_model_path
      * @return
      */
     int switch_synth(const char *synth_model_path);
 
     /**
      * 清空缓存
      */
     void reset();
 
     /**
      * 入一帧，出一帧，要求长度一致
      * 两者可以是同一块buffer
      * @param in_buf
      * @param in_len
      * @param out_buf
      * @param out_len
      * @return
      */
     int process(float *in_buf, int in_len, float *out_buf, int out_len);
 
     /**
      * 将所有处理好的结果获取出来
      * 因为不确定还有多少，所以由内部来开辟空间,外部进行释放
      * @return
      */
     void flush(float *&out_buf, int &len);
 
     /**
      * 获取延迟时间
      */
     int get_latency_ms();
 
 private:
     int m_sample_rate;
     int m_channel;
 
     std::shared_ptr<CRvcCircleBuffer> m_resample_queue;
     std::shared_ptr<CRvcCircleBuffer> m_input_queue;
     std::shared_ptr<CRvcCircleBuffer> m_out_queue;
     int m_input_tmp_buf_len;
     int m_output_tmp_buf_len;
     std::shared_ptr<float> m_input_tmp_buf;
     std::shared_ptr<float> m_output_tmp_buf;
 
     std::shared_ptr<CRvcLiteOnline> m_rvc_inst;
     std::shared_ptr<CThreadPool> m_thread_pool;
 
     // 逻辑变量
     bool m_init;
     // 处理线程相关
     bool m_rvc_stop;
     std::mutex m_rvc_mutex;
     std::condition_variable m_rvc_cond;
 
     // 重采样相关
     std::shared_ptr<CResample> m_resample16;
     std::shared_ptr<CResample> m_resample2src;
 
     int m_resample_buf_max_len;
     std::shared_ptr<float> m_resample_in_buf;
     std::shared_ptr<float> m_resample_out_buf;
 
     // 切换音色
     std::string m_synth_path;
     std::string m_new_synth_path;
 
     // 合成的状态
     int m_syn_state;
     // 延迟器
     std::shared_ptr<CRvcCircleBuffer> m_latency_queue;
 };
 
 #endif //MNN_DEMO_CRVCLITEONLINE_H
diff --git a/mnn_demo/inc/CRvcLiteOnlineV2.h b/mnn_demo/inc/CRvcLiteOnlineV2.h
new file mode 100644
index 0000000..5abeb15
--- /dev/null
+++ b/mnn_demo/inc/CRvcLiteOnlineV2.h
@@ -0,0 +1,105 @@
+//
+// Created by Administrator on 2024/1/22.
+//
+
+#ifndef MNN_DEMO_CRVCLITEONLINEV2_H
+#define MNN_DEMO_CRVCLITEONLINEV2_H
+#include "CRvcLiteSynthesizer.h"
+
+const int CRVC_V2_STATE_DEFAULT = 0;
+const int CRVC_V2_STATE_EFFECT = 1;
+const int CRVC_V2_STATE_DEFAULT2EFFECT = 2;
+const int CRVC_V2_STATE_EFFECT2DEFAULT = 3;
+
+/**
+ * 使用方式:
+ * 初始化之后: push和pop以及switch_model均异步处理即可
+ * 具体使用方式可以参考:main.cpp用法
+ */
+class CRvcLiteOnlineV2
+{
+public:
+    CRvcLiteOnlineV2();
+    ~CRvcLiteOnlineV2();
+
+private:
+    void set_cur_state(bool reset);
+
+public:
+    /**
+     * 初始化，给定HubertModel,采样率和通道数
+     * @param hubert_model
+     * @param sample_rate
+     * @param channel
+     * @return 0 表示正常
+     */
+    int init(const char* hubert_model, int sample_rate, int channel);
+
+    /**
+     * 设置人声模型地址，如果人声模型不存在，则会返回错误码
+     * @param synth_model
+     * @return 0 表示正常
+     */
+    int switch_model(const char* synth_model);
+
+    /**
+     * 设置变调，范围是[-12, 12]
+     * 有人声模型才生效，否则不生效
+     * 换人声模型，该状态不会丢失，并且在无人声的时候设置之后，有人声模型后也会生效
+     * @param key
+     */
+    void set_up_key(int key);
+
+    /**
+     * 清空缓存数据
+     */
+    void reset();
+
+    /**
+     * 输入人声数据，阻塞，
+     * @param buf 人声数据地址[为了省空间，会对这个buf做修改，reset之后的第一帧会进行fade_in操作]
+     * @param len 长度为len代表sample*channel
+     * @param last true代表最后一帧，不论如何都会进行推理，将结果获取出来
+     * @return 0 代表正常
+     */
+    int push(float* buf, int len, bool last=false);
+
+    /**
+     * 返回内部当前可用的数据总数
+     * frame*channel
+     * @return
+     */
+    int size();
+
+    /**
+     * 获取处理之后的结果
+     * @param buf buf地址
+     * @param len 当前buf的长度，返回时如果内部数据不足len的长度则会修改len，表明返回的长度，如果超过，则最多返回len
+     * len 长度代表buffer长度也就是frame*channel
+     */
+    void pop(float* buf, int& len);
+
+public:
+    // 处理逻辑
+    std::shared_ptr<CRvcLiteSynthesizer> m_rvc_inst;
+    // 输入的队列
+    std::shared_ptr<CRvcCircleBuffer> m_in_queue;
+    // 输出的队列
+    std::shared_ptr<CRvcCircleBuffer> m_out_queue;
+    std::shared_ptr<float> m_tmp_in_buf;
+    std::shared_ptr<float> m_tmp_out_buf;
+    int m_tmp_buf_len;
+    int m_block_len;
+    bool m_reset;
+    std::string m_syn_model;
+    std::string m_new_syn_model;
+    int m_sync_state;
+    int m_fade_len;
+    int m_channel;
+    // 输入和输出的差距
+    int m_input_latency_output_frame;
+    std::mutex m_rvc_mutex;
+};
+
+
+#endif //MNN_DEMO_CRVCLITEONLINEV2_H
diff --git a/mnn_demo/inc/CRvcLiteSynthesizer.h b/mnn_demo/inc/CRvcLiteSynthesizer.h
index bac8c21..cedcb2e 100644
--- a/mnn_demo/inc/CRvcLiteSynthesizer.h
+++ b/mnn_demo/inc/CRvcLiteSynthesizer.h
@@ -1,58 +1,82 @@
 //
 // Created by Administrator on 2024/1/21.
 //
 
 #ifndef MNN_DEMO_CRVCLITESYNTHESIZER_H
 #define MNN_DEMO_CRVCLITESYNTHESIZER_H
 #include "CRvcLiteOnline.h"
 
 class CRvcLiteSynthesizer
 {
 public:
     CRvcLiteSynthesizer();
     ~CRvcLiteSynthesizer();
 
 public:
     /**
      * 初始化
      * @param hubert_model 语义模型地址
-     * @param synth_model 音色模型地址
      * @param sample_rate 采样率
      * @param channel 通道数
      * @return 0 表示正常
      */
-    int init(const char* hubert_model, const char* synth_model, int sample_rate, int channel);
+    int init(const char* hubert_model, int sample_rate, int channel);
+
+    /**
+     * 选择人声模型
+     * @param synth_model 音色模型地址
+     * @param enable 是否开启
+     * @return
+     */
+    int switch_model(const char* synth_model);
+
+    /**
+     * 设置变调，范围是[-12, 12]
+     * 有人声模型才生效，否则不生效
+     * 换人声模型，该状态不会丢失，并且在无人声的时候设置之后，有人声模型后也会生效
+     * @param key
+     */
+    void set_up_key(int key);
+
+    /**
+     * reset，清空内部数据
+     */
+    void reset();
 
     /**
      * 处理逻辑
      * @param in_buf 输入的buf
      * @param in_len 输入的Buf长度，frame*channel，建议输入小于等于1s的音频长度，尽量的大就好
      * @param out_buf 输出的buf
      * @param out_len 输出的buf长度, frame*channel
      * 注意: 此处有可能出现输出的长度不一定等于in_len,输出的值会小于等于out_len,但是是连续的，所以out_len可以适当比in_len大一些，从而保证都能搞出来
      * @return
      */
     int process(float* in_buf, int in_len, float* out_buf, int &out_len);
 
-    // 获取实时率，处理1s数据的真实耗时/1s
+    /**
+     * 获取实时率，处理1s数据的真实耗时/1s
+     * @return
+     */
     float get_rtf();
 
 private:
     std::shared_ptr<CRvcLiteOnline> m_rvc_inst;
     std::shared_ptr<CResample> m_resample2_16;
     std::shared_ptr<CResample> m_resample2src;
     int m_channel;
     int m_sample_rate;
     std::shared_ptr<float> m_buf_tmp_16k;
     int m_buf_tmp_16k_len;
     int m_buf_tmp_16k_cap;
     std::shared_ptr<float> m_buf_tmp_32k;
     int m_buf_tmp_32k_len;
     int m_buf_tmp_32k_cap;
     std::shared_ptr<float> m_buf_tmp_src;
     int m_buf_tmp_src_len;
     int m_buf_tmp_src_cap;
+    bool m_first;
 };
 
 
 #endif //MNN_DEMO_CRVCLITESYNTHESIZER_H
diff --git a/mnn_demo/main.cpp b/mnn_demo/main.cpp
index 8aa637d..0d6b685 100644
--- a/mnn_demo/main.cpp
+++ b/mnn_demo/main.cpp
@@ -1,221 +1,285 @@
 #include <sys/time.h>
 #include <thread>
 #include <chrono>
 #include "src/Hubert.h"
 #include "src/CSynthesizer.h"
 #include "CRvcLiteSynthesizer.h"
+#include "CRvcLiteOnlineV2.h"
 int test_hubert() {
     const char *hubert_model_path = "/mnt/d/dataset/svc/models/mnn/hubert_test_v1_fp16.mnn";
     Hubert hubert;
     int err_code = hubert.init(hubert_model_path);
     std::vector<float> input(33280, 0.1);
     std::vector<std::vector<std::vector<float>>> ret;
     ret.resize(1);
     ret[0].resize(205);
     for (int i = 0; i < 205; i++) {
         ret[0][i].resize(256);
     }
     float time = hubert.process(input.data(), ret);
     return 0;
 }
 
 int test_contentvec() {
     const char *contentvec_model_path = "/mnt/d/dataset/svc/models/mnn/contentvec_test_fp16.mnn";
     CSynthesizer contentVec;
     int err_code = contentVec.init(contentvec_model_path);
     std::vector<std::vector<std::vector<float>>> input(1);
     input[0].resize(205);
     for (int i = 0; i < 205; i++) {
         for (int j = 0; j < 258; j++) {
             if (j == 256) {
                 input[0][i].push_back(0.2);
             } else if (j == 257) {
                 input[0][i].push_back(1.0);
             } else {
                 input[0][i].push_back(0.1);
             }
         }
     }
 
     std::vector<std::vector<std::vector<float>>> ret;
     ret.resize(1);
     for (int i = 0; i < 1; i++) {
         ret[i].resize(1);
         ret[i][0].resize(35840);
     }
 
     float tot = 0.f;
     for (int i = 0; i < 10; i++) {
         float time = contentVec.process(input, ret);
         tot += time;
     }
     printf("time: %f \n", tot / 100.f);
     return 0;
 }
 
 #include "CRvcLiteOnline.h"
 #include "av_waves/waves/inc/STWaveFile.h"
 
 void test() {
     const char *hubert_model_path = "/mnt/d/dataset/svc/models/mnn/hubert_test_v2_fp16.mnn";
     const char *contentvec_model_path = "/mnt/d/dataset/svc/models/mnn/contentvec_test_fp16.mnn";
     const char *in_wav = "/mnt/d/dataset/svc/dataset/tests/rainy_day321_01_16.wav";
 //    const char *in_wav = "/mnt/d/code/develop/svc/Retrieval-based-Voice-Conversion-WebUI/online/1_1.wav";
     const char *out_wav = "/mnt/d/dataset/svc/dataset/tests/rainy_day321_01_cpp_v1.wav";
 
     CRvcLiteOnline rvc_inst;
     rvc_inst.init(hubert_model_path);
 
     // 读取音频文件, 要求16k,单声道
     STCWaveFile wav_inst(in_wav, false);
     int sample_rate = wav_inst.GetSampleRate();
     int channel = wav_inst.GetChannels();
     int len = wav_inst.GetTotalFrames() * channel;
     float *data = new float[len];
     float *outdata = new float[len * 2];
     wav_inst.ReadFrameAsfloat(data, wav_inst.GetTotalFrames());
     int step = sample_rate;
     printf("start ..\n");
     for (int i = 0; i < len; i += step) {
         if (i + step > len) {
             step = len - i;
         }
         struct timeval start;
         struct timeval end;
         gettimeofday(&start, NULL);
         rvc_inst.process_block(data + i, step, outdata + 2 * i, 2 * step);
         gettimeofday(&end, NULL);
         printf("sp = %f ms\n", (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0);
     }
     STCWaveFile wav_out_inst(out_wav, true);
     wav_out_inst.SetSampleRate(32000);
     wav_out_inst.SetChannels(1);
     wav_out_inst.SetSampleFormat(SF_IEEE_FLOAT);
     wav_out_inst.SetupDone();
     wav_out_inst.WriteFrame(outdata, len * 2);
     printf("finish2 ....\n");
 }
 
 
 void test_rvc_lite_synth()
 {
     const char *hubert_model_path = "/mnt/d/dataset/svc/models/layers_3/layer3_contentvec.mnn";
     const char *syz_model = "/mnt/d/dataset/svc/models/layers_3/layer3_syz.mnn";
     const char *out_wav = "/mnt/d/dataset/tmp/i_out3.wav";
     const char *in_wav = "/mnt/d/dataset/tmp/t1.wav";
 
     STCWaveFile wav_inst(in_wav, false);
     int sample_rate = wav_inst.GetSampleRate();
     int channel = wav_inst.GetChannels();
     int len = wav_inst.GetTotalFrames() * channel;
     float *data = new float[len];
     float *outdata = new float[len];
     wav_inst.ReadFrameAsfloat(data, wav_inst.GetTotalFrames());
     CRvcLiteSynthesizer m_rvc_inst;
-    int err = m_rvc_inst.init(hubert_model_path, syz_model, sample_rate, channel);
+    int err = m_rvc_inst.init(hubert_model_path, sample_rate, channel);
     printf("init err=%d!\n", err);
     printf("rtf=%f\n", m_rvc_inst.get_rtf());
     int step = sample_rate * channel - 100 * channel;
     int out_len = 0;
     for(int i = 0; i < len; i+=step)
     {
         if (i + step > len) {
             step = len - i;
         }
         int out_step = step;
         err = m_rvc_inst.process(data+i, step, outdata+out_len, out_step);
         if(err != ERR_RVC_LITE_SUCCESS)
         {
-            printf("process err!\n");
+            printf("process err=%d!\n", err);
             return ;
         }
         out_len += out_step;
     }
     STCWaveFile wav_out_inst(out_wav, true);
     wav_out_inst.SetSampleRate(sample_rate);
     wav_out_inst.SetChannels(channel);
     wav_out_inst.SetSampleFormat(SF_IEEE_FLOAT);
     wav_out_inst.SetupDone();
     wav_out_inst.WriteFrame(outdata, wav_inst.GetTotalFrames());
     delete[] data;
     delete[] outdata;
 }
 
+void test_rvc_lite_v2()
+{
+    const char *hubert_model_path = "/mnt/d/dataset/svc/models/layers_3/layer3_contentvec.mnn";
+    const char *syz_model = "/mnt/d/dataset/svc/models/layers_3/layer3_syz.mnn";
+    const char *out_wav = "/mnt/d/dataset/tmp/i_out_01_r.wav";
+    const char *in_wav = "/mnt/d/dataset/tmp/t1.wav";
+
+    STCWaveFile wav_inst(in_wav, false);
+    int sample_rate = wav_inst.GetSampleRate();
+    int channel = wav_inst.GetChannels();
+    int len = wav_inst.GetTotalFrames() * channel;
+    float *data = new float[len];
+    float *outdata = new float[len];
+    wav_inst.ReadFrameAsfloat(data, wav_inst.GetTotalFrames());
+    CRvcLiteOnlineV2 m_rvc_inst;
+    int err = m_rvc_inst.init(hubert_model_path, sample_rate, channel);
+//    m_rvc_inst.switch_model(syz_model);
+//    m_rvc_inst.set_up_key(0);
+    printf("init err=%d!\n", err);
+    int step = sample_rate * channel - 100 * channel;
+    int out_len = 0;
+    bool last = false;
+    int flag = 0;
+    for(int i = 0; i < len; i+=step)
+    {
+        if (i + step > len) {
+            step = len - i;
+            last = true;
+        }
+        int out_step = step;
+        err = m_rvc_inst.push(data+i, step, last);
+        if(err != ERR_RVC_LITE_SUCCESS)
+        {
+            printf("process err=%d!\n", err);
+            return ;
+        }
+
+        if (i >= len / 3 && flag == 0)
+        {
+            flag = 1;
+            m_rvc_inst.switch_model(syz_model);
+        }
+
+        if (i >= len / 2 && flag == 1)
+        {
+            flag = 2;
+            m_rvc_inst.reset();
+        }
+
+        out_step = 2 * step;
+        m_rvc_inst.pop(outdata+out_len, out_step);
+        out_len += out_step;
+    }
+    STCWaveFile wav_out_inst(out_wav, true);
+    wav_out_inst.SetSampleRate(sample_rate);
+    wav_out_inst.SetChannels(channel);
+    wav_out_inst.SetSampleFormat(SF_IEEE_FLOAT);
+    wav_out_inst.SetupDone();
+    wav_out_inst.WriteFrame(outdata, wav_inst.GetTotalFrames());
+    delete[] data;
+    delete[] outdata;
+}
 
 void test_rvc_lite_online() {
 //    const char *hubert_model_path = "/mnt/d/dataset/svc/models/mnn/hubert_test_v2_fp16.mnn";
 //    const char *hubert_model_path = "/mnt/d/dataset/svc/models/layer6_bingxiao_v1/mnn/layers6_checkpoint_14_1660000_1_hubert.mnn";
     const char *hubert_model_path = "/mnt/d/dataset/svc/models/layers_3/layer3_contentvec.mnn";
 //    const char *contentvec_model_path = "/mnt/d/dataset/svc/models/mnn/contentvec_test_fp16.mnn";
 //    const char *syz_model = "/mnt/d/dataset/svc/models/layer6_bingxiao_v1/mnn/xusong_v1_6hubert_hifix_syz_base_vctk_kd_32k_hubert6_jianli_e225_s62775_205.mnn";
     const char *xs_model = "/mnt/d/dataset/svc/models/layers_3/layer3_xusong.mnn";
     const char *syz_model = "/mnt/d/dataset/svc/models/layers_3/layer3_syz.mnn";
 //    const char *contentvec_model_path = "/mnt/d/dataset/svc/models/layer6_bingxiao_v1/mnn/xiafan_fp16.mnn";
 
 //    const char *in_wav = "/mnt/d/dataset/svc/dataset/tests/rainy_day321_01.wav";
     const char *in_wav = "/mnt/d/dataset/tmp/t1.wav";
 //    const char* in_wav = "/mnt/d/dataset/svc/dataset/短数据样本/男声/qiankun.wav";
 //    const char* in_wav = "/mnt/d/dataset/tmp/i.wav";
 //    const char *in_wav = "/mnt/d/code/develop/svc/Retrieval-based-Voice-Conversion-WebUI/online/1_1.wav";
 //    const char *out_wav = "/mnt/d/dataset/svc/dataset/tests/rainy_day321_01_cpp_v4.wav";
 //    const char *out_wav = "/mnt/d/dataset/svc/dataset/tests/qiankun_412_v4.wav";
     const char *out_wav = "/mnt/d/dataset/tmp/i_out2.wav";
 
     // 读取音频文件, 要求16k,单声道
     STCWaveFile wav_inst(in_wav, false);
     int sample_rate = wav_inst.GetSampleRate();
     int channel = wav_inst.GetChannels();
     int len = wav_inst.GetTotalFrames() * channel;
     float *data = new float[len];
     float *outdata = new float[len];
 
     CRvcLiteOnlineRealTime rvc_inst;
     rvc_inst.init(hubert_model_path, sample_rate, channel);
 
     wav_inst.ReadFrameAsfloat(data, wav_inst.GetTotalFrames());
     int step = 1024;
     printf("start ..\n");
     bool flag = true;
     rvc_inst.switch_synth(syz_model);
     for (int i = 0; i < len; i += step) {
         if (i + step > len) {
             step = len - i;
         }
         struct timeval start;
         struct timeval end;
         gettimeofday(&start, NULL);
         int ret = rvc_inst.process(data + i, step, outdata+i, step);
         std::this_thread::sleep_for(std::chrono::milliseconds (15));
         gettimeofday(&end, NULL);
         printf("ret = %d, sp = %f ms step=%d\n", ret,
                (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0, step);
 
         if (flag && i >= len / 3) {
             flag = false;
             rvc_inst.reset();
 //            rvc_inst.switch_synth(xs_model);
         }
     }
     STCWaveFile wav_out_inst(out_wav, true);
     wav_out_inst.SetSampleRate(sample_rate);
     wav_out_inst.SetChannels(channel);
     wav_out_inst.SetSampleFormat(SF_IEEE_FLOAT);
     wav_out_inst.SetupDone();
     wav_out_inst.WriteFrame(outdata, wav_inst.GetTotalFrames());
 
     float* flush_data;
     int flush_len;
     rvc_inst.flush(flush_data, flush_len);
     wav_out_inst.WriteFrame(flush_data, flush_len/channel);
     printf("finish2 ....\n");
 }
 
 int main() {
 //    int ret_hubert = test_hubert();
 //    int ret_contentvec = test_contentvec();
 //    test();
 //    test();
 //    test_rvc_lite_online();
-    test_rvc_lite_synth();
+//    test_rvc_lite_synth();
+    test_rvc_lite_v2();
     return 0;
 }
diff --git a/mnn_demo/src/CRvcLiteOnline.cpp b/mnn_demo/src/CRvcLiteOnline.cpp
index 241c6b8..f9067f7 100644
--- a/mnn_demo/src/CRvcLiteOnline.cpp
+++ b/mnn_demo/src/CRvcLiteOnline.cpp
@@ -1,811 +1,831 @@
 //
 // Created by Administrator on 2023/11/29.
 //
 
 #include <cmath>
 #include <cstring>
 #include <sys/time.h>
 #include "CRvcLiteOnline.h"
 #include "Hubert.h"
 #include "CSynthesizer.h"
 #include "espyin-v1.0/ESPYIN.h"
 #include "ThreadPool.h"
 #include "CRvcCircleBuffer.h"
 #include "FfmpegResampler.h"
 #include <unistd.h>
 
 inline bool file_exists (const std::string& name) {
     return ( access( name.c_str(), F_OK ) != -1 );
 }
 
 // size代表了buf的长度
 void stereo2mono(float *input, int size, float *output) {
     for (int i = 0; i < size - 1; i += 2) {
         output[i / 2] = (input[i] + input[i + 1]) / 2;
     }
 }
 
 void mono2stereo(float *input, int size, float *output) {
     for (int i = 0; i < size; i++) {
         output[2 * i] = input[i];
         output[2 * i + 1] = input[i];
     }
 }
 
 
 CRvcLiteOnline::CRvcLiteOnline() {
     init_variable();
     m_init = false;
     m_switch_model = false;
 
     // 输入部分需要的变量
     // 要求输入的时间片长度,采样点数
     m_input_block_frame = int(gs_block_time * gs_src_samplerate);
     // 推理时额外需要的长度
     m_input_extra_frame = int(gs_extra_time * gs_src_samplerate);
     int zc = gs_src_samplerate / 100; // 10ms的点数
     int input_corssfade_frame = int(gs_crossfade_time * gs_src_samplerate);
 
     // 推理时使用的buffer长度
     m_input_predict_buf_frame = int(ceil((m_input_extra_frame + input_corssfade_frame + m_input_block_frame)
                                          * 1.0 / zc) * zc);
     // 推理时使用的buffer
     m_input_predict_buf = new float[m_input_predict_buf_frame];
     memset(m_input_predict_buf, 0, sizeof(float) * m_input_predict_buf_frame);
 
     // 输出部分需要的变量
     m_crossfade_frame = int(gs_crossfade_time * gs_dst_samplerate);
     m_output_block_frame = int(gs_block_time * gs_dst_samplerate);
     int output_extra_frame = int(gs_extra_time * gs_dst_samplerate);
     zc = gs_dst_samplerate / 100;
     m_output_cache_buf_frame = int(ceil((m_output_block_frame + m_crossfade_frame + output_extra_frame)
                                         * 1.0 / zc) * zc);
     m_output_cache_buf = new float[m_output_cache_buf_frame];
     memset(m_output_cache_buf, 0, sizeof(float) * m_output_cache_buf_frame);
     m_crossfade_buf = new float[m_crossfade_frame];
     memset(m_crossfade_buf, 0, sizeof(float) * m_crossfade_frame);
 
     // 对于模型的输入和输出进行缓存
     // 此处是写死的和模型有关
     m_hubert_ret.resize(1);
     m_hubert_ret[0].resize(gs_hubert_frame);
     for (int i = 0; i < gs_hubert_frame; i++) {
         m_hubert_ret[0][i].resize(gs_hubert_dim);
     }
 
     // synth模型的输入
     m_synth_input.resize(1);
     m_synth_input[0].resize(gs_synth_input_frame);
     for (int i = 0; i < gs_synth_input_frame; i++) {
         m_synth_input[0][i].resize(gs_synth_input_dim);
     }
 
     m_synth_out.resize(1);
     m_synth_out[0].resize(1);
     m_synth_out[0][0].resize(gs_synth_output_frame);
 }
 
 CRvcLiteOnline::~CRvcLiteOnline() {
     uninit();
 }
 
 /**********************************对内函数*********************************************/
 void CRvcLiteOnline::uninit() {
     if (m_input_predict_buf != NULL) {
         delete[] m_input_predict_buf;
         m_input_predict_buf = NULL;
     }
     if (m_output_cache_buf != NULL) {
         delete[] m_output_cache_buf;
         m_output_cache_buf = NULL;
     }
     if (m_crossfade_buf != NULL) {
         delete[] m_crossfade_buf;
         m_crossfade_buf = NULL;
     }
     init_variable();
 }
 
 void CRvcLiteOnline::get_pyin_f0() {
     for (int i = 0; i < m_input_predict_buf_frame; i += 160) {
         m_es_pyin->process(m_input_predict_buf + i);
     }
     m_f0_data.clear();
     ESFeatureSet feats = m_es_pyin->getRemainingFeatures();
     if (!feats.empty()) {
         m_f0_data.resize(feats[4].size());
         for (size_t i = 0; i < feats[4].size(); ++i) {
-            // JL_DEBUG
-            m_f0_data[i] = feats[4][i].values[0];
+            // 设置变调
+            m_f0_data[i] = feats[4][i].values[0] * m_f0_up_key;
             if (m_f0_data[i] < 0) {
                 m_f0_data[i] = 0;
             }
         }
     }
     m_es_pyin->reset();
     get_f0_post();
 }
 
 void CRvcLiteOnline::get_f0_post() {
     int f0_min = 50;
     int f0_max = 1100;
     float f0_mel_min = 1127 * log2(1 + f0_min * 1.0 / 700);
     float f0_mel_max = 1127 * log2(1 + f0_max * 1.0 / 700);
     m_f0_coarse_data.clear();
     m_f0_coarse_data.resize(m_f0_data.size());
     for (int i = 0; i < m_f0_data.size(); i++) {
         float f0_mel = 1127 * log2(1 + m_f0_data[i] / 700);
         if (f0_mel > 0) {
             f0_mel = (f0_mel - f0_mel_min) * 254.f / (f0_mel_max - f0_mel_min) + 1;
         }
         if (f0_mel <= 1) {
             f0_mel = 1;
         } else if (f0_mel > 255) {
             f0_mel = 255;
         }
         m_f0_coarse_data[i] = float(int(f0_mel + 0.5));
     }
 }
 
 void CRvcLiteOnline::init_variable() {
     m_init = false;
     m_switch_model = false;
     // 缓存使用的数据
     // 要求输入的时间片长度,采样点数
     m_input_block_frame = 0;
     m_input_extra_frame = 0;
     m_input_predict_buf_frame = 0;
     m_input_predict_buf = nullptr;
 
     m_f0_data.clear();
     m_f0_coarse_data.clear();
 
     m_crossfade_frame = 0;
     m_output_block_frame = 0;
     m_output_cache_buf_frame = 0;
     m_crossfade_buf = nullptr;
     m_output_cache_buf = nullptr;
 
     // 各个实例的返回结果
     m_hubert_ret.clear();
     m_synth_input.clear();
     m_synth_out.clear();
 
     m_fade_in = true;
+    m_f0_up_key = 1.f;
+    m_f0_new_up_key = 1.f;
 }
 
 /**********************************对外函数*********************************************/
 int CRvcLiteOnline::init(const char *hubert_model_path) {
     if (m_init) {
         return ERR_RVC_LITE_REINIT;
     }
 
     m_hubert_inst = std::make_shared<Hubert>();
     m_synthesizer_inst = std::make_shared<CSynthesizer>();
     m_hubert_inst->init(hubert_model_path);
 //    m_synthesizer_inst->init(synth_model_path);
     // 要求stepSize必须是2^n
     m_es_pyin = std::make_shared<ESPYIN>(16000, 160, 1024, 50, 1100);
 
     m_init = true;
     m_switch_model = false;
     m_fade_in = true;
+    m_f0_up_key = 1.f;
+    m_f0_new_up_key = 1.f;
     return ERR_RVC_LITE_SUCCESS;
 }
 
 int CRvcLiteOnline::switch_synth_model(const char *synth_model_path) {
     if (!m_init) {
         return ERR_RVC_LITE_NOT_INIT;
     }
 
     if (file_exists(synth_model_path))
     {
         m_synthesizer_inst = std::make_shared<CSynthesizer>();
         m_synthesizer_inst->init(synth_model_path);
         m_switch_model = true;
         return ERR_RVC_LITE_SUCCESS;
     }
     return ERR_RVC_LITE_MODEL_NOT_EXISTS;
 }
 
+void CRvcLiteOnline::set_up_key(int key)
+{
+    if (key > 12)
+    {
+        key = 12;
+    }
+
+    if (key < -12)
+    {
+        key = -12;
+    }
+    m_f0_new_up_key = pow(2, key / 12.f);
+}
+
 
 void CRvcLiteOnline::reset() {
     memset(m_input_predict_buf, 0, sizeof(float) * m_input_predict_buf_frame);
     memset(m_crossfade_buf, 0, sizeof(float) * m_crossfade_frame);
     memset(m_output_cache_buf, 0, sizeof(float) * m_output_cache_buf_frame);
     m_fade_in = true;
 }
 
 int CRvcLiteOnline::process_block(float *in_buf, int in_len, float *out_buf, int out_len) {
     if (!m_init) {
         return ERR_RVC_LITE_NOT_INIT;
     }
 
     if (!m_switch_model)
     {
         return ERR_RVC_LITE_NOT_SWITCH_MODEL;
     }
 
     // 外部数据产生不连贯，比如做了reset的时候，需要做fade_in
     if (m_fade_in)
     {
         for(int i = 0; i < in_len; i++)
         {
             float rate = i * 1.0 / in_len;
             in_buf[i] = in_buf[i] * rate;
         }
         m_fade_in = false;
     }
 
     // 剔除尾部的block的数据
     memcpy(m_input_predict_buf, m_input_predict_buf + in_len,
            sizeof(float) * (m_input_predict_buf_frame - in_len));
     // 向尾部填充in_buf的数据
     memcpy(m_input_predict_buf + (m_input_predict_buf_frame - in_len), in_buf,
            sizeof(float) * in_len);
 
     // 提取f0特征序列
     struct timeval start;
     struct timeval end;
     gettimeofday(&start, NULL);
+    m_f0_up_key = m_f0_new_up_key;
     get_pyin_f0();
     gettimeofday(&end, NULL);
     LOGE("CRvcLiteOnline", "get pyin sp = %f ms\n",
          (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0);
 
     // 推理hubert
     gettimeofday(&start, NULL);
     m_hubert_inst->process(m_input_predict_buf, m_hubert_ret);
     gettimeofday(&end, NULL);
     LOGE("CRvcLiteOnline", "m_hubert_inst sp = %f ms\n",
          (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0);
 
     // 合成语音
     for (int i = 0; i < gs_synth_input_frame; i++) {
         // 拷贝数据 1,gs_hubert_frame,258
         for (int j = 0; j < gs_hubert_dim; j++) {
             m_synth_input[0][i][j] = m_hubert_ret[0][i][j];
         }
         m_synth_input[0][i][256] = m_f0_coarse_data[i];
         m_synth_input[0][i][257] = m_f0_data[i];
     }
     gettimeofday(&start, NULL);
     m_synthesizer_inst->process(m_synth_input, m_synth_out);
     gettimeofday(&end, NULL);
     LOGE("CRvcLiteOnline", "m_synthesizer_inst sp = %f ms\n",
          (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0);
 
     // 将结果全部放到缓存中
     memcpy(m_output_cache_buf, m_output_cache_buf + gs_synth_output_frame,
            sizeof(float) * (m_output_cache_buf_frame - gs_synth_output_frame));
     memcpy(m_output_cache_buf + (m_output_cache_buf_frame - gs_synth_output_frame),
            m_synth_out[0][0].data(), sizeof(float) * gs_synth_output_frame);
 
     int start_pos = m_output_cache_buf_frame - m_crossfade_frame - out_len;
     memcpy(out_buf, m_output_cache_buf + start_pos, sizeof(float) * out_len);
     // 对头部数据做fade_in以及fadeout
     for (int i = 0; i < m_crossfade_frame; i++) {
         float rate = float(i * 1.f / m_crossfade_frame);
         out_buf[i] = rate * out_buf[i] + m_crossfade_buf[i] * (1 - rate);
     }
     memcpy(m_crossfade_buf, m_output_cache_buf + (m_output_cache_buf_frame - m_crossfade_frame),
            sizeof(float) * m_crossfade_frame);
 
     return 0;
 }
 
 int CRvcLiteOnline::get_latency_ms() {
-    return gs_crossfade_time * 1000;
+    // 此处除了block的延迟，还有推理时hubert理论上应该获取208，实际获取205帧，所以少的30ms
+    return gs_crossfade_time * 1000 + 30;
 }
 
 
 
 /*******************************对内的类**************************************/
 CResample::CResample()
 {
     m_resample_inst = nullptr;
 }
 
 CResample::~CResample()
 {
 
 }
 
 int CResample::init(int in_samplerate, int out_samplerate, int in_channel, int out_channel)
 {
     // 只是通道数不一致时走自驱逻辑
     m_in_channel = in_channel;
     m_out_channel = out_channel;
     if (in_samplerate == out_samplerate && in_channel != out_channel) {
         m_resample_inst = nullptr;
     }
     else {
         m_resample_inst = std::make_shared<CFfmpegResampler>();
         return m_resample_inst->init(in_samplerate, out_samplerate, in_channel, out_channel);
     }
     return ERR_RVC_LITE_SUCCESS;
 }
 
 int CResample::get_out_samples(int num)
 {
     if (m_resample_inst)
     {
         return m_resample_inst->get_out_samples(num);
     }
     return num;
 }
 
 void CResample::reset()
 {
     if (m_resample_inst)
     {
         return m_resample_inst->reset();
     }
 }
 
 int CResample::get_latency()
 {
     if (m_resample_inst)
     {
         return m_resample_inst->get_latency();
     }
     return 0;
 }
 
 int CResample::resample(float *in_buf, int in_num, float *out_buf, int &out_num) {
     if (m_resample_inst) {
         return m_resample_inst->resample(in_buf, in_num, out_buf, out_num);
     }
 
     if (m_in_channel == 2 && m_out_channel == 1) {
         if (out_num < in_num) {
             return ERR_RVC_LITE_RT_RESAMPLE_OUTBUF_SHORT;
         }
         stereo2mono(in_buf, in_num, out_buf);
         return ERR_RVC_LITE_SUCCESS;
     }
 
     if (m_in_channel == 1 && m_out_channel == 2) {
         if (out_num < in_num) {
             return ERR_RVC_LITE_RT_RESAMPLE_OUTBUF_SHORT;
         }
         mono2stereo(in_buf, in_num, out_buf);
         return ERR_RVC_LITE_SUCCESS;
     }
     return ERR_RVC_LITE_SUCCESS;
 }
 
 /*******************************对外的类***************************************/
 
 
 
 
 /*******************************对内函数***************************************/
 void CRvcLiteOnlineRealTime::init_variable() {
     m_init = false;
     m_rvc_stop = true;
     m_sample_rate = 44100;
     m_channel = 1;
     m_synth_path = "";
     m_new_synth_path = "";
     m_syn_state = RVC_LITE_RT_SYN_STATE_DEFAULT;
 }
 
 /*******************************对外函数***************************************/
 CRvcLiteOnlineRealTime::CRvcLiteOnlineRealTime() {
     init_variable();
 }
 
 CRvcLiteOnlineRealTime::~CRvcLiteOnlineRealTime() {
     uninit();
 }
 
 int CRvcLiteOnlineRealTime::init(const char *hubert_model_path, int sample_rate, int channel) {
     if (m_init) {
         return ERR_RVC_LITE_RT_REINIT;
     }
 
     if (sample_rate < 16000) {
         return ERR_RVC_LITE_RT_INPUT_SAMPLE_ERR;
     }
     init_variable();
     m_sample_rate = sample_rate;
     m_channel = channel;
     m_synth_path = "";
     m_new_synth_path = "";
     m_syn_state = RVC_LITE_RT_SYN_STATE_DEFAULT;
     int output_one_sec_number = m_sample_rate * m_channel; // 临时使用的数据
     int latency_len = gs_crossfade_time * m_sample_rate * m_channel;
     CThreadPool::Task task = std::bind(&CRvcLiteOnlineRealTime::rvc_process, this);
 
     m_rvc_inst = std::make_shared<CRvcLiteOnline>();
     int err = m_rvc_inst->init(hubert_model_path);
     if (ERR_RVC_LITE_SUCCESS != err) {
         goto exit;
     }
 
     // 重采样部分
     m_resample_queue = std::make_shared<CRvcCircleBuffer>(sample_rate * 3 * m_channel);
     m_resample16 = std::make_shared<CResample>();
     err = m_resample16->init(m_sample_rate, gs_src_samplerate, m_channel, 1);
     if (ERR_RVC_LITE_SUCCESS != err) {
         goto exit;
     }
 
     m_resample2src = std::make_shared<CResample>();
     err = m_resample2src->init(gs_dst_samplerate, m_sample_rate, 1, m_channel);
     if (ERR_RVC_LITE_SUCCESS != err) {
         goto exit;
     }
     m_resample_buf_max_len = 2048; // 此时空间最大是2048，保证不超即可
     m_resample_in_buf = std::shared_ptr<float>(new float[m_resample_buf_max_len], std::default_delete<float[]>());
     m_resample_out_buf = std::shared_ptr<float>(new float[m_resample_buf_max_len], std::default_delete<float[]>());
 
     // 核心处理部分
     m_input_tmp_buf_len = gs_src_samplerate;
     m_output_tmp_buf_len = gs_dst_samplerate;
     m_input_tmp_buf = std::shared_ptr<float>(new float[m_input_tmp_buf_len], std::default_delete<float[]>());
     m_output_tmp_buf = std::shared_ptr<float>(new float[m_output_tmp_buf_len], std::default_delete<float[]>());
     memset(m_input_tmp_buf.get(), 0, sizeof(float) * m_input_tmp_buf_len);
     memset(m_output_tmp_buf.get(), 0, sizeof(float) * m_output_tmp_buf_len);
 
     // 循环buffer
     m_input_queue = std::make_shared<CRvcCircleBuffer>(m_input_tmp_buf_len * 3);
     // 对外的是目标的采样率和通道数的数据
     m_out_queue = std::make_shared<CRvcCircleBuffer>(output_one_sec_number * 3);
     m_latency_queue = std::make_shared<CRvcCircleBuffer>(latency_len);
     // 提前塞入两组，保证延迟稳定在2s
     for (int i = 0; i < 2; i++) {
         // 塞入1s数据
         for (int j = 0; j < output_one_sec_number / m_output_tmp_buf_len; j++) {
             m_out_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len);
         }
         m_out_queue->push(m_output_tmp_buf.get(), output_one_sec_number % m_output_tmp_buf_len);
     }
     // 算法本身有延迟，所有为了保证延迟一致，在无效果的时候需要添加该延迟
     for (int j = 0; j < latency_len / m_output_tmp_buf_len; j++) {
         m_latency_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len);
     }
     m_latency_queue->push(m_output_tmp_buf.get(), latency_len % m_output_tmp_buf_len);
 
     // 开始处理线程
     m_thread_pool = std::make_shared<CThreadPool>();
     m_thread_pool->start(1);
     m_rvc_stop = false;
     m_thread_pool->run(task);
 
     m_init = true;
     exit:
     if (ERR_RVC_LITE_SUCCESS != err) {
         m_init = true;
         uninit();
     }
     return err;
 }
 
 int CRvcLiteOnlineRealTime::switch_synth(const char *synth_model_path) {
     if (!m_init) {
         return ERR_RVC_LITE_RT_NOT_INIT;
     }
 
     {
         std::unique_lock<std::mutex> lock(m_rvc_mutex);
         m_new_synth_path = synth_model_path;
     }
     return ERR_RVC_LITE_SUCCESS;
 }
 
 
 int CRvcLiteOnlineRealTime::process(float *in_buf, int in_len, float *out_buf, int out_len) {
     if (!m_init) {
         return ERR_RVC_LITE_RT_NOT_INIT;
     }
 
     // 写入数据
     {
         std::unique_lock<std::mutex> lock(m_rvc_mutex);
         m_resample_queue->push(in_buf, in_len);
         m_rvc_cond.notify_all();
     }
     memset(out_buf, 0, sizeof(float) * out_len);
     int tmp_out_len = out_len;
 
     // 获取数据
     {
         std::unique_lock<std::mutex> lock(m_rvc_mutex);
         m_out_queue->pop(out_buf, tmp_out_len);
     }
 
     if (tmp_out_len != out_len) {
         return ERR_RVC_LITE_RT_NOT_ENOUGH_DATA;
     }
     return ERR_RVC_LITE_SUCCESS;
 }
 
 void CRvcLiteOnlineRealTime::reset() {
     if (!m_init) {
         return;
     }
 
     {
         std::unique_lock<std::mutex> lock(m_rvc_mutex);
         m_resample_queue->reset();
         m_resample16->reset();
         m_resample2src->reset();
         m_input_queue->reset();
         m_out_queue->reset();
         m_rvc_inst->reset();
         m_latency_queue->reset();
         // 提前塞入两组，保证延迟稳定在2s
         int output_one_sec_number = m_sample_rate * m_channel; // 临时使用的数据
         memset(m_output_tmp_buf.get(), 0, sizeof(float) * m_output_tmp_buf_len);
         for (int i = 0; i < 2; i++) {
             for (int j = 0; j < output_one_sec_number / m_output_tmp_buf_len; j++) {
                 m_out_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len);
             }
             m_out_queue->push(m_output_tmp_buf.get(), output_one_sec_number % m_output_tmp_buf_len);
         }
         // 算法本身有延迟，所有为了保证延迟一致，在无效果的时候需要添加该延迟
         int latency_len = gs_crossfade_time * m_sample_rate * m_channel;
         for (int j = 0; j < latency_len / m_output_tmp_buf_len; j++) {
             m_latency_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len);
         }
         m_latency_queue->push(m_output_tmp_buf.get(), latency_len % m_output_tmp_buf_len);
     }
 }
 
 void CRvcLiteOnlineRealTime::flush(float *&out_buf, int &len) {
     // 将内部的所有的数据吐出来
     /**
      * 先停止
      */
     stop();
 
     // 无音色转换的情况
     int resample_in_len = 0;
     int resample_out_len = 0;
     if(m_syn_state == RVC_LITE_RT_SYN_STATE_DEFAULT)
     {
         while (m_resample_queue->size() > 0) {
             resample_in_len = m_resample_buf_max_len;
             m_resample_queue->pop(m_resample_in_buf.get(), resample_in_len);
             m_latency_queue->push(m_resample_in_buf.get(), resample_in_len);
             m_latency_queue->pop(m_resample_in_buf.get(), resample_in_len);
             m_out_queue->push(m_resample_in_buf.get(), resample_in_len);
         }
 
         while(m_latency_queue->size() > 0)
         {
             resample_in_len = m_resample_buf_max_len;
             m_latency_queue->pop(m_resample_in_buf.get(), resample_in_len);
             m_out_queue->push(m_resample_in_buf.get(), resample_in_len);
         }
 
         len = m_out_queue->size();
         out_buf = new float[len];
         m_out_queue->pop(out_buf, len);
         return;
     }
 
     // 有音色转换的情况
     while (m_resample_queue->size() > 0) {
         resample_in_len = m_resample_buf_max_len;
         m_resample_queue->pop(m_resample_in_buf.get(), resample_in_len);
         // 输入的数据需要考虑channel
         resample_out_len = m_resample16->get_out_samples(resample_in_len / m_channel);
         m_resample16->resample(m_resample_in_buf.get(), resample_in_len / m_channel, m_resample_out_buf.get(),
                                resample_out_len);
         // 输出是16k单声道，不需要考虑
         m_input_queue->push(m_resample_out_buf.get(), resample_out_len);
     }
     memset(m_input_tmp_buf.get(), 0, sizeof(float) * m_input_tmp_buf_len);
     int add_size = m_input_tmp_buf_len - m_input_queue->size() % m_input_tmp_buf_len;
     if (add_size != 0 && add_size < m_input_tmp_buf_len) {
         m_input_queue->push(m_input_tmp_buf.get(), add_size);
     }
     int num = m_input_queue->size() / m_input_tmp_buf_len;
     for (int i = 0; i < num; i++) {
         rvc_process_step();
     }
 
     // 将所有数据拷贝出来
     len = m_out_queue->size();
     out_buf = new float[len];
     m_out_queue->pop(out_buf, len);
 }
 
 int CRvcLiteOnlineRealTime::get_latency_ms() {
     return m_rvc_inst->get_latency_ms() + 2000;
 }
 
 /*******************************对内函数***************************************/
 void CRvcLiteOnlineRealTime::uninit() {
     if (!m_init) {
         return;
     }
     stop();
 }
 
 void CRvcLiteOnlineRealTime::stop() {
     // 释放thread_pool的数据,先通知一下rvc_process,防止是在等待中
     m_rvc_stop = true;
     if (m_thread_pool) {
         m_rvc_cond.notify_all();
         m_thread_pool->stop();
     }
 }
 
 void CRvcLiteOnlineRealTime::rvc_process_step() {
 
     struct timeval start;
     struct timeval end;
     int sample_out_len = 0;
     // 开始处理
     if (m_input_queue->size() < m_input_tmp_buf_len) {
         return;
     }
     gettimeofday(&start, NULL);
     m_input_queue->pop(m_input_tmp_buf.get(), m_input_tmp_buf_len);
     m_rvc_inst->process_block(m_input_tmp_buf.get(), m_input_tmp_buf_len,
                               m_output_tmp_buf.get(), m_output_tmp_buf_len);
     gettimeofday(&end, NULL);
     LOGD("RvcLite", "rvc_process process sp %f ms",
          (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0);
 
     // 重采样
     // 考虑到此处采样率变大，但是最多也不到两倍，但是通道数有可能扩展到两倍，所以按照1/4进行设置
     gettimeofday(&start, NULL);
     bool last = false;
     int step = m_resample_buf_max_len / 4;
     for (int i = 0; i < m_output_tmp_buf_len; i += step) {
         if (i + step >= m_output_tmp_buf_len) {
             step = m_output_tmp_buf_len - i;
             last = true;
         }
         // 此时的输入是单声道，采样点数量和总长度一致
         sample_out_len = m_resample2src->get_out_samples(step);
         m_resample2src->resample(m_output_tmp_buf.get() + i, step, m_resample_out_buf.get(), sample_out_len);
 
         // 从有到无
         if(last && m_syn_state == RVC_LITE_RT_SYN_STATE_EFFECT2DEFAULT)
         {
             // 因为不加音效也需要延迟对齐，所以此处只要做fade_out就行了
             for(int ii =0; ii < sample_out_len * m_channel; ii+=m_channel)
             {
                 float rate = ii * 1.0 / step;
                 for(int jj = 0; jj < m_channel; jj++)
                 {
                     m_resample_out_buf.get()[ii+jj] = m_resample_out_buf.get()[ii+jj] * (1 - rate);
                 }
             }
             m_syn_state = RVC_LITE_RT_SYN_STATE_BEFORE_DEFAULT;
         }
 
         {
             std::unique_lock<std::mutex> lock(m_rvc_mutex);
             m_out_queue->push(m_resample_out_buf.get(), sample_out_len * m_channel);
         }
     }
     gettimeofday(&end, NULL);
     LOGD("RvcLite", "rvc_process re_resample sp %f ms",
          (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0);
     printf("finish ...\n");
 }
 
 void CRvcLiteOnlineRealTime::rvc_process() {
     int sample_in_len;
     int sample_out_len = 0;
     while (!m_rvc_stop) {
         {
             // 重采样
             std::unique_lock<std::mutex> lock(m_rvc_mutex);
             if (m_resample_queue->size() < m_resample_buf_max_len) {
                 // 睡眠前检查下情况
                 if (m_rvc_stop) {
                     return;
                 }
                 m_rvc_cond.wait(lock);
                 continue;
             }
             sample_in_len = m_resample_buf_max_len;
             m_resample_queue->pop(m_resample_in_buf.get(), sample_in_len);
         }
 
         /**
          * 此处有三种情况:
          * 因为无论哪种变换，有延迟的存在，导致输入的数据都是需要塞0进去，所以对当前的数据做fade_out即可
          * 1. 无到有:对无到有的部分做个fade_out,对下一帧要塞入音效器的部分做fade_in
          * 2. 有到无:对无到有的部分做个fade_out,对下一帧要塞入音效器的部分做fade_in
          * 3. 有到有[这个不用考虑，内部自己做了处理]
          */
         if (m_synth_path != m_new_synth_path) {
 
             // 从无到有，此时对本帧做fade_out,对下一帧输入做fade_in
             if(m_synth_path.empty() && !m_new_synth_path.empty())
             {
                 m_syn_state = RVC_LITE_RT_SYN_STATE_DEFAULT2EFFECT;
             }
 
             // 从有到无
             if (!m_synth_path.empty() && m_new_synth_path.empty())
             {
                 m_syn_state = RVC_LITE_RT_SYN_STATE_EFFECT2DEFAULT;
             }
 
             {
                 std::unique_lock<std::mutex> lock(m_rvc_mutex);
                 m_synth_path = m_new_synth_path;
             }
             m_rvc_inst->switch_synth_model(m_new_synth_path.c_str());
         }
 
         // 刚切过来第一次做效果
         if(m_syn_state == RVC_LITE_RT_SYN_STATE_BEFORE_DEFAULT)
         {
             // 刚从有到无，需要清空数据,以及对输入的队列添加fade_in
             m_latency_queue->reset();
             // 算法本身有延迟，所有为了保证延迟一致，在无效果的时候需要添加该延迟
             memset(m_output_tmp_buf.get(), 0, sizeof(float) * m_output_tmp_buf_len);
             int latency_len = gs_crossfade_time * m_sample_rate * m_channel;
             for (int j = 0; j < latency_len / m_output_tmp_buf_len; j++) {
                 m_latency_queue->push(m_output_tmp_buf.get(), m_output_tmp_buf_len);
             }
             m_latency_queue->push(m_output_tmp_buf.get(), latency_len % m_output_tmp_buf_len);
 
             // 对输入做fade_in
             for(int i = 0; i < sample_in_len; i+=m_channel)
             {
                 float rate = i * 1.0 / sample_in_len;
                 for(int j = 0; j < m_channel; j++)
                 {
                     m_resample_in_buf.get()[i+j] *= rate;
                 }
             }
             m_syn_state = RVC_LITE_RT_SYN_STATE_DEFAULT;
         }
 
         // 不做效果
         if(m_syn_state == RVC_LITE_RT_SYN_STATE_DEFAULT)
         {
             m_latency_queue->push(m_resample_in_buf.get(), sample_in_len);
             m_latency_queue->pop(m_resample_in_buf.get(), sample_in_len);
             {
                 std::unique_lock<std::mutex> lock(m_rvc_mutex);
                 m_out_queue->push(m_resample_in_buf.get(), sample_in_len);
             }
             continue;
         }
 
         // 从无到有的转换
         if (m_syn_state == RVC_LITE_RT_SYN_STATE_DEFAULT2EFFECT)
         {
             // 做fade_out
             for(int i = 0; i < sample_in_len; i+=m_channel)
             {
                 float rate = i * 1.0 / sample_in_len;
                 for(int j = 0; j < m_channel; j++)
                 {
                     m_resample_in_buf.get()[i+j] *= 1 - rate;
                 }
             }
             m_latency_queue->push(m_resample_in_buf.get(), sample_in_len);
             m_latency_queue->pop(m_resample_in_buf.get(), sample_in_len);
             {
                 std::unique_lock<std::mutex> lock(m_rvc_mutex);
                 m_out_queue->push(m_resample_in_buf.get(), sample_in_len);
             }
 
             // 此时对于rvc来说输入的数据不连贯了，所以清空内部数据重新搞
             m_syn_state = RVC_LITE_RT_SYN_STATE_EFFECT;
             m_rvc_inst->reset();
             continue;
         }
 
         // 重采样到16k,此处采样率变低，所以不会出现sample_out_len > sample_in_len的情况
         sample_out_len = m_resample16->get_out_samples(sample_in_len / m_channel);
         m_resample16->resample(m_resample_in_buf.get(), sample_in_len / m_channel, m_resample_out_buf.get(),
                                sample_out_len);
         m_input_queue->push(m_resample_out_buf.get(), sample_out_len);
         rvc_process_step();
     }
 }
\ No newline at end of file
diff --git a/mnn_demo/src/CRvcLiteOnlineV2.cpp b/mnn_demo/src/CRvcLiteOnlineV2.cpp
new file mode 100644
index 0000000..0269a7d
--- /dev/null
+++ b/mnn_demo/src/CRvcLiteOnlineV2.cpp
@@ -0,0 +1,215 @@
+//
+// Created by Administrator on 2024/1/22.
+//
+
+#include "CRvcLiteOnlineV2.h"
+#include "CRvcCircleBuffer.h"
+#include <unistd.h>
+
+inline bool file_exists1 (const std::string& name) {
+    return ( access( name.c_str(), F_OK ) != -1 );
+}
+
+CRvcLiteOnlineV2::CRvcLiteOnlineV2()
+{
+
+}
+
+CRvcLiteOnlineV2::~CRvcLiteOnlineV2()
+{
+
+}
+/*****************************************对内函数***************************************************************/
+void CRvcLiteOnlineV2::set_cur_state(bool reset)
+{
+    /**
+     * 一共三种状态
+     * 从无到有: 让不做效果的fade_out,做效果的fade_in
+     * 从有到无: 让做效果的fade_out, 不做效果的fade_in即可
+     * 从有到有，这种情况不考虑，内部自己会做fade
+     */
+    if (m_syn_model != m_new_syn_model)
+    {
+        // 从无到有
+        if (m_syn_model.empty() && !m_new_syn_model.empty())
+        {
+            m_sync_state = CRVC_V2_STATE_DEFAULT2EFFECT;
+
+            // 如果此时已经发生了reset，则不需要做切换，直接做就行
+            if (reset)
+            {
+                m_sync_state = CRVC_V2_STATE_EFFECT;
+            }
+            m_syn_model = m_new_syn_model;
+            m_rvc_inst->switch_model(m_syn_model.c_str());
+        }
+
+        // 从有到无
+        if (!m_syn_model.empty() && m_new_syn_model.empty())
+        {
+            m_sync_state = CRVC_V2_STATE_EFFECT2DEFAULT;
+            // 如果此时已经发生了reset，则不需要做切换，直接做就行
+            if (reset)
+            {
+                m_sync_state = CRVC_V2_STATE_DEFAULT;
+            }
+            m_syn_model = m_new_syn_model;
+        }
+    }
+}
+
+/*****************************************对外函数***************************************************************/
+int CRvcLiteOnlineV2::init(const char *hubert_model, int sample_rate, int channel)
+{
+    m_rvc_inst = std::make_shared<CRvcLiteSynthesizer>();
+    m_block_len = sample_rate * channel - 100 * channel;
+    m_tmp_buf_len = m_block_len * 2;
+    m_reset = true;
+    m_syn_model = "";
+    m_new_syn_model = "";
+    m_sync_state = CRVC_V2_STATE_DEFAULT;
+    m_fade_len = int(sample_rate * 0.05) * channel; // 50ms的时长用来做fade
+    m_channel = channel;
+
+    m_tmp_in_buf = std::shared_ptr<float>(new float[m_tmp_buf_len], std::default_delete<float[]>());
+    m_tmp_out_buf = std::shared_ptr<float>(new float[m_tmp_buf_len], std::default_delete<float[]>());
+    m_in_queue =  std::make_shared<CRvcCircleBuffer>(m_tmp_buf_len * 2);
+    m_out_queue =  std::make_shared<CRvcCircleBuffer>(m_tmp_buf_len * 2);
+    m_input_latency_output_frame = 0;
+    return m_rvc_inst->init(hubert_model, sample_rate, channel);
+}
+
+int CRvcLiteOnlineV2::switch_model(const char *synth_model)
+{
+    if (synth_model != "" && !file_exists1(synth_model))
+    {
+        return ERR_RVC_LITE_MODEL_NOT_EXISTS;
+    }
+
+    m_new_syn_model = synth_model;
+    return ERR_RVC_LITE_SUCCESS;
+}
+
+void CRvcLiteOnlineV2::set_up_key(int key)
+{
+    // 内部是线程安全的，所以直接设置即可
+    m_rvc_inst->set_up_key(key);
+}
+
+void CRvcLiteOnlineV2::reset()
+{
+    m_reset = true;
+}
+
+
+int CRvcLiteOnlineV2::push(float *buf, int len, bool last)
+{
+    bool reset = m_reset;
+    if (m_reset)
+    {
+        m_reset = false;
+        m_input_latency_output_frame = 0;
+        m_in_queue->reset();
+        m_out_queue->reset();
+        m_rvc_inst->reset();
+    }
+
+    set_cur_state(reset);
+
+    if (CRVC_V2_STATE_DEFAULT == m_sync_state)
+    {
+        std::unique_lock<std::mutex> lock(m_rvc_mutex);
+        m_out_queue->push(buf, len);
+        return ERR_RVC_LITE_SUCCESS;
+    }
+
+    // 此时无论怎样，都要让模型跑一下，得到结果再说
+    m_in_queue->push(buf, len);
+    while(m_in_queue->size() >= m_block_len || last) {
+        if (m_in_queue->size() <= 0)
+        {
+            return ERR_RVC_LITE_SUCCESS;
+        }
+
+        int cur_in_len = m_block_len;
+        int cur_out_len = m_block_len;
+        m_in_queue->pop(m_tmp_in_buf.get(), cur_in_len);
+        int err = m_rvc_inst->process(m_tmp_in_buf.get(), cur_in_len, m_tmp_out_buf.get(), cur_out_len);
+        if (err != ERR_RVC_LITE_SUCCESS) {
+            return err;
+        }
+
+        // 此时对于effect做fade_out,default做fade_in
+        if (m_sync_state == CRVC_V2_STATE_EFFECT2DEFAULT)
+        {
+            // 此时由于m_rvc_inst本身存在延迟输出的情况[虽然头部的静音帧已经被砍掉了]，但是其输入的数据和输出的数据并不是完美对应的，存在延迟差
+            // 所以此时输入的头部和输出的头部之前存在延迟差，但是不加音效是没有这个延迟差的
+            // 所以需要将输入的头部对应到其应该对应的输出真实数据的头部
+            // 比如: 输入: 1,2,3,4,5 输出: l1,l2,1,2,3 ,其中l1和l2是延迟采样点，也就是1,2，对应的是输出+延迟采样点才对
+            for(int i = 0; i < m_fade_len; i+=m_channel)
+            {
+                float rate = i * 1.0 / m_fade_len;
+                for(int j = 0; j < m_channel; j+=1)
+                {
+                    m_tmp_in_buf.get()[i+j] =  m_tmp_in_buf.get()[i+j] * rate + m_tmp_out_buf.get()[i+j+m_input_latency_output_frame] * (1 - rate);
+                }
+            }
+            {
+                std::unique_lock<std::mutex> lock(m_rvc_mutex);
+                // 将之前要输入的那块塞进去
+                m_out_queue->push(m_tmp_out_buf.get(), m_input_latency_output_frame);
+                m_out_queue->push(m_tmp_in_buf.get(), cur_in_len);
+            }
+
+            m_sync_state = CRVC_V2_STATE_DEFAULT;
+            m_input_latency_output_frame = 0;
+
+            while(m_in_queue->size() > 0)
+            {
+                cur_in_len = m_block_len;
+                m_in_queue->pop(m_tmp_in_buf.get(), cur_in_len);
+                {
+                    std::unique_lock<std::mutex> lock(m_rvc_mutex);
+                    m_out_queue->push(m_tmp_in_buf.get(), cur_in_len);
+                }
+            }
+            return ERR_RVC_LITE_SUCCESS;
+        }
+
+        // 此时对effect做fade_in,default做fade_out
+        if (m_sync_state == CRVC_V2_STATE_DEFAULT2EFFECT)
+        {
+            for(int i = 0; i < m_fade_len; i+=m_channel)
+            {
+                float rate = i * 1.0 / m_fade_len;
+                for(int j = 0; j < m_channel; j+=1)
+                {
+                    m_tmp_out_buf.get()[i+j] =  m_tmp_out_buf.get()[i+j] * rate + m_tmp_in_buf.get()[i+j] * (1 - rate);
+                }
+            }
+            // 设置状态
+            m_sync_state = CRVC_V2_STATE_EFFECT;
+        }
+
+        // effect会存在输入和输出长度不一致的情况
+        m_input_latency_output_frame += cur_in_len - cur_out_len;
+
+        // 加锁塞入数据
+        {
+            std::unique_lock<std::mutex> lock(m_rvc_mutex);
+            m_out_queue->push(m_tmp_out_buf.get(), cur_out_len);
+        }
+    }
+    return ERR_RVC_LITE_SUCCESS;
+}
+
+int CRvcLiteOnlineV2::size()
+{
+    return m_out_queue->size();
+}
+
+void CRvcLiteOnlineV2::pop(float *buf, int &len)
+{
+    std::unique_lock<std::mutex> lock(m_rvc_mutex);
+    m_out_queue->pop(buf, len);
+}
diff --git a/mnn_demo/src/CRvcLiteSynthesizer.cpp b/mnn_demo/src/CRvcLiteSynthesizer.cpp
index 9bce8d7..6ff952b 100644
--- a/mnn_demo/src/CRvcLiteSynthesizer.cpp
+++ b/mnn_demo/src/CRvcLiteSynthesizer.cpp
@@ -1,106 +1,128 @@
 //
 // Created by Administrator on 2024/1/21.
 //
 
 #include "CRvcLiteSynthesizer.h"
 #include <cstring>
 #include <sys/time.h>
 
 CRvcLiteSynthesizer::CRvcLiteSynthesizer(){}
 
 CRvcLiteSynthesizer::~CRvcLiteSynthesizer() {}
 
-int CRvcLiteSynthesizer::init(const char *hubert_model, const char *synth_model, int sample_rate, int channel)
+int CRvcLiteSynthesizer::init(const char *hubert_model, int sample_rate, int channel)
 {
     m_rvc_inst = std::make_shared<CRvcLiteOnline>();
     int err = m_rvc_inst->init(hubert_model);
     if (err != ERR_RVC_LITE_SUCCESS)
     {
         return err;
     }
-    err = m_rvc_inst->switch_synth_model(synth_model);
-    if (err != ERR_RVC_LITE_SUCCESS)
-    {
-        return err;
-    }
+
     m_resample2_16 = std::make_shared<CResample>();
     m_resample2_16->init(sample_rate, gs_src_samplerate, channel, 1);
     m_resample2src = std::make_shared<CResample>();
     m_resample2src->init(gs_dst_samplerate, sample_rate, 1, channel);
 
     m_channel = channel;
     m_sample_rate = sample_rate;
 
     m_buf_tmp_16k_len = 0;
     m_buf_tmp_16k_cap = 0;
     m_buf_tmp_32k_len = 0;
     m_buf_tmp_32k_cap = 0;
     m_buf_tmp_src_len = 0;
     m_buf_tmp_src_cap = 0;
+    m_first = true;
     return ERR_RVC_LITE_SUCCESS;
 }
 
+int CRvcLiteSynthesizer::switch_model(const char *synth_model)
+{
+    return m_rvc_inst->switch_synth_model(synth_model);
+}
+
+void CRvcLiteSynthesizer::set_up_key(int key)
+{
+    m_rvc_inst->set_up_key(key);
+}
+
+void CRvcLiteSynthesizer::reset()
+{
+    m_rvc_inst->reset();
+    m_first = true;
+}
+
 int CRvcLiteSynthesizer::process(float *in_buf, int in_len, float *out_buf, int &out_len) {
     // 1 重采样 2 推理 3 再次重采样
     int resample_out_len = m_resample2_16->get_out_samples(in_len / m_channel);
     // 控制逻辑，不能超过该长度
     if (resample_out_len > gs_src_samplerate) {
         return ERR_RVC_LITE_BLOCK_TOO_LONG;
     }
 
     if (m_buf_tmp_16k_cap < resample_out_len) {
         m_buf_tmp_16k_cap = resample_out_len;
         m_buf_tmp_16k = std::shared_ptr<float>(new float[m_buf_tmp_16k_cap], std::default_delete<float[]>());
     }
     m_buf_tmp_16k_len = resample_out_len;
     int err = m_resample2_16->resample(in_buf, in_len / m_channel, m_buf_tmp_16k.get(), m_buf_tmp_16k_len);
     if (err != ERR_RVC_LITE_SUCCESS) {
         return err;
     }
     if (m_buf_tmp_32k_cap < m_buf_tmp_16k_len * 2) {
         m_buf_tmp_32k_cap = m_buf_tmp_16k_len * 2;
         m_buf_tmp_32k = std::shared_ptr<float>(new float[m_buf_tmp_32k_cap], std::default_delete<float[]>());
     }
     m_buf_tmp_32k_len = m_buf_tmp_16k_len * 2;
 
     // 推理
     err = m_rvc_inst->process_block(m_buf_tmp_16k.get(), m_buf_tmp_16k_len, m_buf_tmp_32k.get(), m_buf_tmp_32k_len);
     if (err != ERR_RVC_LITE_SUCCESS) {
         return err;
     }
     // 重采样回来
     int out_frame = m_resample2src->get_out_samples(m_buf_tmp_32k_len);
     if (m_buf_tmp_src_cap < out_frame * m_channel) {
         m_buf_tmp_src_cap = out_frame * m_channel;
         m_buf_tmp_src = std::shared_ptr<float>(new float[m_buf_tmp_src_cap], std::default_delete<float[]>());
     }
     m_buf_tmp_src_len = out_frame;
     err = m_resample2src->resample(m_buf_tmp_32k.get(), m_buf_tmp_32k_len, m_buf_tmp_src.get(), m_buf_tmp_src_len);
     if (err != ERR_RVC_LITE_SUCCESS) {
         return err;
     }
 
     // 取较小的值
     if (out_len > m_buf_tmp_src_len * m_channel)
     {
         out_len = m_buf_tmp_src_len * m_channel;
     }
 
-    memcpy(out_buf, m_buf_tmp_src.get(), sizeof(float) * out_len);
+    // 第一次过来，将头部的延迟块切掉
+    int latency_frame = 0;
+    if (m_first)
+    {
+        m_first = false;
+        latency_frame = int(m_rvc_inst->get_latency_ms() * 1.0 / 1000 * m_sample_rate) * m_channel;
+        out_len -= latency_frame;
+    }
+    memcpy(out_buf, m_buf_tmp_src.get()+latency_frame, sizeof(float) * out_len);
     return ERR_RVC_LITE_SUCCESS;
 }
 
+
 float CRvcLiteSynthesizer::get_rtf()
 {
     struct timeval start;
     struct timeval end;
     gettimeofday(&start, NULL);
     int in_len = m_sample_rate * m_channel - 100 *m_channel;
     int out_len = in_len;
     float* in_buf = new float[in_len];
     process(in_buf, in_len, in_buf, in_len);
     delete [] in_buf;
     gettimeofday(&end, NULL);
     double sp = (end.tv_sec - start.tv_sec) * 1000.0 + (end.tv_usec - start.tv_usec) / 1000.0;
     return sp / 1000;
 }
\ No newline at end of file