diff --git a/AIMeiSheng/._readme_meisheng.md b/AIMeiSheng/._readme_meisheng.md
deleted file mode 100644
index 50212ca..0000000
Binary files a/AIMeiSheng/._readme_meisheng.md and /dev/null differ
diff --git a/AIMeiSheng/docker_demo/common.py b/AIMeiSheng/docker_demo/common.py
index 098cba8..64aba31 100644
--- a/AIMeiSheng/docker_demo/common.py
+++ b/AIMeiSheng/docker_demo/common.py
@@ -1,121 +1,122 @@
 import os
 import sys
 import time
 # import logging
 import urllib, urllib.request
 
 # 测试/正式环境
 gs_prod = True
 # if len(sys.argv) > 1 and sys.argv[1] == "prod":
 #     gs_prod = True
 # print(gs_prod)
 
 gs_tmp_dir = "/tmp/ai_meisheng_tmp"
 gs_model_dir = "/tmp/ai_meisheng_models"
 gs_resource_cache_dir = "/tmp/ai_meisheng_resource_cache"
 gs_embed_model_path = os.path.join(gs_model_dir, "RawNet3/models/weights/model.pt")
 gs_svc_model_path = os.path.join(gs_model_dir,
                                  "weights/xusong_v2_org_version_alldata_embed_spkenx200x_double_e14_s90706.pth")
 gs_hubert_model_path = os.path.join(gs_model_dir, "hubert.pt")
 gs_rmvpe_model_path = os.path.join(gs_model_dir, "rmvpe.pt")
 gs_embed_model_spk_path = os.path.join(gs_model_dir, "SpeakerEncoder/pretrained_model/best_model.pth.tar")
 gs_embed_config_spk_path = os.path.join(gs_model_dir, "SpeakerEncoder/pretrained_model/config.json")
 
 # errcode
 
 gs_err_code_success = 0
 gs_err_code_download_vocal = 100
 gs_err_code_download_svc_url = 101
 gs_err_code_svc_process = 102
 gs_err_code_transcode = 103
 gs_err_code_volume_adjust = 104
 gs_err_code_upload = 105
 gs_err_code_params = 106
 gs_err_code_pending = 107
 gs_err_code_target_silence = 108
 gs_err_code_too_many_connections = 429
+gs_err_code_gender_classify = 430
 
 gs_redis_conf = {
     "host": "av-credis.starmaker.co",
     "port": 6379,
     "pwd": "lKoWEhz%jxTO",
 }
 
 gs_server_redis_conf = {
     "producer": "test_ai_meisheng_producer",  # 输入的队列
     "ai_meisheng_key_prefix": "test_ai_meisheng_key_",  # 存储结果情况
 }
 
 if gs_prod:
     gs_server_redis_conf = {
         "producer": "ai_meisheng_producer",  # 输入的队列
         "ai_meisheng_key_prefix": "ai_meisheng_key_",  # 存储结果情况
     }
 
 gs_feishu_conf = {
     "url": "http://sg-prod-songbook-webmp-1:8000/api/feishu/people",
     "users": [
         "18810833785",  # 杨建利
         "17778007843",  # 王健军
         "18612496315"  # 郭子豪
     ]
 }
 
 
 def download2disk(url, dst_path):
     try:
         urllib.request.urlretrieve(url, dst_path)
         return os.path.exists(dst_path)
     except Exception as ex:
         print(f"download url={url} error", ex)
         return False
 
 
 def exec_cmd(cmd):
     # gs_logger.info(cmd)
     print(cmd)
     ret = os.system(cmd)
     if ret != 0:
         return False
     return True
 
 
 def exec_cmd_and_result(cmd):
     r = os.popen(cmd)
     text = r.read()
     r.close()
     return text
 
 
 def upload_file2cos(key, file_path, region='ap-singapore', bucket_name='av-audit-sync-sg-1256122840'):
     """
     将文件上传到cos
     :param key: 桶上的具体地址
     :param file_path: 本地文件地址
     :param region: 区域
     :param bucket_name: 桶地址
     :return:
     """
     gs_coscmd = "coscmd"
     gs_coscmd_conf = "~/.cos.conf"
 
     cmd = "{} -c {} -r {} -b {} upload {} {}".format(gs_coscmd, gs_coscmd_conf, region, bucket_name, file_path, key)
     if exec_cmd(cmd):
         cmd = "{} -c {} -r {} -b {} info {}".format(gs_coscmd, gs_coscmd_conf, region, bucket_name, key) \
               + "| grep  Content-Length |awk \'{print $2}\'"
         res_str = exec_cmd_and_result(cmd)
         # logging.info("{},res={}".format(key, res_str))
         size = float(res_str)
         if size > 0:
             return True
         return False
     return False
 
 
 def check_input(input_data):
     key_list = ["record_song_url", "target_url", "start", "end", "vocal_loudness", "female_recording_url",
                 "male_recording_url"]
     for key in key_list:
         if key not in input_data.keys():
             return False
     return True
diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py
index a52ab24..f12143f 100644
--- a/AIMeiSheng/docker_demo/svc_online.py
+++ b/AIMeiSheng/docker_demo/svc_online.py
@@ -1,190 +1,194 @@
 # -*- coding: UTF-8 -*-
 """
 SVC的核心处理逻辑
 """
 import os
 import time
 import socket
 import shutil
 import hashlib
 
 from AIMeiSheng.meisheng_svc_final import load_model, process_svc_online
 from AIMeiSheng.cos_similar_ui_zoom import cos_similar
 from AIMeiSheng.meisheng_env_preparex import meisheng_env_prepare
 from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass, download_volume_balanced
 
 from AIMeiSheng.docker_demo.common import *
 
 import logging
 
 hostname = socket.gethostname()
 log_file_name = f"{os.path.dirname(os.path.abspath(__file__))}/av_meisheng_{hostname}.log"
 
 # 设置logger
 svc_offline_logger = logging.getLogger("svc_offline")
 file_handler = logging.FileHandler(log_file_name)
 file_handler.setLevel(logging.INFO)
 formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S')
 file_handler.setFormatter(formatter)
 if gs_prod:
     svc_offline_logger.addHandler(file_handler)
 
 if os.path.exists(gs_tmp_dir):
     shutil.rmtree(gs_tmp_dir)
 
 os.makedirs(gs_model_dir, exist_ok=True)
 os.makedirs(gs_resource_cache_dir, exist_ok=True)
 
 # 预设参数
 gs_gender_models_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/hub/voice_classification/models.zip"
 gs_volume_bin_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/ebur128_tool"
 
 
 class GSWorkerAttr:
     def __init__(self, input_data):
         # 取出输入资源
         vocal_url = input_data["record_song_url"]
         target_url = input_data["target_url"]
         start = input_data["start"]  # 单位是ms
         end = input_data["end"]  # 单位是ms
         vocal_loudness = input_data["vocal_loudness"]
         female_recording_url = input_data["female_recording_url"]
         male_recording_url = input_data["male_recording_url"]
 
         self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest()
         self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id)
         if os.path.exists(self.tmp_dir):
             shutil.rmtree(self.tmp_dir)
         os.makedirs(self.tmp_dir)
 
         self.vocal_url = vocal_url
         self.target_url = target_url
 
         ext = vocal_url.split(".")[-1]
         self.vocal_path = os.path.join(self.tmp_dir, self.distinct_id + f"_in.{ext}")
         self.target_wav_path = os.path.join(self.tmp_dir, self.distinct_id + "_out.wav")
         self.target_wav_ad_path = os.path.join(self.tmp_dir, self.distinct_id + "_out_ad.wav")
         self.target_path = os.path.join(self.tmp_dir, self.distinct_id + "_out.m4a")
 
         self.female_svc_source_url = female_recording_url
         self.male_svc_source_url = male_recording_url
 
         ext = female_recording_url.split(".")[-1]
         self.female_svc_source_path = os.path.join(gs_resource_cache_dir,
                                                    hashlib.md5(female_recording_url.encode()).hexdigest() + "." + ext)
         ext = male_recording_url.split(".")[-1]
         self.male_svc_source_path = os.path.join(gs_resource_cache_dir,
                                                  hashlib.md5(male_recording_url.encode()).hexdigest() + "." + ext)
         self.st_tm = start
         self.ed_tm = end
         self.target_loudness = vocal_loudness
 
     def log_info_name(self):
         return f"d_id={self.distinct_id}, vocal_url={self.vocal_url}"
 
     def rm_cache(self):
         if os.path.exists(self.tmp_dir):
             shutil.rmtree(self.tmp_dir)
 
 
 def init_gender_model():
     """
     下载模型
     :return:
     """
     dst_model_dir = os.path.join(gs_model_dir, "voice_classification")
     if not os.path.exists(dst_model_dir):
         dst_zip_path = os.path.join(gs_model_dir, "models.zip")
         if not download2disk(gs_gender_models_url, dst_zip_path):
             svc_offline_logger.fatal(f"download gender_model err={gs_gender_models_url}")
         cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}"
         os.system(cmd)
         if not os.path.exists(dst_model_dir):
             svc_offline_logger.fatal(f"unzip {dst_zip_path} err")
 
     music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth")
     music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth")
     gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth")
     gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth")
     vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
     return vc
 
 
 def init_svc_model():
     meisheng_env_prepare(logging, gs_model_dir)
     embed_model, hubert_model = load_model()
     cs_sim = cos_similar()
     return embed_model, hubert_model,cs_sim
 
 
 def download_volume_adjustment():
     """
     下载音量调整工具
     :return:
     """
     volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
     if not os.path.exists(volume_bin_path):
         if not download2disk(gs_volume_bin_url, volume_bin_path):
             svc_offline_logger.fatal(f"download volume_bin err={gs_volume_bin_url}")
         os.system(f"chmod +x {volume_bin_path}")
 
 
 def volume_adjustment(wav_path, target_loudness, out_path):
     """
     音量调整
     :param wav_path:
     :param target_loudness:
     :param out_path:
     :return:
     """
     volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
     cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}"
     os.system(cmd)
 
 
 class SVCOnline:
 
     def __init__(self):
         st = time.time()
         self.gender_model = init_gender_model()
         self.embed_model, self.hubert_model, self.cs_sim = init_svc_model()
         download_volume_adjustment()
         download_volume_balanced()
         svc_offline_logger.info(f"svc init finished, sp = {time.time() - st}")
 
     def gender_process(self, worker_attr):
         st = time.time()
         gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path)
         svc_offline_logger.info(
             f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, "
             f"gender_process  sp = {time.time() - st}")
         if gender == 0:
             gender = 'female'
         elif gender == 1:
             gender = 'male'
+        elif female_rate == None:
+            gender = 'male'
+            return gender, gs_err_code_gender_classify
         elif female_rate > 0.5:
             gender = 'female'
         else:
             gender = 'male'
+
         svc_offline_logger.info(f"{worker_attr.vocal_url}, modified gender={gender}")
 
         # err = gs_err_code_success
         # if female_rate == -1:
         #     err = gs_err_code_target_silence
         return gender, gs_err_code_success
 
     def process(self, worker_attr):
         gender, err = self.gender_process(worker_attr)
         if err != gs_err_code_success:
             return gender, err
 
         song_path = worker_attr.female_svc_source_path
         if gender == "male":
             song_path = worker_attr.male_svc_source_path
         params = {'gender': gender, 'tst': worker_attr.st_tm, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None}
         st = time.time()
         err_code = process_svc_online(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, self.embed_model,
                                       self.hubert_model, self.cs_sim, params)
 
         svc_offline_logger.info(f"{worker_attr.vocal_url}, err_code={err_code} process svc sp = {time.time() - st}")
         return gender, err_code
diff --git a/AIMeiSheng/vc_infer_pipeline_org_embed_spk.py b/AIMeiSheng/vc_infer_pipeline_org_embed_spk.py
index 076184f..f1e8f48 100644
--- a/AIMeiSheng/vc_infer_pipeline_org_embed_spk.py
+++ b/AIMeiSheng/vc_infer_pipeline_org_embed_spk.py
@@ -1,778 +1,781 @@
 import numpy as np, parselmouth, torch, pdb, sys, os
 from time import time as ttime
 import torch.nn.functional as F
 import scipy.signal as signal
 import pyworld, os, traceback, faiss, librosa, torchcrepe
 from scipy import signal
 from functools import lru_cache
 
 now_dir = os.getcwd()
 sys.path.append(now_dir)
 
 bh, ah = signal.butter(N=5, Wn=48, btype="high", fs=16000)
 
 input_audio_path2wav = {}
 fidx = 0
 
 import threading
 import concurrent.futures
 
 
 @lru_cache
 def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
     audio = input_audio_path2wav[input_audio_path]
     f0, t = pyworld.harvest(
         audio,
         fs=fs,
         f0_ceil=f0max,
         f0_floor=f0min,
         frame_period=frame_period,
     )
     f0 = pyworld.stonemask(audio, f0, t, fs)
     return f0
 
 
 def change_rms(data1, sr1, data2, sr2, rate):  # 1是输入音频，2是输出音频,rate是2的占比
     # print(data1.max(),data2.max())
     rms1 = librosa.feature.rms(
         y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2
     )  # 每半秒一个点
     rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2)
     rms1 = torch.from_numpy(rms1)
     rms1 = F.interpolate(
         rms1.unsqueeze(0), size=data2.shape[0], mode="linear"
     ).squeeze()
     rms2 = torch.from_numpy(rms2)
     rms2 = F.interpolate(
         rms2.unsqueeze(0), size=data2.shape[0], mode="linear"
     ).squeeze()
     rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6)
     data2 *= (
         torch.pow(rms1, torch.tensor(1 - rate))
         * torch.pow(rms2, torch.tensor(rate - 1))
     ).numpy()
     return data2
 
 
 class VC(object):
     def __init__(self, tgt_sr, config):
         self.x_pad, self.x_query, self.x_center, self.x_max, self.is_half = (
             config.x_pad, ##config会根据设备配置不通知如：3
             config.x_query, # 10 等于x_max-x_center)*2
             config.x_center, #60
             config.x_max,   #65
             config.is_half,
         )
         self.sr = 16000  # hubert输入采样率
         self.window = 160  # 每帧点数
         self.t_pad = self.sr * self.x_pad  # 每条前后pad时间
         self.t_pad_tgt = tgt_sr * self.x_pad
         self.t_pad2 = self.t_pad * 2
         self.t_query = self.sr * self.x_query  # 查询切点前后查询时间,
         self.t_center = self.sr * self.x_center  # 查询切点位置
         self.t_max = self.sr * self.x_max  # 免查询时长阈值
         self.device = config.device
 
     def get_f0(
         self,
         input_audio_path,
         x,
         p_len,
         f0_up_key,
         f0_method,
         filter_radius,
         inp_f0=None,
     ):
         global input_audio_path2wav
         time_step = self.window / self.sr * 1000
         f0_min = 50
         f0_max = 1100
         f0_mel_min = 1127 * np.log(1 + f0_min / 700)
         f0_mel_max = 1127 * np.log(1 + f0_max / 700)
         if f0_method == "pm":
             f0 = (
                 parselmouth.Sound(x, self.sr)
                 .to_pitch_ac(
                     time_step=time_step / 1000,
                     voicing_threshold=0.6,
                     pitch_floor=f0_min,
                     pitch_ceiling=f0_max,
                 )
                 .selected_array["frequency"]
             )
             pad_size = (p_len - len(f0) + 1) // 2
             if pad_size > 0 or p_len - len(f0) - pad_size > 0:
                 f0 = np.pad(
                     f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
                 )
         elif f0_method == "harvest":
             input_audio_path2wav[input_audio_path] = x.astype(np.double)
             f0 = cache_harvest_f0(input_audio_path, self.sr, f0_max, f0_min, 10)
             if filter_radius > 2:
                 f0 = signal.medfilt(f0, 3)
         elif f0_method == "crepe":
             model = "full"
             # Pick a batch size that doesn't cause memory errors on your gpu
             batch_size = 512
             # Compute pitch using first gpu
             audio = torch.tensor(np.copy(x))[None].float()
             f0, pd = torchcrepe.predict(
                 audio,
                 self.sr,
                 self.window,
                 f0_min,
                 f0_max,
                 model,
                 batch_size=batch_size,
                 device=self.device,
                 return_periodicity=True,
             )
             pd = torchcrepe.filter.median(pd, 3)
             f0 = torchcrepe.filter.mean(f0, 3)
             f0[pd < 0.1] = 0
             f0 = f0[0].cpu().numpy()
         elif f0_method == "rmvpe":
             if hasattr(self, "model_rmvpe") == False:
                 from lib.rmvpe import RMVPE
 
                 print("loading rmvpe model")
                 self.model_rmvpe = RMVPE(
                     "rmvpe.pt", is_half=self.is_half, device=self.device
                 )
             f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
         else: ##for meisheng
             self.model_rmvpe = f0_method
             f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
 
         ##这里读文件，更改pitch st  fang
         valid_f0 = f0[f0 > 50]
         mean_pitch_cur = np.mean(valid_f0[:min(len(valid_f0),500)])
-       
 
         #print("@@f0_up_key:",f0_up_key)
         deta = 0 
         if(f0_up_key > 50 ): 
             deta =  -mean_pitch_cur + f0_up_key
         
         #print("$$$$$$$$$fangxxxxx pitch shift: ",deta)
-        f0_up_key = int(np.log2(deta/(mean_pitch_cur + 1) + 1) * 12)##方法2 fang
-        if( abs(f0_up_key) <= 8 ):
+        f0_up_key = np.log2(deta/(mean_pitch_cur + 1) + 1) * 12 
+        if np.isnan(f0_up_key):
             f0_up_key = 0
-        elif f0_up_key > 8:
+        f0_up_key = int(f0_up_key)
+        #f0_up_key = int(np.log2(deta/(mean_pitch_cur + 1) + 1) * 12)##方法2 fang
+        if( f0_up_key >= 12 ):
             f0_up_key = 12
-        elif f0_up_key < -8:
+        elif f0_up_key < -12:
             f0_up_key = -12
+        else:
+            f0_up_key = 0
         #if( abs(f0_up_key) < 3 ):
         #    f0_up_key = 0
-        f0_up_key = max(min(12,f0_up_key),-12)
+        # f0_up_key = max(min(12,f0_up_key),-12)
         #print("f0_up_key: ",f0_up_key)        
 
         f0 *= pow(2, f0_up_key / 12)#这块是音调更改 fang 我设置的0
         # with open("test.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
         tf0 = self.sr // self.window  # 每秒f0点数
         if inp_f0 is not None:
             delta_t = np.round(
                 (inp_f0[:, 0].max() - inp_f0[:, 0].min()) * tf0 + 1
             ).astype("int16")
             replace_f0 = np.interp(
                 list(range(delta_t)), inp_f0[:, 0] * 100, inp_f0[:, 1]
             )
             shape = f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)].shape[0]
             f0[self.x_pad * tf0 : self.x_pad * tf0 + len(replace_f0)] = replace_f0[
                 :shape
             ]
         # with open("test_opt.txt","w")as f:f.write("\n".join([str(i)for i in f0.tolist()]))
 
 
         f0bak = f0.copy()
         f0_mel = 1127 * np.log(1 + f0 / 700)
         f0_mel[f0_mel > 0] = (f0_mel[f0_mel > 0] - f0_mel_min) * 254 / (
             f0_mel_max - f0_mel_min
         ) + 1
         f0_mel[f0_mel <= 1] = 1
         f0_mel[f0_mel > 255] = 255
         f0_coarse = np.rint(f0_mel).astype(int)
         return f0_coarse, f0bak  # 1-0
 
     def vc(
         self,
         model,
         net_g,
         sid,
         audio0,
         pitch,
         pitchf,
         times,
         index,
         big_npy,
         index_rate,
         version,
         protect,
     ):  # ,file_index,file_big_npy
         feats = torch.from_numpy(audio0)
         if self.is_half:
             feats = feats.half()
         else:
             feats = feats.float()
         if feats.dim() == 2:  # double channels
             feats = feats.mean(-1)
         assert feats.dim() == 1, feats.dim()
         feats = feats.view(1, -1)
         padding_mask = torch.BoolTensor(feats.shape).to(self.device).fill_(False)
         #print("@@@feats: ",feats.shape)
         #print("@@@padding_mask: ",padding_mask.shape)
         inputs = {
             "source": feats.to(self.device),
             "padding_mask": padding_mask,
             "output_layer": 9 if version == "v1" else 12,
             #"output_layer": 6 if version == "v1" else 12,
         }
         t0 = ttime()
         #'''
         with torch.no_grad():
             logits = model.extract_features(**inputs)
             feats = model.final_proj(logits[0]) if version == "v1" else logits[0]#为何v1要转化，维度问题??? fang
         #'''
 
         #print("@@@feats: ",feats.shape)
         '''
         global fidx
         feats_name = f"./feats_{fidx}.pt"
         fidx += 1
         torch.save(feats, feats_name)
         feats = torch.load(feats_name)
         #'''
 
         if protect < 0.5 and pitch != None and pitchf != None:
             feats0 = feats.clone()
         if (
             isinstance(index, type(None)) == False
             and isinstance(big_npy, type(None)) == False
             and index_rate != 0
         ):
             npy = feats[0].cpu().numpy()
             if self.is_half:
                 npy = npy.astype("float32")
 
             # _, I = index.search(npy, 1)
             # npy = big_npy[I.squeeze()]
 
             score, ix = index.search(npy, k=8)
             weight = np.square(1 / score)
             weight /= weight.sum(axis=1, keepdims=True)
             npy = np.sum(big_npy[ix] * np.expand_dims(weight, axis=2), axis=1)
 
             if self.is_half:
                 npy = npy.astype("float16")
             feats = (
                 torch.from_numpy(npy).unsqueeze(0).to(self.device) * index_rate
                 + (1 - index_rate) * feats
             )##基于index和实际音频的特征进行组合，作为输入 fang
 
         #print("@@@feats: ",feats.shape)
         feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
         if protect < 0.5 and pitch != None and pitchf != None:
             feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(
                 0, 2, 1
             )#feats0的维度1 插值增加一倍 fang
         t1 = ttime()
         p_len = audio0.shape[0] // self.window ##分帧求pitch fang
         if feats.shape[1] < p_len:
             p_len = feats.shape[1]
             if pitch != None and pitchf != None:
                 pitch = pitch[:, :p_len]
                 pitchf = pitchf[:, :p_len]
 
         if protect < 0.5 and pitch != None and pitchf != None:
             pitchff = pitchf.clone()
             pitchff[pitchf > 0] = 1
             pitchff[pitchf < 1] = protect
             pitchff = pitchff.unsqueeze(-1)
             feats = feats * pitchff + feats0 * (1 - pitchff)
             feats = feats.to(feats0.dtype)
         p_len = torch.tensor([p_len], device=self.device).long()
         #print("###feats:",feats.shape,"pitch:",pitch.shape,"p_len:",p_len)
         with torch.no_grad():
             if pitch != None and pitchf != None:
                 audio1 = (
                     (net_g.infer(feats, p_len, pitch, pitchf, sid)[0][0, 0])
                     .data.cpu()
                     .float()
                     .numpy()
                 )
             else:
                 audio1 = (
                     (net_g.infer(feats, p_len, sid)[0][0, 0]).data.cpu().float().numpy()
                 )
         del feats, p_len, padding_mask
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         t2 = ttime()
         times[0] += t1 - t0
         times[2] += t2 - t1
         return audio1
 
     def pipeline(
         self,
         model,
         net_g,
         sid,
         audio,## input wav
         input_audio_path, #input wav name
         times,
         f0_up_key,
         f0_method,# f0 meathod
         file_index, #index 路径
         # file_big_npy,
         index_rate, 
         if_f0,
         filter_radius,
         tgt_sr,
         resample_sr,
         rms_mix_rate,
         version,
         protect,
         f0_file=None,
     ):
         if (
             file_index != ""  #.index文件不为空 fang
             # and file_big_npy != ""
             # and os.path.exists(file_big_npy) == True
             and os.path.exists(file_index) == True
             and index_rate != 0
         ):
             try:
                 index = faiss.read_index(file_index)
                 # big_npy = np.load(file_big_npy)
                 big_npy = index.reconstruct_n(0, index.ntotal)
             except:
                 traceback.print_exc()
                 index = big_npy = None
         else:
             index = big_npy = None
         #print("####audio 1:",audio.shape)
         audio = signal.filtfilt(bh, ah, audio)
         #print("####audio 2:",audio.shape)
         audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
         opt_ts = []
         
         #print("###t_max:",self.t_max)
         #print("###window:",self.window,"self.t_query:",self.t_query,"self.t_pad2:",self.t_pad2)
         if audio_pad.shape[0] > self.t_max:
             audio_sum = np.zeros_like(audio)
             for i in range(self.window):
                 audio_sum += audio_pad[i : i - self.window]#这样算循环了,每个idx是过去一帧的值的和  fang
             for t in range(self.t_center, audio.shape[0], self.t_center):#一分钟一帧？？ fang
                 opt_ts.append(
                     t
                     - self.t_query
                     + np.where(
                         np.abs(audio_sum[t - self.t_query : t + self.t_query])
                         == np.abs(audio_sum[t - self.t_query : t + self.t_query]).min()
                     )[0][0]
                 )#返回[ t - self.t_query, t+self.t_query] 区间最小值位置的索引保存，fang
         s = 0
         audio_opt = []
         t = None
         t1 = ttime()
         audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
         p_len = audio_pad.shape[0] // self.window
         inp_f0 = None
         if hasattr(f0_file, "name") == True:
             try:
                 with open(f0_file.name, "r") as f:
                     lines = f.read().strip("\n").split("\n")
                 inp_f0 = []
                 for line in lines:
                     inp_f0.append([float(i) for i in line.split(",")])
                 inp_f0 = np.array(inp_f0, dtype="float32")
             except:
                 traceback.print_exc()
         #sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
 
         sid_embed = np.load(sid)
         sid = torch.FloatTensor(sid_embed).to(self.device).half()
         pitch, pitchf = None, None
         if if_f0 == 1:
             pitch, pitchf = self.get_f0(
                 input_audio_path,
                 audio_pad,
                 p_len,
                 f0_up_key,
                 f0_method,
                 filter_radius,
                 inp_f0,
             )
             pitch = pitch[:p_len]
             pitchf = pitchf[:p_len]
             if self.device == "mps":
                 pitchf = pitchf.astype(np.float32)
             pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
             pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
         
         #print("&&&&pitch: ",pitchf)
         t2 = ttime()
         times[1] += t2 - t1
         #print("####len(audio_pad):",len(audio_pad))
         #print("###pitch:", pitch.shape)
         for t in opt_ts: #分段推理每段音频，一段这里设置60s左右 fang
             t = t // self.window * self.window
             if if_f0 == 1:
                 audio_opt.append(
                     self.vc(
                         model,
                         net_g,
                         sid,
                         audio_pad[s : t + self.t_pad2 + self.window],
                         pitch[:, s // self.window : (t + self.t_pad2) // self.window],
                         pitchf[:, s // self.window : (t + self.t_pad2) // self.window],
                         times,
                         index,
                         big_npy,
                         index_rate,
                         version,
                         protect,
                     )[self.t_pad_tgt : -self.t_pad_tgt]
                 )
             else:
                 audio_opt.append(
                     self.vc(
                         model,
                         net_g,
                         sid,
                         audio_pad[s : t + self.t_pad2 + self.window],
                         None,
                         None,
                         times,
                         index,
                         big_npy,
                         index_rate,
                         version,
                         protect,
                     )[self.t_pad_tgt : -self.t_pad_tgt]
                 )
             s = t
         if if_f0 == 1:  ##后面是最后一段处理 fang
             audio_opt.append(
                 self.vc(
                     model,
                     net_g,
                     sid,
                     audio_pad[t:],
                     pitch[:, t // self.window :] if t is not None else pitch,
                     pitchf[:, t // self.window :] if t is not None else pitchf,
                     times,
                     index,
                     big_npy,
                     index_rate,
                     version,
                     protect,
                 )[self.t_pad_tgt : -self.t_pad_tgt]
             )
         else:
             audio_opt.append(
                 self.vc(
                     model,
                     net_g,
                     sid,
                     audio_pad[t:],
                     None,
                     None,
                     times,
                     index,
                     big_npy,
                     index_rate,
                     version,
                     protect,
                 )[self.t_pad_tgt : -self.t_pad_tgt]
             )
         audio_opt = np.concatenate(audio_opt)
         if rms_mix_rate != 1:
             audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate)
         if resample_sr >= 16000 and tgt_sr != resample_sr:
             audio_opt = librosa.resample(
                 audio_opt, orig_sr=tgt_sr, target_sr=resample_sr
             )
         audio_max = np.abs(audio_opt).max() / 0.99
         max_int16 = 32768
         if audio_max > 1:
             max_int16 /= audio_max
         audio_opt = (audio_opt * max_int16).astype(np.int16)
         del pitch, pitchf, sid
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         return audio_opt
 
     def infer_core_fang(self,para1,para2,para3,idx,
                             model,
                             net_g,
                             sid,
                             times,
                             index,
                             big_npy,
                             index_rate,
                             version,
                             protect):
         return [ self.vc(
                     model,
                     net_g,
                     sid,
                     para1, para2, para3,
                     # audio_pad[s: t + self.t_pad2 + self.window],
                     # pitch[:, s // self.window: (t + self.t_pad2) // self.window],
                     # pitchf[:, s // self.window: (t + self.t_pad2) // self.window],
                     times,
                     index,
                     big_npy,
                     index_rate,
                     version,
                     protect,
                 )[self.t_pad_tgt: -self.t_pad_tgt], idx]
 
     def ThreadPool_process_core(self, func_process,params1,params2,params3,
             model,
             net_g,
             sid,
             # audio_pad[s: t + self.t_pad2 + self.window],
             # pitch[:, s // self.window: (t + self.t_pad2) // self.window],
             # pitchf[:, s // self.window: (t + self.t_pad2) // self.window],
             times,
             index,
             big_npy,
             index_rate,
             version,
             protect
             ):
         num_threads = 2
         futures = []
         sort_ret = {}
         with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
             for idx in range(len(params1)):
                 para1 = params1[idx]
                 para2 = params2[idx]
                 para3 = params3[idx]
                 ret = executor.submit(self.infer_core_fang,para1,para2,para3,idx,
                             model,
                             net_g,
                             sid,
                             times,
                             index,
                             big_npy,
                             index_rate,
                             version,
                             protect)
                 futures.append(ret)
 
             cnt = 0
             for future in concurrent.futures.as_completed(futures):
                 cnt += 1
                 #print(f"process finised {cnt}, and index :{future.result()[1]}")
 
                 #print(future.result())  # result
                 # print(future.result()[1])  ##index
                 sort_ret[str(future.result()[1])] = future.result()[0]
 
 
             fea_list = []
             for idx in range(len(sort_ret)):
                 fea_list.append(sort_ret[str(idx)])
 
         return fea_list
 
     def pipeline_mulprocess(
             self,
             model,
             net_g,
             sid,
             audio,  ## input wav
             input_audio_path,  # input wav name
             times,
             f0_up_key,
             f0_method,  # f0 meathod
             file_index,  # index 路径
             # file_big_npy,
             index_rate,
             if_f0,
             filter_radius,
             tgt_sr,
             resample_sr,
             rms_mix_rate,
             version,
             protect,
             f0_file=None,
     ):
         if (
                 file_index != ""  # .index文件不为空 fang
                 # and file_big_npy != ""
                 # and os.path.exists(file_big_npy) == True
                 and os.path.exists(file_index) == True
                 and index_rate != 0
         ):
             try:
                 index = faiss.read_index(file_index)
                 # big_npy = np.load(file_big_npy)
                 big_npy = index.reconstruct_n(0, index.ntotal)
             except:
                 traceback.print_exc()
                 index = big_npy = None
         else:
             index = big_npy = None
         audio = signal.filtfilt(bh, ah, audio)
         audio_pad = np.pad(audio, (self.window // 2, self.window // 2), mode="reflect")
         opt_ts = []
         if audio_pad.shape[0] > self.t_max:
             audio_sum = np.zeros_like(audio)
             for i in range(self.window):
                 audio_sum += audio_pad[i: i - self.window]  # 这样算循环了,每个idx是过去一帧的值的和  fang
             for t in range(self.t_center, audio.shape[0], self.t_center):  # 一分钟一帧？？ fang
                 opt_ts.append(
                     t
                     - self.t_query
                     + np.where(
                         np.abs(audio_sum[t - self.t_query: t + self.t_query])
                         == np.abs(audio_sum[t - self.t_query: t + self.t_query]).min()
                     )[0][0]
                 )  # 返回[ t - self.t_query, t+self.t_query] 区间最小值位置的索引保存，fang
         s = 0
 
         t = None
         t1 = ttime()
         audio_pad = np.pad(audio, (self.t_pad, self.t_pad), mode="reflect")
         p_len = audio_pad.shape[0] // self.window
         inp_f0 = None
         if hasattr(f0_file, "name") == True:
             try:
                 with open(f0_file.name, "r") as f:
                     lines = f.read().strip("\n").split("\n")
                 inp_f0 = []
                 for line in lines:
                     inp_f0.append([float(i) for i in line.split(",")])
                 inp_f0 = np.array(inp_f0, dtype="float32")
             except:
                 traceback.print_exc()
         # sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
         sid_embed = np.load(sid)
         embed_npy_spk = sid[:-4] + '_spk.npy'
         sid_spk_embed = np.load(embed_npy_spk )
         print("555555sid_embed:",np.shape(sid_embed),'type:',type(sid_embed))
         print('sid_spk_embed:', np.shape(sid_spk_embed), 'type:',type(sid_spk_embed))
         sid_embed = np.concatenate((sid_embed, sid_spk_embed),axis=0)
         print('sid_embed:', np.shape(sid_embed), 'type:',type(sid_embed))
         sid = torch.FloatTensor(sid_embed).to(self.device).half()
 
         #sid_embed = np.load(sid)
         #sid = torch.FloatTensor(sid_embed).to(self.device).half()
         print('sid:',sid.shape)
 
         pitch, pitchf = None, None
         #'''
         if if_f0 == 1:
             pitch, pitchf = self.get_f0(
                 input_audio_path,
                 audio_pad,
                 p_len,
                 f0_up_key,
                 f0_method,
                 filter_radius,
                 inp_f0,
             )
             pitch = pitch[:p_len]
             pitchf = pitchf[:p_len]
             if self.device == "mps":
                 pitchf = pitchf.astype(np.float32)
             pitch = torch.tensor(pitch, device=self.device).unsqueeze(0).long()
             pitchf = torch.tensor(pitchf, device=self.device).unsqueeze(0).float()
         #'''
 
         ''' 
         pitch_name = "./pitch_pitchf.npz"
         #np.savez(pitch_name, pitch = pitch.detach().cpu().numpy(), pitchf = pitchf.detach().cpu().numpy())
         npz_obj = np.load(pitch_name)  #文件名的后缀为npz
         pitch, pitchf = npz_obj['pitch'], npz_obj['pitchf']
         pitch = torch.tensor(pitch, device=self.device).long()
         pitchf = torch.tensor(pitchf, device=self.device).float()
         #'''
 
         t2 = ttime()
         times[1] += t2 - t1
 
         audio_opt = []
         audio_pad_list = []
         pitch_list = []
         pitchf_list = []
 
 
         for t in opt_ts:  # 分段推理每段音频，一段这里设置60s左右 fang
             t = t // self.window * self.window
             audio_pad_list.append(audio_pad[s: t + self.t_pad2 + self.window])
             pitch_list.append(pitch[:, s // self.window: (t + self.t_pad2) // self.window])
             pitchf_list.append(pitchf[:, s // self.window: (t + self.t_pad2) // self.window])
             s = t
             
         audio_pad_list.append(audio_pad[t:])
         pitch_list.append(pitch[:, t // self.window:] if t is not None else pitch)
         pitchf_list.append(pitchf[:, t // self.window:] if t is not None else pitchf)
 
         audio_opt = self.ThreadPool_process_core(self.infer_core_fang, audio_pad_list, pitch_list, pitchf_list,
                                 model,
                                 net_g,
                                 sid,
                                 times,
                                 index,
                                 big_npy,
                                 index_rate,
                                 version,
                                 protect
                                 )
         '''
         if if_f0 == 1:  ##后面是最后一段处理 fang
             audio_opt.append(
                 self.vc(
                     model,
                     net_g,
                     sid,
                     audio_pad[t:],
                     pitch[:, t // self.window:] if t is not None else pitch,
                     pitchf[:, t // self.window:] if t is not None else pitchf,
                     times,
                     index,
                     big_npy,
                     index_rate,
                     version,
                     protect,
                 )[self.t_pad_tgt: -self.t_pad_tgt]
             )
         else:
             audio_opt.append(
                 self.vc(
                     model,
                     net_g,
                     sid,
                     audio_pad[t:],
                     None,
                     None,
                     times,
                     index,
                     big_npy,
                     index_rate,
                     version,
                     protect,
                 )[self.t_pad_tgt: -self.t_pad_tgt]
             )
         #'''
         audio_opt = np.concatenate(audio_opt)
         if rms_mix_rate != 1:
             audio_opt = change_rms(audio, 16000, audio_opt, tgt_sr, rms_mix_rate)
         if resample_sr >= 16000 and tgt_sr != resample_sr:
             audio_opt = librosa.resample(
                 audio_opt, orig_sr=tgt_sr, target_sr=resample_sr
             )
         audio_max = np.abs(audio_opt).max() / 0.99
         max_int16 = 32768
         if audio_max > 1:
             max_int16 /= audio_max
         audio_opt = (audio_opt * max_int16).astype(np.int16)
         del pitch, pitchf, sid
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
         return audio_opt