diff --git a/AutoCoverTool/online/tone_shift_one.py b/AutoCoverTool/online/tone_shift_one.py
index e395c8d..9a422f8 100644
--- a/AutoCoverTool/online/tone_shift_one.py
+++ b/AutoCoverTool/online/tone_shift_one.py
@@ -1,328 +1,338 @@
 """
 变调的方式做处理
 1. 下载
 2. 分离
 3. 针对于人声变调+2，伴奏+1
 4. 合成
 """
 
 import os
 import json
 import shutil
 import librosa
 import logging
 import numpy as np
 from ref.music_remover.separate_interface import SeparateInterface
 from online.inference_worker import upload_file2cos, gs_state_use, gs_state_finish, gs_state_default
 from online.common import *
+from ref.online.voice_class_online import VoiceClass
 
 logging.basicConfig(filename='/tmp/tone_shift_one.log', level=logging.INFO)
 
 gs_tone_shift_exe = "/opt/soft/bin/tone_shift_exe"
 gs_simple_mixer_path = "/opt/soft/bin/simple_mixer"
 
 gs_err_code_success = 0
 gs_err_code_tone_shift = 1
 gs_err_code_mix = 2
 gs_err_code_transcode = 3
 gs_err_code_upload = 4
 gs_err_code_download = 5
 gs_err_code_trans_to_mp3 = 6
 gs_err_code_separate = 7
 gs_err_code_duration_too_long = 8
 gs_err_code_duration_no_vocal = 9
 gs_err_code_duration_err = 10
 gs_err_code_transcode_acc = 11
 gs_err_code_upload_acc = 12
 gs_err_code_download_acc = 13
 gs_err_code_download_vocal = 14
 gs_err_code_transcode_acc_v1 = 15
 gs_err_code_transcode_vocal_v1 = 16
 gs_err_code_silence_no_data = 17
 gs_err_code_silence_no_process = 18
 
 
 def exec_cmd(cmd):
     r = os.popen(cmd)
     text = r.read()
     r.close()
     return text
 
 
 def get_d(audio_path):
     cmd = "ffprobe -v quiet -print_format json -show_format -show_streams {}".format(audio_path)
     data = exec_cmd(cmd)
     data = json.loads(data)
     # 返回秒
     if 'format' in data.keys() and 'duration' in data['format']:
         return float(data["format"]["duration"])
     return -1
 
 
 def get_mean_power(audio_path):
     sr = 44100
     audio, sr = librosa.load(audio_path, sr=sr, mono=True)
     mm = np.mean(np.abs(audio))
     return mm
 
 
 class ToneShift:
     def __init__(self):
         self.separate_inst = SeparateInterface()
+        model_path = "./models"
+        music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth")
+        music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth")
+        gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth")
+        gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth")
+
+        self.voice_class = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model,
+                                      gender_no_pure_model)
 
     def update_state(self, song_id, state):
         sql = "update svc_queue_table set state={},update_time={} where song_id = {}". \
             format(state, int(time.time()), song_id)
         banned_user_map['db'] = "av_db"
         update_db(sql, banned_user_map)
 
     def get_url_by_id(self, song_id):
         sql = "select song_id, url from svc_queue_table where song_id={}".format(song_id)
         banned_user_map["db"] = "av_db"
         data = get_data_by_mysql(sql)
         if len(data) == 0:
             return None, None
         return str(data[0][0]), data[0][1]
 
     def get_one_data_logic(self):
         """
         按照5,4,3的优先级进行获取
         :return:
         """
         song_src_arr = [5, 4, 3]
         for song_src in song_src_arr:
             song_id, song_url = self.get_one_data(song_src=song_src)
             if song_id is not None:
                 return song_id, song_url
         return None, None
 
     def get_one_data(self, song_src=3):
         sql = "select song_id, url from svc_queue_table where state = 0 and song_src={} order by create_time asc limit 1".format(
             song_src)
         banned_user_map["db"] = "av_db"
         data = get_data_by_mysql(sql, banned_user_map)
         if len(data) == 0:
             return None, None
         song_id, song_url = data[0]
         if song_id != "":
             self.update_state(song_id, gs_state_use)
         return str(song_id), song_url
 
     def pre_process(self, work_dir, song_url):
         """
         创建文件夹，下载数据
         :return:
         """
 
         if "?sign=" in song_url:
             return gs_err_code_download
         ext = str(song_url).split(".")[-1]
         dst_file = "{}/src_origin.{}".format(work_dir, ext)
         cmd = "wget {} -O {}".format(song_url, dst_file)
         os.system(cmd)
         if not os.path.exists(dst_file):
             return gs_err_code_download
 
         duration = get_d(dst_file)
         if duration < 0:
             return gs_err_code_duration_err
         print("Duration:", dst_file, duration)
         if duration > 20 * 60:
             return gs_err_code_duration_too_long
 
         dst_mp3_file = "{}/src.wav".format(work_dir)
         cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} ".format(dst_file, dst_mp3_file)
         os.system(cmd)
         if not os.path.exists(dst_mp3_file):
             return gs_err_code_trans_to_mp3
         return gs_err_code_success
 
     def tone_shift_one(self, in_file, dst_file, pitch):
         cmd = "{} {} {} {}".format(gs_tone_shift_exe, in_file, dst_file, pitch)
         os.system(cmd)
         return os.path.exists(dst_file)
 
     def mix(self, cid, vocal_path, acc_path, tp):
         if tp == 1:
             vocal_pitch = 2
             acc_pitch = 0
         else:
             vocal_pitch = -2
             acc_pitch = 0
 
         vocal_path_2 = vocal_path.replace(".wav", "_{}.wav".format(vocal_pitch))
         acc_path_2 = acc_path.replace(".wav", "_{}.wav".format(acc_pitch))
         err = self.tone_shift_one(vocal_path, vocal_path_2, vocal_pitch)
         if not err:
-            return gs_err_code_tone_shift, None
+            return gs_err_code_tone_shift, None, None
 
+        gender, female_rate = self.voice_class.process_one(vocal_path_2)
         err = self.tone_shift_one(acc_path, acc_path_2, acc_pitch)
         if not err:
-            return gs_err_code_tone_shift, None
+            return gs_err_code_tone_shift, None, None
 
         base_dir = os.path.dirname(vocal_path)
         mix_path = "{}/mix_{}_{}.wav".format(base_dir, vocal_pitch, acc_pitch)
         cmd = "{} {} {} {}".format(gs_simple_mixer_path, vocal_path_2, acc_path_2, mix_path)
         print("exec_cmd={}".format(cmd))
         os.system(cmd)
 
         if not os.path.exists(mix_path):
-            return gs_err_code_mix, None
+            return gs_err_code_mix, None, None
 
         # 转码
         mix_path_mp3 = mix_path.replace(".wav", ".mp4")
         cmd = "ffmpeg -i {} -b:a 128k -c:a aac -ar 44100 -ac 2 -y {} -loglevel fatal".format(mix_path, mix_path_mp3)
         os.system(cmd)
         if not os.path.exists(mix_path_mp3):
-            return gs_err_code_transcode, None
+            return gs_err_code_transcode, None, None
 
         # 上传到cos
         mix_name = os.path.basename(mix_path_mp3)
         key = "av_res/svc_res_tone_shift/{}/{}".format(str(cid), mix_name)
         if not upload_file2cos(key, mix_path_mp3):
-            return gs_err_code_upload, None
-        return gs_err_code_success, key
+            return gs_err_code_upload, None, None
+        return gs_err_code_success, key, gender
 
     def upload_acc(self, cid, acc_path):
         # 转码
         mix_path_aac = acc_path.replace(".wav", ".m4a")
         cmd = "ffmpeg -i {} -b:a 128k -c:a aac -ar 44100 -ac 2 -y {} -loglevel fatal".format(acc_path, mix_path_aac)
         os.system(cmd)
         if not os.path.exists(mix_path_aac):
             return gs_err_code_transcode_acc, None
 
         # 上传
         mix_name = os.path.basename(mix_path_aac)
         key = "av_res/svc_res_tone_shift/{}/{}".format(str(cid), mix_name)
         if not upload_file2cos(key, mix_path_aac):
             return gs_err_code_upload_acc, None
         return gs_err_code_success, key
 
     def process_one(self, cid, work_dir):
         """
         :param cid:
         :param work_dir:
         :return:
         """
         src_mp3 = os.path.join(work_dir, "src.wav")
         vocal_path = os.path.join(work_dir, "vocal.wav")
         acc_path = os.path.join(work_dir, "acc.wav")
         if not (os.path.exists(vocal_path) and os.path.exists(acc_path)):
             if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path):
                 return gs_err_code_separate, []
             if not os.path.exists(vocal_path) or not os.path.exists(acc_path):
                 return gs_err_code_separate, []
 
         # 当人声的平均能量小于一定值时，则认为无人声(0.01是经验值判定，样本分析来看)
         # 无人声的样本[0.0056, 0.0003], 有人声的样本(目前最小)[0.046, 0.049]
         print("power:{},{}".format(cid, get_mean_power(vocal_path)))
         if get_mean_power(vocal_path) < 0.02:
             return gs_err_code_duration_no_vocal, []
-        err, type1_mix_mp3 = self.mix(cid, vocal_path, acc_path, 1)
+        err, type1_mix_mp3, gender = self.mix(cid, vocal_path, acc_path, 1)
         if err != gs_err_code_success:
             return err, []
 
-        err, type2_mix_mp3 = self.mix(cid, vocal_path, acc_path, 2)
+        err, type2_mix_mp3, gender2 = self.mix(cid, vocal_path, acc_path, 2)
         if err != gs_err_code_success:
             return err, []
 
         # 上传伴奏文件
         # err, acc_path_m4a = self.upload_acc(cid, acc_path)
         # if err != gs_err_code_success:
         #     return err, []
-        return gs_err_code_success, [type1_mix_mp3, type2_mix_mp3]
+        return gs_err_code_success, [type1_mix_mp3, type2_mix_mp3, str(gender), str(gender2)]
 
     def download_and_transcode(self, url, local_path, local_path_wav):
         cmd = "wget {} -O {}".format(url, local_path)
         os.system(cmd)
         if not os.path.exists(local_path):
             return -1
         cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(local_path, local_path_wav)
         os.system(cmd)
         if not os.path.exists(local_path_wav):
             return -2
         return 0
 
     def get_data_from_mysql(self, cid, work_dir):
         sql = "select starmaker_songid,task_url,complete_url,voice_url from starmaker_musicbook.silence where starmaker_songid={} order by task_id desc limit 1".format(
             cid)
         data = get_data_by_mysql(sql, banned_user_map)
         if len(data) == 0:
             return gs_err_code_silence_no_data
         song_id, task_url, complete_url, voice_url = data[0]
         if complete_url != "" and voice_url != "":
             """
             将人声与伴奏下载下来
             """
             ext = str(complete_url).split(".")[-1]
             acc_dst_file = os.path.join(work_dir, "acc.{}".format(ext))
             acc_wav_dst_file = os.path.join(work_dir, "acc.wav")
 
             err = self.download_and_transcode(complete_url, acc_dst_file, acc_wav_dst_file)
             os.unlink(acc_dst_file)
             if err == -1:
                 return gs_err_code_download_acc
             if err == -2:
                 return gs_err_code_transcode_acc_v1
 
             ext = str(voice_url).split(".")[-1]
             vocal_dst_file = os.path.join(work_dir, "vocal.{}".format(ext))
             vocal_wav_dst_file = os.path.join(work_dir, "vocal.wav")
 
             err = self.download_and_transcode(voice_url, vocal_dst_file, vocal_wav_dst_file)
             os.unlink(vocal_dst_file)
             if err == -1:
                 return gs_err_code_download_vocal
             if err == -2:
                 return gs_err_code_transcode_vocal_v1
             return gs_err_code_success
         return gs_err_code_silence_no_process
 
     def process_worker(self):
         logging.info("start process_worker .....")
         base_dir = "/tmp/tone_shift_one"
         if not os.path.exists(base_dir):
             os.makedirs(base_dir)
         while True:
             worker_st = time.time()
             cid, song_url = self.get_one_data_logic()
-            # cid, song_url = self.get_url_by_id('175210503076374799')
+            # cid, song_url = self.get_url_by_id('611752105030548048')
             if cid is None:
                 time.sleep(5)
                 logging.info("get one data is None ...")
                 continue
 
             work_dir = os.path.join(base_dir, str(cid))
             if os.path.exists(work_dir):
                 shutil.rmtree(work_dir)
             os.makedirs(work_dir)
 
             # 先查看消音数据库中是否已经完成了该项目,已经有的话，就直接下载即可
             err = self.get_data_from_mysql(cid, work_dir)
             if err != gs_err_code_success:
                 # 清空磁盘
                 shutil.rmtree(work_dir)
                 os.makedirs(work_dir)
 
                 err = self.pre_process(work_dir, song_url)
                 if err != gs_err_code_success:
                     self.update_state(str(cid), -err)
                     continue
 
             st = time.time()
             err, data = self.process_one(str(cid), work_dir)
             logging.info("process_finish,{},{}".format(cid, time.time() - st))
             if err == gs_err_code_success and len(data) != 0:
                 sql = "update svc_queue_table set state={},update_time={},svc_url=\"{}\" where song_id = {}". \
                     format(gs_state_finish, int(time.time()), ",".join(data), str(cid))
                 banned_user_map['db'] = "av_db"
                 update_db(sql, banned_user_map)
             else:
                 self.update_state(str(cid), -err)
             shutil.rmtree(work_dir)
             logging.info("process_finish,{},{}".format(cid, time.time() - worker_st))
 
 
 if __name__ == '__main__':
     ts = ToneShift()
     ts.process_worker()
diff --git a/AutoCoverTool/ref/online/common.py b/AutoCoverTool/ref/online/common.py
new file mode 100644
index 0000000..af3487a
--- /dev/null
+++ b/AutoCoverTool/ref/online/common.py
@@ -0,0 +1,93 @@
+#-*-encording=utf-8-*-
+"""
+程序绑定核心
+一个脚本启动多次，每次绑定一个核心，不会多次绑定到同一个核心
+每个进程选定绑定n个核心,或者自己传入需要绑定的核心编号
+"""
+
+import time
+import psutil
+import os
+import sys
+import hashlib
+import fcntl
+
+"""
+自动获取可用核心
+"""
+
+
+def exec_cmd_ints(cmd):
+    """
+    执行cmd，获取返回值
+    :param cmd:
+    :return:
+    """
+    r = os.popen(cmd)
+    lines = r.readlines()
+    ids = []
+    for line in lines:
+        line = line.strip()
+        if line.isdigit():
+            id = int(float(line))
+            ids.append(id)
+    return ids
+
+
+def get_idle_kernel(n=1):
+    cur_id = os.getpid()
+    name = os.path.basename(sys.argv[0])
+    command = "ps -ef | grep {} |grep python | awk \'{{print $2}}\'".format(name)
+    print(command)
+    ids = exec_cmd_ints(command)
+
+    print(ids, cur_id)
+    # 获取所有被绑定的核心
+    count = psutil.cpu_count()
+    used = [False] * (count // n)
+    command = "pidstat | grep {} | awk \'{{print $(NF-1)}}\'"
+    for i in range(0, len(ids)):
+        if cur_id != ids[i]:
+            cmd = command.format(ids[i])
+            kers = exec_cmd_ints(cmd)
+            for ker in kers:
+                ker = ker // n
+                used[ker] = True
+    print(used)
+    # 获取N个可用的核心
+    for i in range(0, len(used)):
+        if not used[i]:
+            res = []
+            cur_i = i * n
+            for idx in range(cur_i, cur_i+n):
+                if idx < count:
+                    res.append(idx)
+            return res
+    return 0
+
+
+def bind_kernel(n=1, kernel=[]):
+    p = psutil.Process()
+
+    # 加锁
+    name = hashlib.md5(os.path.basename(sys.argv[0]).encode('utf-8')).hexdigest()
+    name = os.path.join("/tmp", name + ".lock")
+    if not os.path.exists(name):
+        with open(name, "w") as f:
+            f.write("0")
+    file = open(name)
+    fcntl.flock(file.fileno(), fcntl.LOCK_EX)  # 排他锁
+    print("lock file --- {}".format(name))
+    if len(kernel) > 0:
+        kernels = kernel
+    else:
+        kernels = get_idle_kernel(n)
+    p.cpu_affinity(kernels) # 绑定特定核心
+    print("bind_kernel", kernels)
+    file.close()    # 释放锁
+    print("unlock file --- {}".format(name))
+
+
+def calc_forever():
+    for i in range(0, 10000):
+        time.sleep(1000)
\ No newline at end of file
diff --git a/AutoCoverTool/ref/online/mobilenet_v2_custom.py b/AutoCoverTool/ref/online/mobilenet_v2_custom.py
new file mode 100644
index 0000000..57b1227
--- /dev/null
+++ b/AutoCoverTool/ref/online/mobilenet_v2_custom.py
@@ -0,0 +1,142 @@
+"""
+直接从代码库中拷贝出的代码
+目的: mobilenet_v2只允许输入图片的通道数为3,不满足要求，因此拷贝出来做修改
+"""
+
+from torch import nn
+
+
+def _make_divisible(v, divisor, min_value=None):
+    """
+    This function is taken from the original tf repo.
+    It ensures that all layers have a channel number that is divisible by 8
+    It can be seen here:
+    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+    :param v:
+    :param divisor:
+    :param min_value:
+    :return:
+    """
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+class ConvBNReLU(nn.Sequential):
+    def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
+        padding = (kernel_size - 1) // 2
+        super(ConvBNReLU, self).__init__(
+            nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
+            nn.BatchNorm2d(out_planes),
+            nn.ReLU6(inplace=True)
+        )
+
+
+class InvertedResidual(nn.Module):
+    def __init__(self, inp, oup, stride, expand_ratio):
+        super(InvertedResidual, self).__init__()
+        self.stride = stride
+        assert stride in [1, 2]
+
+        hidden_dim = int(round(inp * expand_ratio))
+        self.use_res_connect = self.stride == 1 and inp == oup
+
+        layers = []
+        if expand_ratio != 1:
+            # pw
+            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
+        layers.extend([
+            # dw
+            ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
+            # pw-linear
+            nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+            nn.BatchNorm2d(oup),
+        ])
+        self.conv = nn.Sequential(*layers)
+
+    def forward(self, x):
+        if self.use_res_connect:
+            return x + self.conv(x)
+        else:
+            return self.conv(x)
+
+
+class MobileNetV2Custom(nn.Module):
+    def __init__(self, num_classes=2, in_channel=1, width_mult=1.0, inverted_residual_setting=None, round_nearest=8):
+        """
+        MobileNet V2 main class
+
+        Args:
+            num_classes (int): Number of classes
+            width_mult (float): Width multiplier - adjusts number of channels in each layer by this amount
+            inverted_residual_setting: Network structure
+            round_nearest (int): Round the number of channels in each layer to be a multiple of this number
+            Set to 1 to turn off rounding
+        """
+        super(MobileNetV2Custom, self).__init__()
+        block = InvertedResidual
+        input_channel = 32
+        last_channel = 1280
+
+        if inverted_residual_setting is None:
+            inverted_residual_setting = [
+                # t, c, n, s
+                [1, 16, 1, 1],
+                [6, 24, 2, 2],
+                [6, 32, 3, 2],
+                [6, 64, 4, 2],
+                [6, 96, 3, 1],
+                [6, 160, 3, 2],
+                [6, 320, 1, 1],
+            ]
+
+        # only check the first element, assuming user knows t,c,n,s are required
+        if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
+            raise ValueError("inverted_residual_setting should be non-empty "
+                             "or a 4-element list, got {}".format(inverted_residual_setting))
+
+        # building first layer
+        input_channel = _make_divisible(input_channel * width_mult, round_nearest)
+        self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
+        # 修改的地方,原来in_channel=3
+        features = [ConvBNReLU(in_channel, input_channel, stride=2)]
+        # building inverted residual blocks
+        for t, c, n, s in inverted_residual_setting:
+            output_channel = _make_divisible(c * width_mult, round_nearest)
+            for i in range(n):
+                stride = s if i == 0 else 1
+                features.append(block(input_channel, output_channel, stride, expand_ratio=t))
+                input_channel = output_channel
+        # building last several layers
+        features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
+        # make it nn.Sequential
+        self.features = nn.Sequential(*features)
+
+        # building classifier
+        self.classifier = nn.Sequential(
+            nn.Dropout(0.2),
+            nn.Linear(self.last_channel, num_classes),
+        )
+
+        # weight initialization
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.ones_(m.weight)
+                nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                nn.init.zeros_(m.bias)
+
+    def forward(self, x):
+        x = self.features(x)
+        x = x.mean([2, 3])
+        x = self.classifier(x)
+        return x
diff --git a/AutoCoverTool/ref/online/model.py b/AutoCoverTool/ref/online/model.py
new file mode 100644
index 0000000..c5e8adc
--- /dev/null
+++ b/AutoCoverTool/ref/online/model.py
@@ -0,0 +1,71 @@
+from mobilenet_v2_custom import MobileNetV2Custom
+import torch
+import torch.nn as nn
+
+MFCC_LEN = 80
+FRAME_LEN = 128
+
+
+class MobileNetV2Gender(MobileNetV2Custom):
+
+    def forward(self, x):
+        x = x.view([-1, 1, FRAME_LEN, MFCC_LEN])
+        return super(MobileNetV2Gender, self).forward(x)
+
+
+class MusicVoiceV5Model(nn.Module):
+    def __init__(self):
+        super(MusicVoiceV5Model, self).__init__()
+
+        def conv_bn(inp, oup, stride):
+            return nn.Sequential(
+                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                nn.BatchNorm2d(oup),
+                nn.ReLU(inplace=True)
+            )
+
+        def conv_dw(inp, oup, stride):
+            return nn.Sequential(
+                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                nn.BatchNorm2d(inp),
+                nn.ReLU(inplace=True),
+
+                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+                nn.ReLU(inplace=True),
+            )
+
+        self.model = nn.Sequential(
+            conv_bn(1, 32, 2),
+            conv_dw(32, 64, 1),
+            conv_dw(64, 128, 2),
+            conv_dw(128, 128, 1),
+            conv_dw(128, 256, 2),
+            conv_dw(256, 256, 1),
+            conv_dw(256, 512, 2),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 512, 1),
+            conv_dw(512, 1024, 2),
+            conv_dw(1024, 1024, 1),
+            nn.AvgPool2d((4, 3)),
+        )
+        self.fc = nn.Linear(1024, 2)
+
+    def forward(self, x):
+        x = x.view([-1, 1, FRAME_LEN, MFCC_LEN])
+        x = self.model(x)
+        x = x.view(-1, 1024)
+        x = self.fc(x)
+        return x
+
+
+def load_model(model_type, model_path, device):
+    model = model_type()
+    params = torch.load(model_path, map_location=torch.device(device))
+    model.load_state_dict(state_dict=params)
+    model.eval()
+    model.to(device)
+    return model
diff --git a/AutoCoverTool/ref/online/readme.md b/AutoCoverTool/ref/online/readme.md
new file mode 100644
index 0000000..10a1f09
--- /dev/null
+++ b/AutoCoverTool/ref/online/readme.md
@@ -0,0 +1,50 @@
+#男女声识别
+
+```
+模型名称以及对应作用:
+---gender_8k_ratev5_v6_adam.pth  // 男女声(纯人声)分类模型(使用8k纯人声数据集进行训练,mobilenet_v2,adam优化器)
+---gender_8k_v6_adam.pth         // 男女声(带人声)分类模型(使用8k带人声数据集进行训练,mobilenet_v2,adam优化器)
+---voice_005_rec_v5.pth          // 纯人声分类模型(400首人工标注的歌曲,判定纯人声段(使用作品中带人声段当作负样本) mobilenet_v1, sgd优化器)
+---voice_10_v5.pth               // 带人声分类模型(400首人工标注的歌曲,判定带人声段, mobilenet_v1, sgd优化器)
+模型地址:https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/models.zip
+```
+
+# 文件说明
+```
+---common.py                    // 用于绑定核心的代码
+---mobilenet_v2_custom.py       // 模型代码
+---model.py                     // 调用模型的封装层
+---readme.MD                    // 说明文件
+---voice_class_online.py        // 运行时使用的文件
+```
+
+# 环境安装
+```
+cd /home/worker
+wget "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/bin/bin.zip"
+unzip bin.zip
+rm -f bin.zip
+export PATH=$PATH:/home/worker/bin # 需要写入到.zshrc中
+sudo yum install libsndfile-devel
+
+# 以下使用手动安装即可
+conda create -n voice_class python=3.7 -y
+conda activate voice_class
+pip3 install librosa
+pip3 install psutil
+pip3 install torch==1.5 torchvision torchaudio
+```
+
+# 使用说明
+```
+下载模型并解压后,按照voice_class_online.py中的运行方式运行即可
+```
+
+# 注意:
+目前代码中限制了CPU的核心数量,只允许占用一个核,建议根据核心的情况多开几个进程做处理
+
+# 性能测试(不加性能限制的情况下在GPU-2机器上测试得到):
+20个线上样本(男10,女10)
+
+CPU情况:spend_time:tot=31.91|transcode=5.92|vb=3.12|gen_feature=3.5|predict=18.94
+GPU情况:spend_time:tot=15.64|transcode=6.34|vb=4.17|gen_feature=3.3|predict=1.443
diff --git a/AutoCoverTool/ref/online/resource/female/4785074274851990.mp4 b/AutoCoverTool/ref/online/resource/female/4785074274851990.mp4
new file mode 100644
index 0000000..9b225ee
Binary files /dev/null and b/AutoCoverTool/ref/online/resource/female/4785074274851990.mp4 differ
diff --git a/AutoCoverTool/ref/online/voice_class_online.py b/AutoCoverTool/ref/online/voice_class_online.py
new file mode 100644
index 0000000..6041c94
--- /dev/null
+++ b/AutoCoverTool/ref/online/voice_class_online.py
@@ -0,0 +1,420 @@
+"""
+男女声分类在线工具
+1 转码为16bit单声道
+2 均衡化
+3 模型分类
+"""
+
+import os
+import sys
+import librosa
+import shutil
+import logging
+import time
+import torch.nn.functional as F
+import numpy as np
+from model import *
+# from common import bind_kernel
+
+logging.basicConfig(level=logging.INFO)
+
+os.environ["LRU_CACHE_CAPACITY"] = "1"
+
+# torch.set_num_threads(1)
+# bind_kernel(1)
+
+"""
+临时用一下,全局使用的变量
+"""
+
+transcode_time = 0
+vb_time = 0
+mfcc_time = 0
+predict_time = 0
+
+"""
+错误码
+"""
+ERR_CODE_SUCCESS = 0  # 处理成功
+ERR_CODE_NO_FILE = -1  # 文件不存在
+ERR_CODE_TRANSCODE = -2  # 转码失败
+ERR_CODE_VOLUME_BALANCED = -3  # 均衡化失败
+ERR_CODE_FEATURE_TOO_SHORT = -4  # 特征文件太短
+
+"""
+常量
+"""
+
+FRAME_LEN = 128
+MFCC_LEN = 80
+
+EBUR128_BIN = "/opt/soft/bin/standard_audio_no_cut"
+# EBUR128_BIN = "/Users/yangjianli/linux/opt/soft/bin/standard_audio_no_cut"
+GENDER_FEMALE = 0
+GENDER_MALE = 1
+GENDER_OTHER = 2
+"""
+通用函数
+"""
+
+
+def exec_cmd(cmd):
+    ret = os.system(cmd)
+    if ret != 0:
+        return False
+    return True
+
+
+"""
+业务需要的函数
+"""
+
+
+def get_one_mfcc(file_url):
+    st = time.time()
+    data, sr = librosa.load(file_url, sr=16000)
+    if len(data) < 512:
+        return []
+    mfcc = librosa.feature.mfcc(y=data, sr=sr, n_fft=512, hop_length=256, n_mfcc=MFCC_LEN)
+    mfcc = mfcc.transpose()
+    print("get_one_mfcc:spend_time={}".format(time.time() - st))
+    global mfcc_time
+    mfcc_time += time.time() - st
+    return mfcc
+
+
+def volume_balanced(src, dst):
+    st = time.time()
+    cmd = "{} {} {}".format(EBUR128_BIN, src, dst)
+    logging.info(cmd)
+    exec_cmd(cmd)
+    if not os.path.exists(dst):
+        logging.error("volume_balanced:cmd={}".format(cmd))
+    print("volume_balanced:spend_time={}".format(time.time() - st))
+
+    global vb_time
+    vb_time += time.time() - st
+    return os.path.exists(dst)
+
+
+def transcode(src, dst):
+    st = time.time()
+    cmd = "ffmpeg -loglevel quiet -i {} -ar 16000 -ac 1 {}".format(src, dst)
+    logging.info(cmd)
+    exec_cmd(cmd)
+    if not os.path.exists(dst):
+        logging.error("transcode:cmd={}".format(cmd))
+    print("transcode:spend_time={}".format(time.time() - st))
+    global transcode_time
+    transcode_time += time.time() - st
+    return os.path.exists(dst)
+
+
+class VoiceClass:
+
+    def __init__(self, music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model):
+        """
+        四个模型
+        :param music_voice_pure_model: 分辨纯净人声/其他
+        :param music_voice_no_pure_model: 分辨有人声/其他
+        :param gender_pure_model: 纯净人声分辨男女
+        :param gender_no_pure_model: 有人声分辨男女
+        """
+        st = time.time()
+        self.device = "cpu"
+        self.batch_size = 256
+        self.music_voice_pure_model = load_model(MusicVoiceV5Model, music_voice_pure_model, self.device)
+        self.music_voice_no_pure_model = load_model(MusicVoiceV5Model, music_voice_no_pure_model, self.device)
+        self.gender_pure_model = load_model(MobileNetV2Gender, gender_pure_model, self.device)
+        self.gender_no_pure_model = load_model(MobileNetV2Gender, gender_no_pure_model, self.device)
+        logging.info("load model ok ! spend_time={}".format(time.time() - st))
+
+    def batch_predict(self, model, features):
+        st = time.time()
+        scores = []
+        with torch.no_grad():
+            for i in range(0, len(features), self.batch_size):
+                cur_data = features[i:i + self.batch_size].to(self.device)
+                predicts = model(cur_data)
+                predicts_score = F.softmax(predicts, dim=1)
+                scores.extend(predicts_score.cpu().numpy())
+        ret = np.array(scores)
+        global predict_time
+        predict_time += time.time() - st
+        return ret
+
+    def predict_pure(self, filename, features):
+        scores = self.batch_predict(self.music_voice_pure_model, features)
+        new_features = []
+        for idx, score in enumerate(scores):
+            if score[0] > 0.5:  # 非人声
+                continue
+            new_features.append(features[idx].numpy())
+
+        # 人声段太少,不能进行处理
+        # 参数可以改
+        new_feature_len = len(new_features)
+        new_feature_rate = len(new_features) / len(features)
+        if new_feature_len < 4 or new_feature_rate < 0.4:
+            logging.warning(
+                "filename={}|predict_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate)
+            )
+            return GENDER_OTHER, -1
+        new_features = torch.from_numpy(np.array(new_features))
+        scores = self.batch_predict(self.gender_pure_model, new_features)
+        f_avg = sum(scores[:, 0]) / len(scores)
+        m_avg = sum(scores[:, 1]) / len(scores)
+        female_rate = f_avg / (f_avg + m_avg)
+        if female_rate > 0.65:
+            return GENDER_FEMALE, female_rate
+        if female_rate < 0.12:
+            return GENDER_MALE, female_rate
+        logging.warning(
+            "filename={}|predict_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate)
+        )
+        return GENDER_OTHER, female_rate
+
+    def predict_no_pure(self, filename, features):
+        scores = self.batch_predict(self.music_voice_no_pure_model, features)
+        new_features = []
+        for idx, score in enumerate(scores):
+            if score[0] > 0.5:  # 非人声
+                continue
+            new_features.append(features[idx].numpy())
+
+        # 人声段太少,不能进行处理
+        # 参数可以改
+        new_feature_len = len(new_features)
+        new_feature_rate = len(new_features) / len(features)
+        if new_feature_len < 4 or new_feature_rate < 0.4:
+            logging.warning(
+                "filename={}|predict_no_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate)
+            )
+            return GENDER_OTHER, -1
+        new_features = torch.from_numpy(np.array(new_features))
+        scores = self.batch_predict(self.gender_no_pure_model, new_features)
+        f_avg = sum(scores[:, 0]) / len(scores)
+        m_avg = sum(scores[:, 1]) / len(scores)
+        female_rate = f_avg / (f_avg + m_avg)
+        if female_rate > 0.75:
+            return GENDER_FEMALE, female_rate
+        if female_rate < 0.1:
+            return GENDER_MALE, female_rate
+        logging.warning(
+            "filename={}|predict_no_pure|other|len={}|rate={}".format(filename, new_feature_len, new_feature_rate)
+        )
+        return GENDER_OTHER, female_rate
+
+    def predict(self, filename, features):
+        st = time.time()
+        new_features = []
+        for i in range(FRAME_LEN, len(features), FRAME_LEN):
+            new_features.append(features[i - FRAME_LEN: i])
+        new_features = torch.from_numpy(np.array(new_features))
+        gender, rate = self.predict_pure(filename, new_features)
+        if gender == GENDER_OTHER:
+            logging.info("start no pure process...")
+            return self.predict_no_pure(filename, new_features)
+        print("predict|spend_time={}".format(time.time() - st))
+        return gender, rate
+
+    def process_one_logic(self, filename, file_path, cache_dir):
+        tmp_wav = os.path.join(cache_dir, "tmp.wav")
+        tmp_vb_wav = os.path.join(cache_dir, "tmp_vb.wav")
+        if not transcode(file_path, tmp_wav):
+            return ERR_CODE_TRANSCODE
+        if not volume_balanced(tmp_wav, tmp_vb_wav):
+            return ERR_CODE_VOLUME_BALANCED
+        features = get_one_mfcc(tmp_vb_wav)
+        if len(features) < FRAME_LEN:
+            logging.error("feature too short|file_path={}".format(file_path))
+            return ERR_CODE_FEATURE_TOO_SHORT
+        return self.predict(filename, features)
+
+    def process_one(self, file_path):
+        base_dir = os.path.dirname(file_path)
+        filename = os.path.splitext(file_path)[0]
+        cache_dir = os.path.join(base_dir, filename + "_cache")
+        if os.path.exists(cache_dir):
+            shutil.rmtree(cache_dir)
+        os.makedirs(cache_dir)
+        ret = self.process_one_logic(filename, file_path, cache_dir)
+        shutil.rmtree(cache_dir)
+        return ret
+
+    def process(self, file_path):
+        gender, female_rate = self.process_one(file_path)
+        logging.info("{}|gender={}|female_rate={}".format(file_path, gender, female_rate))
+        return gender, female_rate
+
+    def process_by_feature(self, feature_file):
+        """
+        直接处理特征文件
+        :param feature_file:
+        :return:
+        """
+        filename = os.path.splitext(feature_file)[0]
+        features = np.load(feature_file)
+        gender, female_rate = self.predict(filename, features)
+        return gender, female_rate
+
+
+def test_all_feature():
+    import glob
+    base_dir = "/data/datasets/music_voice_dataset_full/feature_online_data_v3"
+    female = glob.glob(os.path.join(base_dir, "female/*feature.npy"))
+    male = glob.glob(os.path.join(base_dir, "male/*feature.npy"))
+    other = glob.glob(os.path.join(base_dir, "other/*feature.npy"))
+    model_path = "/data/jianli.yang/voice_classification/online/models"
+    music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth")
+    music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth")
+    gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth")
+    gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth")
+    vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
+
+    tot_st = time.time()
+    ret_map = {
+        0: {0: 0, 1: 0, 2: 0},
+        1: {0: 0, 1: 0, 2: 0},
+        2: {0: 0, 1: 0, 2: 0}
+    }
+    for file in female:
+        st = time.time()
+        print("------------------------------>>>>>")
+        gender, female_score = vc.process_by_feature(file)
+        ret_map[0][gender] += 1
+        if gender != 0:
+            print("err:female->{}|{}|{}".format(gender, file, female_score))
+        print("process|spend_tm=={}".format(time.time() - st))
+
+    for file in male:
+        st = time.time()
+        print("------------------------------>>>>>")
+        gender, female_score = vc.process_by_feature(file)
+        ret_map[1][gender] += 1
+        if gender != 1:
+            print("err:male->{}|{}|{}".format(gender, file, female_score))
+        print("process|spend_tm=={}".format(time.time() - st))
+
+    for file in other:
+        st = time.time()
+        print("------------------------------>>>>>")
+        gender, female_score = vc.process_by_feature(file)
+        ret_map[2][gender] += 1
+        if gender != 2:
+            print("err:other->{}|{}|{}".format(gender, file, female_score))
+        print("process|spend_tm=={}".format(time.time() - st))
+
+    global transcode_time, vb_time, mfcc_time, predict_time
+    print("spend_time:tot={}|transcode={}|vb={}|gen_feature={}|predict={}".format(time.time() - tot_st, transcode_time,
+                                                                                  vb_time, mfcc_time, predict_time))
+    f_f = ret_map[0][0]
+    f_m = ret_map[0][1]
+    f_o = ret_map[0][2]
+    m_f = ret_map[1][0]
+    m_m = ret_map[1][1]
+    m_o = ret_map[1][2]
+    o_f = ret_map[2][0]
+    o_m = ret_map[2][1]
+    o_o = ret_map[2][2]
+
+    print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o))
+    print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o))
+    print("om:{},of:{},oo:{}".format(o_m, o_f, o_o))
+    # 女性准确率和召回率
+    f_acc = f_f / (f_f + m_f + o_f)
+    f_recall = f_f / (f_f + f_m + f_o)
+    # 男性准确率和召回率
+    m_acc = m_m / (m_m + f_m + o_m)
+    m_recall = m_m / (m_m + m_f + m_o)
+    print("female: acc={}|recall={}".format(f_acc, f_recall))
+    print("male: acc={}|recall={}".format(m_acc, m_recall))
+
+
+def test_all():
+    import glob
+    base_dir = "/data/datasets/music_voice_dataset_full/online_data_v3_top200"
+    female = glob.glob(os.path.join(base_dir, "female/*mp4"))
+    male = glob.glob(os.path.join(base_dir, "male/*mp4"))
+    other = glob.glob(os.path.join(base_dir, "other/*mp4"))
+    model_path = "/data/jianli.yang/voice_classification/online/models"
+    music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth")
+    music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth")
+    gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth")
+    gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth")
+    vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
+
+    tot_st = time.time()
+    ret_map = {
+        0: {0: 0, 1: 0, 2: 0},
+        1: {0: 0, 1: 0, 2: 0},
+        2: {0: 0, 1: 0, 2: 0}
+    }
+    for file in female:
+        st = time.time()
+        print("------------------------------>>>>>")
+        gender, female_score = vc.process(file)
+        ret_map[0][gender] += 1
+        if gender != 0:
+            print("err:female->{}|{}|{}".format(gender, file, female_score))
+        print("process|spend_tm=={}".format(time.time() - st))
+
+    for file in male:
+        st = time.time()
+        print("------------------------------>>>>>")
+        gender, female_score = vc.process(file)
+        ret_map[1][gender] += 1
+        if gender != 1:
+            print("err:male->{}|{}|{}".format(gender, file, female_score))
+        print("process|spend_tm=={}".format(time.time() - st))
+
+    for file in other:
+        st = time.time()
+        print("------------------------------>>>>>")
+        gender, female_score = vc.process(file)
+        ret_map[2][gender] += 1
+        if gender != 2:
+            print("err:other->{}|{}|{}".format(gender, file, female_score))
+        print("process|spend_tm=={}".format(time.time() - st))
+
+    global transcode_time, vb_time, mfcc_time, predict_time
+    print("spend_time:tot={}|transcode={}|vb={}|gen_feature={}|predict={}".format(time.time() - tot_st, transcode_time,
+                                                                                  vb_time, mfcc_time, predict_time))
+    f_f = ret_map[0][0]
+    f_m = ret_map[0][1]
+    f_o = ret_map[0][2]
+    m_f = ret_map[1][0]
+    m_m = ret_map[1][1]
+    m_o = ret_map[1][2]
+    o_f = ret_map[2][0]
+    o_m = ret_map[2][1]
+    o_o = ret_map[2][2]
+
+    print("ff:{},fm:{},fo:{}".format(f_f, f_m, f_o))
+    print("mm:{},mf:{},mo:{}".format(m_m, m_f, m_o))
+    print("om:{},of:{},oo:{}".format(o_m, o_f, o_o))
+    # 女性准确率和召回率
+    f_acc = f_f / (f_f + m_f + o_f)
+    f_recall = f_f / (f_f + f_m + f_o)
+    # 男性准确率和召回率
+    m_acc = m_m / (m_m + f_m + o_m)
+    m_recall = m_m / (m_m + m_f + m_o)
+    print("female: acc={}|recall={}".format(f_acc, f_recall))
+    print("male: acc={}|recall={}".format(m_acc, m_recall))
+
+
+if __name__ == "__main__":
+    # test_all()
+    # test_all_feature()
+    model_path = sys.argv[1]
+    voice_path = sys.argv[2]
+    music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth")
+    music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth")
+    gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth")
+    gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth")
+    vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
+    for i in range(0, 1):
+        st = time.time()
+        print("------------------------------>>>>>")
+        vc.process(voice_path)
+        print("process|spend_tm=={}".format(time.time() - st))