Page MenuHomePhabricator

No OneTemporary

diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py
index 9a5d94f..89c57f5 100644
--- a/AIMeiSheng/meisheng_svc_final.py
+++ b/AIMeiSheng/meisheng_svc_final.py
@@ -1,227 +1,227 @@
import os
import sys
sys.path.append(os.path.dirname(__file__))
import time
import shutil
import glob
import hashlib
import librosa
import soundfile
import gradio as gr
import pandas as pd
import numpy as np
from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model
from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe
from gender_classify import load_gender_model
from AIMeiSheng.docker_demo.common import gs_svc_model_path, gs_embed_model_path, gs_rmvpe_model_path, gs_err_code_target_silence
from slicex.slice_set_silence import del_noise
gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件
tmp_workspace_name = "batch_test_ocean_fi" # 工作空间名
song_folder = "./data_meisheng/" ##song folder
gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" # 工作空间路径
pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件
cur_dir = os.path.abspath(os.path.dirname(__file__))
abs_path = os.path.join(cur_dir, song_folder, tmp_workspace_name) + '/'
f0_method = None
def mix(in_path, acc_path, dst_path):
# svc转码到442
svc_442_file = in_path + "_442.wav"
st = time.time()
cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file)
os.system(cmd)
if not os.path.exists(svc_442_file):
return -1
print("transcode,{},sp={}".format(in_path, time.time() - st))
# 混合
st = time.time()
cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path)
os.system(cmd)
print("mixer,{},sp={}".format(in_path, time.time() - st))
def load_model():
global f0_method
embed_model = get_embed_model(gs_embed_model_path)
hubert_model = load_hubert()
get_vc(gs_svc_model_path)
f0_method = get_rmvpe(gs_rmvpe_model_path)
print("model preload finish!!!")
return embed_model, hubert_model # ,svc_model
def meisheng_init():
embed_model, hubert_model = load_model() ##提前加载模型
gender_model = load_gender_model()
return embed_model, hubert_model, gender_model
def pyin_process_single_rmvpe(input_file):
global f0_method
if f0_method is None:
f0_method = get_rmvpe()
rate = 16000 # 44100
# 读取音频文件
y, sr = librosa.load(input_file, sr=rate)
len_s = len(y) / sr
lim_s = 15 # 10
if (len_s > lim_s):
y1 = y[:sr * lim_s]
y2 = y[-sr * lim_s:]
f0 = f0_method.infer_from_audio(y1, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch1 = np.mean(valid_f0)
f0 = f0_method.infer_from_audio(y2, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch2 = np.mean(valid_f0)
if abs(mean_pitch1 - mean_pitch2) > 55:
mean_pitch_cur = min(mean_pitch1, mean_pitch2)
else:
mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2
else:
f0 = f0_method.infer_from_audio(y, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch_cur = np.mean(valid_f0)
return mean_pitch_cur
def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras):
##计算pitch
f0up_key = pyin_process_single_rmvpe(target_wav)
if f0up_key < 40 or np.isnan(f0up_key):#unvoice
return gs_err_code_target_silence
## get embed, 音色
get_embed(target_wav, embed_npy, embed_md)
print("svc main start...")
svc_main(song_wav, svc_out_path, embed_npy, f0up_key, hubert_md, paras)
print("svc main finished!!")
- del_noise(song_wav,svc_out_path)
+ del_noise(song_wav,svc_out_path,paras)
print("del noise in silence")
return 0
def process_svc_online(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras):
embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
err_code = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras)
return err_code
def process_svc(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras):
song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename(
target_wav), os.path.basename(svc_out_path) # 绝对路径
song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path
embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
# similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)
similar = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras)
return similar
def get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras):
'''
:param target_yinse_wav: 目标音色
:param song_name: 歌曲名字
;param paras: 其他参数
:return: svc路径名
'''
##清空工作空间临时路径
if os.path.exists(gs_work_dir):
# shutil.rmtree(gs_work_dir)
cmd = f"rm -rf {gs_work_dir}/*"
os.system(cmd)
else:
os.makedirs(gs_work_dir)
gender = paras['gender'] ##为了确定歌曲
##目标音色读取
f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav))
# print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav)
# shutil.move(target_yinse_wav, f_dst) ##放在工作目录
shutil.copy(target_yinse_wav, f_dst)
target_yinse_wav = f_dst
##歌曲/伴奏 读取(路径需要修改)
song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal
inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name))
# song_wav = './xusong_long.wav'
svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字
print("inputMsg:", song_wav, target_yinse_wav, svc_out_path)
## svc process
st = time.time()
print("start inference...")
similar = process_svc(song_wav, target_yinse_wav, svc_out_path, embed_model, hubert_model, paras)
print("svc finished!!")
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(svc_out_path))
# '''
##加混响
print("add reverbration...")
svc_out_path_effect = svc_out_path[:-4] + '_effect.wav'
cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}"
print("cmd :", cmd)
os.system(cmd)
# # 人声伴奏合并
print("add acc...")
out_path = svc_out_path_effect[:-4] + '_music.wav'
mix(svc_out_path_effect, inf_acc_path, out_path)
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(out_path))
# '''
return svc_out_path
def meisheng_func(target_yinse_wav, song_name, paras):
##init
embed_model, hubert_model, gender_model = meisheng_init()
###gender predict
gender, female_rate, is_pure = gender_model.process(target_yinse_wav)
print('=====================')
print("gender:{}, female_rate:{},is_pure:{}".format(gender, female_rate, is_pure))
if gender == 0:
gender = 'female'
elif gender == 1:
gender = 'male'
elif female_rate > 0.5:
gender = 'female'
else:
gender = 'male'
print("modified gender:{} ".format(gender))
print('=====================')
##美声main
paras['gender'] = gender ##单位都是ms
get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras)
if __name__ == '__main__':
# target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" # 需要完整路径
target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
song_name = "lost_stars" ##歌曲名字
paras = {'gender': None, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None}
# paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试
meisheng_func(target_yinse_wav, song_name, paras)
diff --git a/AIMeiSheng/slicex/slice_set_silence.py b/AIMeiSheng/slicex/slice_set_silence.py
index f1b51b6..d906be9 100644
--- a/AIMeiSheng/slicex/slice_set_silence.py
+++ b/AIMeiSheng/slicex/slice_set_silence.py
@@ -1,59 +1,65 @@
# -*- coding: utf-8 -*-
import librosa # Optional. Use any library you like to read audio files.
import soundfile # Optional. Use any library you like to write audio files.
from slicex.slicer_torch import Slicer
class silce_silence():
def __init__(self, sr):
# audio = torch.from_numpy(audio)
self.slicer = Slicer(
sr=sr,
threshold=-40,
min_length=5000,
min_interval=300,
hop_size=10,
max_sil_kept=500
)
def set_silence(self,chunks,sr, target_audio, target_sr):
'''
:param chunks: slice结果 of song wav
:param sr: song in sr
:param target_audio: svc_out
:param target_sr: svc_out sr
:return:
'''
# target_audio = np.zeros(int(len(audio)*target_sr/sr),1)
# result = []
for k, v in chunks.items():
tag = v["split_time"].split(",")
# if tag[0] != tag[1]:
# result.append((v["slice"], audio[int(tag[0]):int(tag[1])]))
if( tag[0] != tag[1] and v["slice"] == True):#静音
st = int(int(tag[0])*target_sr/sr)
en = min(int(int(tag[1])*target_sr/sr), len(target_audio))
target_audio[st:en] = 0#0.001 * target_audio[st:en]
return target_audio
def cut(self, audio):
chunks = self.slicer.slice(audio)
chunks = dict(chunks)
return chunks
-def del_noise(wav_in,svc_out):
+def del_noise(wav_in,svc_out,paras=None):
audio, sr = librosa.load(wav_in, sr=None) # Load an audio file with librosa.
target_audio, target_sr = librosa.load(svc_out, sr=None) # Load an audio file with librosa.
+ if paras != None:
+ st = int(paras['tst'] * 16000/1000)
+ en = len(audio)
+ if paras['tnd'] != None:
+ en = min(en,int(paras['tnd'] * 16000/1000))
+ audio = audio[st:en]
slice_sil = silce_silence(sr)
chunks = slice_sil.cut(audio)
target_audio1 = slice_sil.set_silence(chunks, sr, target_audio, target_sr)
soundfile.write(svc_out, target_audio1, target_sr)
return

File Metadata

Mime Type
text/x-diff
Expires
Sun, Jan 12, 08:34 (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1346278
Default Alt Text
(10 KB)

Event Timeline