Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F4880340
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
View Options
diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py
index 9a5d94f..89c57f5 100644
--- a/AIMeiSheng/meisheng_svc_final.py
+++ b/AIMeiSheng/meisheng_svc_final.py
@@ -1,227 +1,227 @@
import os
import sys
sys.path.append(os.path.dirname(__file__))
import time
import shutil
import glob
import hashlib
import librosa
import soundfile
import gradio as gr
import pandas as pd
import numpy as np
from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model
from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe
from gender_classify import load_gender_model
from AIMeiSheng.docker_demo.common import gs_svc_model_path, gs_embed_model_path, gs_rmvpe_model_path, gs_err_code_target_silence
from slicex.slice_set_silence import del_noise
gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件
tmp_workspace_name = "batch_test_ocean_fi" # 工作空间名
song_folder = "./data_meisheng/" ##song folder
gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" # 工作空间路径
pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件
cur_dir = os.path.abspath(os.path.dirname(__file__))
abs_path = os.path.join(cur_dir, song_folder, tmp_workspace_name) + '/'
f0_method = None
def mix(in_path, acc_path, dst_path):
# svc转码到442
svc_442_file = in_path + "_442.wav"
st = time.time()
cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file)
os.system(cmd)
if not os.path.exists(svc_442_file):
return -1
print("transcode,{},sp={}".format(in_path, time.time() - st))
# 混合
st = time.time()
cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path)
os.system(cmd)
print("mixer,{},sp={}".format(in_path, time.time() - st))
def load_model():
global f0_method
embed_model = get_embed_model(gs_embed_model_path)
hubert_model = load_hubert()
get_vc(gs_svc_model_path)
f0_method = get_rmvpe(gs_rmvpe_model_path)
print("model preload finish!!!")
return embed_model, hubert_model # ,svc_model
def meisheng_init():
embed_model, hubert_model = load_model() ##提前加载模型
gender_model = load_gender_model()
return embed_model, hubert_model, gender_model
def pyin_process_single_rmvpe(input_file):
global f0_method
if f0_method is None:
f0_method = get_rmvpe()
rate = 16000 # 44100
# 读取音频文件
y, sr = librosa.load(input_file, sr=rate)
len_s = len(y) / sr
lim_s = 15 # 10
if (len_s > lim_s):
y1 = y[:sr * lim_s]
y2 = y[-sr * lim_s:]
f0 = f0_method.infer_from_audio(y1, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch1 = np.mean(valid_f0)
f0 = f0_method.infer_from_audio(y2, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch2 = np.mean(valid_f0)
if abs(mean_pitch1 - mean_pitch2) > 55:
mean_pitch_cur = min(mean_pitch1, mean_pitch2)
else:
mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2
else:
f0 = f0_method.infer_from_audio(y, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch_cur = np.mean(valid_f0)
return mean_pitch_cur
def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras):
##计算pitch
f0up_key = pyin_process_single_rmvpe(target_wav)
if f0up_key < 40 or np.isnan(f0up_key):#unvoice
return gs_err_code_target_silence
## get embed, 音色
get_embed(target_wav, embed_npy, embed_md)
print("svc main start...")
svc_main(song_wav, svc_out_path, embed_npy, f0up_key, hubert_md, paras)
print("svc main finished!!")
- del_noise(song_wav,svc_out_path)
+ del_noise(song_wav,svc_out_path,paras)
print("del noise in silence")
return 0
def process_svc_online(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras):
embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
err_code = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras)
return err_code
def process_svc(song_wav, target_wav, svc_out_path, embed_md, hubert_md, paras):
song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename(
target_wav), os.path.basename(svc_out_path) # 绝对路径
song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path
embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
# similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)
similar = meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, embed_md, hubert_md, paras)
return similar
def get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras):
'''
:param target_yinse_wav: 目标音色
:param song_name: 歌曲名字
;param paras: 其他参数
:return: svc路径名
'''
##清空工作空间临时路径
if os.path.exists(gs_work_dir):
# shutil.rmtree(gs_work_dir)
cmd = f"rm -rf {gs_work_dir}/*"
os.system(cmd)
else:
os.makedirs(gs_work_dir)
gender = paras['gender'] ##为了确定歌曲
##目标音色读取
f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav))
# print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav)
# shutil.move(target_yinse_wav, f_dst) ##放在工作目录
shutil.copy(target_yinse_wav, f_dst)
target_yinse_wav = f_dst
##歌曲/伴奏 读取(路径需要修改)
song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal
inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name))
# song_wav = './xusong_long.wav'
svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字
print("inputMsg:", song_wav, target_yinse_wav, svc_out_path)
## svc process
st = time.time()
print("start inference...")
similar = process_svc(song_wav, target_yinse_wav, svc_out_path, embed_model, hubert_model, paras)
print("svc finished!!")
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(svc_out_path))
# '''
##加混响
print("add reverbration...")
svc_out_path_effect = svc_out_path[:-4] + '_effect.wav'
cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}"
print("cmd :", cmd)
os.system(cmd)
# # 人声伴奏合并
print("add acc...")
out_path = svc_out_path_effect[:-4] + '_music.wav'
mix(svc_out_path_effect, inf_acc_path, out_path)
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(out_path))
# '''
return svc_out_path
def meisheng_func(target_yinse_wav, song_name, paras):
##init
embed_model, hubert_model, gender_model = meisheng_init()
###gender predict
gender, female_rate, is_pure = gender_model.process(target_yinse_wav)
print('=====================')
print("gender:{}, female_rate:{},is_pure:{}".format(gender, female_rate, is_pure))
if gender == 0:
gender = 'female'
elif gender == 1:
gender = 'male'
elif female_rate > 0.5:
gender = 'female'
else:
gender = 'male'
print("modified gender:{} ".format(gender))
print('=====================')
##美声main
paras['gender'] = gender ##单位都是ms
get_svc(target_yinse_wav, song_name, embed_model, hubert_model, paras)
if __name__ == '__main__':
# target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" # 需要完整路径
target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
song_name = "lost_stars" ##歌曲名字
paras = {'gender': None, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None}
# paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试
meisheng_func(target_yinse_wav, song_name, paras)
diff --git a/AIMeiSheng/slicex/slice_set_silence.py b/AIMeiSheng/slicex/slice_set_silence.py
index f1b51b6..d906be9 100644
--- a/AIMeiSheng/slicex/slice_set_silence.py
+++ b/AIMeiSheng/slicex/slice_set_silence.py
@@ -1,59 +1,65 @@
# -*- coding: utf-8 -*-
import librosa # Optional. Use any library you like to read audio files.
import soundfile # Optional. Use any library you like to write audio files.
from slicex.slicer_torch import Slicer
class silce_silence():
def __init__(self, sr):
# audio = torch.from_numpy(audio)
self.slicer = Slicer(
sr=sr,
threshold=-40,
min_length=5000,
min_interval=300,
hop_size=10,
max_sil_kept=500
)
def set_silence(self,chunks,sr, target_audio, target_sr):
'''
:param chunks: slice结果 of song wav
:param sr: song in sr
:param target_audio: svc_out
:param target_sr: svc_out sr
:return:
'''
# target_audio = np.zeros(int(len(audio)*target_sr/sr),1)
# result = []
for k, v in chunks.items():
tag = v["split_time"].split(",")
# if tag[0] != tag[1]:
# result.append((v["slice"], audio[int(tag[0]):int(tag[1])]))
if( tag[0] != tag[1] and v["slice"] == True):#静音
st = int(int(tag[0])*target_sr/sr)
en = min(int(int(tag[1])*target_sr/sr), len(target_audio))
target_audio[st:en] = 0#0.001 * target_audio[st:en]
return target_audio
def cut(self, audio):
chunks = self.slicer.slice(audio)
chunks = dict(chunks)
return chunks
-def del_noise(wav_in,svc_out):
+def del_noise(wav_in,svc_out,paras=None):
audio, sr = librosa.load(wav_in, sr=None) # Load an audio file with librosa.
target_audio, target_sr = librosa.load(svc_out, sr=None) # Load an audio file with librosa.
+ if paras != None:
+ st = int(paras['tst'] * 16000/1000)
+ en = len(audio)
+ if paras['tnd'] != None:
+ en = min(en,int(paras['tnd'] * 16000/1000))
+ audio = audio[st:en]
slice_sil = silce_silence(sr)
chunks = slice_sil.cut(audio)
target_audio1 = slice_sil.set_silence(chunks, sr, target_audio, target_sr)
soundfile.write(svc_out, target_audio1, target_sr)
return
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Jan 12, 08:34 (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1346278
Default Alt Text
(10 KB)
Attached To
R350 av_svc
Event Timeline
Log In to Comment