Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F4987437
app.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
10 KB
Subscribers
None
app.py
View Options
import
os
import
torch
# os.system("wget -P cvec/ https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt")
import
gradio
as
gr
import
librosa
import
numpy
as
np
import
logging
from
fairseq
import
checkpoint_utils
from
vc_infer_pipeline
import
VC
import
traceback
from
config
import
Config
from
lib.infer_pack.models
import
(
SynthesizerTrnMs256NSFsid
,
SynthesizerTrnMs256NSFsid_nono
,
SynthesizerTrnMs768NSFsid
,
SynthesizerTrnMs768NSFsid_nono
,
)
from
i18n
import
I18nAuto
logging
.
getLogger
(
"numba"
)
.
setLevel
(
logging
.
WARNING
)
logging
.
getLogger
(
"markdown_it"
)
.
setLevel
(
logging
.
WARNING
)
logging
.
getLogger
(
"urllib3"
)
.
setLevel
(
logging
.
WARNING
)
logging
.
getLogger
(
"matplotlib"
)
.
setLevel
(
logging
.
WARNING
)
i18n
=
I18nAuto
()
i18n
.
print
()
config
=
Config
()
weight_root
=
"weights"
weight_uvr5_root
=
"uvr5_weights"
index_root
=
"logs"
names
=
[]
hubert_model
=
None
for
name
in
os
.
listdir
(
weight_root
):
if
name
.
endswith
(
".pth"
):
names
.
append
(
name
)
index_paths
=
[]
for
root
,
dirs
,
files
in
os
.
walk
(
index_root
,
topdown
=
False
):
for
name
in
files
:
if
name
.
endswith
(
".index"
)
and
"trained"
not
in
name
:
index_paths
.
append
(
"
%s
/
%s
"
%
(
root
,
name
))
def
get_vc
(
sid
):
global
n_spk
,
tgt_sr
,
net_g
,
vc
,
cpt
,
version
if
sid
==
""
or
sid
==
[]:
global
hubert_model
if
hubert_model
!=
None
:
# 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的
print
(
"clean_empty_cache"
)
del
net_g
,
n_spk
,
vc
,
hubert_model
,
tgt_sr
# ,cpt
hubert_model
=
net_g
=
n_spk
=
vc
=
hubert_model
=
tgt_sr
=
None
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
empty_cache
()
###楼下不这么折腾清理不干净
if_f0
=
cpt
.
get
(
"f0"
,
1
)
version
=
cpt
.
get
(
"version"
,
"v1"
)
if
version
==
"v1"
:
if
if_f0
==
1
:
net_g
=
SynthesizerTrnMs256NSFsid
(
*
cpt
[
"config"
],
is_half
=
config
.
is_half
)
else
:
net_g
=
SynthesizerTrnMs256NSFsid_nono
(
*
cpt
[
"config"
])
elif
version
==
"v2"
:
if
if_f0
==
1
:
net_g
=
SynthesizerTrnMs768NSFsid
(
*
cpt
[
"config"
],
is_half
=
config
.
is_half
)
else
:
net_g
=
SynthesizerTrnMs768NSFsid_nono
(
*
cpt
[
"config"
])
del
net_g
,
cpt
if
torch
.
cuda
.
is_available
():
torch
.
cuda
.
empty_cache
()
cpt
=
None
return
{
"visible"
:
False
,
"__type__"
:
"update"
}
person
=
"
%s
/
%s
"
%
(
weight_root
,
sid
)
print
(
"loading
%s
"
%
person
)
cpt
=
torch
.
load
(
person
,
map_location
=
"cpu"
)
tgt_sr
=
cpt
[
"config"
][
-
1
]
cpt
[
"config"
][
-
3
]
=
cpt
[
"weight"
][
"emb_g.weight"
]
.
shape
[
0
]
# n_spk
if_f0
=
cpt
.
get
(
"f0"
,
1
)
version
=
cpt
.
get
(
"version"
,
"v1"
)
if
version
==
"v1"
:
if
if_f0
==
1
:
net_g
=
SynthesizerTrnMs256NSFsid
(
*
cpt
[
"config"
],
is_half
=
config
.
is_half
)
else
:
net_g
=
SynthesizerTrnMs256NSFsid_nono
(
*
cpt
[
"config"
])
elif
version
==
"v2"
:
if
if_f0
==
1
:
net_g
=
SynthesizerTrnMs768NSFsid
(
*
cpt
[
"config"
],
is_half
=
config
.
is_half
)
else
:
net_g
=
SynthesizerTrnMs768NSFsid_nono
(
*
cpt
[
"config"
])
del
net_g
.
enc_q
print
(
net_g
.
load_state_dict
(
cpt
[
"weight"
],
strict
=
False
))
net_g
.
eval
()
.
to
(
config
.
device
)
if
config
.
is_half
:
net_g
=
net_g
.
half
()
else
:
net_g
=
net_g
.
float
()
vc
=
VC
(
tgt_sr
,
config
)
n_spk
=
cpt
[
"config"
][
-
3
]
return
{
"visible"
:
True
,
"maximum"
:
n_spk
,
"__type__"
:
"update"
}
def
load_hubert
():
global
hubert_model
models
,
_
,
_
=
checkpoint_utils
.
load_model_ensemble_and_task
(
[
"hubert_base.pt"
],
suffix
=
""
,
)
hubert_model
=
models
[
0
]
hubert_model
=
hubert_model
.
to
(
config
.
device
)
if
config
.
is_half
:
hubert_model
=
hubert_model
.
half
()
else
:
hubert_model
=
hubert_model
.
float
()
hubert_model
.
eval
()
def
vc_single
(
sid
,
input_audio_path
,
f0_up_key
,
f0_file
,
f0_method
,
file_index
,
file_index2
,
# file_big_npy,
index_rate
,
filter_radius
,
resample_sr
,
rms_mix_rate
,
protect
,
):
# spk_item, input_audio0, vc_transform0,f0_file,f0method0
global
tgt_sr
,
net_g
,
vc
,
hubert_model
,
version
if
input_audio_path
is
None
:
return
"You need to upload an audio"
,
None
f0_up_key
=
int
(
f0_up_key
)
try
:
audio
=
input_audio_path
[
1
]
/
32768.0
if
len
(
audio
.
shape
)
==
2
:
audio
=
np
.
mean
(
audio
,
-
1
)
audio
=
librosa
.
resample
(
audio
,
orig_sr
=
input_audio_path
[
0
],
target_sr
=
16000
)
audio_max
=
np
.
abs
(
audio
)
.
max
()
/
0.95
if
audio_max
>
1
:
audio
/=
audio_max
times
=
[
0
,
0
,
0
]
if
hubert_model
==
None
:
load_hubert
()
if_f0
=
cpt
.
get
(
"f0"
,
1
)
file_index
=
(
(
file_index
.
strip
(
" "
)
.
strip
(
'"'
)
.
strip
(
"
\n
"
)
.
strip
(
'"'
)
.
strip
(
" "
)
.
replace
(
"trained"
,
"added"
)
)
if
file_index
!=
""
else
file_index2
)
# 防止小白写错,自动帮他替换掉
# file_big_npy = (
# file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
# )
audio_opt
=
vc
.
pipeline
(
hubert_model
,
net_g
,
sid
,
audio
,
input_audio_path
,
times
,
f0_up_key
,
f0_method
,
file_index
,
# file_big_npy,
index_rate
,
if_f0
,
filter_radius
,
tgt_sr
,
resample_sr
,
rms_mix_rate
,
version
,
protect
,
f0_file
=
f0_file
,
)
if
resample_sr
>=
16000
and
tgt_sr
!=
resample_sr
:
tgt_sr
=
resample_sr
index_info
=
(
"Using index:
%s
."
%
file_index
if
os
.
path
.
exists
(
file_index
)
else
"Index not used."
)
return
"Success.
\n
%s
\n
Time:
\n
npy:
%s
s, f0:
%s
s, infer:
%s
s"
%
(
index_info
,
times
[
0
],
times
[
1
],
times
[
2
],
),
(
tgt_sr
,
audio_opt
)
except
:
info
=
traceback
.
format_exc
()
print
(
info
)
return
info
,
(
None
,
None
)
app
=
gr
.
Blocks
()
with
app
:
with
gr
.
Tabs
():
with
gr
.
TabItem
(
"在线demo"
):
gr
.
Markdown
(
value
=
"""
RVC 在线demo
"""
)
sid
=
gr
.
Dropdown
(
label
=
i18n
(
"推理音色"
),
choices
=
sorted
(
names
))
with
gr
.
Column
():
spk_item
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
2333
,
step
=
1
,
label
=
i18n
(
"请选择说话人id"
),
value
=
0
,
visible
=
False
,
interactive
=
True
,
)
sid
.
change
(
fn
=
get_vc
,
inputs
=
[
sid
],
outputs
=
[
spk_item
],
)
gr
.
Markdown
(
value
=
i18n
(
"男转女推荐+12key, 女转男推荐-12key, 如果音域爆炸导致音色失真也可以自己调整到合适音域. "
)
)
vc_input3
=
gr
.
Audio
(
label
=
"上传音频(长度小于90秒)"
)
vc_transform0
=
gr
.
Number
(
label
=
i18n
(
"变调(整数, 半音数量, 升八度12降八度-12)"
),
value
=
0
)
f0method0
=
gr
.
Radio
(
label
=
i18n
(
"选择音高提取算法,输入歌声可用pm提速,harvest低音好但巨慢无比,crepe效果好但吃GPU"
),
choices
=
[
"pm"
,
"harvest"
,
"crepe"
,
"rmvpe"
],
value
=
"pm"
,
interactive
=
True
,
)
filter_radius0
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
7
,
label
=
i18n
(
">=3则使用对harvest音高识别的结果使用中值滤波,数值为滤波半径,使用可以削弱哑音"
),
value
=
3
,
step
=
1
,
interactive
=
True
,
)
with
gr
.
Column
():
file_index1
=
gr
.
Textbox
(
label
=
i18n
(
"特征检索库文件路径,为空则使用下拉的选择结果"
),
value
=
""
,
interactive
=
False
,
visible
=
False
,
)
file_index2
=
gr
.
Dropdown
(
label
=
i18n
(
"自动检测index路径,下拉式选择(dropdown)"
),
choices
=
sorted
(
index_paths
),
interactive
=
True
,
)
index_rate1
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
1
,
label
=
i18n
(
"检索特征占比"
),
value
=
0.88
,
interactive
=
True
,
)
resample_sr0
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
48000
,
label
=
i18n
(
"后处理重采样至最终采样率,0为不进行重采样"
),
value
=
0
,
step
=
1
,
interactive
=
True
,
)
rms_mix_rate0
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
1
,
label
=
i18n
(
"输入源音量包络替换输出音量包络融合比例,越靠近1越使用输出包络"
),
value
=
1
,
interactive
=
True
,
)
protect0
=
gr
.
Slider
(
minimum
=
0
,
maximum
=
0.5
,
label
=
i18n
(
"保护清辅音和呼吸声,防止电音撕裂等artifact,拉满0.5不开启,调低加大保护力度但可能降低索引效果"
),
value
=
0.33
,
step
=
0.01
,
interactive
=
True
,
)
f0_file
=
gr
.
File
(
label
=
i18n
(
"F0曲线文件, 可选, 一行一个音高, 代替默认F0及升降调"
))
but0
=
gr
.
Button
(
i18n
(
"转换"
),
variant
=
"primary"
)
vc_output1
=
gr
.
Textbox
(
label
=
i18n
(
"输出信息"
))
vc_output2
=
gr
.
Audio
(
label
=
i18n
(
"输出音频(右下角三个点,点了可以下载)"
))
but0
.
click
(
vc_single
,
[
spk_item
,
vc_input3
,
vc_transform0
,
f0_file
,
f0method0
,
file_index1
,
file_index2
,
# file_big_npy1,
index_rate1
,
filter_radius0
,
resample_sr0
,
rms_mix_rate0
,
protect0
,
],
[
vc_output1
,
vc_output2
],
)
app
.
launch
()
File Metadata
Details
Attached
Mime Type
text/x-python
Expires
Sat, Apr 5, 05:25 (29 m, 14 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1418642
Default Alt Text
app.py (10 KB)
Attached To
R350 av_svc
Event Timeline
Log In to Comment