Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F4880359
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
14 KB
Subscribers
None
View Options
diff --git a/AIMeiSheng/docker_demo/Dockerfile b/AIMeiSheng/docker_demo/Dockerfile
index 8a6fc25..94fb28a 100644
--- a/AIMeiSheng/docker_demo/Dockerfile
+++ b/AIMeiSheng/docker_demo/Dockerfile
@@ -1,28 +1,29 @@
# 系统版本 CUDA Version 11.8.0
# NAME="CentOS Linux" VERSION="7 (Core)"
# FROM starmaker.tencentcloudcr.com/starmaker/av/av:1.1
# 基础镜像, python3.9,cuda118,centos7,外加ffmpeg
#FROM starmaker.tencentcloudcr.com/starmaker/av/av_base:1.0
FROM registry.ushow.media/av/av_base:1.0
#FROM av_base_test:1.0
RUN source /etc/profile && sed -i 's|mirrorlist=|#mirrorlist=|g' /etc/yum.repos.d/CentOS-Base.repo && sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Base.repo && yum clean all && yum install -y unzip && yum install -y libsndfile && yum install -y libsamplerate libsamplerate-devel
RUN source /etc/profile && pip3 install librosa==0.9.1 && pip3 install gradio && pip3 install torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
RUN source /etc/profile && pip3 install urllib3==1.26.15 && pip3 install coscmd && coscmd config -a AKIDoQmshFWXGitnQmrfCTYNwEExPaU6RVHm -s F9n9E2ZonWy93f04qMaYFfogHadPt62h -b log-sg-1256122840 -r ap-singapore
RUN source /etc/profile && pip3 install asteroid-filterbanks
RUN source /etc/profile && pip3 install praat-parselmouth==0.4.3
RUN source /etc/profile && pip3 install pyworld
RUN source /etc/profile && pip3 install faiss-cpu
RUN source /etc/profile && pip3 install torchcrepe
RUN source /etc/profile && pip3 install thop
RUN source /etc/profile && pip3 install ffmpeg-python
-RUN source /etc/profile && pip3 install fairseq
+RUN source /etc/profile && pip3 install pip3==24.0
+RUN source /etc/profile && pip3 install fairseq==0.12.2
RUN source /etc/profile && pip3 install redis==4.5.0
RUN source /etc/profile && pip3 install numpy==1.26.4
COPY ./ /data/code/
WORKDIR /data/code
CMD ["/bin/bash", "-c", "source /etc/profile; export PYTHONPATH=/data/code; cd /data/code/AIMeiSheng/docker_demo; python3 offline_server.py"]
#CMD ["/bin/bash", "-c", "source /etc/profile; export PYTHONPATH=/data/code; cd /data/code/AIMeiSheng/docker_demo; python3 tmp.py"]
\ No newline at end of file
diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py
index f12143f..3efdb58 100644
--- a/AIMeiSheng/docker_demo/svc_online.py
+++ b/AIMeiSheng/docker_demo/svc_online.py
@@ -1,194 +1,194 @@
# -*- coding: UTF-8 -*-
"""
SVC的核心处理逻辑
"""
import os
import time
import socket
import shutil
import hashlib
from AIMeiSheng.meisheng_svc_final import load_model, process_svc_online
from AIMeiSheng.cos_similar_ui_zoom import cos_similar
from AIMeiSheng.meisheng_env_preparex import meisheng_env_prepare
from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass, download_volume_balanced
from AIMeiSheng.docker_demo.common import *
import logging
hostname = socket.gethostname()
log_file_name = f"{os.path.dirname(os.path.abspath(__file__))}/av_meisheng_{hostname}.log"
# 设置logger
svc_offline_logger = logging.getLogger("svc_offline")
file_handler = logging.FileHandler(log_file_name)
file_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S')
file_handler.setFormatter(formatter)
if gs_prod:
svc_offline_logger.addHandler(file_handler)
if os.path.exists(gs_tmp_dir):
shutil.rmtree(gs_tmp_dir)
os.makedirs(gs_model_dir, exist_ok=True)
os.makedirs(gs_resource_cache_dir, exist_ok=True)
# 预设参数
gs_gender_models_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/hub/voice_classification/models.zip"
-gs_volume_bin_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/ebur128_tool"
+gs_volume_bin_url = "https://av-audit-sync-sg-1256122840.cos.ap-singapore.myqcloud.com/dataset/AIMeiSheng/ebur128_tool/v1/ebur128_tool"
class GSWorkerAttr:
def __init__(self, input_data):
# 取出输入资源
vocal_url = input_data["record_song_url"]
target_url = input_data["target_url"]
start = input_data["start"] # 单位是ms
end = input_data["end"] # 单位是ms
vocal_loudness = input_data["vocal_loudness"]
female_recording_url = input_data["female_recording_url"]
male_recording_url = input_data["male_recording_url"]
self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest()
self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id)
if os.path.exists(self.tmp_dir):
shutil.rmtree(self.tmp_dir)
os.makedirs(self.tmp_dir)
self.vocal_url = vocal_url
self.target_url = target_url
ext = vocal_url.split(".")[-1]
self.vocal_path = os.path.join(self.tmp_dir, self.distinct_id + f"_in.{ext}")
self.target_wav_path = os.path.join(self.tmp_dir, self.distinct_id + "_out.wav")
self.target_wav_ad_path = os.path.join(self.tmp_dir, self.distinct_id + "_out_ad.wav")
self.target_path = os.path.join(self.tmp_dir, self.distinct_id + "_out.m4a")
self.female_svc_source_url = female_recording_url
self.male_svc_source_url = male_recording_url
ext = female_recording_url.split(".")[-1]
self.female_svc_source_path = os.path.join(gs_resource_cache_dir,
hashlib.md5(female_recording_url.encode()).hexdigest() + "." + ext)
ext = male_recording_url.split(".")[-1]
self.male_svc_source_path = os.path.join(gs_resource_cache_dir,
hashlib.md5(male_recording_url.encode()).hexdigest() + "." + ext)
self.st_tm = start
self.ed_tm = end
self.target_loudness = vocal_loudness
def log_info_name(self):
return f"d_id={self.distinct_id}, vocal_url={self.vocal_url}"
def rm_cache(self):
if os.path.exists(self.tmp_dir):
shutil.rmtree(self.tmp_dir)
def init_gender_model():
"""
下载模型
:return:
"""
dst_model_dir = os.path.join(gs_model_dir, "voice_classification")
if not os.path.exists(dst_model_dir):
dst_zip_path = os.path.join(gs_model_dir, "models.zip")
if not download2disk(gs_gender_models_url, dst_zip_path):
svc_offline_logger.fatal(f"download gender_model err={gs_gender_models_url}")
cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}"
os.system(cmd)
if not os.path.exists(dst_model_dir):
svc_offline_logger.fatal(f"unzip {dst_zip_path} err")
music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth")
music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth")
gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth")
gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth")
vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
return vc
def init_svc_model():
meisheng_env_prepare(logging, gs_model_dir)
embed_model, hubert_model = load_model()
cs_sim = cos_similar()
- return embed_model, hubert_model,cs_sim
+ return embed_model, hubert_model, cs_sim
def download_volume_adjustment():
"""
下载音量调整工具
:return:
"""
volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
if not os.path.exists(volume_bin_path):
if not download2disk(gs_volume_bin_url, volume_bin_path):
svc_offline_logger.fatal(f"download volume_bin err={gs_volume_bin_url}")
os.system(f"chmod +x {volume_bin_path}")
def volume_adjustment(wav_path, target_loudness, out_path):
"""
音量调整
:param wav_path:
:param target_loudness:
:param out_path:
:return:
"""
volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}"
os.system(cmd)
class SVCOnline:
def __init__(self):
st = time.time()
self.gender_model = init_gender_model()
self.embed_model, self.hubert_model, self.cs_sim = init_svc_model()
download_volume_adjustment()
download_volume_balanced()
svc_offline_logger.info(f"svc init finished, sp = {time.time() - st}")
def gender_process(self, worker_attr):
st = time.time()
gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path)
svc_offline_logger.info(
f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, "
f"gender_process sp = {time.time() - st}")
if gender == 0:
gender = 'female'
elif gender == 1:
gender = 'male'
elif female_rate == None:
gender = 'male'
return gender, gs_err_code_gender_classify
elif female_rate > 0.5:
gender = 'female'
else:
gender = 'male'
svc_offline_logger.info(f"{worker_attr.vocal_url}, modified gender={gender}")
# err = gs_err_code_success
# if female_rate == -1:
# err = gs_err_code_target_silence
return gender, gs_err_code_success
def process(self, worker_attr):
gender, err = self.gender_process(worker_attr)
if err != gs_err_code_success:
return gender, err
song_path = worker_attr.female_svc_source_path
if gender == "male":
song_path = worker_attr.male_svc_source_path
params = {'gender': gender, 'tst': worker_attr.st_tm, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None}
st = time.time()
err_code = process_svc_online(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, self.embed_model,
self.hubert_model, self.cs_sim, params)
svc_offline_logger.info(f"{worker_attr.vocal_url}, err_code={err_code} process svc sp = {time.time() - st}")
return gender, err_code
diff --git a/tools/ebur128_tool/ebur128_tool.cpp b/tools/ebur128_tool/ebur128_tool.cpp
index c3d171c..ca42875 100644
--- a/tools/ebur128_tool/ebur128_tool.cpp
+++ b/tools/ebur128_tool/ebur128_tool.cpp
@@ -1,107 +1,118 @@
//
// Created by Administrator on 2024/7/8.
//
#include <cmath>
#include <cstdio>
#include <cstdlib>
#include <iostream>
#include "alimiter.h"
#include "ebur128.h"
#include "WaveFile.h"
#define PROC_LEN 1024
/**
* 获取增益
* @param nChannel
* @param nSampleRate
* @param pData
* @param nLength
* @param gain
* @return
*/
int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness)
{
ebur128_state *st = NULL;
st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I);
if (NULL == st)
{
return -1;
}
int nPos = 0;
int nTmpLength = 0;
int nRet;
while (nPos < nLength)
{
nTmpLength = PROC_LEN;
if (nLength - nPos < PROC_LEN)
{
nTmpLength = nLength - nPos;
}
nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel);
if (nRet != 0)
{
return -2;
}
nPos += nTmpLength;
}
gated_loudness = -1;
ebur128_loudness_global(st, &gated_loudness);
ebur128_destroy(&st);
return 0;
}
int main(int argc, char* argv[]) {
if (argc < 4)
{
printf("input error! example: ./main input_wav target_loudness dst_wav\n");
return -1;
}
std::string vocal_path = argv[1];
double target_loudness = atof(argv[2]);
std::string out_vocal_path = argv[3];
// 读取数据
CWaveFile vocal_wav = CWaveFile(vocal_path.c_str(), false);
if (!vocal_wav.GetStatus())
{
printf("%s not ok!\n", vocal_path.c_str());
return -2;
}
int vocal_buf_len = vocal_wav.GetChannels() * vocal_wav.GetTotalFrames();
float *vocal_buf = new float[vocal_buf_len];
short *short_vocal_buf = new short[vocal_buf_len];
vocal_wav.ReadFrameAsfloat(vocal_buf, vocal_wav.GetTotalFrames());
for(int i = 0; i < vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(); i++)
{
short_vocal_buf[i] = float(vocal_buf[i]) * 32767.f;
}
double vocal_gated_loudness = 0;
ebur128_whole(vocal_wav.GetChannels(), vocal_wav.GetSampleRate(), short_vocal_buf,
vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(), vocal_gated_loudness);
+ if (std::isnan(vocal_gated_loudness))
+ {
+ printf("vocal_gated_loudness is nan\n");
+ vocal_gated_loudness = target_loudness;
+ }
float db = (target_loudness - vocal_gated_loudness) / 20.f;
float ebur128_rate = pow(10, db);
-
+ if (ebur128_rate > 100) {
+ printf("ebur128_rate=%f bigger than 100\n", ebur128_rate);
+ ebur128_rate = 100;
+ } else if (ebur128_rate < 0.01) {
+ printf("ebur128_rate=%f little than 100\n", ebur128_rate);
+ ebur128_rate = 0.01;
+ }
printf("vocal_gated_loudness = %f, db = %f, gain = %f\n", vocal_gated_loudness, db, ebur128_rate);
SUPERSOUND::Alimiter limiter;
limiter.SetParam(vocal_wav.GetSampleRate(), vocal_wav.GetChannels());
for (int i = 0; i < vocal_buf_len; i++)
{
float out = vocal_buf[i] * ebur128_rate;
limiter.Filter(&out, &out, 1);
vocal_buf[i] = out;
}
CWaveFile out_wav = CWaveFile(out_vocal_path.c_str(), true);
out_wav.SetChannels(vocal_wav.GetChannels());
out_wav.SetSampleRate(vocal_wav.GetSampleRate());
out_wav.SetSampleFormat(SF_IEEE_FLOAT);
out_wav.SetupDone();
out_wav.WriteFrame(vocal_buf, vocal_wav.GetTotalFrames());
delete[] vocal_buf;
delete[] short_vocal_buf;
return 0;
}
\ No newline at end of file
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Jan 12, 08:35 (1 d, 11 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1347221
Default Alt Text
(14 KB)
Attached To
R350 av_svc
Event Timeline
Log In to Comment