Page MenuHomePhabricator

No OneTemporary

diff --git a/AutoCoverTool/online/fade_out.py b/AutoCoverTool/online/fade_out.py
new file mode 100644
index 0000000..14a436c
--- /dev/null
+++ b/AutoCoverTool/online/fade_out.py
@@ -0,0 +1,48 @@
+# 淡出效果
+import os
+import sys
+import json
+import time
+
+
+def exec_cmd_and_result(cmd):
+ r = os.popen(cmd)
+ text = r.read()
+ r.close()
+ return text
+
+
+def get_d(audio_path):
+ cmd = "/usr/local/bin/ffprobe -v quiet -print_format json -show_format -show_streams {}".format(audio_path)
+ data = exec_cmd_and_result(cmd)
+ data = json.loads(data)
+ # 返回秒
+ if 'format' in data.keys() and 'duration' in data['format']:
+ return float(data["format"]["duration"])
+ return -1
+
+
+def fade_out(in_file, out_file, d):
+ tmp_file = in_file + "_tmp.mp4"
+ cmd = "ffmpeg -i {} -vn -acodec copy -ss 00:00:00 -t {} -y {}".format(in_file, d - 0.2, tmp_file)
+ os.system(cmd)
+ d = d - 0.2
+ cmd = "/usr/local/bin/ffmpeg -i {} -filter_complex afade=t=out:st={}:d=1 -y {}".format(tmp_file, d - 1, out_file)
+ print(cmd)
+ os.system(cmd)
+ os.unlink(tmp_file)
+
+
+def process(input_file, out_file):
+ st = time.time()
+ d_s = get_d(input_file)
+ fade_out(input_file, out_file, d_s)
+ print("sp={}".format(time.time() - st))
+
+
+if __name__ == '__main__':
+ # input_file = sys.argv[1]
+ # out_file = sys.argv[2]
+ input_file = "/Users/yangjianli/tmp/ttt/123/611752105030647512/mix_-2_0.mp4"
+ out_file = "/Users/yangjianli/tmp/ttt/123/611752105030647512/mix_-2_01.mp4"
+ process(input_file, out_file)
diff --git a/AutoCoverTool/online/tone_shift_one.py b/AutoCoverTool/online/tone_shift_one.py
index 232516d..55b21ff 100644
--- a/AutoCoverTool/online/tone_shift_one.py
+++ b/AutoCoverTool/online/tone_shift_one.py
@@ -1,369 +1,369 @@
"""
变调的方式做处理
1. 下载
2. 分离
3. 针对于人声变调+2,伴奏+1
4. 合成
"""
import os
import json
import shutil
import librosa
import logging
import numpy as np
import multiprocessing as mp
from ref.music_remover.separate_interface import SeparateInterface
from online.inference_worker import upload_file2cos, gs_state_use, gs_state_finish, gs_state_default
from online.common import *
from ref.online.voice_class_online import VoiceClass
logging.basicConfig(filename='/tmp/tone_shift_one.log', level=logging.INFO)
gs_tone_shift_exe = "/data/gpu_env_common/res/av_svc/bin/tone_shift_exe"
gs_simple_mixer_path = "/data/gpu_env_common/res/av_svc/bin/simple_mixer"
gs_err_code_success = 0
gs_err_code_tone_shift = 1
gs_err_code_mix = 2
gs_err_code_transcode = 3
gs_err_code_upload = 4
gs_err_code_download = 5
gs_err_code_trans_to_mp3 = 6
gs_err_code_separate = 7
gs_err_code_duration_too_long = 8
gs_err_code_duration_no_vocal = 9
gs_err_code_duration_err = 10
gs_err_code_transcode_acc = 11
gs_err_code_upload_acc = 12
gs_err_code_download_acc = 13
gs_err_code_download_vocal = 14
gs_err_code_transcode_acc_v1 = 15
gs_err_code_transcode_vocal_v1 = 16
gs_err_code_silence_no_data = 17
gs_err_code_silence_no_process = 18
def post_process_err_callback(msg):
print("ERROR|post_process|task_error_callback:", msg)
def exec_cmd(cmd):
r = os.popen(cmd)
text = r.read()
r.close()
return text
def get_d(audio_path):
cmd = "ffprobe -v quiet -print_format json -show_format -show_streams {}".format(audio_path)
data = exec_cmd(cmd)
data = json.loads(data)
# 返回秒
if 'format' in data.keys() and 'duration' in data['format']:
return float(data["format"]["duration"])
return -1
def get_mean_power(audio_path):
sr = 44100
audio, sr = librosa.load(audio_path, sr=sr, mono=True)
mm = np.mean(np.abs(audio))
return mm
def tone_shift_one(in_file, dst_file, pitch):
cmd = "{} {} {} {}".format(gs_tone_shift_exe, in_file, dst_file, pitch)
os.system(cmd)
return os.path.exists(dst_file)
def mix(cid, vocal_path, acc_path, tp):
if tp == 1:
vocal_pitch = 2
acc_pitch = 0
else:
vocal_pitch = -2
acc_pitch = 0
vocal_path_2 = vocal_path.replace(".wav", "_{}.wav".format(vocal_pitch))
acc_path_2 = acc_path.replace(".wav", "_{}.wav".format(acc_pitch))
err = tone_shift_one(vocal_path, vocal_path_2, vocal_pitch)
if not err:
return gs_err_code_tone_shift, None, None, tp
err = tone_shift_one(acc_path, acc_path_2, acc_pitch)
if not err:
return gs_err_code_tone_shift, None, None, tp
base_dir = os.path.dirname(vocal_path)
mix_path = "{}/mix_{}_{}.wav".format(base_dir, vocal_pitch, acc_pitch)
cmd = "{} {} {} {}".format(gs_simple_mixer_path, vocal_path_2, acc_path_2, mix_path)
print("exec_cmd={}".format(cmd))
os.system(cmd)
if not os.path.exists(mix_path):
return gs_err_code_mix, None, None, tp
# 转码
mix_path_mp3 = mix_path.replace(".wav", ".mp4")
cmd = "ffmpeg -i {} -b:a 128k -c:a aac -ar 44100 -ac 2 -y {} -loglevel fatal".format(mix_path, mix_path_mp3)
os.system(cmd)
if not os.path.exists(mix_path_mp3):
return gs_err_code_transcode, None, None, tp
# 上传到cos
mix_name = os.path.basename(mix_path_mp3)
key = "av_res/svc_res_tone_shift/{}/{}".format(str(cid), mix_name)
if not upload_file2cos(key, mix_path_mp3):
return gs_err_code_upload, None, None
return gs_err_code_success, key, vocal_path_2, tp
class ToneShift:
def __init__(self):
self.separate_inst = SeparateInterface()
model_path = "/data/gpu_env_common/res/av_svc/models"
music_voice_pure_model = os.path.join(model_path, "voice_005_rec_v5.pth")
music_voice_no_pure_model = os.path.join(model_path, "voice_10_v5.pth")
gender_pure_model = os.path.join(model_path, "gender_8k_ratev5_v6_adam.pth")
gender_no_pure_model = os.path.join(model_path, "gender_8k_v6_adam.pth")
self.voice_class = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model,
gender_no_pure_model)
def update_state(self, song_id, state):
sql = "update svc_queue_table set state={},update_time={} where song_id = {}". \
format(state, int(time.time()), song_id)
banned_user_map['db'] = "av_db"
update_db(sql, banned_user_map)
def get_url_by_id(self, song_id):
sql = "select song_id, url from svc_queue_table where song_id={}".format(song_id)
banned_user_map["db"] = "av_db"
data = get_data_by_mysql(sql)
if len(data) == 0:
return None, None
return str(data[0][0]), data[0][1]
def get_one_data_logic(self):
"""
按照5,4,3的优先级进行获取
:return:
"""
song_src_arr = [5, 4, 3]
for song_src in song_src_arr:
song_id, song_url = self.get_one_data(song_src=song_src)
if song_id is not None:
return song_id, song_url
return None, None
def get_one_data(self, song_src=3):
sql = "select song_id, url from svc_queue_table where state = 0 and song_src={} order by create_time asc limit 1".format(
song_src)
banned_user_map["db"] = "av_db"
data = get_data_by_mysql(sql, banned_user_map)
if len(data) == 0:
return None, None
song_id, song_url = data[0]
if song_id != "":
self.update_state(song_id, gs_state_use)
return str(song_id), song_url
def pre_process(self, work_dir, song_url):
"""
创建文件夹,下载数据
:return:
"""
if "?sign=" in song_url:
return gs_err_code_download
ext = str(song_url).split(".")[-1]
dst_file = "{}/src_origin.{}".format(work_dir, ext)
cmd = "wget {} -O {}".format(song_url, dst_file)
os.system(cmd)
if not os.path.exists(dst_file):
return gs_err_code_download
duration = get_d(dst_file)
if duration < 0:
return gs_err_code_duration_err
print("Duration:", dst_file, duration)
if duration > 20 * 60:
return gs_err_code_duration_too_long
dst_mp3_file = "{}/src.wav".format(work_dir)
cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} ".format(dst_file, dst_mp3_file)
os.system(cmd)
if not os.path.exists(dst_mp3_file):
return gs_err_code_trans_to_mp3
return gs_err_code_success
def upload_acc(self, cid, acc_path):
# 转码
mix_path_aac = acc_path.replace(".wav", ".m4a")
cmd = "ffmpeg -i {} -b:a 128k -c:a aac -ar 44100 -ac 2 -y {} -loglevel fatal".format(acc_path, mix_path_aac)
os.system(cmd)
if not os.path.exists(mix_path_aac):
return gs_err_code_transcode_acc, None
# 上传
mix_name = os.path.basename(mix_path_aac)
key = "av_res/svc_res_tone_shift/{}/{}".format(str(cid), mix_name)
if not upload_file2cos(key, mix_path_aac):
return gs_err_code_upload_acc, None
return gs_err_code_success, key
def async_mix(self, cid, vocal_path, acc_path):
pool = mp.Pool(processes=2)
res = []
for i in range(1, 3):
ret = pool.apply_async(mix, args=(cid, vocal_path, acc_path, i), error_callback=post_process_err_callback)
res.append(ret)
pool.close()
pool.join()
real_res = []
for i in res:
real_res.append(i.get(timeout=10 * 60))
return real_res
def process_one(self, cid, work_dir):
"""
:param cid:
:param work_dir:
:return:
"""
src_mp3 = os.path.join(work_dir, "src.wav")
vocal_path = os.path.join(work_dir, "vocal.wav")
acc_path = os.path.join(work_dir, "acc.wav")
if not (os.path.exists(vocal_path) and os.path.exists(acc_path)):
if not self.separate_inst.process(cid, src_mp3, vocal_path, acc_path):
return gs_err_code_separate, []
if not os.path.exists(vocal_path) or not os.path.exists(acc_path):
return gs_err_code_separate, []
# 当人声的平均能量小于一定值时,则认为无人声(0.01是经验值判定,样本分析来看)
# 无人声的样本[0.0056, 0.0003], 有人声的样本(目前最小)[0.046, 0.049]
print("power:{},{}".format(cid, get_mean_power(vocal_path)))
if get_mean_power(vocal_path) < 0.02:
return gs_err_code_duration_no_vocal, []
rets = self.async_mix(cid, vocal_path, acc_path)
out_mix_mp3 = ["", ""]
out_vocal_path = ["", ""]
for ret in rets:
err, mix_mp3, vocal_path, tp = ret
if err != gs_err_code_success:
return err, []
out_mix_mp3[tp - 1] = mix_mp3
out_vocal_path[tp - 1] = vocal_path
out_gender = []
for i in range(len(out_vocal_path)):
gender, female_rate = self.voice_class.process_one(out_vocal_path[i])
# 性别映射,由0:女 1:男 2:未知 映射为 1:男 2:女 3: 未知
# GENDER_FEMALE = 0,GENDER_MALE = 1,GENDER_OTHER = 2
mmap = [2, 1, 3]
gender = mmap[gender]
out_gender.append(str(gender))
# 音频1,音频2,性别1,性别2
real_msg = [out_mix_mp3[0], out_mix_mp3[1], out_gender[0], out_gender[1]]
return gs_err_code_success, real_msg
def download_and_transcode(self, url, local_path, local_path_wav):
cmd = "wget {} -O {}".format(url, local_path)
os.system(cmd)
if not os.path.exists(local_path):
return -1
cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(local_path, local_path_wav)
os.system(cmd)
if not os.path.exists(local_path_wav):
return -2
return 0
def get_data_from_mysql(self, cid, work_dir):
sql = "select starmaker_songid,task_url,complete_url,voice_url from starmaker_musicbook.silence where starmaker_songid={} order by task_id desc limit 1".format(
cid)
data = get_data_by_mysql(sql, banned_user_map)
if len(data) == 0:
return gs_err_code_silence_no_data
song_id, task_url, complete_url, voice_url = data[0]
if complete_url != "" and voice_url != "":
"""
将人声与伴奏下载下来
"""
ext = str(complete_url).split(".")[-1]
acc_dst_file = os.path.join(work_dir, "acc.{}".format(ext))
acc_wav_dst_file = os.path.join(work_dir, "acc.wav")
err = self.download_and_transcode(complete_url, acc_dst_file, acc_wav_dst_file)
os.unlink(acc_dst_file)
if err == -1:
return gs_err_code_download_acc
if err == -2:
return gs_err_code_transcode_acc_v1
ext = str(voice_url).split(".")[-1]
vocal_dst_file = os.path.join(work_dir, "vocal.{}".format(ext))
vocal_wav_dst_file = os.path.join(work_dir, "vocal.wav")
err = self.download_and_transcode(voice_url, vocal_dst_file, vocal_wav_dst_file)
os.unlink(vocal_dst_file)
if err == -1:
return gs_err_code_download_vocal
if err == -2:
return gs_err_code_transcode_vocal_v1
return gs_err_code_success
return gs_err_code_silence_no_process
def process_worker(self):
logging.info("start process_worker .....")
base_dir = "/tmp/tone_shift_one"
if not os.path.exists(base_dir):
os.makedirs(base_dir)
while True:
worker_st = time.time()
cid, song_url = self.get_one_data_logic()
- # cid, song_url = self.get_url_by_id('611752105030838774')
+ # cid, song_url = self.get_url_by_id('611752105030647512')
if cid is None:
time.sleep(5)
logging.info("get one data is None ...")
continue
work_dir = os.path.join(base_dir, str(cid))
if os.path.exists(work_dir):
shutil.rmtree(work_dir)
os.makedirs(work_dir)
# 先查看消音数据库中是否已经完成了该项目,已经有的话,就直接下载即可
err = self.get_data_from_mysql(cid, work_dir)
if err != gs_err_code_success:
# 清空磁盘
shutil.rmtree(work_dir)
os.makedirs(work_dir)
err = self.pre_process(work_dir, song_url)
if err != gs_err_code_success:
self.update_state(str(cid), -err)
continue
st = time.time()
err, data = self.process_one(str(cid), work_dir)
logging.info("process_finish,{},{}".format(cid, time.time() - st))
if err == gs_err_code_success and len(data) != 0:
sql = "update svc_queue_table set state={},update_time={},svc_url=\"{}\" where song_id = {}". \
format(gs_state_finish, int(time.time()), ",".join(data), str(cid))
banned_user_map['db'] = "av_db"
update_db(sql, banned_user_map)
else:
self.update_state(str(cid), -err)
shutil.rmtree(work_dir)
logging.info("process_finish,{},{}".format(cid, time.time() - worker_st))
if __name__ == '__main__':
ts = ToneShift()
ts.process_worker()
diff --git a/AutoCoverTool/ref/tools/mixer/tone_shift.cpp b/AutoCoverTool/ref/tools/mixer/tone_shift.cpp
index afbefab..e083869 100644
--- a/AutoCoverTool/ref/tools/mixer/tone_shift.cpp
+++ b/AutoCoverTool/ref/tools/mixer/tone_shift.cpp
@@ -1,250 +1,263 @@
//
// Created by yangjianli on 2019-09-09.
//
/**
* 输入一个音频和伴奏自动进行混合
* gated_loudness 当前音量
* gain 预期增益
*/
#include "iostream"
#include "WaveFile.h"
#include "math.h"
#include "ebur128.h"
#include "AudioMixer.h"
#include "alimiter.h"
#include "waves/inc/WaveFile.h"
#include "CAudioEffectsChainApi.h"
#include "string"
#include "ae_server/CAeServer.h"
#include <cstdio>
#include <chrono>
#include <iostream>
#include <cstdlib>
#include <sys/time.h>
#include "denoise/webrtc/include/WebrtcDenoise.h"
#define PROC_LEN 1024
#define DEFAULT_BASELINE_DB (float)-14.57f
int short2float(short *pInBuf, int nLen, float *pOutBuf)
{
for (int i = 0; i < nLen; i++)
{
pOutBuf[i] = pInBuf[i] * 1.0 / 32768;
}
return 0;
}
int float2short(float *pInBuf, int nLen, short *pOutBuf)
{
for (int i = 0; i < nLen; i++)
{
pOutBuf[i] = int(pInBuf[i] * 32768);
}
return 0;
}
/**
* 获取增益
* @param nChannel
* @param nSampleRate
* @param pData
* @param nLength
* @param gain
* @return
*/
int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness, double &gain)
{
printf("ebur128_init start .. %d\n", nLength);
ebur128_state *st = NULL;
st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I);
if (NULL == st)
{
return -1;
}
int nPos = 0;
int nTmpLength = 0;
int nRet;
printf("process start ..\n");
while (nPos < nLength)
{
nTmpLength = PROC_LEN;
if (nLength - nPos < PROC_LEN)
{
nTmpLength = nLength - nPos;
}
nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel);
if (nRet != 0)
{
return -2;
}
nPos += nTmpLength;
}
printf("process ok..\n");
gated_loudness = -1;
ebur128_loudness_global(st, &gated_loudness);
float db = (DEFAULT_BASELINE_DB - gated_loudness) / 20.f;
gain = pow(10, db);
printf("gated_loudness = %f db = %f gain = %f\n", gated_loudness, db, gain);
ebur128_destroy(&st);
return 0;
}
/**
* 混合音频和伴奏
* @param pVocalIn
* @param pAccIn
* @param nLength
* @param gainVocal
* @param gainAcc
* @param pOutBuf
* @return
*/
int mix(float *pVocalIn, float *pAccIn, int nLength, double gainVocal, double gainAcc, float *pOutBuf,
int nSampleRate, int nChannel, int nDelay, std::string effect_file)
{
CAudioMixer *cAudioMixer = new CAudioMixer();
cAudioMixer->init(nSampleRate, nChannel);
cAudioMixer->set_acc_delay(nDelay);
cAudioMixer->set_vocal_volume(int(gainVocal * 50));
cAudioMixer->set_acc_volume(int(gainAcc * 50));
int nPos = 0;
int nStep = 1024;
float *fTmp = new float[nStep];
cAudioMixer->reset();
nPos = 0;
nStep = 1024;
int cnt = 0;
CAeServer cAeServer;
cAeServer.init(nSampleRate, nChannel, nStep / nChannel);
AE_PARAMS_IM_EFFECT im_params = {
.effect_path = effect_file,
};
cAeServer.set_params(AE_TYPE_IM_EFFECT, (void *) &im_params);
while (nPos < nLength)
{
if (nLength - nPos < nStep)
{
nStep = nLength - nPos;
}
cnt++;
cAeServer.process(pVocalIn + nPos, pVocalIn + nPos, nStep);
cAudioMixer->process(pVocalIn + nPos, pAccIn + nPos, pOutBuf + nPos, nStep);
nPos += nStep;
}
cAeServer.uninit();
delete cAudioMixer;
delete[] fTmp;
return 0;
}
int denoise_webrtc(short *pInBuf, int nLength, int nChannel, int nSampleRate)
{
CWebrtcDenoise cWebrtcDenoise;
cWebrtcDenoise.init(nSampleRate, nChannel);
float *pTmp = new float[nLength];
for (int i = 0; i < nLength; i++)
{
pTmp[i] = pInBuf[i] * 1.0 / 32768;
}
cWebrtcDenoise.set_level(kHigh);
int nStep = 512 * nChannel;
for (int i = 0; i < nStep; i++)
{
pTmp[i] = pTmp[i] * i * 1.0 / nStep;
}
for (int i = 0, cnt = 0; i < nLength; i += nStep, cnt++)
{
if (nLength - i < nStep) continue;
cWebrtcDenoise.process(pTmp + i, nStep);
}
for (int i = 0; i < nLength; i++)
{
pInBuf[i] = short(pTmp[i] * 32768);
}
delete[] pTmp;
return 0;
}
double calc_power_rate(float *in_data, int32_t in_len, float *ref_data, int32_t ref_len)
{
double in_power = 0;
double ref_power = 0;
int32_t min_len = in_len > ref_len ? ref_len : in_len;
for (int i = 0; i < min_len; i++)
{
in_power += (in_data[i]) * (in_data[i]);
ref_power += (ref_data[i]) * (ref_data[i]);
}
return ref_power / in_power;
}
int main(int argc, char *argv[])
{
if (argc != 4)
{
printf("input error! example: ./main vocal_path dst_path pitch!\n");
return -1;
}
std::string vocal_path = argv[1];
std::string dst_path = argv[2];
float pitch = strtod(argv[3], NULL);
// 读取人声
CWaveFile *oWaveFile = new CWaveFile(vocal_path.c_str(), false);
float *pVocalBuf = new float[oWaveFile->GetTotalFrames() * oWaveFile->GetChannels()];
oWaveFile->ReadFrameAsfloat(pVocalBuf, oWaveFile->GetTotalFrames());
int nStep = 1024;
int nLength = oWaveFile->GetTotalFrames() * oWaveFile->GetChannels();
CAeServer cAeServer;
cAeServer.init(oWaveFile->GetSampleRate(), oWaveFile->GetChannels(), nStep / oWaveFile->GetChannels());
AEToneShiftParam ae_param;
ae_param.max_shift = 12;
ae_param.min_shift = -12;
ae_param.tone_shift = pitch;
cAeServer.set_params(AE_TYPE_TONE_SHIFT, &ae_param);
int nPos = 0;
while (nPos < nLength)
{
if (nLength - nPos < nStep)
{
nStep = nLength - nPos;
}
cAeServer.process(pVocalBuf + nPos, pVocalBuf + nPos, nStep);
nPos += nStep;
}
// 剔除84ms延迟
int latency_pos = int(cAeServer.get_latency_ms() * oWaveFile->GetSampleRate() / 1000.0) * oWaveFile->GetChannels();
printf("latency_pos=%d\n", latency_pos);
cAeServer.uninit();
//写入文件
printf("write2file nLength:%d path:%s!\n", oWaveFile->GetTotalFrames() * oWaveFile->GetChannels(),
dst_path.c_str());
+ // 对结尾做一帧的平滑
+ int end_pos = oWaveFile->GetTotalFrames() * oWaveFile->GetChannels();
+ int fade_out = 1024 * oWaveFile->GetChannels();
+ int st_pos = end_pos - fade_out;
+ for(int i = 0; i < fade_out; i+=oWaveFile->GetChannels())
+ {
+ float rate =1 - i * 1.f / fade_out;
+ for(int j = 0; j < oWaveFile->GetChannels(); j++)
+ {
+ pVocalBuf[st_pos +i + j] = pVocalBuf[st_pos + i + j] * rate;
+ }
+ }
+
CWaveFile out_wav = CWaveFile(dst_path.c_str(), true);
out_wav.SetChannels(oWaveFile->GetChannels());
out_wav.SetSampleRate(oWaveFile->GetSampleRate());
out_wav.SetSampleFormat(SF_IEEE_FLOAT);
out_wav.SetupDone();
- out_wav.WriteFrame(pVocalBuf+latency_pos, oWaveFile->GetTotalFrames());
+ out_wav.WriteFrame(pVocalBuf+latency_pos, oWaveFile->GetTotalFrames() - latency_pos/oWaveFile->GetChannels());
delete oWaveFile;
delete[] pVocalBuf;
return 0;
}
\ No newline at end of file

File Metadata

Mime Type
text/x-diff
Expires
Sun, Jan 12, 08:31 (1 d, 10 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1347175
Default Alt Text
(23 KB)

Event Timeline