No OneTemporary
Actions

Size

23 KB

Subscribers

None

View Options

	diff --git a/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp b/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp
	index 036d772..78ca63a 100644
	--- a/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp
	+++ b/AutoCoverTool/ref/tools/mixer/simple_mixer.cpp
	@@ -1,238 +1,264 @@
	//
	// Created by yangjianli on 2019-09-09.
	//
	/**
	* 输入一个音频和伴奏自动进行混合
	* gated_loudness 当前音量
	* gain 预期增益
	*/
	#include "iostream"
	#include "WaveFile.h"
	#include "math.h"
	#include "ebur128.h"
	#include "AudioMixer.h"
	#include "alimiter.h"
	#include "waves/inc/WaveFile.h"
	#include "CAudioEffectsChainApi.h"
	#include "string"
	#include "ae_server/CAeServer.h"
	#include <cstdio>
	#include <chrono>
	#include <iostream>
	#include <cstdlib>
	#include <sys/time.h>
	#include "denoise/webrtc/include/WebrtcDenoise.h"

	#define PROC_LEN 1024
	#define DEFAULT_BASELINE_DB (float)-14.57f

	int short2float(short pInBuf, int nLen, float pOutBuf)
	{
	for (int i = 0; i < nLen; i++)
	{
	pOutBuf[i] = pInBuf[i] * 1.0 / 32768;
	}
	return 0;
	}

	int float2short(float pInBuf, int nLen, short pOutBuf)
	{
	for (int i = 0; i < nLen; i++)
	{
	pOutBuf[i] = int(pInBuf[i] * 32768);
	}
	return 0;
	}

	/**
	* 获取增益
	* @param nChannel
	* @param nSampleRate
	* @param pData
	* @param nLength
	* @param gain
	* @return
	*/
	-int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness, double &gain)
	+int ebur128_whole(int nChannel, int nSampleRate, float *pData, const int nLength, double &gated_loudness, double &gain)
	{
	printf("ebur128_init start .. %d\n", nLength);
	ebur128_state *st = NULL;
	st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I);
	if (NULL == st)
	{
	return -1;
	}
	int nPos = 0;
	int nTmpLength = 0;
	int nRet;
	printf("process start ..\n");
	while (nPos < nLength)
	{
	nTmpLength = PROC_LEN;
	if (nLength - nPos < PROC_LEN)
	{
	nTmpLength = nLength - nPos;
	}
	- nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel);
	+ nRet = ebur128_add_frames_float(st, pData + nPos, nTmpLength / nChannel);
	if (nRet != 0)
	{
	return -2;
	}
	nPos += nTmpLength;
	}
	printf("process ok..\n");
	gated_loudness = -1;
	ebur128_loudness_global(st, &gated_loudness);
	float db = (DEFAULT_BASELINE_DB - gated_loudness) / 20.f;
	gain = pow(10, db);
	printf("gated_loudness = %f db = %f gain = %f\n", gated_loudness, db, gain);
	ebur128_destroy(&st);
	return 0;
	}


	/**
	* 混合音频和伴奏
	* @param pVocalIn
	* @param pAccIn
	* @param nLength
	* @param gainVocal
	* @param gainAcc
	* @param pOutBuf
	* @return
	*/
	int mix(float pVocalIn, float pAccIn, int nLength, double gainVocal, double gainAcc, float *pOutBuf,
	int nSampleRate, int nChannel, int nDelay)
	{

	CAudioMixer *cAudioMixer = new CAudioMixer();
	cAudioMixer->init(nSampleRate, nChannel);
	cAudioMixer->set_acc_delay(nDelay);
	cAudioMixer->set_vocal_volume(int(gainVocal * 50));
	cAudioMixer->set_acc_volume(int(gainAcc * 50));

	int nPos = 0;
	int nStep = 1024;
	float *fTmp = new float[nStep];
	cAudioMixer->reset();
	nPos = 0;
	nStep = 1024;
	int cnt = 0;


	while (nPos < nLength)
	{
	if (nLength - nPos < nStep)
	{
	nStep = nLength - nPos;
	}
	cnt++;
	cAudioMixer->process(pVocalIn + nPos, pAccIn + nPos, pOutBuf + nPos, nStep);
	nPos += nStep;
	}
	delete cAudioMixer;
	delete[] fTmp;
	return 0;
	}

	int denoise_webrtc(short *pInBuf, int nLength, int nChannel, int nSampleRate)
	{
	CWebrtcDenoise cWebrtcDenoise;
	cWebrtcDenoise.init(nSampleRate, nChannel);
	float *pTmp = new float[nLength];
	for (int i = 0; i < nLength; i++)
	{
	pTmp[i] = pInBuf[i] * 1.0 / 32768;
	}
	cWebrtcDenoise.set_level(kHigh);
	int nStep = 512 * nChannel;

	for (int i = 0; i < nStep; i++)
	{
	pTmp[i] = pTmp[i] * i * 1.0 / nStep;
	}

	for (int i = 0, cnt = 0; i < nLength; i += nStep, cnt++)
	{
	if (nLength - i < nStep) continue;
	cWebrtcDenoise.process(pTmp + i, nStep);
	}

	for (int i = 0; i < nLength; i++)
	{
	pInBuf[i] = short(pTmp[i] * 32768);
	}
	delete[] pTmp;
	return 0;
	}

	double calc_power_rate(float in_data, int32_t in_len, float ref_data, int32_t ref_len)
	{
	double in_power = 0;
	double ref_power = 0;
	int32_t min_len = in_len > ref_len ? ref_len : in_len;
	for (int i = 0; i < min_len; i++)
	{
	in_power += (in_data[i]) * (in_data[i]);
	ref_power += (ref_data[i]) * (ref_data[i]);
	}
	return ref_power / in_power;
	}


	int main(int argc, char *argv[])
	{
	- if (argc != 4)
	+ if (argc < 4)
	{
	- printf("input error! example: ./main vocal_path acc_path mix_path\n");
	+ printf("input error! example: ./main vocal_path acc_path mix_path use_ada_loudness[0/1][可选]\n");
	return -1;
	}
	+
	std::string sVocal = argv[1];
	std::string sAcc = argv[2];
	std::string sMix = argv[3];

	+ int ada_loudness = 0;
	+ if (argc > 4)
	+ {
	+ ada_loudness = atoi(argv[4]); // 是否自适应音量
	+ }
	+
	// 读取人声
	CWaveFile *oWaveFile = new CWaveFile(sVocal.c_str(), false);
	float pfVocalBuf = new float[oWaveFile->GetTotalFrames() oWaveFile->GetChannels()];
	oWaveFile->ReadFrameAsfloat(pfVocalBuf, oWaveFile->GetTotalFrames());

	//读取伴奏
	CWaveFile *oWaveFile1 = new CWaveFile(sAcc.c_str(), false);
	float pfAccBuf = new float[oWaveFile1->GetTotalFrames() oWaveFile1->GetChannels()];
	oWaveFile1->ReadFrameAsfloat(pfAccBuf, oWaveFile1->GetTotalFrames());

	if (oWaveFile->GetChannels() != oWaveFile1->GetChannels())
	{
	printf("channel not equal!\n");
	return -1;
	}

	// 混合音频和伴奏
	printf("mix wav:%s and acc:%s! %d,%d\n", sVocal.c_str(), sAcc.c_str(), oWaveFile->GetTotalFrames(), oWaveFile1->GetTotalFrames());
	int nOutLen = oWaveFile->GetTotalFrames() < oWaveFile1->GetTotalFrames() ? oWaveFile->GetTotalFrames()
	: oWaveFile1->GetTotalFrames();
	printf("XXXXXXX, %d,%d\n", nOutLen, oWaveFile->GetChannels());
	nOutLen = nOutLen * oWaveFile->GetChannels();

	float *pOutBuf = new float[nOutLen];

	- mix(pfVocalBuf, pfAccBuf, nOutLen, 1.0, 1.0, pOutBuf, oWaveFile->GetSampleRate(), oWaveFile->GetChannels(), 0);
	+
	+ // 伴奏不动，人声拉伸到伴奏的1.5倍
	+ double vocal_gain = 1.0;
	+ if (ada_loudness == 1)
	+ {
	+ // 人声响度以及音量增益
	+ double vocal_gated_loudness = 0;
	+ ebur128_whole(oWaveFile->GetChannels(), oWaveFile->GetSampleRate(), pfVocalBuf, nOutLen, vocal_gated_loudness, vocal_gain);
	+
	+ // 伴奏响度以及音量增益
	+ double acc_gated_loudness = 0;
	+ double acc_gain = 1.0;
	+ ebur128_whole(oWaveFile->GetChannels(), oWaveFile->GetSampleRate(), pfAccBuf, nOutLen, acc_gated_loudness, acc_gain);
	+
	+ // 伴奏不动，将人声拉到伴奏的1.5倍响度
	+ float db = (acc_gated_loudness - vocal_gated_loudness) / 20.f;
	+ vocal_gain = pow(10, db) * 1.5;
	+ }
	+
	+ mix(pfVocalBuf, pfAccBuf, nOutLen, vocal_gain, 1.0, pOutBuf, oWaveFile->GetSampleRate(), oWaveFile->GetChannels(), 0);

	//写入文件
	printf("write2file nLength:%d path:%s!\n", nOutLen, sMix.c_str());
	CWaveFile *oWaveFile2 = new CWaveFile(sMix.c_str(), true);
	oWaveFile2->SetSampleFormat(SF_IEEE_FLOAT);
	oWaveFile2->SetSampleRate(oWaveFile->GetSampleRate());
	oWaveFile2->SetChannels(oWaveFile->GetChannels());
	oWaveFile2->SetupDone();
	oWaveFile2->WriteFrame(pOutBuf, nOutLen / oWaveFile->GetChannels());

	delete oWaveFile;
	delete oWaveFile1;
	delete oWaveFile2;

	delete[] pfVocalBuf;
	delete[] pfAccBuf;
	delete[] pOutBuf;
	return 0;
	}
	\ No newline at end of file
	diff --git a/AutoCoverTool/script/update_model_gender.py b/AutoCoverTool/script/update_model_gender.py
	new file mode 100644
	index 0000000..c9160f1
	--- /dev/null
	+++ b/AutoCoverTool/script/update_model_gender.py
	@@ -0,0 +1,198 @@
	+from online.common import update_db, get_data_by_mysql
	+
	+
	+def update_gender(user_id, gender):
	+ """
	+ 查看数据库，只有当性别是3[未知]再更新
	+ :return:
	+ """
	+ sql = "select * from av_db.av_svc_model where user_id=\"{}\" and gender=3".format(user_id)
	+ data = get_data_by_mysql(sql)
	+ if len(data) == 1:
	+ sql = "update av_db.av_svc_model set gender={} where user_id=\"{}\"".format(gender, user_id)
	+ update_db(sql)
	+
	+
	+if __name__ == '__main__':
	+ arr = [
	+ ["10133099162839896", 2],
	+ ["10133099162997509", 2],
	+ ["10133099163727028", 1],
	+ ["10133099163890661", 2],
	+ ["10133099163991355", 2],
	+ ["10133099164311744", 1],
	+ ["10133099164313669", 1],
	+ ["10133099165386135", 1],
	+ ["10133099166041782", 1],
	+ ["10133099166050735", 1],
	+ ["10133099166238022", 2],
	+ ["10133099166605472", 2],
	+ ["10133099166892845", 1],
	+ ["10133099166898301", 2],
	+ ["10133099167125366", 1],
	+ ["10133099167394822", 2],
	+ ["10133099167940583", 2],
	+ ["10133099168376799", 1],
	+ ["10133099168924385", 2],
	+ ["10133099169324630", 1],
	+ ["10133099169381678", 2],
	+ ["10133099169957610", 2],
	+ ["10133099169975944", 1],
	+ ["10133099170492806", 1],
	+ ["10133099170892510", 1],
	+ ["10133099171013390", 1],
	+ ["10133099171081854", 2],
	+ ["10133099171087756", 1],
	+ ["10133099171192036", 1],
	+ ["10133099171607206", 2],
	+ ["10133099171754668", 2],
	+ ["10133099172086640", 2],
	+ ["10133099172138002", 2],
	+ ["10133099172207062", 1],
	+ ["10133099172269180", 1],
	+ ["10133099172339368", 1],
	+ ["10414574138359736", 2],
	+ ["10414574138369704", 2],
	+ ["10414574138530136", 1],
	+ ["10414574139102564", 2],
	+ ["10414574139967984", 1],
	+ ["10414574140258122", 1],
	+ ["10414574140405046", 1],
	+ ["10414574140676612", 1],
	+ ["10414574140780266", 2],
	+ ["10414574142812606", 2],
	+ ["10414574143134746", 1],
	+ ["10414574143604234", 1],
	+ ["10414574143906306", 1],
	+ ["10414574144526110", 1],
	+ ["10414574144707118", 1],
	+ ["10414574145823464", 2],
	+ ["10414574145904464", 1],
	+ ["10414574146080322", 2],
	+ ["10414574146420792", 2],
	+ ["10414574146884926", 1],
	+ ["10414574147339012", 1],
	+ ["10414574147372254", 2],
	+ ["10414574147425002", 2],
	+ ["10414574147597736", 2],
	+ ["10414574147647706", 1],
	+ ["10414574147658166", 1],
	+ ["10414574147828554", 1],
	+ ["10414574148014424", 1],
	+ ["10414574148247626", 2],
	+ ["10414574148624370", 2],
	+ ["10414574148669184", 1],
	+ ["10414574148692388", 1],
	+ ["10414574148859406", 1],
	+ ["10414574149000590", 1],
	+ ["10414574149067094", 1],
	+ ["10414574149143568", 2],
	+ ["10414574149221618", 1],
	+ ["10414574149303702", 2],
	+ ["10696049115833380", 1],
	+ ["10696049115944594", 1],
	+ ["10696049115987498", 1],
	+ ["10696049116130908", 2],
	+ ["10696049116285936", 2],
	+ ["10696049117044138", 2],
	+ ["10696049117276112", 1],
	+ ["10696049117685892", 2],
	+ ["10696049119207544", 1],
	+ ["10696049119659788", 1],
	+ ["10696049120426324", 2],
	+ ["10696049120919532", 1],
	+ ["10696049121183928", 1],
	+ ["10696049121338248", 1],
	+ ["10696049121406512", 1],
	+ ["10696049121502826", 2],
	+ ["10696049123071172", 1],
	+ ["10696049123219186", 1],
	+ ["10696049123447868", 1],
	+ ["10696049123506368", 2],
	+ ["10696049123660154", 2],
	+ ["10696049123805538", 1],
	+ ["10696049124073344", 2],
	+ ["10696049124110520", 1],
	+ ["10696049124182084", 2],
	+ ["10696049124450100", 1],
	+ ["10696049124595430", 2],
	+ ["10696049124833978", 2],
	+ ["10696049125084058", 2],
	+ ["10696049125481092", 2],
	+ ["10696049125584584", 1],
	+ ["10696049125798928", 2],
	+ ["10696049125820940", 1],
	+ ["10696049125864268", 2],
	+ ["10696049125885128", 1],
	+ ["10696049125972416", 1],
	+ ["10696049125997808", 1],
	+ ["10696049125999636", 2],
	+ ["10977524091895906", 2],
	+ ["10977524092611108", 1],
	+ ["10977524092703694", 2],
	+ ["10977524092737576", 2],
	+ ["10977524092926748", 1],
	+ ["10977524093350560", 2],
	+ ["10977524093613618", 1],
	+ ["10977524094859474", 1],
	+ ["10977524096635844", 1],
	+ ["10977524096695280", 2],
	+ ["10977524096819198", 1],
	+ ["10977524096995342", 2],
	+ ["10977524098416100", 1],
	+ ["10977524098804908", 1],
	+ ["10977524099612646", 2],
	+ ["10977524100174518", 1],
	+ ["10977524100978492", 1],
	+ ["10977524101050108", 1],
	+ ["10977524101220516", 1],
	+ ["10977524101243434", 2],
	+ ["10977524101575638", 2],
	+ ["10977524101593280", 1],
	+ ["10977524101680844", 1],
	+ ["10977524102313334", 1],
	+ ["10977524102348346", 1],
	+ ["10977524102432628", 1],
	+ ["10977524102444474", 2],
	+ ["10977524102525738", 2],
	+ ["10977524102533320", 1],
	+ ["10977524102598012", 1],
	+ ["10977524102674590", 2],
	+ ["10977524102678972", 2],
	+ ["10977524102679572", 2],
	+ ["1125899906849269", 1],
	+ ["1125899908853925", 1],
	+ ["1125899908854526", 1],
	+ ["1125899908904395", 1],
	+ ["1125899909347935", 1],
	+ ["1125899909790502", 1],
	+ ["1125899910057693", 2],
	+ ["1125899910105120", 1],
	+ ["1125899910461551", 1],
	+ ["1125899910516883", 1],
	+ ["1125899910808376", 2],
	+ ["1125899910826302", 2],
	+ ["1125899910943438", 1],
	+ ["1125899911011477", 1],
	+ ["1125899911821662", 2],
	+ ["1125899911962207", 1],
	+ ["1125899912327206", 2],
	+ ["1125899912442110", 2],
	+ ["1125899912511535", 1],
	+ ["1125899912520616", 2],
	+ ["1125899912538184", 1],
	+ ["1125899912584668", 1],
	+ ["1125899912859360", 1],
	+ ["1125899912895306", 2],
	+ ["1125899912929958", 1],
	+ ["1125899912987231", 1],
	+ ["1125899913281334", 1],
	+ ["1125899913294939", 2],
	+ ["3635049378", 1],
	+ ["8725724286358130", 2]
	+ ]
	+
	+ for aa in arr:
	+ user_id, gender = aa
	+ update_gender(user_id, gender)
	+ # exit(-1)
	diff --git a/AutoCoverTool/svc_inference/svc_inference_one.py b/AutoCoverTool/svc_inference/svc_inference_one.py
	index ecb8e8c..b0f7157 100644
	--- a/AutoCoverTool/svc_inference/svc_inference_one.py
	+++ b/AutoCoverTool/svc_inference/svc_inference_one.py
	@@ -1,215 +1,215 @@
	"""
	SVC推理逻辑
	conda activate auto_song_cover_t4
	export PYTHONPATH=$PWD:$PWD/ref/music_remover/demucs:$PWD/ref/so_vits_svc:$PWD/ref/split_dirty_frame:$PWD/ref/online
	"""
	import os
	import json
	import shutil
	from ref.so_vits_svc.inference_main import *
	from ref.speaker_feature_extractor.sf_extractor_interface import SFExtractorInterface

	gs_draw_volume_exe = "/data/gpu_env_common/bin/draw_volume"
	gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer"

	gs_svci_success = 0
	gs_svci_data_params_check_model_path = 1
	gs_svci_data_params_check_vocal_path = 2
	gs_svci_data_params_check_acc_path = 3
	gs_svci_data_params_check_video_path = 4
	gs_svci_data_prepare_transcode_media = 5
	gs_svci_data_inference = 6
	gs_svci_svc_trans_442 = 7
	gs_svci_svc_volume = 8
	gs_svci_svc_mix = 9
	gs_svci_svc_mix_gen = 10
	gs_svci_svc_mix_audio_video = 11


	class SVCInferenceOne:
	def __init__(self):
	self.vocal_32_wav_path = None
	self.vocal_wav_path = None
	self.acc_wav_path = None
	self.config = os.path.join(os.path.dirname(os.path.abspath(__file__)), "config.json")
	self.spk_emb_inst = SFExtractorInterface()

	def mix(self, work_dir, svc_file, vocal_file, acc_file, mix_path):
	"""
	:param work_dir:
	:param svc_file:
	:param vocal_file:
	:param acc_file:
	:param mix_path:
	:return:
	"""
	cache_dir = os.path.join(work_dir, "cache")
	if os.path.exists(cache_dir):
	shutil.rmtree(cache_dir)
	os.makedirs(cache_dir)

	# svc转码到442
	svc_442_file = os.path.join(cache_dir, "442.wav")
	st = time.time()
	cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(svc_file, svc_442_file)
	os.system(cmd)
	if not os.path.exists(svc_442_file):
	return gs_svci_svc_trans_442
	logging.info("transcode,{},sp={}".format(svc_file, time.time() - st))

	# 合并转码后再做一次拉伸，保证响度
	st = time.time()
	volume_path = os.path.join(cache_dir, "volume.wav")
	cmd = "{} {} {} {}".format(gs_draw_volume_exe, svc_442_file, vocal_file, volume_path)
	os.system(cmd)
	if not os.path.exists(volume_path):
	print("{} ERROR draw volume".format(volume_path))
	return gs_svci_svc_volume
	logging.info("draw_volume2,{},sp={}".format(svc_file, time.time() - st))

	# 混合
	st = time.time()
	mix_wav_path = os.path.join(cache_dir, "mix.wav")
	- cmd = "{} {} {} {}".format(gs_simple_mixer_path, volume_path, acc_file, mix_wav_path)
	+ cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, volume_path, acc_file, mix_wav_path)
	os.system(cmd)
	if not os.path.exists(mix_wav_path):
	return gs_svci_svc_mix
	logging.info("mixer,{},sp={}".format(svc_file, time.time() - st))

	# 编码为m4a
	st = time.time()
	cmd = "ffmpeg -i {} -ab 128k -y {} -loglevel fatal".format(mix_wav_path, mix_path)
	print(cmd)
	os.system(cmd)
	if not os.path.exists(mix_path):
	return gs_svci_svc_mix
	logging.info("encode,{},sp={}".format(svc_file, time.time() - st))
	return gs_svci_success

	def params_check(self, model_path, vocal_path, acc_path, video_path):
	if not os.path.exists(model_path):
	print("model_path={} is null".format(model_path))
	return gs_svci_data_params_check_model_path
	if not os.path.exists(vocal_path):
	print("vocal_path={} is null".format(vocal_path))
	return gs_svci_data_params_check_vocal_path
	if not os.path.exists(acc_path):
	print("acc_path={} is null".format(acc_path))
	return gs_svci_data_params_check_acc_path
	if not os.path.exists(video_path):
	print("video_path={} is null".format(video_path))
	return gs_svci_data_params_check_video_path
	return gs_svci_success

	def data_prepare(self, work_dir, vocal_path, acc_path):
	self.vocal_32_wav_path = os.path.join(work_dir, "vocal_32.wav")
	self.vocal_wav_path = os.path.join(work_dir, "vocal.wav")
	self.acc_wav_path = os.path.join(work_dir, "acc.wav")
	cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(vocal_path, self.vocal_wav_path)
	os.system(cmd)

	cmd = "ffmpeg -i {} -ar 32000 -ac 1 -y {}".format(vocal_path, self.vocal_32_wav_path)
	os.system(cmd)
	cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {}".format(acc_path, self.acc_wav_path)
	os.system(cmd)
	return os.path.exists(self.vocal_32_wav_path) and os.path.exists(self.acc_wav_path)

	def process_logic(self, work_dir, model_path, vocal_path, acc_path, video_path, out_path):
	# 1. 先转码人声和伴奏
	st = time.time()
	if not self.data_prepare(work_dir, vocal_path, acc_path):
	print("transcode vocal={} or acc={} err!\n".format(vocal_path, acc_path))
	return gs_svci_data_prepare_transcode_media
	print("transcode vocal and acc sp={}".format(time.time() - st))

	# 2. 进行推理
	# raw_audio_path, dst_path
	st = time.time()
	svc_file = os.path.join(work_dir, "trans_vocal.wav")
	# try:
	inf(model_path, self.config, self.vocal_32_wav_path, svc_file, 'prod')
	# except Exception as ex:
	# print(ex)
	if not os.path.exists(svc_file):
	print("inference err vocal_path={}, model_path={}".format(vocal_path, model_path))
	return gs_svci_data_inference, []
	print("inf sp={}".format(time.time() - st))

	# 3. 生成作品
	st = time.time()
	mix_tmp_path = os.path.join(work_dir, "mix.wav")
	err = self.mix(work_dir, svc_file, self.vocal_wav_path, self.acc_wav_path, mix_tmp_path)
	if err != gs_svci_success:
	return err, []
	if not os.path.exists(mix_tmp_path):
	return gs_svci_svc_mix_gen, []
	print("mix sp={}".format(time.time() - st))

	st = time.time()
	# 4. 音频编码，并且和视频合并
	cmd = "ffmpeg -i {} -i {} -acodec aac -strict -2 -b:a 128k -vcodec copy -shortest -af apad -y {}".format(
	video_path,
	mix_tmp_path,
	out_path)
	os.system(cmd)
	if not os.path.exists(out_path):
	print("mix audio_video err={}".format(video_path, mix_tmp_path))
	return gs_svci_svc_mix_audio_video, []
	print("mix audio and video sp={}".format(time.time() - st))

	# 5. 提取emb
	st = time.time()
	emb = self.spk_emb_inst.process(svc_file)
	print("get emb sp={}".format(time.time() - st))
	return gs_svci_success, emb

	def process(self, work_dir, model_path, vocal_path, acc_path, video_path, out_path):
	err = self.params_check(model_path, vocal_path, acc_path, video_path)
	if err != gs_svci_success:
	return err, []

	if os.path.exists(work_dir):
	shutil.rmtree(work_dir)
	os.makedirs(work_dir)
	st = time.time()
	err, emb = self.process_logic(work_dir, model_path, vocal_path, acc_path, video_path, out_path)
	print("process_logic sp={}".format(time.time() - st))
	shutil.rmtree(work_dir)
	return err, emb


	def test():
	svc_inst = SVCInferenceOne()
	b_dir = "/data/rsync/jianli.yang/AutoCoverTool/data/test_svc_inference_one/"
	w_dir = os.path.join(b_dir, "rg_input")
	in_m4a = os.path.join(b_dir, "rg_input.m4a")
	in_acc_m4a = os.path.join(b_dir, "acc.m4a")
	in_video = os.path.join(b_dir, "rg.mp4")
	out_video = os.path.join(b_dir, "rg_input_out.mp4")
	m_path = "/data/rsync/jianli.yang/AutoCoverTool/data/train_users/jianli/logs/32k/G_2000.pth"
	err, emb = svc_inst.process(w_dir, m_path, in_m4a, in_acc_m4a, in_video, out_video)
	print(err)
	print(emb)


	if __name__ == '__main__':
	if len(sys.argv) != 8:
	print("input err!")
	print(
	"example: work_dir[临时工作目录，内部会自动清除数据] model_path in_media_path in_acc_media_path in_video_path out_video_path emb_path")
	exit(-1)

	w_dir = sys.argv[1]
	m_path = sys.argv[2]
	in_m4a = sys.argv[3]
	in_acc_m4a = sys.argv[4]
	in_video = sys.argv[5]
	out_video = sys.argv[6]
	emb_path = sys.argv[7]
	svc_inst = SVCInferenceOne()
	err, emb = svc_inst.process(w_dir, m_path, in_m4a, in_acc_m4a, in_video, out_video)
	with open(emb_path, "w") as f:
	f.write(json.dumps({"emb": list(emb.tolist())}))

	# print(err)
	# print(emb)

File Metadata

Mime Type: text/x-diff
Expires: Sun, Jan 12, 08:32 (1 d, 15 h)
Storage Engine: blob
Storage Format: Raw Data
Storage Handle: 1347182
Default Alt Text: (23 KB)

No OneTemporaryActions

View Options

File Metadata

Event Timeline

No OneTemporary
Actions