diff --git a/AIMeiSheng/RawNet3/infererence_fang_meisheng.py b/AIMeiSheng/RawNet3/infererence_fang_meisheng.py index 471f92a..5612582 100644 --- a/AIMeiSheng/RawNet3/infererence_fang_meisheng.py +++ b/AIMeiSheng/RawNet3/infererence_fang_meisheng.py @@ -1,269 +1,270 @@ import argparse import itertools import os import sys from typing import Dict import numpy as np import soundfile as sf import torch import torch.nn.functional as F from tqdm import tqdm from models.RawNet3 import RawNet3 from models.RawNetBasicBlock import Bottle2neck from utils import tuneThresholdfromScore, ComputeErrorRates, ComputeMinDcf #model_directory = '/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3' #sys.path.append(os.path.abspath(model_directory)) -def get_embed_model(): +def get_embed_model(model_path): model = RawNet3( Bottle2neck, model_scale=8, context=True, summed=True, encoder_type="ECA", nOut=256, out_bn=False, sinc_stride=10, log_sinc=True, norm_sinc="mean", grad_mult=1, ) model.load_state_dict( torch.load( - "/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3/models/weights/model.pt", + model_path, + # "/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3/models/weights/model.pt", map_location=lambda storage, loc: storage, )["model"] ) model.eval() return model def main(args: Dict, model=None) -> None: if model == None: model = RawNet3( Bottle2neck, model_scale=8, context=True, summed=True, encoder_type="ECA", nOut=256, out_bn=False, sinc_stride=10, log_sinc=True, norm_sinc="mean", grad_mult=1, ) model.load_state_dict( torch.load( "./models/weights/model.pt", map_location=lambda storage, loc: storage, )["model"] ) model.eval() # gpu = False gpu = True if torch.cuda.is_available() else False #print("RawNet3 initialised & weights loaded!") if torch.cuda.is_available(): #print("Cuda available, conducting inference on GPU") model = model.to("cuda") gpu = True if args.inference_utterance: output = extract_speaker_embd( model, fn=args.input, n_samples=48000, n_segments=args.n_segments, gpu=gpu, ).mean(0) #print("embead shape:", output.size()) np.save(args.out_dir, output.detach().cpu().numpy()) return if args.vox1_o_benchmark: with open("../../trials/cleaned_test_list.txt", "r") as f: trials = f.readlines() ## Get a list of unique file names files = list(itertools.chain(*[x.strip().split()[-2:] for x in trials])) setfiles = list(set(files)) setfiles.sort() embd_dic = {} for f in tqdm(setfiles): embd_dic[f] = extract_speaker_embd( model, os.path.join(args.DB_dir, f), n_samples=64000, gpu=gpu ) labels, scores = [], [] for line in trials: data = line.split() ref_feat = F.normalize(embd_dic[data[1]], p=2, dim=1) com_feat = F.normalize(embd_dic[data[2]], p=2, dim=1) if gpu: ref_feat = ref_feat.cuda() com_feat = com_feat.cuda() dist = ( torch.cdist( ref_feat.reshape((args.n_segments, -1)), com_feat.reshape((args.n_segments, -1)), ) .detach() .cpu() .numpy() ) score = -1.0 * np.mean(dist) labels.append(int(data[0])) scores.append(score) result = tuneThresholdfromScore(scores, labels, [1, 0.1]) fnrs, fprs, thresholds = ComputeErrorRates(scores, labels) p_target, c_miss, c_fa = 0.05, 1, 1 mindcf, _ = ComputeMinDcf( fnrs, fprs, thresholds, p_target, c_miss, c_fa ) print( "Vox1-O benchmark Finished. EER: %2.4f, minDCF:%.5f" % (result[1], mindcf) ) import librosa def extract_speaker_embd( model, fn: str, n_samples: int, n_segments: int = 10, gpu: bool = False ) -> np.ndarray: #audio, sample_rate = sf.read(fn) audio, sample_rate = librosa.load(fn,sr=16000) ##fang add if len(audio.shape) > 1: raise ValueError( f"RawNet3 supports mono input only. Input data has a shape of {audio.shape}." ) if sample_rate != 16000: raise ValueError( f"RawNet3 supports 16k sampling rate only. Input data's sampling rate is {sample_rate}." ) if ( len(audio) < n_samples ): # RawNet3 was trained using utterances of 3 seconds shortage = n_samples - len(audio) + 1 audio = np.pad(audio, (0, shortage), "wrap") audios = [] startframe = np.linspace(0, len(audio) - n_samples, num=n_segments) for asf in startframe: audios.append(audio[int(asf) : int(asf) + n_samples]) audios = torch.from_numpy(np.stack(audios, axis=0).astype(np.float32)) if gpu: audios = audios.to("cuda") with torch.no_grad(): output = model(audios) return output def get_embed(target_wav, embed_npy, model=None): parser = argparse.ArgumentParser(description="RawNet3 inference") parser.add_argument( "--inference_utterance", default=True, action="store_true" ) parser.add_argument( "--input", type=str, default="", help="Input file to extract embedding. Required when 'inference_utterance' is True", ) parser.add_argument( "--vox1_o_benchmark", default=False, action="store_true" ) parser.add_argument( "--DB_dir", type=str, default="", help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True", ) parser.add_argument("--out_dir", type=str, default="./out.npy") parser.add_argument( "--n_segments", type=int, default=10, help="number of segments to make using each utterance", ) args = parser.parse_args() args.input = target_wav args.out_dir = embed_npy assert args.inference_utterance or args.vox1_o_benchmark if args.inference_utterance: assert args.input != "" if args.vox1_o_benchmark: assert args.DB_dir != "" #sys.exit(main(args,model)) main(args,model) if __name__ == "__main__": parser = argparse.ArgumentParser(description="RawNet3 inference") parser.add_argument( "--inference_utterance", default=False, action="store_true" ) parser.add_argument( "--input", type=str, default="", help="Input file to extract embedding. Required when 'inference_utterance' is True", ) parser.add_argument( "--vox1_o_benchmark", default=False, action="store_true" ) parser.add_argument( "--DB_dir", type=str, default="", help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True", ) parser.add_argument("--out_dir", type=str, default="./out.npy") parser.add_argument( "--n_segments", type=int, default=10, help="number of segments to make using each utterance", ) args = parser.parse_args() assert args.inference_utterance or args.vox1_o_benchmark if args.inference_utterance: assert args.input != "" if args.vox1_o_benchmark: assert args.DB_dir != "" sys.exit(main(args)) diff --git a/AIMeiSheng/docker_demo/.requirements.txt.swp b/AIMeiSheng/docker_demo/.requirements.txt.swp deleted file mode 100644 index 1adaec3..0000000 Binary files a/AIMeiSheng/docker_demo/.requirements.txt.swp and /dev/null differ diff --git a/AIMeiSheng/docker_demo/common.py b/AIMeiSheng/docker_demo/common.py new file mode 100644 index 0000000..6a31932 --- /dev/null +++ b/AIMeiSheng/docker_demo/common.py @@ -0,0 +1,52 @@ +import os +import time +import logging +import urllib, urllib.request + + +def download2disk(url, dst_path): + st = time.time() + urllib.request.urlretrieve(url, dst_path) + print(f"download {url} -> {dst_path} sp = {time.time() - st}") + return os.path.exists(dst_path) + + +def exec_cmd(cmd): + # gs_logger.info(cmd) + print(cmd) + ret = os.system(cmd) + if ret != 0: + return False + return True + + +def exec_cmd_and_result(cmd): + r = os.popen(cmd) + text = r.read() + r.close() + return text + + +def upload_file2cos(key, file_path, region='ap-singapore', bucket_name='av-audit-sync-sg-1256122840'): + """ + 将文件上传到cos + :param key: 桶上的具体地址 + :param file_path: 本地文件地址 + :param region: 区域 + :param bucket_name: 桶地址 + :return: + """ + gs_coscmd = "coscmd" + gs_coscmd_conf = "~/.cos.conf" + + cmd = "{} -c {} -r {} -b {} upload {} {}".format(gs_coscmd, gs_coscmd_conf, region, bucket_name, file_path, key) + if exec_cmd(cmd): + cmd = "{} -c {} -r {} -b {} info {}".format(gs_coscmd, gs_coscmd_conf, region, bucket_name, key) \ + + "| grep Content-Length |awk \'{print $2}\'" + res_str = exec_cmd_and_result(cmd) + logging.info("{},res={}".format(key, res_str)) + size = float(res_str) + if size > 0: + return True + return False + return False diff --git a/AIMeiSheng/docker_demo/http_server.py b/AIMeiSheng/docker_demo/http_server.py new file mode 100644 index 0000000..23ac0ba --- /dev/null +++ b/AIMeiSheng/docker_demo/http_server.py @@ -0,0 +1,128 @@ +# -*- coding: UTF-8 -*- + +""" +SVC处理逻辑 +1. 根据跟定的vocal_url 判别男女 +2. 根据男女信息选择适合的男女url +3. 模型推理 +""" + +import gc +import os +import shutil +import sys +import time +import logging +import hashlib +import numpy as np +import multiprocessing as mp +from multiprocessing import Pool +from flask import Flask, jsonify, request, abort +from common import download2disk, exec_cmd, upload_file2cos +from svc_online import GSWorkerAttr, SVCOnline, volume_adjustment + +# 全局设置 +import socket + +hostname = socket.gethostname() +log_file_name = f"av_svc_{hostname}.log" +logging.basicConfig(filename=log_file_name, format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S', + level=logging.INFO) + +# errcode +gs_err_code_success = 0 +gs_err_code_download_vocal = 100 +gs_err_code_download_svc_url = 101 +gs_err_code_svc_process = 102 +gs_err_code_transcode = 103 +gs_err_code_volume_adjust = 104 +gs_err_code_upload = 105 + +sys.path.append(os.path.dirname(__file__)) +sys.path.append(os.path.join(os.path.dirname(__file__), "../")) + +app = Flask(__name__) + + +def download_data(worker_attr): + vocal_path = os.path.join(worker_attr.tmp_dir, worker_attr.distinct_id) + if os.path.exists(vocal_path): + os.remove(vocal_path) + + st = time.time() + if not download2disk(worker_attr.vocal_url, worker_attr.vocal_path): + return gs_err_code_download_vocal + logging.info(f"download vocal_url={worker_attr.vocal_url} sp = {time.time() - st}") + + # download svc_source_url + if not os.path.exists(worker_attr.female_svc_source_path): + st = time.time() + if not download2disk(worker_attr.female_svc_source_url, worker_attr.female_svc_source_path): + return gs_err_code_download_svc_url + logging.info(f"download female_url={worker_attr.female_svc_source_url} sp = {time.time() - st}") + + # download svc_source_url + if not os.path.exists(worker_attr.male_svc_source_path): + st = time.time() + if not download2disk(worker_attr.male_svc_source_url, worker_attr.male_svc_source_path): + return gs_err_code_download_svc_url + logging.info(f"download male_url={worker_attr.male_svc_source_url} sp = {time.time() - st}") + return gs_err_code_success + + +def transcode(wav_path, dst_path): + st = time.time() + cmd = f"ffmpeg -i {wav_path} -ar 44100 -ac 2 -b:a 64k -y {dst_path} -loglevel fatal" + exec_cmd(cmd) + logging.info(f"transcode cmd={cmd}, sp = {time.time() - st}") + return os.path.exists(dst_path) + + +gs_svc_online = None + + +def process_one(input_data): + logging.info(f"start input={input_data} start prepare data ...") + worker_attr = GSWorkerAttr(input_data) + err = download_data(worker_attr) + if err != gs_err_code_success: + return err, None + + # process audio + global gs_svc_online + if gs_svc_online is None: + gs_svc_online = SVCOnline() + gs_svc_online.process(worker_attr) + if not os.path.exists(worker_attr.target_wav_path): + return gs_err_code_svc_process, None + + # 音量拉伸到指定响度 + volume_adjustment(worker_attr.target_wav_path, worker_attr.target_loudness, worker_attr.target_wav_ad_path) + if not os.path.exists(worker_attr.target_wav_ad_path): + return gs_err_code_volume_adjust, None + + # transcode + if not transcode(worker_attr.target_wav_path, worker_attr.target_path): + return gs_err_code_transcode, None + + # upload + st = time.time() + if upload_file2cos(worker_attr.target_url, worker_attr.target_path): + return gs_err_code_upload, None + logging.info(f"audio_url={worker_attr.vocal_url} upload {worker_attr.target_url} sp = {time.time() - st}") + return gs_err_code_success, worker_attr.target_path + + +@app.route("/ai_meisheng", methods=["POST"]) +def get_song_res(): + data = request.json + st = time.time() + logging.info(f"ai_meisheng:in:{data}") + ret, url = process_one(data) + all_ret_msg = jsonify({"out_url": url, "ret": ret}) + logging.info(f"ai_meisheng:out:{data}-{all_ret_msg}, sp={time.time() - st}") + return all_ret_msg + + +if __name__ == "__main__": + app.run(host='0.0.0.0', port=5000, threaded=False) diff --git a/AIMeiSheng/docker_demo/main.py b/AIMeiSheng/docker_demo/main.py deleted file mode 100644 index 094c2fc..0000000 --- a/AIMeiSheng/docker_demo/main.py +++ /dev/null @@ -1,12 +0,0 @@ -import gradio as gr - -def greet(name): - return "Hello " + name + "!!" - -demo = gr.Interface(fn=greet, inputs="text", outputs="text") - -if __name__ == "__main__": - demo.launch(server_name="0.0.0.0") - # 注意:gradio启动项目后默认地址为127.0.0.1;使用docker部署需要将地址修改为0.0.0.0,否则会导致地址访问错误 - # 默认端口为7860,如需更改可在launch()中设置server_port=7000 -~ diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py new file mode 100644 index 0000000..f952346 --- /dev/null +++ b/AIMeiSheng/docker_demo/svc_online.py @@ -0,0 +1,162 @@ +# -*- coding: UTF-8 -*- +""" +SVC的核心处理逻辑 +""" +import os +import shutil +import hashlib +import time + +from AIMeiSheng.meisheng_svc_final import get_svc, process_svc +from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass +from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model +from AIMeiSheng.myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe + +from AIMeiSheng.docker_demo.common import * + +gs_resource_cache_dir = "/tmp/gs_svc_resource_cache" +gs_tmp_dir = "/tmp/gs_svc_tmp" +gs_model_dir = "/tmp/models" + +if os.path.exists(gs_tmp_dir): + shutil.rmtree(gs_tmp_dir) +os.makedirs(gs_model_dir, exist_ok=True) + +# 预设参数 +gs_gender_models_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/models.zip" +gs_svc_emb_url = "" +gs_svc_model_url = "" +gs_volume_bin_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/dataset/AIMeiSheng/ebur128_tool" + + +class GSWorkerAttr: + def __init__(self, input_data): + vocal_url = input_data["vocal_url"] + female_svc_source_url = input_data["female_svc_url"] + male_svc_source_url = input_data["male_svc_url"] + st_tm = input_data["st_tm"] # 单位是s + ed_tm = input_data["ed_tm"] # 单位是s + + self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest() + self.vocal_url = vocal_url + self.target_url = input_data["target_url"] + + ext = vocal_url.split(".")[-1] + self.vocal_path = os.path.join(gs_tmp_dir, self.distinct_id + f"_in.{ext}") + self.target_wav_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.wav") + self.target_wav_ad_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out_ad.wav") + self.target_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.m4a") + + self.female_svc_source_url = female_svc_source_url + self.male_svc_source_url = male_svc_source_url + + ext = female_svc_source_url.split(".")[-1] + self.female_svc_source_path = hashlib.md5(female_svc_source_url.encode()).hexdigest() + "." + ext + ext = male_svc_source_url.split(".")[-1] + self.male_svc_source_path = hashlib.md5(male_svc_source_url.encode()).hexdigest() + "." + ext + self.st_tm = st_tm + self.ed_tm = ed_tm + self.target_loudness = input_data["target_loudness"] + + self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id) + if os.path.exists(self.tmp_dir): + shutil.rmtree(self.tmp_dir) + os.makedirs(self.tmp_dir) + + def __del__(self): + if os.path.exists(self.tmp_dir): + shutil.rmtree(self.tmp_dir) + + +def init_gender_model(): + """ + 下载模型 + :return: + """ + dst_model_dir = os.path.join(gs_model_dir, "voice_classification") + if not os.path.exists(dst_model_dir): + dst_zip_path = os.path.join(gs_model_dir, "models.zip") + if not download2disk(gs_gender_models_url, dst_zip_path): + logging.fatal(f"download gender_model err={gs_gender_models_url}") + cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}" + os.system(cmd) + if not os.path.exists(dst_model_dir): + logging.fatal(f"unzip {dst_zip_path} err") + + music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth") + music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth") + gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth") + gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth") + vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model) + return vc + + +def init_svc_model(): + emb_model_path = os.path.join(gs_model_dir, "RawNet3_weights.pt") + if not os.path.exists(emb_model_path): + if not download2disk(gs_svc_emb_url, emb_model_path): + logging.fatal(f"download svc_emb_model err={gs_svc_emb_url}") + embed_model = get_embed_model(emb_model_path) + hubert_model = load_hubert() + + svc_filename = gs_svc_model_url.split("/")[-1] + svc_model_path = os.path.join(gs_model_dir, svc_filename) + if not os.path.exists(svc_model_path): + if not download2disk(gs_svc_model_url, svc_model_path): + logging.fatal(f"download svc_model err={gs_svc_model_url}") + + # 此处内部会生成全局模型 + get_vc(svc_model_path) + return embed_model, hubert_model + + +def volume_adjustment(wav_path, target_loudness, out_path): + """ + 音量调整 + :param wav_path: + :param target_loudness: + :param out_path: + :return: + """ + volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool") + if not os.path.exists(volume_bin_path): + if not download2disk(gs_volume_bin_url, volume_bin_path): + logging.fatal(f"download volume_bin err={gs_volume_bin_url}") + cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}" + os.system(cmd) + + +class SVCOnline: + + def __init__(self): + st = time.time() + self.gender_model = init_gender_model() + self.embed_model, self.hubert_model = init_svc_model() + logging.info(f"svc init finished, sp = {time.time() - st}") + + def gender_process(self, worker_attr): + st = time.time() + gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path) + logging.info( + f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, " + f"gender_process sp = {time.time() - st}") + if gender == 0: + gender = 'female' + elif gender == 1: + gender = 'male' + elif female_rate > 0.5: + gender = 'female' + else: + gender = 'male' + logging.info(f"{worker_attr.vocal_url}, modified gender={gender}") + return gender + + def process(self, worker_attr): + gender = self.gender_process(worker_attr) + song_path = worker_attr.female_svc_source_path + if gender == "male": + song_path = worker_attr.male_svc_source_path + params = {'gender': gender, 'tst': worker_attr.st_ms, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None} + st = time.time() + similar = process_svc(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, params) + logging.info(f"{worker_attr.vocal_url}, similar={similar} process svc sp = {time.time() - st}") diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py index 6359fb9..e5a6b3f 100644 --- a/AIMeiSheng/meisheng_svc_final.py +++ b/AIMeiSheng/meisheng_svc_final.py @@ -1,212 +1,215 @@ import os,sys import time import shutil import glob import hashlib import librosa import soundfile import gradio as gr import pandas as pd import numpy as np sys.path.append('./RawNet3/') from infererence_fang_meisheng import get_embed, get_embed_model from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe from gender_classify import load_gender_model gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件 tmp_workspace_name = "batch_test_ocean_fi"#工作空间名 song_folder = "./data_meisheng/" ##song folder gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径 pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件 cur_dir = os.path.abspath(os.path.dirname(__file__)) -abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/' - +abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/' +f0_method = None def mix(in_path, acc_path, dst_path): # svc转码到442 svc_442_file = in_path + "_442.wav" st = time.time() cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file) os.system(cmd) if not os.path.exists(svc_442_file): return -1 print("transcode,{},sp={}".format(in_path, time.time() - st)) # 混合 st = time.time() cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path) os.system(cmd) print("mixer,{},sp={}".format(in_path, time.time() - st)) def load_model(): global f0_method embed_model = get_embed_model() hubert_model = load_hubert() get_vc(pth_model_path) f0_method = get_rmvpe() print("model preload finish!!!") return embed_model, hubert_model#,svc_model embed_model, hubert_model = load_model() ##提前加载模型 gender_model = load_gender_model() def pyin_process_single_rmvpe(input_file): global f0_method + if f0_method is None: + f0_method = get_rmvpe() + rate = 16000 #44100 # 读取音频文件 y, sr = librosa.load(input_file, sr=rate) len_s = len(y)/sr lim_s = 15 #10 if(len_s > lim_s): y1 = y[:sr*lim_s] y2 = y[-sr*lim_s:] f0 = f0_method.infer_from_audio(y1, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] mean_pitch1 = np.mean(valid_f0) f0 = f0_method.infer_from_audio(y2, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] mean_pitch2 = np.mean(valid_f0) if abs(mean_pitch1 - mean_pitch2) > 55: mean_pitch_cur = min(mean_pitch1, mean_pitch2) else: mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2 else: f0 = f0_method.infer_from_audio(y, thred=0.03) f0 = f0[f0 < 600] valid_f0 = f0[f0 > 50] mean_pitch_cur = np.mean(valid_f0) return mean_pitch_cur def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras): ##计算pitch f0up_key = pyin_process_single_rmvpe(target_wav) ## get embed get_embed(target_wav, embed_npy, embed_model) print("svc main start...") svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras) print("svc main finished!!") return 0 def process_svc(song_wav, target_wav, svc_out_path,paras): song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename( target_wav), os.path.basename(svc_out_path) #绝对路径 song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置 similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras) return similar def get_svc(target_yinse_wav, song_name, paras): ''' :param target_yinse_wav: 目标音色 :param song_name: 歌曲名字 ;param paras: 其他参数 :return: svc路径名 ''' ##清空工作空间临时路径 if os.path.exists(gs_work_dir): #shutil.rmtree(gs_work_dir) cmd = f"rm -rf {gs_work_dir}/*" os.system(cmd) else: os.makedirs(gs_work_dir) gender = paras['gender']##为了确定歌曲 ##目标音色读取 f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav)) #print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav) #shutil.move(target_yinse_wav, f_dst) ##放在工作目录 shutil.copy(target_yinse_wav, f_dst) target_yinse_wav = f_dst ##歌曲/伴奏 读取(路径需要修改) song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name)) #song_wav = './xusong_long.wav' svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字 print("inputMsg:", song_wav, target_yinse_wav, svc_out_path) ## svc process st = time.time() print("start inference...") similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras) print("svc finished!!") print("time cost = {}".format(time.time() - st)) print("out path name {} ".format(svc_out_path)) #''' ##加混响 print("add reverbration...") svc_out_path_effect = svc_out_path[:-4] + '_effect.wav' cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}" print("cmd :", cmd) os.system(cmd) # # 人声伴奏合并 print("add acc...") out_path = svc_out_path_effect[:-4] + '_music.wav' mix(svc_out_path_effect, inf_acc_path, out_path) print("time cost = {}".format(time.time() - st)) print("out path name {} ".format(out_path)) #''' return svc_out_path if __name__=='__main__': ###gender predict target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a" gender, female_rate, is_pure = gender_model.process(target_yinse_wav) print('=====================') print("gender:{}, female_rate:{},is_pure:{}".format(gender,female_rate,is_pure)) if gender == 0: gender = 'female' elif gender == 1: gender = 'male' elif female_rate > 0.5: gender = 'female' else: gender = 'male' print("modified gender:{} ".format(gender)) print('=====================') ###接口函数 ''' target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径 song_name = "drivers_license" #"Levitating" ##路径会自动添加(要更改) paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms #paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试 #''' #''' #target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a" song_name = "lost_stars" #paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} paras = {'gender': gender, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None } get_svc(target_yinse_wav, song_name, paras) #''' diff --git a/tools/ebur128_tool/CMakeLists.txt b/tools/ebur128_tool/CMakeLists.txt new file mode 100644 index 0000000..3017d49 --- /dev/null +++ b/tools/ebur128_tool/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 2.8) +project(ebur128_tool) + +set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib) + +include_directories(../ref/alimter/inc) +include_directories(../ref/waves/inc) +include_directories(../ref/ebur128/inc) + +add_subdirectory("../ref/alimter" ${PROJECT_SOURCE_DIR}/ref/alimter) +add_subdirectory("../ref/waves" ${PROJECT_SOURCE_DIR}/ref/waves) +add_subdirectory("../ref/ebur128" ${PROJECT_SOURCE_DIR}/ref/ebur128) + +add_executable(ebur128_tool ebur128_tool.cpp) + +target_link_libraries(ebur128_tool + ${LIBRARY_OUTPUT_PATH}/libalimiter.a + ${LIBRARY_OUTPUT_PATH}/libwaves.a + ${LIBRARY_OUTPUT_PATH}/libebur128.a) \ No newline at end of file diff --git a/tools/ebur128_tool/ebur128_tool.cpp b/tools/ebur128_tool/ebur128_tool.cpp new file mode 100644 index 0000000..c3d171c --- /dev/null +++ b/tools/ebur128_tool/ebur128_tool.cpp @@ -0,0 +1,107 @@ +// +// Created by Administrator on 2024/7/8. +// +#include +#include +#include +#include + +#include "alimiter.h" +#include "ebur128.h" +#include "WaveFile.h" + +#define PROC_LEN 1024 +/** + * 获取增益 + * @param nChannel + * @param nSampleRate + * @param pData + * @param nLength + * @param gain + * @return + */ +int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness) +{ + ebur128_state *st = NULL; + st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I); + if (NULL == st) + { + return -1; + } + int nPos = 0; + int nTmpLength = 0; + int nRet; + while (nPos < nLength) + { + nTmpLength = PROC_LEN; + if (nLength - nPos < PROC_LEN) + { + nTmpLength = nLength - nPos; + } + nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel); + if (nRet != 0) + { + return -2; + } + nPos += nTmpLength; + } + gated_loudness = -1; + ebur128_loudness_global(st, &gated_loudness); + ebur128_destroy(&st); + return 0; +} + +int main(int argc, char* argv[]) { + if (argc < 4) + { + printf("input error! example: ./main input_wav target_loudness dst_wav\n"); + return -1; + } + + std::string vocal_path = argv[1]; + double target_loudness = atof(argv[2]); + std::string out_vocal_path = argv[3]; + + // 读取数据 + CWaveFile vocal_wav = CWaveFile(vocal_path.c_str(), false); + if (!vocal_wav.GetStatus()) + { + printf("%s not ok!\n", vocal_path.c_str()); + return -2; + } + int vocal_buf_len = vocal_wav.GetChannels() * vocal_wav.GetTotalFrames(); + float *vocal_buf = new float[vocal_buf_len]; + short *short_vocal_buf = new short[vocal_buf_len]; + vocal_wav.ReadFrameAsfloat(vocal_buf, vocal_wav.GetTotalFrames()); + for(int i = 0; i < vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(); i++) + { + short_vocal_buf[i] = float(vocal_buf[i]) * 32767.f; + } + + double vocal_gated_loudness = 0; + ebur128_whole(vocal_wav.GetChannels(), vocal_wav.GetSampleRate(), short_vocal_buf, + vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(), vocal_gated_loudness); + float db = (target_loudness - vocal_gated_loudness) / 20.f; + float ebur128_rate = pow(10, db); + + printf("vocal_gated_loudness = %f, db = %f, gain = %f\n", vocal_gated_loudness, db, ebur128_rate); + SUPERSOUND::Alimiter limiter; + limiter.SetParam(vocal_wav.GetSampleRate(), vocal_wav.GetChannels()); + for (int i = 0; i < vocal_buf_len; i++) + { + float out = vocal_buf[i] * ebur128_rate; + limiter.Filter(&out, &out, 1); + vocal_buf[i] = out; + } + + CWaveFile out_wav = CWaveFile(out_vocal_path.c_str(), true); + out_wav.SetChannels(vocal_wav.GetChannels()); + out_wav.SetSampleRate(vocal_wav.GetSampleRate()); + out_wav.SetSampleFormat(SF_IEEE_FLOAT); + out_wav.SetupDone(); + out_wav.WriteFrame(vocal_buf, vocal_wav.GetTotalFrames()); + + delete[] vocal_buf; + delete[] short_vocal_buf; + return 0; +} \ No newline at end of file diff --git a/tools/ref/alimter/CMakeLists.txt b/tools/ref/alimter/CMakeLists.txt new file mode 100644 index 0000000..9748c4d --- /dev/null +++ b/tools/ref/alimter/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_ALIMTER_SRCS) +add_library(alimiter ${DIR_ALIMTER_SRCS}) \ No newline at end of file diff --git a/tools/ref/alimter/inc/alimiter.h b/tools/ref/alimter/inc/alimiter.h new file mode 100644 index 0000000..8022d39 --- /dev/null +++ b/tools/ref/alimter/inc/alimiter.h @@ -0,0 +1,99 @@ + +/*************************************************************************** +* email : yijiangyang@tencent.com * +***************************************************************************/ + +//+ ----------------------------------------------------+ +//+ _oo0oo_ + +//+ o8888888o + +//+ 88" . "88 + +//+ (| -_- |) + +//+ 0\ = /0 + +//+ ___/`---'\___ + +//+ .' \\| |// '. + +//+ / \\||| : |||// \ + +//+ / _||||| -:- |||||- \ + +//+ | | \\\ - /// | | + +//+ | \_| ''\---/'' |_/ | + +//+ \ .-\__ '-' ___/-. / + +//+ ___'. .' /--.--\ `. .'___ + +//+ ."" '< `.___\_<|>_/___.' >' "". + +//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | + +//+ \ \ `_. \_ __\ /__ _/ .-` / / + +//+ =====`-.____`.___ \_____/___.-`___.-'===== + +//+ `=---=' + +//+ + +//+ + +//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +//+ + +//+ 佛祖保佑 永无BUG + +//+ ----------------------------------------------------+ + +//实现 FFMPEG 中的限制器,这个压限器对频谱友好,但是压得比较厉害 + +#ifndef __ALIMITER_H__ +#define __ALIMITER_H__ + +#include +#define ERROR_SUPERSOUND_SUCCESS 0 +#define ERROR_SUPERSOUND_PARAM -1 +#define ERROR_SUPERSOUND_MEMORY -2 +typedef struct AudioLimiterContext +{ + float limit; + float attack; + float release; + float att; + float level_in; + float level_out; + int32_t auto_release; + int32_t auto_level; + float asc; + int32_t asc_c; + int32_t asc_pos; + float asc_coeff; + + float *buffer; + int32_t buffer_size; + int32_t buffer_max_size; + int32_t pos; + int32_t *nextpos; + float *nextdelta; + + float delta; + int32_t nextiter; + int32_t nextlen; + int32_t asc_changed; +}AudioLimiterContext; + +namespace SUPERSOUND +{ + + +class Alimiter +{ +public: + Alimiter(); + ~Alimiter(); + +public: + void Flush(); + int32_t GetLatecy(); + int32_t SetParam(int32_t fs, int32_t channels); + void Filter(float * input, float * output, int32_t num); + +private: + void Uninit(); + int32_t config_input(); + float get_rdelta(AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc); + +private: + AudioLimiterContext m_alimiterCtx; + int m_nChannels; + int m_nFs; +}; + + +} + +#endif /* __ALIMITER_H__ */ \ No newline at end of file diff --git a/tools/ref/alimter/src/alimiter.cpp b/tools/ref/alimter/src/alimiter.cpp new file mode 100644 index 0000000..abbd622 --- /dev/null +++ b/tools/ref/alimter/src/alimiter.cpp @@ -0,0 +1,306 @@ + +#include "alimiter.h" +#include +#include +#include +#include + +#define MAX(a,b) (((a) > (b)) ? (a) : (b)) +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#define MIDDLE(x, y, z) ((x)<(y)?((y)<(z)?(y):(x)<(z)?(z):(x)):((y)>(z)?(y):(x)>(z)?(z):(x))) +#define SAFE_DELETE_PTR(ptr) \ +{ \ + if(ptr) \ + { \ + delete [] ptr; \ + ptr = NULL; \ + } \ +} + +namespace SUPERSOUND +{ + + +Alimiter::Alimiter() +{ + memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx)); + + m_nChannels = 0; + m_nFs = 0; + + Flush(); +} + +Alimiter::~Alimiter() +{ + Uninit(); +} + +void Alimiter::Flush() +{ + float * buffer = m_alimiterCtx.buffer; + float * nextdelta = m_alimiterCtx.nextdelta; + int32_t * nextpos = m_alimiterCtx.nextpos; + int32_t buffer_max_size = m_alimiterCtx.buffer_max_size; + int32_t buffer_size = m_alimiterCtx.buffer_size; + + if(buffer) + memset(buffer, 0, sizeof(float) * buffer_max_size); + if(nextdelta) + memset(nextdelta, 0, sizeof(float) * buffer_max_size); + if(nextpos) + memset(nextpos, -1, sizeof(float) * buffer_max_size); + + memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx)); + + m_alimiterCtx.level_in = 1; + m_alimiterCtx.level_out = 32000 / 32768.0; + m_alimiterCtx.limit = 1; + m_alimiterCtx.attack = 5; + m_alimiterCtx.release = 50; + m_alimiterCtx.auto_release = 0; + m_alimiterCtx.asc_coeff = 0.5; + m_alimiterCtx.auto_level = 1; + + m_alimiterCtx.attack /= 1000; + m_alimiterCtx.release /= 1000; + m_alimiterCtx.att = 1; + m_alimiterCtx.asc_pos = -1; + m_alimiterCtx.asc_coeff = pow(0.5f, m_alimiterCtx.asc_coeff - 0.5f) * 2 * -1; + + m_alimiterCtx.buffer = buffer; + m_alimiterCtx.nextdelta = nextdelta; + m_alimiterCtx.nextpos = nextpos; + m_alimiterCtx.buffer_max_size = buffer_max_size; + m_alimiterCtx.buffer_size = buffer_size; +} + +int32_t Alimiter::GetLatecy() +{ + return m_alimiterCtx.buffer_size / m_nChannels; +} + +int32_t Alimiter::SetParam( int32_t fs, int32_t channels ) +{ + if((fs == m_nFs) && (channels == m_nChannels)) + return ERROR_SUPERSOUND_SUCCESS; + + m_nChannels = channels; + m_nFs = fs; + + return config_input(); +} + +void Alimiter::Filter( float * input, float * output, int32_t num ) +{ + num = num / m_nChannels; + int channels = m_nChannels; + int buffer_size = m_alimiterCtx.buffer_size; + float * buffer = m_alimiterCtx.buffer; + float release = m_alimiterCtx.release; + float limit = m_alimiterCtx.limit; + float * nextdelta = m_alimiterCtx.nextdelta; + float level = m_alimiterCtx.auto_level ? 1 / limit : 1; + float level_out = m_alimiterCtx.level_out; + float level_in = m_alimiterCtx.level_in; + int *nextpos = m_alimiterCtx.nextpos; + + float * buf; + float * dst; + float * src; + int n, c, i; + AudioLimiterContext * s = &m_alimiterCtx; + + dst = output; + src = input; + + for (n = 0; n < num; n++) { + float peak = 0; + + for (c = 0; c < channels; c++) { + float sample = src[c] * level_in; + + buffer[s->pos + c] = sample; + peak = MAX(peak, fabs(sample)); + } + + if (s->auto_release && peak > limit) { + s->asc += peak; + s->asc_c++; + } + + if (peak > limit) { + float patt = MIN(limit / peak, 1); + float rdelta = get_rdelta(s, release, m_nFs, + peak, limit, patt, 0); + float delta = (limit / peak - s->att) / buffer_size * channels; + int found = 0; + + if (delta < s->delta) { + s->delta = delta; + nextpos[0] = s->pos; + nextpos[1] = -1; + nextdelta[0] = rdelta; + s->nextlen = 1; + s->nextiter= 0; + } else { + for (i = s->nextiter; i < s->nextiter + s->nextlen; i++) { + int j = i % buffer_size; + float ppeak, pdelta; + + ppeak = fabs(buffer[nextpos[j]]) > fabs(buffer[nextpos[j] + 1]) ? + fabs(buffer[nextpos[j]]) : fabs(buffer[nextpos[j] + 1]); + pdelta = (limit / peak - limit / ppeak) / (((buffer_size - nextpos[j] + s->pos) % buffer_size) / channels); + if (pdelta < nextdelta[j]) { + nextdelta[j] = pdelta; + found = 1; + break; + } + } + if (found) { + s->nextlen = i - s->nextiter + 1; + nextpos[(s->nextiter + s->nextlen) % buffer_size] = s->pos; + nextdelta[(s->nextiter + s->nextlen) % buffer_size] = rdelta; + nextpos[(s->nextiter + s->nextlen + 1) % buffer_size] = -1; + s->nextlen++; + } + } + } + + buf = &s->buffer[(s->pos + channels) % buffer_size]; + peak = 0; + for (c = 0; c < channels; c++) { + float sample = buf[c]; + + peak = MAX(peak, fabs(sample)); + } + + if (s->pos == s->asc_pos && !s->asc_changed) + s->asc_pos = -1; + + if (s->auto_release && s->asc_pos == -1 && peak > limit) { + s->asc -= peak; + s->asc_c--; + } + + s->att += s->delta; + + for (c = 0; c < channels; c++) + dst[c] = buf[c] * s->att; + + if ((s->pos + channels) % buffer_size == nextpos[s->nextiter]) { + if (s->auto_release) { + s->delta = get_rdelta(s, release, m_nFs, + peak, limit, s->att, 1); + if (s->nextlen > 1) { + int pnextpos = nextpos[(s->nextiter + 1) % buffer_size]; + float ppeak = fabs(buffer[pnextpos]) > fabs(buffer[pnextpos + 1]) ? + fabs(buffer[pnextpos]) : + fabs(buffer[pnextpos + 1]); + float pdelta = (limit / ppeak - s->att) / + (((buffer_size + pnextpos - + ((s->pos + channels) % buffer_size)) % + buffer_size) / channels); + if (pdelta < s->delta) + s->delta = pdelta; + } + } else { + s->delta = nextdelta[s->nextiter]; + s->att = limit / peak; + } + + s->nextlen -= 1; + nextpos[s->nextiter] = -1; + s->nextiter = (s->nextiter + 1) % buffer_size; + } + + if (s->att > 1.) { + s->att = 1.; + s->delta = 0.; + s->nextiter = 0; + s->nextlen = 0; + nextpos[0] = -1; + } + + if (s->att <= 0.) { + s->att = 0.000001f; + s->delta = (1 - s->att) / (m_nFs * release); + } + + if (s->att != 1 && (1 - s->att) < 0.000001f) + s->att = 1; + + if (s->delta != 0 && fabs(s->delta) < 0.000001f) + s->delta = 0; + + for (c = 0; c < channels; c++) + dst[c] = MIDDLE(dst[c], -limit, limit) * level * level_out; + + s->pos = (s->pos + channels) % buffer_size; + src += channels; + dst += channels; + } +} + +void Alimiter::Uninit() +{ + SAFE_DELETE_PTR(m_alimiterCtx.buffer); + SAFE_DELETE_PTR(m_alimiterCtx.nextdelta); + SAFE_DELETE_PTR(m_alimiterCtx.nextpos); +} + +int32_t Alimiter::config_input() +{ + int obuffer_size = int(m_nFs * m_nChannels * 100 / 1000. + m_nChannels); + if(obuffer_size < m_nChannels) + return ERROR_SUPERSOUND_PARAM; + + if(obuffer_size > m_alimiterCtx.buffer_max_size) + { + SAFE_DELETE_PTR(m_alimiterCtx.buffer); + m_alimiterCtx.buffer = new(std::nothrow) float[obuffer_size]; + if(m_alimiterCtx.buffer == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.buffer, 0, sizeof(float) * obuffer_size); + + SAFE_DELETE_PTR(m_alimiterCtx.nextdelta); + m_alimiterCtx.nextdelta = new(std::nothrow) float[obuffer_size]; + if(m_alimiterCtx.nextdelta == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.nextdelta, 0, sizeof(float) * obuffer_size); + + SAFE_DELETE_PTR(m_alimiterCtx.nextpos); + m_alimiterCtx.nextpos = new(std::nothrow) int32_t[obuffer_size]; + if(m_alimiterCtx.nextpos == NULL) + return ERROR_SUPERSOUND_MEMORY; + memset(m_alimiterCtx.nextpos, -1, obuffer_size*sizeof(int32_t)); + + m_alimiterCtx.buffer_max_size = obuffer_size; + } + + m_alimiterCtx.buffer_size = int(m_nFs * m_alimiterCtx.attack * m_nChannels); + m_alimiterCtx.buffer_size -= m_alimiterCtx.buffer_size % m_nChannels; + + return ERROR_SUPERSOUND_SUCCESS; +} + +float Alimiter::get_rdelta( AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc ) +{ + float rdelta = (1 - patt) / (sample_rate * release); + + if (asc && s->auto_release && s->asc_c > 0) { + float a_att = limit / (s->asc_coeff * s->asc) * (float)s->asc_c; + + if (a_att > patt) { + float delta = MAX((a_att - patt) / (sample_rate * release), rdelta / 10); + + if (delta < rdelta) + rdelta = delta; + } + } + + return rdelta; +} + + +} \ No newline at end of file diff --git a/tools/ref/ebur128/CMakeLists.txt b/tools/ref/ebur128/CMakeLists.txt new file mode 100644 index 0000000..18a5a86 --- /dev/null +++ b/tools/ref/ebur128/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_EBUR128_SRCS) +add_library(ebur128 ${DIR_EBUR128_SRCS}) \ No newline at end of file diff --git a/tools/ref/ebur128/inc/ebur128.h b/tools/ref/ebur128/inc/ebur128.h new file mode 100644 index 0000000..faa66c6 --- /dev/null +++ b/tools/ref/ebur128/inc/ebur128.h @@ -0,0 +1,425 @@ +/* See COPYING file for copyright and license details. */ + +#ifndef EBUR128_H_ +#define EBUR128_H_ + +/** \file ebur128.h + * \brief libebur128 - a library for loudness measurement according to + * the EBU R128 standard. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define EBUR128_VERSION_MAJOR 1 +#define EBUR128_VERSION_MINOR 2 +#define EBUR128_VERSION_PATCH 4 + +#include /* for size_t */ + +/** \enum channel + * Use these values when setting the channel map with ebur128_set_channel(). + * See definitions in ITU R-REC-BS 1770-4 + */ +enum channel { + EBUR128_UNUSED = 0, /**< unused channel (for example LFE channel) */ + EBUR128_LEFT = 1, + EBUR128_Mp030 = 1, /**< itu M+030 */ + EBUR128_RIGHT = 2, + EBUR128_Mm030 = 2, /**< itu M-030 */ + EBUR128_CENTER = 3, + EBUR128_Mp000 = 3, /**< itu M+000 */ + EBUR128_LEFT_SURROUND = 4, + EBUR128_Mp110 = 4, /**< itu M+110 */ + EBUR128_RIGHT_SURROUND = 5, + EBUR128_Mm110 = 5, /**< itu M-110 */ + EBUR128_DUAL_MONO, /**< a channel that is counted twice */ + EBUR128_MpSC, /**< itu M+SC */ + EBUR128_MmSC, /**< itu M-SC */ + EBUR128_Mp060, /**< itu M+060 */ + EBUR128_Mm060, /**< itu M-060 */ + EBUR128_Mp090, /**< itu M+090 */ + EBUR128_Mm090, /**< itu M-090 */ + EBUR128_Mp135, /**< itu M+135 */ + EBUR128_Mm135, /**< itu M-135 */ + EBUR128_Mp180, /**< itu M+180 */ + EBUR128_Up000, /**< itu U+000 */ + EBUR128_Up030, /**< itu U+030 */ + EBUR128_Um030, /**< itu U-030 */ + EBUR128_Up045, /**< itu U+045 */ + EBUR128_Um045, /**< itu U-030 */ + EBUR128_Up090, /**< itu U+090 */ + EBUR128_Um090, /**< itu U-090 */ + EBUR128_Up110, /**< itu U+110 */ + EBUR128_Um110, /**< itu U-110 */ + EBUR128_Up135, /**< itu U+135 */ + EBUR128_Um135, /**< itu U-135 */ + EBUR128_Up180, /**< itu U+180 */ + EBUR128_Tp000, /**< itu T+000 */ + EBUR128_Bp000, /**< itu B+000 */ + EBUR128_Bp045, /**< itu B+045 */ + EBUR128_Bm045 /**< itu B-045 */ +}; + +/** \enum error + * Error return values. + */ +enum error { + EBUR128_SUCCESS = 0, + EBUR128_ERROR_NOMEM, + EBUR128_ERROR_INVALID_MODE, + EBUR128_ERROR_INVALID_CHANNEL_INDEX, + EBUR128_ERROR_NO_CHANGE +}; + +/** \enum mode + * Use these values in ebur128_init (or'ed). Try to use the lowest possible + * modes that suit your needs, as performance will be better. + */ +enum mode { + /** can call ebur128_loudness_momentary */ + EBUR128_MODE_M = (1 << 0), + /** can call ebur128_loudness_shortterm */ + EBUR128_MODE_S = (1 << 1) | EBUR128_MODE_M, + /** can call ebur128_loudness_global_* and ebur128_relative_threshold */ + EBUR128_MODE_I = (1 << 2) | EBUR128_MODE_M, + /** can call ebur128_loudness_range */ + EBUR128_MODE_LRA = (1 << 3) | EBUR128_MODE_S, + /** can call ebur128_sample_peak */ + EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | EBUR128_MODE_M, + /** can call ebur128_true_peak */ + EBUR128_MODE_TRUE_PEAK = (1 << 5) | EBUR128_MODE_M + | EBUR128_MODE_SAMPLE_PEAK, + /** uses histogram algorithm to calculate loudness */ + EBUR128_MODE_HISTOGRAM = (1 << 6) +}; + +/** forward declaration of ebur128_state_internal */ +struct ebur128_state_internal; + +/** \brief Contains information about the state of a loudness measurement. + * + * You should not need to modify this struct directly. + */ +typedef struct { + int mode; /**< The current mode. */ + unsigned int channels; /**< The number of channels. */ + unsigned long samplerate; /**< The sample rate. */ + struct ebur128_state_internal* d; /**< Internal state. */ +} ebur128_state; + +/** \brief Get library version number. Do not pass null pointers here. + * + * @param major major version number of library + * @param minor minor version number of library + * @param patch patch version number of library + */ +void ebur128_get_version(int* major, int* minor, int* patch); + +/** \brief Initialize library state. + * + * @param channels the number of channels. + * @param samplerate the sample rate. + * @param mode see the mode enum for possible values. + * @return an initialized library state, or NULL on error. + */ +ebur128_state* ebur128_init(unsigned int channels, + unsigned long samplerate, + int mode); + +/** \brief Destroy library state. + * + * @param st pointer to a library state. + */ +void ebur128_destroy(ebur128_state** st); + +/** \brief Set channel type. + * + * The default is: + * - 0 -> EBUR128_LEFT + * - 1 -> EBUR128_RIGHT + * - 2 -> EBUR128_CENTER + * - 3 -> EBUR128_UNUSED + * - 4 -> EBUR128_LEFT_SURROUND + * - 5 -> EBUR128_RIGHT_SURROUND + * + * @param st library state. + * @param channel_number zero based channel index. + * @param value channel type from the "channel" enum. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_set_channel(ebur128_state* st, + unsigned int channel_number, + int value); + +/** \brief Change library parameters. + * + * Note that the channel map will be reset when setting a different number of + * channels. The current unfinished block will be lost. + * + * @param st library state. + * @param channels new number of channels. + * @param samplerate new sample rate. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be + * invalid and must be destroyed. + * - EBUR128_ERROR_NO_CHANGE if channels and sample rate were not changed. + */ +int ebur128_change_parameters(ebur128_state* st, + unsigned int channels, + unsigned long samplerate); + +/** \brief Set the maximum window duration. + * + * Set the maximum duration that will be used for ebur128_window_loudness(). + * Note that this destroys the current content of the audio buffer. + * + * @param st library state. + * @param window duration of the window in ms. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be + * invalid and must be destroyed. + * - EBUR128_ERROR_NO_CHANGE if window duration not changed. + */ +int ebur128_set_max_window(ebur128_state* st, unsigned long window); + +/** \brief Set the maximum history. + * + * Set the maximum history that will be stored for loudness integration. + * More history provides more accurate results, but requires more resources. + * + * Applies to ebur128_loudness_range() and ebur128_loudness_global() when + * EBUR128_MODE_HISTOGRAM is not set. + * + * Default is ULONG_MAX (at least ~50 days). + * Minimum is 3000ms for EBUR128_MODE_LRA and 400ms for EBUR128_MODE_M. + * + * @param st library state. + * @param history duration of history in ms. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NO_CHANGE if history not changed. + */ +int ebur128_set_max_history(ebur128_state* st, unsigned long history); + +/** \brief Add frames to be processed. + * + * @param st library state. + * @param src array of source frames. Channels must be interleaved. + * @param frames number of frames. Not number of samples! + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM on memory allocation error. + */ +int ebur128_add_frames_short(ebur128_state* st, + const short* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_int(ebur128_state* st, + const int* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_float(ebur128_state* st, + const float* src, + size_t frames); +/** \brief See \ref ebur128_add_frames_short */ +int ebur128_add_frames_double(ebur128_state* st, + const double* src, + size_t frames); + +/** \brief Get global integrated loudness in LUFS. + * + * @param st library state. + * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set. + */ +int ebur128_loudness_global(ebur128_state* st, double* out); +/** \brief Get global integrated loudness in LUFS across multiple instances. + * + * @param sts array of library states. + * @param size length of sts + * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set. + */ +int ebur128_loudness_global_multiple(ebur128_state** sts, + size_t size, + double* out); + +/** \brief Get momentary loudness (last 400ms) in LUFS. + * + * @param st library state. + * @param out momentary loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + */ +int ebur128_loudness_momentary(ebur128_state* st, double* out); +/** \brief Get short-term loudness (last 3s) in LUFS. + * + * @param st library state. + * @param out short-term loudness in LUFS. -HUGE_VAL if result is negative + * infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_S" has not been set. + */ +int ebur128_loudness_shortterm(ebur128_state* st, double* out); + +/** \brief Get loudness of the specified window in LUFS. + * + * window must not be larger than the current window set in st. + * The current window can be changed by calling ebur128_set_max_window(). + * + * @param st library state. + * @param window window in ms to calculate loudness. + * @param out loudness in LUFS. -HUGE_VAL if result is negative infinity. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if window larger than current window in st. + */ +int ebur128_loudness_window(ebur128_state* st, + unsigned long window, + double* out); + +/** \brief Get loudness range (LRA) of programme in LU. + * + * Calculates loudness range according to EBU 3342. + * + * @param st library state. + * @param out loudness range (LRA) in LU. Will not be changed in case of + * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be + * returned in this case. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM in case of memory allocation error. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set. + */ +int ebur128_loudness_range(ebur128_state* st, double* out); +/** \brief Get loudness range (LRA) in LU across multiple instances. + * + * Calculates loudness range according to EBU 3342. + * + * @param sts array of library states. + * @param size length of sts + * @param out loudness range (LRA) in LU. Will not be changed in case of + * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be + * returned in this case. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_NOMEM in case of memory allocation error. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set. + */ +int ebur128_loudness_range_multiple(ebur128_state** sts, + size_t size, + double* out); + +/** \brief Get maximum sample peak from all frames that have been processed. + * + * The equation to convert to dBFS is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum sample peak in float format (1.0 is 0 dBFS) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum sample peak from the last call to add_frames(). + * + * The equation to convert to dBFS is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum sample peak in float format (1.0 is 0 dBFS) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_prev_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum true peak from all frames that have been processed. + * + * Uses an implementation defined algorithm to calculate the true peak. Do not + * try to compare resulting values across different versions of the library, + * as the algorithm may change. + * + * The current implementation uses a custom polyphase FIR interpolator to + * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for + * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz. + * + * The equation to convert to dBTP is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum true peak in float format (1.0 is 0 dBTP) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get maximum true peak from the last call to add_frames(). + * + * Uses an implementation defined algorithm to calculate the true peak. Do not + * try to compare resulting values across different versions of the library, + * as the algorithm may change. + * + * The current implementation uses a custom polyphase FIR interpolator to + * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for + * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz. + * + * The equation to convert to dBTP is: 20 * log10(out) + * + * @param st library state + * @param channel_number channel to analyse + * @param out maximum true peak in float format (1.0 is 0 dBTP) + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not + * been set. + * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index. + */ +int ebur128_prev_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out); + +/** \brief Get relative threshold in LUFS. + * + * @param st library state + * @param out relative threshold in LUFS. + * @return + * - EBUR128_SUCCESS on success. + * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not + * been set. + */ +int ebur128_relative_threshold(ebur128_state* st, double* out); +#ifdef __cplusplus +} +#endif + +#endif /* EBUR128_H_ */ diff --git a/tools/ref/ebur128/src/ebur128.c b/tools/ref/ebur128/src/ebur128.c new file mode 100644 index 0000000..6c10f1e --- /dev/null +++ b/tools/ref/ebur128/src/ebur128.c @@ -0,0 +1,1333 @@ +/* See COPYING file for copyright and license details. */ + +#include "ebur128.h" + +#include +#include +#include /* You may have to define _USE_MATH_DEFINES if you use MSVC */ +#include +#include + +/* This can be replaced by any BSD-like queue implementation. */ +#include + +#define CHECK_ERROR(condition, errorcode, goto_point) \ + if ((condition)) { \ + errcode = (errorcode); \ + goto goto_point; \ + } + +STAILQ_HEAD(ebur128_double_queue, ebur128_dq_entry); +struct ebur128_dq_entry { + double z; + STAILQ_ENTRY(ebur128_dq_entry) entries; +}; + +#define ALMOST_ZERO 0.000001 + +typedef struct { /* Data structure for polyphase FIR interpolator */ + unsigned int factor; /* Interpolation factor of the interpolator */ + unsigned int taps; /* Taps (prefer odd to increase zero coeffs) */ + unsigned int channels; /* Number of channels */ + unsigned int delay; /* Size of delay buffer */ + struct { + unsigned int count; /* Number of coefficients in this subfilter */ + unsigned int* index; /* Delay index of corresponding filter coeff */ + double* coeff; /* List of subfilter coefficients */ + }* filter; /* List of subfilters (one for each factor) */ + float** z; /* List of delay buffers (one for each channel) */ + unsigned int zi; /* Current delay buffer index */ +} interpolator; + +struct ebur128_state_internal { + /** Filtered audio data (used as ring buffer). */ + double* audio_data; + /** Size of audio_data array. */ + size_t audio_data_frames; + /** Current index for audio_data. */ + size_t audio_data_index; + /** How many frames are needed for a gating block. Will correspond to 400ms + * of audio at initialization, and 100ms after the first block (75% overlap + * as specified in the 2011 revision of BS1770). */ + unsigned long needed_frames; + /** The channel map. Has as many elements as there are channels. */ + int* channel_map; + /** How many samples fit in 100ms (rounded). */ + unsigned long samples_in_100ms; + /** BS.1770 filter coefficients (nominator). */ + double b[5]; + /** BS.1770 filter coefficients (denominator). */ + double a[5]; + /** BS.1770 filter state. */ + double v[5][5]; + /** Linked list of block energies. */ + struct ebur128_double_queue block_list; + unsigned long block_list_max; + unsigned long block_list_size; + /** Linked list of 3s-block energies, used to calculate LRA. */ + struct ebur128_double_queue short_term_block_list; + unsigned long st_block_list_max; + unsigned long st_block_list_size; + int use_histogram; + unsigned long *block_energy_histogram; + unsigned long *short_term_block_energy_histogram; + /** Keeps track of when a new short term block is needed. */ + size_t short_term_frame_counter; + /** Maximum sample peak, one per channel */ + double* sample_peak; + double* prev_sample_peak; + /** Maximum true peak, one per channel */ + double* true_peak; + double* prev_true_peak; + interpolator* interp; + float* resampler_buffer_input; + size_t resampler_buffer_input_frames; + float* resampler_buffer_output; + size_t resampler_buffer_output_frames; + /** The maximum window duration in ms. */ + unsigned long window; + unsigned long history; +}; + +static double relative_gate = -10.0; + +/* Those will be calculated when initializing the library */ +static double relative_gate_factor; +static double minus_twenty_decibels; +static double histogram_energies[1000]; +static double histogram_energy_boundaries[1001]; + +static interpolator* interp_create(unsigned int taps, unsigned int factor, unsigned int channels) { + interpolator* interp = calloc(1, sizeof(interpolator)); + unsigned int j = 0; + + interp->taps = taps; + interp->factor = factor; + interp->channels = channels; + interp->delay = (interp->taps + interp->factor - 1) / interp->factor; + + /* Initialize the filter memory + * One subfilter per interpolation factor. */ + interp->filter = calloc(interp->factor, sizeof(*interp->filter)); + for (j = 0; j < interp->factor; j++) { + interp->filter[j].index = calloc(interp->delay, sizeof(unsigned int)); + interp->filter[j].coeff = calloc(interp->delay, sizeof(double)); + } + /* One delay buffer per channel. */ + interp->z = calloc(interp->channels, sizeof(float*)); + for (j = 0; j < interp->channels; j++) { + interp->z[j] = calloc( interp->delay, sizeof(float) ); + } + + /* Calculate the filter coefficients */ + for (j = 0; j < interp->taps; j++) { + /* Calculate sinc */ + double m = (double)j - (double)(interp->taps - 1) / 2.0; + double c = 1.0; + if (fabs(m) > ALMOST_ZERO) { + c = sin(m * M_PI / interp->factor) / (m * M_PI / interp->factor); + } + /* Apply Hanning window */ + c *= 0.5 * (1 - cos(2 * M_PI * j / (interp->taps - 1))); + + if (fabs(c) > ALMOST_ZERO) { /* Ignore any zero coeffs. */ + /* Put the coefficient into the correct subfilter */ + unsigned int f = j % interp->factor; + unsigned int t = interp->filter[f].count++; + interp->filter[f].coeff[t] = c; + interp->filter[f].index[t] = j / interp->factor; + } + } + return interp; +} + +static void interp_destroy(interpolator* interp) { + unsigned int j = 0; + if (!interp) { + return; + } + for (j = 0; j < interp->factor; j++) { + free(interp->filter[j].index); + free(interp->filter[j].coeff); + } + free(interp->filter); + for (j = 0; j < interp->channels; j++) { + free(interp->z[j]); + } + free(interp->z); + free(interp); +} + +static size_t interp_process(interpolator* interp, size_t frames, float* in, float* out) { + size_t frame = 0; + unsigned int chan = 0; + unsigned int f = 0; + unsigned int t = 0; + unsigned int out_stride = interp->channels * interp->factor; + float* outp = 0; + double acc = 0; + double c = 0; + + for (frame = 0; frame < frames; frame++) { + for (chan = 0; chan < interp->channels; chan++) { + /* Add sample to delay buffer */ + interp->z[chan][interp->zi] = *in++; + /* Apply coefficients */ + outp = out + chan; + for (f = 0; f < interp->factor; f++) { + acc = 0.0; + for (t = 0; t < interp->filter[f].count; t++) { + int i = (int)interp->zi - (int)interp->filter[f].index[t]; + if (i < 0) { + i += interp->delay; + } + c = interp->filter[f].coeff[t]; + acc += interp->z[chan][i] * c; + } + *outp = (float)acc; + outp += interp->channels; + } + } + out += out_stride; + interp->zi++; + if (interp->zi == interp->delay) { + interp->zi = 0; + } + } + + return frames * interp->factor; +} + +static void ebur128_init_filter(ebur128_state* st) { + int i, j; + + double f0 = 1681.974450955533; + double G = 3.999843853973347; + double Q = 0.7071752369554196; + + double K = tan(M_PI * f0 / (double) st->samplerate); + double Vh = pow(10.0, G / 20.0); + double Vb = pow(Vh, 0.4996667741545416); + + double pb[3] = {0.0, 0.0, 0.0}; + double pa[3] = {1.0, 0.0, 0.0}; + double rb[3] = {1.0, -2.0, 1.0}; + double ra[3] = {1.0, 0.0, 0.0}; + + double a0 = 1.0 + K / Q + K * K ; + pb[0] = (Vh + Vb * K / Q + K * K) / a0; + pb[1] = 2.0 * (K * K - Vh) / a0; + pb[2] = (Vh - Vb * K / Q + K * K) / a0; + pa[1] = 2.0 * (K * K - 1.0) / a0; + pa[2] = (1.0 - K / Q + K * K) / a0; + + /* fprintf(stderr, "%.14f %.14f %.14f %.14f %.14f\n", + b1[0], b1[1], b1[2], a1[1], a1[2]); */ + + f0 = 38.13547087602444; + Q = 0.5003270373238773; + K = tan(M_PI * f0 / (double) st->samplerate); + + ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K); + ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K); + + /* fprintf(stderr, "%.14f %.14f\n", a2[1], a2[2]); */ + + st->d->b[0] = pb[0] * rb[0]; + st->d->b[1] = pb[0] * rb[1] + pb[1] * rb[0]; + st->d->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0]; + st->d->b[3] = pb[1] * rb[2] + pb[2] * rb[1]; + st->d->b[4] = pb[2] * rb[2]; + + st->d->a[0] = pa[0] * ra[0]; + st->d->a[1] = pa[0] * ra[1] + pa[1] * ra[0]; + st->d->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0]; + st->d->a[3] = pa[1] * ra[2] + pa[2] * ra[1]; + st->d->a[4] = pa[2] * ra[2]; + + for (i = 0; i < 5; ++i) { + for (j = 0; j < 5; ++j) { + st->d->v[i][j] = 0.0; + } + } +} + +static int ebur128_init_channel_map(ebur128_state* st) { + size_t i; + st->d->channel_map = (int*) malloc(st->channels * sizeof(int)); + if (!st->d->channel_map) { + return EBUR128_ERROR_NOMEM; + } + if (st->channels == 4) { + st->d->channel_map[0] = EBUR128_LEFT; + st->d->channel_map[1] = EBUR128_RIGHT; + st->d->channel_map[2] = EBUR128_LEFT_SURROUND; + st->d->channel_map[3] = EBUR128_RIGHT_SURROUND; + } else if (st->channels == 5) { + st->d->channel_map[0] = EBUR128_LEFT; + st->d->channel_map[1] = EBUR128_RIGHT; + st->d->channel_map[2] = EBUR128_CENTER; + st->d->channel_map[3] = EBUR128_LEFT_SURROUND; + st->d->channel_map[4] = EBUR128_RIGHT_SURROUND; + } else { + for (i = 0; i < st->channels; ++i) { + switch (i) { + case 0: st->d->channel_map[i] = EBUR128_LEFT; break; + case 1: st->d->channel_map[i] = EBUR128_RIGHT; break; + case 2: st->d->channel_map[i] = EBUR128_CENTER; break; + case 3: st->d->channel_map[i] = EBUR128_UNUSED; break; + case 4: st->d->channel_map[i] = EBUR128_LEFT_SURROUND; break; + case 5: st->d->channel_map[i] = EBUR128_RIGHT_SURROUND; break; + default: st->d->channel_map[i] = EBUR128_UNUSED; break; + } + } + } + return EBUR128_SUCCESS; +} + +static int ebur128_init_resampler(ebur128_state* st) { + int errcode = EBUR128_SUCCESS; + + if (st->samplerate < 96000) { + st->d->interp = interp_create(49, 4, st->channels); + CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit) + } else if (st->samplerate < 192000) { + st->d->interp = interp_create(49, 2, st->channels); + CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit) + } else { + st->d->resampler_buffer_input = NULL; + st->d->resampler_buffer_output = NULL; + st->d->interp = NULL; + goto exit; + } + + st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4; + st->d->resampler_buffer_input = malloc(st->d->resampler_buffer_input_frames * + st->channels * + sizeof(float)); + CHECK_ERROR(!st->d->resampler_buffer_input, EBUR128_ERROR_NOMEM, free_interp) + + st->d->resampler_buffer_output_frames = + st->d->resampler_buffer_input_frames * + st->d->interp->factor; + st->d->resampler_buffer_output = malloc + (st->d->resampler_buffer_output_frames * + st->channels * + sizeof(float)); + CHECK_ERROR(!st->d->resampler_buffer_output, EBUR128_ERROR_NOMEM, free_input) + + return errcode; + +free_interp: + interp_destroy(st->d->interp); + st->d->interp = NULL; +free_input: + free(st->d->resampler_buffer_input); + st->d->resampler_buffer_input = NULL; +exit: + return errcode; +} + +static void ebur128_destroy_resampler(ebur128_state* st) { + free(st->d->resampler_buffer_input); + st->d->resampler_buffer_input = NULL; + free(st->d->resampler_buffer_output); + st->d->resampler_buffer_output = NULL; + interp_destroy(st->d->interp); + st->d->interp = NULL; +} + +void ebur128_get_version(int* major, int* minor, int* patch) { + *major = EBUR128_VERSION_MAJOR; + *minor = EBUR128_VERSION_MINOR; + *patch = EBUR128_VERSION_PATCH; +} + +ebur128_state* ebur128_init(unsigned int channels, + unsigned long samplerate, + int mode) { + int result; + int errcode; + ebur128_state* st; + unsigned int i; + size_t j; + + if (channels == 0 || samplerate < 5) { + return NULL; + } + + st = (ebur128_state*) malloc(sizeof(ebur128_state)); + CHECK_ERROR(!st, 0, exit) + st->d = (struct ebur128_state_internal*) + malloc(sizeof(struct ebur128_state_internal)); + CHECK_ERROR(!st->d, 0, free_state) + st->channels = channels; + errcode = ebur128_init_channel_map(st); + CHECK_ERROR(errcode, 0, free_internal) + + st->d->sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map) + st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_sample_peak, 0, free_sample_peak) + st->d->true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->true_peak, 0, free_prev_sample_peak) + st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_true_peak, 0, free_true_peak) + for (i = 0; i < channels; ++i) { + st->d->sample_peak[i] = 0.0; + st->d->prev_sample_peak[i] = 0.0; + st->d->true_peak[i] = 0.0; + st->d->prev_true_peak[i] = 0.0; + } + + st->d->use_histogram = mode & EBUR128_MODE_HISTOGRAM ? 1 : 0; + st->d->history = ULONG_MAX; + st->samplerate = samplerate; + st->d->samples_in_100ms = (st->samplerate + 5) / 10; + st->mode = mode; + if ((mode & EBUR128_MODE_S) == EBUR128_MODE_S) { + st->d->window = 3000; + } else if ((mode & EBUR128_MODE_M) == EBUR128_MODE_M) { + st->d->window = 400; + } else { + goto free_prev_true_peak; + } + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, 0, free_true_peak) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + ebur128_init_filter(st); + + if (st->d->use_histogram) { + st->d->block_energy_histogram = malloc(1000 * sizeof(unsigned long)); + CHECK_ERROR(!st->d->block_energy_histogram, 0, free_audio_data) + for (i = 0; i < 1000; ++i) { + st->d->block_energy_histogram[i] = 0; + } + } else { + st->d->block_energy_histogram = NULL; + } + if (st->d->use_histogram) { + st->d->short_term_block_energy_histogram = malloc(1000 * sizeof(unsigned long)); + CHECK_ERROR(!st->d->short_term_block_energy_histogram, 0, free_block_energy_histogram) + for (i = 0; i < 1000; ++i) { + st->d->short_term_block_energy_histogram[i] = 0; + } + } else { + st->d->short_term_block_energy_histogram = NULL; + } + STAILQ_INIT(&st->d->block_list); + st->d->block_list_size = 0; + st->d->block_list_max = st->d->history / 100; + STAILQ_INIT(&st->d->short_term_block_list); + st->d->st_block_list_size = 0; + st->d->st_block_list_max = st->d->history / 3000; + st->d->short_term_frame_counter = 0; + + result = ebur128_init_resampler(st); + CHECK_ERROR(result, 0, free_short_term_block_energy_histogram) + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + + /* initialize static constants */ + relative_gate_factor = pow(10.0, relative_gate / 10.0); + minus_twenty_decibels = pow(10.0, -20.0 / 10.0); + histogram_energy_boundaries[0] = pow(10.0, (-70.0 + 0.691) / 10.0); + if (st->d->use_histogram) { + for (i = 0; i < 1000; ++i) { + histogram_energies[i] = pow(10.0, ((double) i / 10.0 - 69.95 + 0.691) / 10.0); + } + for (i = 1; i < 1001; ++i) { + histogram_energy_boundaries[i] = pow(10.0, ((double) i / 10.0 - 70.0 + 0.691) / 10.0); + } + } + + return st; + +free_short_term_block_energy_histogram: + free(st->d->short_term_block_energy_histogram); +free_block_energy_histogram: + free(st->d->block_energy_histogram); +free_audio_data: + free(st->d->audio_data); +free_prev_true_peak: + free(st->d->prev_true_peak); +free_true_peak: + free(st->d->true_peak); +free_prev_sample_peak: + free(st->d->prev_sample_peak); +free_sample_peak: + free(st->d->sample_peak); +free_channel_map: + free(st->d->channel_map); +free_internal: + free(st->d); +free_state: + free(st); +exit: + return NULL; +} + +void ebur128_destroy(ebur128_state** st) { + struct ebur128_dq_entry* entry; + free((*st)->d->block_energy_histogram); + free((*st)->d->short_term_block_energy_histogram); + free((*st)->d->audio_data); + free((*st)->d->channel_map); + free((*st)->d->sample_peak); + free((*st)->d->prev_sample_peak); + free((*st)->d->true_peak); + free((*st)->d->prev_true_peak); + while (!STAILQ_EMPTY(&(*st)->d->block_list)) { + entry = STAILQ_FIRST(&(*st)->d->block_list); + STAILQ_REMOVE_HEAD(&(*st)->d->block_list, entries); + free(entry); + } + while (!STAILQ_EMPTY(&(*st)->d->short_term_block_list)) { + entry = STAILQ_FIRST(&(*st)->d->short_term_block_list); + STAILQ_REMOVE_HEAD(&(*st)->d->short_term_block_list, entries); + free(entry); + } + ebur128_destroy_resampler(*st); + free((*st)->d); + free(*st); + *st = NULL; +} + +static void ebur128_check_true_peak(ebur128_state* st, size_t frames) { + size_t c, i, frames_out; + + frames_out = interp_process(st->d->interp, frames, + st->d->resampler_buffer_input, + st->d->resampler_buffer_output); + + for (i = 0; i < frames_out; ++i) { + for (c = 0; c < st->channels; ++c) { + float val = st->d->resampler_buffer_output[i * st->channels + c]; + + if (val > st->d->prev_true_peak[c]) { + st->d->prev_true_peak[c] = val; + } else if (-val > st->d->prev_true_peak[c]) { + st->d->prev_true_peak[c] = -val; + } + } + } +} + +#ifdef __SSE2_MATH__ +#include +#define TURN_ON_FTZ \ + unsigned int mxcsr = _mm_getcsr(); \ + _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON); +#define TURN_OFF_FTZ _mm_setcsr(mxcsr); +#define FLUSH_MANUALLY +#else +#warning "manual FTZ is being used, please enable SSE2 (-msse2 -mfpmath=sse)" +#define TURN_ON_FTZ +#define TURN_OFF_FTZ +#define FLUSH_MANUALLY \ + st->d->v[ci][4] = fabs(st->d->v[ci][4]) < DBL_MIN ? 0.0 : st->d->v[ci][4]; \ + st->d->v[ci][3] = fabs(st->d->v[ci][3]) < DBL_MIN ? 0.0 : st->d->v[ci][3]; \ + st->d->v[ci][2] = fabs(st->d->v[ci][2]) < DBL_MIN ? 0.0 : st->d->v[ci][2]; \ + st->d->v[ci][1] = fabs(st->d->v[ci][1]) < DBL_MIN ? 0.0 : st->d->v[ci][1]; +#endif + +#define EBUR128_FILTER(type, min_scale, max_scale) \ +static void ebur128_filter_##type(ebur128_state* st, const type* src, \ + size_t frames) { \ + static double scaling_factor = \ + -((double) (min_scale)) > (double) (max_scale) ? \ + -((double) (min_scale)) : (double) (max_scale); \ + double* audio_data = st->d->audio_data + st->d->audio_data_index; \ + size_t i, c; \ + \ + TURN_ON_FTZ \ + \ + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK) { \ + for (c = 0; c < st->channels; ++c) { \ + double max = 0.0; \ + for (i = 0; i < frames; ++i) { \ + if (src[i * st->channels + c] > max) { \ + max = src[i * st->channels + c]; \ + } else if (-src[i * st->channels + c] > max) { \ + max = -1.0 * src[i * st->channels + c]; \ + } \ + } \ + max /= scaling_factor; \ + if (max > st->d->prev_sample_peak[c]) st->d->prev_sample_peak[c] = max; \ + } \ + } \ + if ((st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK && \ + st->d->interp) { \ + for (c = 0; c < st->channels; ++c) { \ + for (i = 0; i < frames; ++i) { \ + st->d->resampler_buffer_input[i * st->channels + c] = \ + (float) (src[i * st->channels + c] / scaling_factor); \ + } \ + } \ + ebur128_check_true_peak(st, frames); \ + } \ + for (c = 0; c < st->channels; ++c) { \ + int ci = st->d->channel_map[c] - 1; \ + if (ci < 0) continue; \ + else if (ci == EBUR128_DUAL_MONO - 1) ci = 0; /*dual mono */ \ + for (i = 0; i < frames; ++i) { \ + st->d->v[ci][0] = (double) (src[i * st->channels + c] / scaling_factor) \ + - st->d->a[1] * st->d->v[ci][1] \ + - st->d->a[2] * st->d->v[ci][2] \ + - st->d->a[3] * st->d->v[ci][3] \ + - st->d->a[4] * st->d->v[ci][4]; \ + audio_data[i * st->channels + c] = \ + st->d->b[0] * st->d->v[ci][0] \ + + st->d->b[1] * st->d->v[ci][1] \ + + st->d->b[2] * st->d->v[ci][2] \ + + st->d->b[3] * st->d->v[ci][3] \ + + st->d->b[4] * st->d->v[ci][4]; \ + st->d->v[ci][4] = st->d->v[ci][3]; \ + st->d->v[ci][3] = st->d->v[ci][2]; \ + st->d->v[ci][2] = st->d->v[ci][1]; \ + st->d->v[ci][1] = st->d->v[ci][0]; \ + } \ + FLUSH_MANUALLY \ + } \ + TURN_OFF_FTZ \ +} +EBUR128_FILTER(short, SHRT_MIN, SHRT_MAX) +EBUR128_FILTER(int, INT_MIN, INT_MAX) +EBUR128_FILTER(float, -1.0f, 1.0f) +EBUR128_FILTER(double, -1.0, 1.0) + +static double ebur128_energy_to_loudness(double energy) { + return 10 * (log(energy) / log(10.0)) - 0.691; +} + +static size_t find_histogram_index(double energy) { + size_t index_min = 0; + size_t index_max = 1000; + size_t index_mid; + + do { + index_mid = (index_min + index_max) / 2; + if (energy >= histogram_energy_boundaries[index_mid]) { + index_min = index_mid; + } else { + index_max = index_mid; + } + } while (index_max - index_min != 1); + + return index_min; +} + +static int ebur128_calc_gating_block(ebur128_state* st, size_t frames_per_block, + double* optional_output) { + size_t i, c; + double sum = 0.0; + double channel_sum; + for (c = 0; c < st->channels; ++c) { + if (st->d->channel_map[c] == EBUR128_UNUSED) { + continue; + } + channel_sum = 0.0; + if (st->d->audio_data_index < frames_per_block * st->channels) { + for (i = 0; i < st->d->audio_data_index / st->channels; ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + for (i = st->d->audio_data_frames - + (frames_per_block - + st->d->audio_data_index / st->channels); + i < st->d->audio_data_frames; ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + } else { + for (i = st->d->audio_data_index / st->channels - frames_per_block; + i < st->d->audio_data_index / st->channels; + ++i) { + channel_sum += st->d->audio_data[i * st->channels + c] * + st->d->audio_data[i * st->channels + c]; + } + } + if (st->d->channel_map[c] == EBUR128_Mp110 || + st->d->channel_map[c] == EBUR128_Mm110 || + st->d->channel_map[c] == EBUR128_Mp060 || + st->d->channel_map[c] == EBUR128_Mm060 || + st->d->channel_map[c] == EBUR128_Mp090 || + st->d->channel_map[c] == EBUR128_Mm090) { + channel_sum *= 1.41; + } else if (st->d->channel_map[c] == EBUR128_DUAL_MONO) { + channel_sum *= 2.0; + } + sum += channel_sum; + } + sum /= (double) frames_per_block; + if (optional_output) { + *optional_output = sum; + return EBUR128_SUCCESS; + } else if (sum >= histogram_energy_boundaries[0]) { + if (st->d->use_histogram) { + ++st->d->block_energy_histogram[find_histogram_index(sum)]; + } else { + struct ebur128_dq_entry* block; + if (st->d->block_list_size == st->d->block_list_max) { + block = STAILQ_FIRST(&st->d->block_list); + STAILQ_REMOVE_HEAD(&st->d->block_list, entries); + } else { + block = (struct ebur128_dq_entry*) malloc(sizeof(struct ebur128_dq_entry)); + if (!block) { + return EBUR128_ERROR_NOMEM; + } + st->d->block_list_size++; + } + block->z = sum; + STAILQ_INSERT_TAIL(&st->d->block_list, block, entries); + } + return EBUR128_SUCCESS; + } else { + return EBUR128_SUCCESS; + } +} + +int ebur128_set_channel(ebur128_state* st, + unsigned int channel_number, + int value) { + if (channel_number >= st->channels) { + return 1; + } + if (value == EBUR128_DUAL_MONO && + (st->channels != 1 || channel_number != 0)) { + fprintf(stderr, "EBUR128_DUAL_MONO only works with mono files!\n"); + return 1; + } + st->d->channel_map[channel_number] = value; + return 0; +} + +int ebur128_change_parameters(ebur128_state* st, + unsigned int channels, + unsigned long samplerate) { + int errcode = EBUR128_SUCCESS; + size_t j; + + if (channels == 0 || samplerate < 5) { + return EBUR128_ERROR_NOMEM; + } + + if (channels == st->channels && + samplerate == st->samplerate) { + return EBUR128_ERROR_NO_CHANGE; + } + + free(st->d->audio_data); + st->d->audio_data = NULL; + + if (channels != st->channels) { + unsigned int i; + + free(st->d->channel_map); st->d->channel_map = NULL; + free(st->d->sample_peak); st->d->sample_peak = NULL; + free(st->d->prev_sample_peak); st->d->prev_sample_peak = NULL; + free(st->d->true_peak); st->d->true_peak = NULL; + free(st->d->prev_true_peak); st->d->prev_true_peak = NULL; + st->channels = channels; + + errcode = ebur128_init_channel_map(st); + CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + + st->d->sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->sample_peak, EBUR128_ERROR_NOMEM, exit) + st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_sample_peak, EBUR128_ERROR_NOMEM, exit) + st->d->true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->true_peak, EBUR128_ERROR_NOMEM, exit) + st->d->prev_true_peak = (double*) malloc(channels * sizeof(double)); + CHECK_ERROR(!st->d->prev_true_peak, EBUR128_ERROR_NOMEM, exit) + for (i = 0; i < channels; ++i) { + st->d->sample_peak[i] = 0.0; + st->d->prev_sample_peak[i] = 0.0; + st->d->true_peak[i] = 0.0; + st->d->prev_true_peak[i] = 0.0; + } + } + if (samplerate != st->samplerate) { + st->samplerate = samplerate; + st->d->samples_in_100ms = (st->samplerate + 5) / 10; + ebur128_init_filter(st); + } + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + ebur128_destroy_resampler(st); + errcode = ebur128_init_resampler(st); + CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit) + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + /* reset short term frame counter */ + st->d->short_term_frame_counter = 0; + +exit: + return errcode; +} + +int ebur128_set_max_window(ebur128_state* st, unsigned long window) +{ + int errcode = EBUR128_SUCCESS; + size_t j; + + if ((st->mode & EBUR128_MODE_S) == EBUR128_MODE_S && window < 3000) { + window = 3000; + } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && window < 400) { + window = 400; + } + if (window == st->d->window) { + return EBUR128_ERROR_NO_CHANGE; + } + + st->d->window = window; + free(st->d->audio_data); + st->d->audio_data = NULL; + st->d->audio_data_frames = st->samplerate * st->d->window / 1000; + if (st->d->audio_data_frames % st->d->samples_in_100ms) { + /* round up to multiple of samples_in_100ms */ + st->d->audio_data_frames = st->d->audio_data_frames + + st->d->samples_in_100ms + - (st->d->audio_data_frames % st->d->samples_in_100ms); + } + st->d->audio_data = (double*) malloc(st->d->audio_data_frames * + st->channels * + sizeof(double)); + CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit) + for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) { + st->d->audio_data[j] = 0.0; + } + + /* the first block needs 400ms of audio data */ + st->d->needed_frames = st->d->samples_in_100ms * 4; + /* start at the beginning of the buffer */ + st->d->audio_data_index = 0; + /* reset short term frame counter */ + st->d->short_term_frame_counter = 0; + +exit: + return errcode; +} + +int ebur128_set_max_history(ebur128_state* st, unsigned long history) +{ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA && history < 3000) { + history = 3000; + } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && history < 400) { + history = 400; + } + if (history == st->d->history) { + return EBUR128_ERROR_NO_CHANGE; + } + st->d->history = history; + st->d->block_list_max = st->d->history / 100; + st->d->st_block_list_max = st->d->history / 3000; + while (st->d->block_list_size > st->d->block_list_max) { + struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->block_list); + STAILQ_REMOVE_HEAD(&st->d->block_list, entries); + free(block); + st->d->block_list_size--; + } + while (st->d->st_block_list_size > st->d->st_block_list_max) { + struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->short_term_block_list); + STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); + free(block); + st->d->st_block_list_size--; + } + return EBUR128_SUCCESS; +} + +static int ebur128_energy_shortterm(ebur128_state* st, double* out); +#define EBUR128_ADD_FRAMES(type) \ +int ebur128_add_frames_##type(ebur128_state* st, \ + const type* src, size_t frames) { \ + size_t src_index = 0; \ + unsigned int c = 0; \ + for (c = 0; c < st->channels; c++) { \ + st->d->prev_sample_peak[c] = 0.0; \ + st->d->prev_true_peak[c] = 0.0; \ + } \ + while (frames > 0) { \ + if (frames >= st->d->needed_frames) { \ + ebur128_filter_##type(st, src + src_index, st->d->needed_frames); \ + src_index += st->d->needed_frames * st->channels; \ + frames -= st->d->needed_frames; \ + st->d->audio_data_index += st->d->needed_frames * st->channels; \ + /* calculate the new gating block */ \ + if ((st->mode & EBUR128_MODE_I) == EBUR128_MODE_I) { \ + if (ebur128_calc_gating_block(st, st->d->samples_in_100ms * 4, NULL)) {\ + return EBUR128_ERROR_NOMEM; \ + } \ + } \ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ + st->d->short_term_frame_counter += st->d->needed_frames; \ + if (st->d->short_term_frame_counter == st->d->samples_in_100ms * 30) { \ + struct ebur128_dq_entry* block; \ + double st_energy; \ + if (ebur128_energy_shortterm(st, &st_energy) == EBUR128_SUCCESS && \ + st_energy >= histogram_energy_boundaries[0]) { \ + if (st->d->use_histogram) { \ + ++st->d->short_term_block_energy_histogram[ \ + find_histogram_index(st_energy)];\ + } else { \ + if (st->d->st_block_list_size == st->d->st_block_list_max) { \ + block = STAILQ_FIRST(&st->d->short_term_block_list); \ + STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); \ + } else { \ + block = (struct ebur128_dq_entry*) \ + malloc(sizeof(struct ebur128_dq_entry)); \ + if (!block) return EBUR128_ERROR_NOMEM; \ + st->d->st_block_list_size++; \ + } \ + block->z = st_energy; \ + STAILQ_INSERT_TAIL(&st->d->short_term_block_list, \ + block, entries); \ + } \ + } \ + st->d->short_term_frame_counter = st->d->samples_in_100ms * 20; \ + } \ + } \ + /* 100ms are needed for all blocks besides the first one */ \ + st->d->needed_frames = st->d->samples_in_100ms; \ + /* reset audio_data_index when buffer full */ \ + if (st->d->audio_data_index == st->d->audio_data_frames * st->channels) {\ + st->d->audio_data_index = 0; \ + } \ + } else { \ + ebur128_filter_##type(st, src + src_index, frames); \ + st->d->audio_data_index += frames * st->channels; \ + if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \ + st->d->short_term_frame_counter += frames; \ + } \ + st->d->needed_frames -= frames; \ + frames = 0; \ + } \ + } \ + for (c = 0; c < st->channels; c++) { \ + if (st->d->prev_sample_peak[c] > st->d->sample_peak[c]) { \ + st->d->sample_peak[c] = st->d->prev_sample_peak[c]; \ + } \ + if (st->d->prev_true_peak[c] > st->d->true_peak[c]) { \ + st->d->true_peak[c] = st->d->prev_true_peak[c]; \ + } \ + } \ + return EBUR128_SUCCESS; \ +} +EBUR128_ADD_FRAMES(short) +EBUR128_ADD_FRAMES(int) +EBUR128_ADD_FRAMES(float) +EBUR128_ADD_FRAMES(double) + +static int ebur128_calc_relative_threshold(ebur128_state* st, + size_t* above_thresh_counter, + double* relative_threshold) { + struct ebur128_dq_entry* it; + size_t i; + + if (st->d->use_histogram) { + for (i = 0; i < 1000; ++i) { + *relative_threshold += st->d->block_energy_histogram[i] * + histogram_energies[i]; + *above_thresh_counter += st->d->block_energy_histogram[i]; + } + } else { + STAILQ_FOREACH(it, &st->d->block_list, entries) { + ++*above_thresh_counter; + *relative_threshold += it->z; + } + } + + return EBUR128_SUCCESS; +} + +static int ebur128_gated_loudness(ebur128_state** sts, size_t size, + double* out) { + struct ebur128_dq_entry* it; + double gated_loudness = 0.0; + double relative_threshold = 0.0; + size_t above_thresh_counter = 0; + size_t i, j, start_index; + + for (i = 0; i < size; i++) { + if (sts[i] && (sts[i]->mode & EBUR128_MODE_I) != EBUR128_MODE_I) { + return EBUR128_ERROR_INVALID_MODE; + } + } + + for (i = 0; i < size; i++) { + if (!sts[i]) { + continue; + } + ebur128_calc_relative_threshold(sts[i], &above_thresh_counter, &relative_threshold); + } + if (!above_thresh_counter) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + + relative_threshold /= (double)above_thresh_counter; + relative_threshold *= relative_gate_factor; + + above_thresh_counter = 0; + if (relative_threshold < histogram_energy_boundaries[0]) { + start_index = 0; + } else { + start_index = find_histogram_index(relative_threshold); + if (relative_threshold > histogram_energies[start_index]) { + ++start_index; + } + } + for (i = 0; i < size; i++) { + if (!sts[i]) { + continue; + } + if (sts[i]->d->use_histogram) { + for (j = start_index; j < 1000; ++j) { + gated_loudness += sts[i]->d->block_energy_histogram[j] * + histogram_energies[j]; + above_thresh_counter += sts[i]->d->block_energy_histogram[j]; + } + } else { + STAILQ_FOREACH(it, &sts[i]->d->block_list, entries) { + if (it->z >= relative_threshold) { + ++above_thresh_counter; + gated_loudness += it->z; + } + } + } + } + if (!above_thresh_counter) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + gated_loudness /= (double) above_thresh_counter; + *out = ebur128_energy_to_loudness(gated_loudness); + return EBUR128_SUCCESS; +} + +int ebur128_relative_threshold(ebur128_state* st, double* out) { + double relative_threshold = 0.0; + size_t above_thresh_counter = 0; + + if ((st->mode & EBUR128_MODE_I) != EBUR128_MODE_I) { + return EBUR128_ERROR_INVALID_MODE; + } + + ebur128_calc_relative_threshold(st, &above_thresh_counter, &relative_threshold); + + if (!above_thresh_counter) { + *out = -70.0; + return EBUR128_SUCCESS; + } + + relative_threshold /= (double)above_thresh_counter; + relative_threshold *= relative_gate_factor; + + *out = ebur128_energy_to_loudness(relative_threshold); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_global(ebur128_state* st, double* out) { + return ebur128_gated_loudness(&st, 1, out); +} + +int ebur128_loudness_global_multiple(ebur128_state** sts, size_t size, + double* out) { + return ebur128_gated_loudness(sts, size, out); +} + +static int ebur128_energy_in_interval(ebur128_state* st, + size_t interval_frames, + double* out) { + if (interval_frames > st->d->audio_data_frames) { + return EBUR128_ERROR_INVALID_MODE; + } + ebur128_calc_gating_block(st, interval_frames, out); + return EBUR128_SUCCESS; +} + +static int ebur128_energy_shortterm(ebur128_state* st, double* out) { + return ebur128_energy_in_interval(st, st->d->samples_in_100ms * 30, out); +} + +int ebur128_loudness_momentary(ebur128_state* st, double* out) { + double energy; + int error = ebur128_energy_in_interval(st, st->d->samples_in_100ms * 4, + &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_shortterm(ebur128_state* st, double* out) { + double energy; + int error = ebur128_energy_shortterm(st, &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +int ebur128_loudness_window(ebur128_state* st, + unsigned long window, + double* out) { + double energy; + size_t interval_frames = st->samplerate * window / 1000; + int error = ebur128_energy_in_interval(st, interval_frames, &energy); + if (error) { + return error; + } else if (energy <= 0.0) { + *out = -HUGE_VAL; + return EBUR128_SUCCESS; + } + *out = ebur128_energy_to_loudness(energy); + return EBUR128_SUCCESS; +} + +static int ebur128_double_cmp(const void *p1, const void *p2) { + const double* d1 = (const double*) p1; + const double* d2 = (const double*) p2; + return (*d1 > *d2) - (*d1 < *d2); +} + +/* EBU - TECH 3342 */ +int ebur128_loudness_range_multiple(ebur128_state** sts, size_t size, + double* out) { + size_t i, j; + struct ebur128_dq_entry* it; + double* stl_vector; + size_t stl_size; + double* stl_relgated; + size_t stl_relgated_size; + double stl_power, stl_integrated; + /* High and low percentile energy */ + double h_en, l_en; + int use_histogram = 0; + + for (i = 0; i < size; ++i) { + if (sts[i]) { + if ((sts[i]->mode & EBUR128_MODE_LRA) != EBUR128_MODE_LRA) { + return EBUR128_ERROR_INVALID_MODE; + } + if (i == 0 && sts[i]->mode & EBUR128_MODE_HISTOGRAM) { + use_histogram = 1; + } else if (use_histogram != !!(sts[i]->mode & EBUR128_MODE_HISTOGRAM)) { + return EBUR128_ERROR_INVALID_MODE; + } + } + } + + if (use_histogram) { + unsigned long hist[1000] = { 0 }; + size_t percentile_low, percentile_high; + size_t index; + + stl_size = 0; + stl_power = 0.0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + for (j = 0; j < 1000; ++j) { + hist[j] += sts[i]->d->short_term_block_energy_histogram[j]; + stl_size += sts[i]->d->short_term_block_energy_histogram[j]; + stl_power += sts[i]->d->short_term_block_energy_histogram[j] + * histogram_energies[j]; + } + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + + stl_power /= stl_size; + stl_integrated = minus_twenty_decibels * stl_power; + + if (stl_integrated < histogram_energy_boundaries[0]) { + index = 0; + } else { + index = find_histogram_index(stl_integrated); + if (stl_integrated > histogram_energies[index]) { + ++index; + } + } + stl_size = 0; + for (j = index; j < 1000; ++j) { + stl_size += hist[j]; + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + + percentile_low = (size_t) ((stl_size - 1) * 0.1 + 0.5); + percentile_high = (size_t) ((stl_size - 1) * 0.95 + 0.5); + + stl_size = 0; + j = index; + while (stl_size <= percentile_low) { + stl_size += hist[j++]; + } + l_en = histogram_energies[j - 1]; + while (stl_size <= percentile_high) { + stl_size += hist[j++]; + } + h_en = histogram_energies[j - 1]; + *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en); + return EBUR128_SUCCESS; + + } else { + stl_size = 0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) { + ++stl_size; + } + } + if (!stl_size) { + *out = 0.0; + return EBUR128_SUCCESS; + } + stl_vector = (double*) malloc(stl_size * sizeof(double)); + if (!stl_vector) { + return EBUR128_ERROR_NOMEM; + } + + j = 0; + for (i = 0; i < size; ++i) { + if (!sts[i]) { + continue; + } + STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) { + stl_vector[j] = it->z; + ++j; + } + } + qsort(stl_vector, stl_size, sizeof(double), ebur128_double_cmp); + stl_power = 0.0; + for (i = 0; i < stl_size; ++i) { + stl_power += stl_vector[i]; + } + stl_power /= (double) stl_size; + stl_integrated = minus_twenty_decibels * stl_power; + + stl_relgated = stl_vector; + stl_relgated_size = stl_size; + while (stl_relgated_size > 0 && *stl_relgated < stl_integrated) { + ++stl_relgated; + --stl_relgated_size; + } + + if (stl_relgated_size) { + h_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.95 + 0.5)]; + l_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.1 + 0.5)]; + free(stl_vector); + *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en); + return EBUR128_SUCCESS; + } else { + free(stl_vector); + *out = 0.0; + return EBUR128_SUCCESS; + } + } +} + +int ebur128_loudness_range(ebur128_state* st, double* out) { + return ebur128_loudness_range_multiple(&st, 1, out); +} + +int ebur128_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_prev_sample_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->prev_sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->true_peak[channel_number] > st->d->sample_peak[channel_number] + ? st->d->true_peak[channel_number] + : st->d->sample_peak[channel_number]; + return EBUR128_SUCCESS; +} + +int ebur128_prev_true_peak(ebur128_state* st, + unsigned int channel_number, + double* out) { + if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) { + return EBUR128_ERROR_INVALID_MODE; + } else if (channel_number >= st->channels) { + return EBUR128_ERROR_INVALID_CHANNEL_INDEX; + } + *out = st->d->prev_true_peak[channel_number] + > st->d->prev_sample_peak[channel_number] + ? st->d->prev_true_peak[channel_number] + : st->d->prev_sample_peak[channel_number]; + return EBUR128_SUCCESS; +} \ No newline at end of file diff --git a/tools/ref/waves/CMakeLists.txt b/tools/ref/waves/CMakeLists.txt new file mode 100644 index 0000000..3045b00 --- /dev/null +++ b/tools/ref/waves/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(inc) +AUX_SOURCE_DIRECTORY(src DIR_WAVES_SRCS) +add_library(waves ${DIR_WAVES_SRCS}) \ No newline at end of file diff --git a/tools/ref/waves/inc/ExtraMono.h b/tools/ref/waves/inc/ExtraMono.h new file mode 100644 index 0000000..280fab0 --- /dev/null +++ b/tools/ref/waves/inc/ExtraMono.h @@ -0,0 +1,230 @@ + +#include +#include + +#define SIZE_LONG 4 +#define SIZE_SHORT 2 + +#define SIZE_FLAG 4 +#define FMT_TAG 0x0001 + +#define BITS_PER_BYTE 8 + +#ifndef AFS_CMPL_MAX_WAV +#define AFS_CMPL_MAX_WAV 15360000 // 时长16分(960*16000) +#endif + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个32位数据 +//+---------------------------------------------------------------------------+ +unsigned long fa_read_u32(FILE* fp) +{ + unsigned long cx; + unsigned char temp[SIZE_LONG]; + + fread(temp, sizeof(unsigned char), SIZE_LONG, fp); + cx = (unsigned long)temp[0]; + cx |= (unsigned long)temp[1] << 8; + cx |= (unsigned long)temp[2] << 16; + cx |= (unsigned long)temp[3] << 24; + return cx; +} + +//+---------------------------------------------------------------------------+ +//+ 从文件中读取一个16位数据 +//+---------------------------------------------------------------------------+ +unsigned short fa_read_u16(FILE *fp) +{ + unsigned short cx; + unsigned char temp[SIZE_SHORT]; + + fread(temp, sizeof(unsigned char), SIZE_SHORT, fp); + cx = temp[0] | (temp[1] * 256); + return cx; +} + +int GetWaveHeadLen(const char* pszFile,unsigned short &channels, int &nPos, int& nLength) +{ + //+---------------------------------------------------------------------------+ + //+ 读取WAVE的头信息 + //+---------------------------------------------------------------------------+ + unsigned char temp[SIZE_FLAG]; + unsigned short bits_per_sample; + unsigned long x_size; + unsigned long n_skip; + + unsigned short format; + //unsigned short channels; + unsigned long sample_rate; + unsigned short block_align; + unsigned long data_size; + int nCnt = 0; + + /* 读取通用信息 */ + FILE* pWavFile = fopen(pszFile, "rb"); + if ( pWavFile == NULL ) + { + printf("Input file can not be opened!\n"); + return -1; + } + + fseek(pWavFile, 0, SEEK_END ); + nLength = ftell(pWavFile); + fseek(pWavFile, 0, SEEK_SET ); + + // 判断资源标识为"RIFF" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "RIFF", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Resource flag is not RIFF!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + nCnt += SIZE_LONG; + + // 判断文件标识为"WAVE" + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "WAVE", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "File flag is not WAVE\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + // 判断格式标识为"fmt " + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + if ( memcmp(temp, "fmt ", (size_t)SIZE_FLAG) != 0 ) + { + fprintf(stderr, "Format flag is not FMT!\n"); + fclose(pWavFile); + + return -1; + } + nCnt += SIZE_FLAG; + + x_size = fa_read_u32(pWavFile); + nCnt += SIZE_LONG; + + // 判断编码格式为0x0001 + format = fa_read_u16(pWavFile); + nCnt += SIZE_SHORT; + if ( format != FMT_TAG ) + { + fprintf(stderr, "Encoding format is not 0x0001!\n"); + fclose(pWavFile); + + return -1; + } + + // 读取声道数目和采样频率 + channels = fa_read_u16(pWavFile); + sample_rate = fa_read_u32(pWavFile); + + fseek(pWavFile, SIZE_LONG, SEEK_CUR); + + // 读取对齐单位和样本位数 + block_align = fa_read_u16(pWavFile); + bits_per_sample = fa_read_u16(pWavFile); + + /* 读取特殊信息 */ + x_size -= (4*SIZE_SHORT + 2*SIZE_LONG); + if ( x_size != 0 ) + { + fseek(pWavFile, x_size, SEEK_CUR); + } + + // 读取数据大小 + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + while ( memcmp(temp, "data", SIZE_FLAG) != 0 ) + { + n_skip = fa_read_u32(pWavFile); + fseek(pWavFile, n_skip, SEEK_CUR); + + fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile); + } + + data_size = fa_read_u32(pWavFile); + fclose(pWavFile); + + //+---------------------------------------------------------------------------+ + //+ 返回WAVE的头长度 + //+---------------------------------------------------------------------------+ + nPos = nCnt; + int nHeadLength = nLength - data_size; + return nHeadLength; +} + +bool ExtraMono(const std::string &sInput, const std::string &sOutput) +{ + FILE *pFile = fopen(sInput.c_str(), "rb"); + if ( NULL == pFile ) + { + printf("Fopen Error %s", sInput.c_str()); + return false; + } + + FILE *pFile2 = fopen(sOutput.c_str(), "wb"); + if ( NULL == pFile2 ) + { + printf("Fopen2 Error %s", sOutput.c_str()); + return false; + } + + short *pBuf = new short[AFS_CMPL_MAX_WAV]; + int nLen = 0; + + nLen = fread(pBuf, sizeof(short), AFS_CMPL_MAX_WAV, pFile); + if ( nLen <= 0 ) + { + perror("Fread Error!"); + return false; + } + + unsigned short channels=0; + int nPos; + int nLength; + int nHeadByte = GetWaveHeadLen(sInput.c_str(),channels, nPos, nLength); + int nHeadShort = nHeadByte/2; + + if (channels==1) + { + fwrite(pBuf + nHeadShort, sizeof(short), nLen - nHeadShort, pFile2); + } + else + { + short *pBuf2 = new short[AFS_CMPL_MAX_WAV]; + memcpy( pBuf2, pBuf, nHeadShort*sizeof(short)); + pBuf2[nPos] = 1; + + unsigned char tmp[2]; + memcpy(tmp, &pBuf2[nPos], 2); + + pBuf2[nPos] = static_cast(tmp[0] | tmp[1]*256); + + short *pWav = pBuf + nHeadShort; + nLen -= nHeadShort; + + int halfnlen=nLen/2; + for (int i=0;i<=halfnlen;i++ ) + { + pBuf2[nHeadShort+i] = *(pWav+i*2); + } + fwrite(pBuf2, sizeof(short), nLen+nHeadShort, pFile2); + + delete []pBuf; + delete []pBuf2; + pBuf = NULL; + pBuf2 = NULL; + } + + + fclose(pFile); + fclose(pFile2); + return true; +} diff --git a/tools/ref/waves/inc/WaveFile.h b/tools/ref/waves/inc/WaveFile.h new file mode 100644 index 0000000..8b57806 --- /dev/null +++ b/tools/ref/waves/inc/WaveFile.h @@ -0,0 +1,74 @@ +#ifndef WAVE_FILE_H +#define WAVE_FILE_H + +#include +#include + + +typedef enum SAMPLE_FORMAT +{ + SF_U8 = 8, + SF_S16 = 16, + SF_S24 = 24, + SF_S32 = 32, + SF_IEEE_FLOAT = 0x100 + 32, + SF_IEEE_DOUBLE = 0x100 + 64, + SF_MAX, +} SAMPLE_FORMAT; + +/* 主处理对象 **/ +class CWaveFile +{ +public: + /* 构造传入文件及 是读还是写 **/ + CWaveFile(const char* Filename, bool Write); + virtual ~CWaveFile(); + +public: + int GetChannels(); + int GetSampleRate(); + double GetDuration(); // in second + uint32_t GetChannelMask(); + void SetChannels(int Channels); + void SetSampleRate(int SampleRate); + void SetSampleFormat(SAMPLE_FORMAT Format); + void SetChannelMask(uint32_t Mask); + void Stat(); + void SetupDone(); + bool ReadFrameAsS16(short* FrameSamples, int Frames = 1); + bool ReadFrameAsDouble(double* FrameSamples, int Frames = 1); + bool ReadFrameAsfloat(float* FrameSamples, int Frames = 1); + void WriteRaw(void* Raw, int Size); + void WriteFrame(uint8_t* FrameSamples, int Frames = 1); + void WriteFrame(short* FrameSamples, int Frames = 1); + void WriteFrame(int32_t* FrameSamples, int Frames = 1); + void WriteFrameS24(int32_t* FrameSamples, int Frames = 1); + void WriteFrame(double* FrameSamples, int Frames = 1); + void WriteFrame(float* FrameSamples, int Frames=1); + void Seek(int FramePos, int Where = SEEK_SET); + bool GetStatus(); + SAMPLE_FORMAT GetFormat(); + int GetTotalFrames(); + int GetFramesRead(); + + +protected: + FILE* File; + int Channels; /* 通道数 **/ + int SampleRate; /* 采样率 **/ + SAMPLE_FORMAT Format; /* 采样精度 **/ + int SampleSize; // Measured in Bits + unsigned int FrameStartPos; /* 音频数据的起始位置 **/ + unsigned long TotalFrames; /* 总帧数,如果16bit,则一个short为一帧 **/ + unsigned long FramesRead; + double Duration; /* 时长 **/ + + bool ReadOnly; /* 是度还是写 **/ + + uint32_t ChannelMask; + + bool m_bOK; /* 文件是否已经被打开 **/ +}; + + +#endif \ No newline at end of file diff --git a/tools/ref/waves/src/WaveFile.cpp b/tools/ref/waves/src/WaveFile.cpp new file mode 100644 index 0000000..83b83d7 --- /dev/null +++ b/tools/ref/waves/src/WaveFile.cpp @@ -0,0 +1,824 @@ + +#include +#include +#include +#include + +#if WIN32 +#else +#include +#endif + +#include "WaveFile.h" + +#define SPEAKER_FRONT_LEFT 0x1 +#define SPEAKER_FRONT_RIGHT 0x2 +#define SPEAKER_FRONT_CENTER 0x4 +#define SPEAKER_LOW_FREQUENCY 0x8 +#define SPEAKER_BACK_LEFT 0x10 +#define SPEAKER_BACK_RIGHT 0x20 +#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40 +#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80 +#define SPEAKER_BACK_CENTER 0x100 +#define SPEAKER_SIDE_LEFT 0x200 +#define SPEAKER_SIDE_RIGHT 0x400 +#define SPEAKER_TOP_CENTER 0x800 +#define SPEAKER_TOP_FRONT_LEFT 0x1000 +#define SPEAKER_TOP_FRONT_CENTER 0x2000 +#define SPEAKER_TOP_FRONT_RIGHT 0x4000 +#define SPEAKER_TOP_BACK_LEFT 0x8000 +#define SPEAKER_TOP_BACK_CENTER 0x10000 +#define SPEAKER_TOP_BACK_RIGHT 0x20000 +#define SPEAKER_RESERVED 0x80000000 + + +#define SPEAKER_REAR_CENTER_SURROUND SPEAKER_BACK_CENTER + +#define DCA_MONO 0 +#define DCA_CHANNEL 1 +#define DCA_STEREO 2 +#define DCA_STEREO_SUMDIFF 3 +#define DCA_STEREO_TOTAL 4 +#define DCA_3F 5 +#define DCA_2F1R 6 +#define DCA_3F1R 7 +#define DCA_2F2R 8 +#define DCA_3F2R 9 +#define DCA_4F2R 10 + +#define DCA_DOLBY 101 /* FIXME */ + +#define DCA_CHANNEL_MAX DCA_3F2R /* We don't handle anything above that */ +#define DCA_CHANNEL_BITS 6 +#define DCA_CHANNEL_MASK 0x3F + +#define DCA_LFE 0x80 +#define DCA_ADJUST_LEVEL 0x100 + +#define WAVE_FORMAT_PCM 0x0001 +#define WAVE_FORMAT_IEEE_FLOAT 0x0003 +#define WAVE_FORMAT_EXTENSIBLE 0xFFFE + +static uint8_t wav_header[] = { + 'R', 'I', 'F', 'F', 0xfc, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 16, 0, 0, 0, + WAVE_FORMAT_PCM, WAVE_FORMAT_PCM >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, + 'd', 'a', 't', 'a', 0xd8, 0xff, 0xff, 0xff +}; + +static uint8_t wavmulti_header[] = { + 'R', 'I', 'F', 'F', 0xf0, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E', + 'f', 'm', 't', ' ', 40, 0, 0, 0, + (uint8_t)(WAVE_FORMAT_EXTENSIBLE & 0xFF), WAVE_FORMAT_EXTENSIBLE >> 8, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 22, 0, + 0, 0, 0, 0, 0, 0, + WAVE_FORMAT_IEEE_FLOAT, WAVE_FORMAT_IEEE_FLOAT >> 8, + 0, 0, 0, 0, 0x10, 0x00, 0x80, 0, 0, 0xaa, 0, 0x38, 0x9b, 0x71, + 'd', 'a', 't', 'a', 0xb4, 0xff, 0xff, 0xff +}; + +static void store4 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; + buf[2] = value >> 16; + buf[3] = value >> 24; +} + +static void store2 (uint8_t * buf, int value) +{ + buf[0] = value; + buf[1] = value >> 8; +} + + +static uint32_t find_chunk(FILE * file, const uint8_t chunk_id[4]) +{ + uint8_t buffer[8]; + while (1) { + size_t chunksize; + size_t s = fread(buffer, 1, 8, file); + if (s < 8) + return 0; + chunksize = (uint32_t)buffer[4] | ((uint32_t)buffer[5] << 8) | + ((uint32_t)buffer[6] << 16) | ((uint32_t)buffer[7] << 24); + if (!memcmp(buffer, chunk_id, 4)) + return chunksize; + fseek(file, chunksize, SEEK_CUR); + } +} + + +CWaveFile::CWaveFile(const char* Filename, bool Write) + : Duration(0), ReadOnly(false), m_bOK(false) +{ + Channels = 0; + + /* 打开文件 **/ + File = fopen(Filename, Write ? "wb":"rb"); + if ( !File ) + return; + + /* 设置写文件初始参数 **/ + if ( Write ) + { + SampleRate = 44100; + Channels = 2; + Format = SF_S16; + SampleSize = 16; + ChannelMask = 0; + m_bOK = true; + return; + } + + ReadOnly = true; + + size_t s; + uint8_t buffer[8]; + uint8_t *fmt = NULL; + uint32_t v; + uint32_t avg_bps; + uint32_t block_align; + unsigned short FormatType; + unsigned short SampleType; + + static const uint8_t riff[4] = { 'R', 'I', 'F', 'F' }; + static const uint8_t wave[4] = { 'W', 'A', 'V', 'E' }; + static const uint8_t fmt_[4] = { 'f', 'm', 't', ' ' }; + static const uint8_t data[4] = { 'd', 'a', 't', 'a' }; + + /* 前四个字节为 riff **/ + s = fread(buffer, 1, 8, File); + if (s < 8) + goto err2; + + if (memcmp(buffer, riff, 4)) + goto err2; + + /* 8~12为wave **/ + /* TODO: check size (in buffer[4..8]) */ + s = fread(buffer, 1, 4, File); + if (s < 4) + goto err2; + + if (memcmp(buffer, wave, 4)) + goto err2; + + s = find_chunk(File, fmt_); + if ( s != 16 && s != 18 && s != 40 ) + goto err2; + + fmt = (uint8_t*)malloc(s); + if (!fmt) + goto err2; + + if (fread(fmt, 1, s, File) != s) + goto err3; + + /* wFormatTag */ + v = (uint32_t)fmt[0] | ((uint32_t)fmt[1] << 8); + if (v != WAVE_FORMAT_PCM && v != WAVE_FORMAT_IEEE_FLOAT && v != WAVE_FORMAT_EXTENSIBLE) + goto err3; + + FormatType = v; + + if (s == 40 && 0xfffe == v) + { + // fmt begins at 0x14 of the wave file + v = *(unsigned short*)&fmt[0x2C - 0x14]; + } + + SampleType = v; + + /* wChannels */ + v = (uint32_t)fmt[2] | ((uint32_t)fmt[3] << 8); + + Channels = v; + + if (v < 1 || v > 32) + goto err3; + + /* dwSamplesPerSec */ + SampleRate = (uint32_t)fmt[4] | ((uint32_t)fmt[5] << 8) | + ((uint32_t)fmt[6] << 16) | ((uint32_t)fmt[7] << 24); + + /* dwAvgBytesPerSec */ + avg_bps = (uint32_t)fmt[8] | ((uint32_t)fmt[9] << 8) | + ((uint32_t)fmt[10] << 16) | ((uint32_t)fmt[11] << 24); + + /* wBlockAlign */ + block_align = (uint32_t)fmt[12] | ((uint32_t)fmt[13] << 8); + + /* wBitsPerSample */ + SampleSize = (uint32_t)fmt[14] | ((uint32_t)fmt[15] << 8); + if (SampleSize != 8 && SampleSize != 16 && SampleSize != 32 && SampleSize != 24 && SampleSize != 64) + goto err3; + + switch (SampleSize) + { + case 8: + Format = SF_U8; + break; + case 16: + Format = SF_S16; + break; + case 24: + Format = SF_S24; + break; + case 32: + { + if (SampleType == WAVE_FORMAT_IEEE_FLOAT) + Format = SF_IEEE_FLOAT; + else + Format = SF_S32; + + } + break; + case 64: + if (SampleType != WAVE_FORMAT_IEEE_FLOAT) + goto err3; + Format = SF_IEEE_DOUBLE; + break; + } + + + // Handle 24-bit samples individually +#if 0 + if (SampleSize == 24 && Channels <= 2) + { + int ba24 = Channels * (SampleSize / 8); // Align to 4x + + ba24 = (ba24 + 3) / 4 * 4; + + if (block_align != ba24) + goto err3; + } + else +#endif + { + if (block_align != Channels * (SampleSize / 8)) + goto err3; + } + + if (avg_bps != block_align * SampleRate) + goto err3; + + v = find_chunk(File, data); + + if (v == 0 || v % block_align != 0) + goto err3; + + TotalFrames = v / block_align; + + FramesRead = 0; + + if (FormatType == WAVE_FORMAT_EXTENSIBLE) + { + ChannelMask = *(unsigned int*)(&fmt[0x14]); + } + else + { + ChannelMask = 0; + } + + FrameStartPos = ftell(File); + + free(fmt); + m_bOK = true; + return; + +err3: + free(fmt); +err2: + fclose(File); + + File = NULL; +} + +bool CWaveFile::GetStatus() +{ + return m_bOK; +} + +SAMPLE_FORMAT CWaveFile::GetFormat() +{ + return Format; +} + +int CWaveFile::GetTotalFrames() +{ + return TotalFrames; +} + +int CWaveFile::GetFramesRead() +{ + return FramesRead; +} + +CWaveFile::~CWaveFile() +{ + if (File != NULL) + { + if (!ReadOnly) + { + unsigned int Size = ftell(File) - FrameStartPos;// 44; + + fseek(File, FrameStartPos - 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + + Size += FrameStartPos - 8; + + fseek(File, 4, SEEK_SET); + fwrite(&Size, 4, 1, File); + } + + fclose(File); + } +} + +int CWaveFile::GetSampleRate() +{ + return SampleRate; +} + +void CWaveFile::SetSampleRate(int SampleRate) +{ + this->SampleRate = SampleRate; +} + +void CWaveFile::SetupDone() +{ + unsigned char Header[68]; + + fseek(File, 0, SEEK_SET); + + SampleSize = Format & 0xFF; + + if (ChannelMask) + { + memcpy(Header, wavmulti_header, sizeof(wavmulti_header)); + + if (Format < SF_IEEE_FLOAT) + { + // store2(Header + 20, WAVE_FORMAT_PCM); + store2(Header + 44, WAVE_FORMAT_PCM); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + store2(Header + 38, SampleSize / 8 * 8); + store4(Header + 40, ChannelMask); + + fwrite(Header, sizeof(wavmulti_header), 1, File); + } + else + { + memcpy(Header, wav_header, sizeof(wav_header)); + + if (Format >= SF_IEEE_FLOAT) + { + store2(Header + 20, WAVE_FORMAT_IEEE_FLOAT); + } + + store2(Header + 22, Channels); + store4(Header + 24, SampleRate); + store4(Header + 28, SampleSize / 8 * SampleRate * Channels); + store2(Header + 32, SampleSize / 8 * Channels); + store2(Header + 34, SampleSize / 8 * 8); + + fwrite(Header, sizeof(wav_header), 1, File); + } + + + FrameStartPos = ftell(File); +} + + +void CWaveFile::Seek(int FramePos, int Where) +{ + // Ignoring Where + + fseek(File, FrameStartPos + FramePos * Channels* (SampleSize / 8), Where); + + FramesRead = FramePos; + +} + +int CWaveFile::GetChannels() +{ + return Channels; +} + +void CWaveFile::SetChannels(int Channels) +{ + this->Channels = Channels; +} + +void CWaveFile::SetSampleFormat(SAMPLE_FORMAT Format) +{ + this->Format = Format; +} + +uint32_t CWaveFile::GetChannelMask() +{ + return ChannelMask; +} + +void CWaveFile::SetChannelMask(uint32_t Mask) +{ + ChannelMask = Mask; +} + +bool CWaveFile::ReadFrameAsS16(short* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) << 8; + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + return Frames == fread(FrameSamples, sizeof(FrameSamples[0])*Channels, Frames, File); + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 8); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + unsigned int DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 16); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + double DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768); + } + } + return true; + } + return false; + } + } + return false; +} + +bool CWaveFile::ReadFrameAsfloat(float* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + if(fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + +// float DirectSamples[32]; +// +// if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) +// { +// for (int frame = 0; frame < Frames; frame++) +// { +// for (int ch = 0; ch < Channels; ch++) +// { +// FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); +// } +// } +// return true; +// } +// return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +bool CWaveFile::ReadFrameAsDouble(double* FrameSamples, int Frames) +{ + if (FramesRead >= TotalFrames) + return false; + + FramesRead += Frames; + + switch (Format) + { + case SF_U8: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 1, 1, File)) + { + FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S16: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + short DirectSample = 0; + if (1 == fread(&DirectSample, 2, 1, File)) + { + FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S24: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 3, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_S32: + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + uint32_t DirectSample = 0; + if (1 == fread(&DirectSample, 4, 1, File)) + { + FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample ))) / + (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1)); + } + else + { + return false; + } + } + } + return true; + } + case SF_IEEE_FLOAT: + { + float DirectSamples[32]; + + if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File)) + { + for (int frame = 0; frame < Frames; frame++) + { + for (int ch = 0; ch < Channels; ch++) + { + FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]); + } + } + return true; + } + return false; + } + case SF_IEEE_DOUBLE: + { + if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File)) + { + return true; + } + return false; + } + } + return false; +} + +void CWaveFile::WriteRaw(void* Raw, int Size) +{ + fwrite(Raw, Size, 1, File); +} + + +void CWaveFile::WriteFrame(uint8_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(short* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(int32_t* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrameS24(int32_t* FrameSamples, int Frames) +{ + for (int c = 0; c < Channels; c++) + { + fwrite(&FrameSamples[c], 3, 1, File); + } +} + +void CWaveFile::WriteFrame(double* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + +void CWaveFile::WriteFrame(float* FrameSamples, int Frames) +{ + fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File); +} + + +double CWaveFile::GetDuration() +{ + return Duration; +}