Page MenuHomePhabricator

No OneTemporary

diff --git a/AIMeiSheng/RawNet3/infererence_fang_meisheng.py b/AIMeiSheng/RawNet3/infererence_fang_meisheng.py
index 471f92a..5612582 100644
--- a/AIMeiSheng/RawNet3/infererence_fang_meisheng.py
+++ b/AIMeiSheng/RawNet3/infererence_fang_meisheng.py
@@ -1,269 +1,270 @@
import argparse
import itertools
import os
import sys
from typing import Dict
import numpy as np
import soundfile as sf
import torch
import torch.nn.functional as F
from tqdm import tqdm
from models.RawNet3 import RawNet3
from models.RawNetBasicBlock import Bottle2neck
from utils import tuneThresholdfromScore, ComputeErrorRates, ComputeMinDcf
#model_directory = '/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3'
#sys.path.append(os.path.abspath(model_directory))
-def get_embed_model():
+def get_embed_model(model_path):
model = RawNet3(
Bottle2neck,
model_scale=8,
context=True,
summed=True,
encoder_type="ECA",
nOut=256,
out_bn=False,
sinc_stride=10,
log_sinc=True,
norm_sinc="mean",
grad_mult=1,
)
model.load_state_dict(
torch.load(
- "/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3/models/weights/model.pt",
+ model_path,
+ # "/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3/models/weights/model.pt",
map_location=lambda storage, loc: storage,
)["model"]
)
model.eval()
return model
def main(args: Dict, model=None) -> None:
if model == None:
model = RawNet3(
Bottle2neck,
model_scale=8,
context=True,
summed=True,
encoder_type="ECA",
nOut=256,
out_bn=False,
sinc_stride=10,
log_sinc=True,
norm_sinc="mean",
grad_mult=1,
)
model.load_state_dict(
torch.load(
"./models/weights/model.pt",
map_location=lambda storage, loc: storage,
)["model"]
)
model.eval()
# gpu = False
gpu = True if torch.cuda.is_available() else False
#print("RawNet3 initialised & weights loaded!")
if torch.cuda.is_available():
#print("Cuda available, conducting inference on GPU")
model = model.to("cuda")
gpu = True
if args.inference_utterance:
output = extract_speaker_embd(
model,
fn=args.input,
n_samples=48000,
n_segments=args.n_segments,
gpu=gpu,
).mean(0)
#print("embead shape:", output.size())
np.save(args.out_dir, output.detach().cpu().numpy())
return
if args.vox1_o_benchmark:
with open("../../trials/cleaned_test_list.txt", "r") as f:
trials = f.readlines()
## Get a list of unique file names
files = list(itertools.chain(*[x.strip().split()[-2:] for x in trials]))
setfiles = list(set(files))
setfiles.sort()
embd_dic = {}
for f in tqdm(setfiles):
embd_dic[f] = extract_speaker_embd(
model, os.path.join(args.DB_dir, f), n_samples=64000, gpu=gpu
)
labels, scores = [], []
for line in trials:
data = line.split()
ref_feat = F.normalize(embd_dic[data[1]], p=2, dim=1)
com_feat = F.normalize(embd_dic[data[2]], p=2, dim=1)
if gpu:
ref_feat = ref_feat.cuda()
com_feat = com_feat.cuda()
dist = (
torch.cdist(
ref_feat.reshape((args.n_segments, -1)),
com_feat.reshape((args.n_segments, -1)),
)
.detach()
.cpu()
.numpy()
)
score = -1.0 * np.mean(dist)
labels.append(int(data[0]))
scores.append(score)
result = tuneThresholdfromScore(scores, labels, [1, 0.1])
fnrs, fprs, thresholds = ComputeErrorRates(scores, labels)
p_target, c_miss, c_fa = 0.05, 1, 1
mindcf, _ = ComputeMinDcf(
fnrs, fprs, thresholds, p_target, c_miss, c_fa
)
print(
"Vox1-O benchmark Finished. EER: %2.4f, minDCF:%.5f"
% (result[1], mindcf)
)
import librosa
def extract_speaker_embd(
model, fn: str, n_samples: int, n_segments: int = 10, gpu: bool = False
) -> np.ndarray:
#audio, sample_rate = sf.read(fn)
audio, sample_rate = librosa.load(fn,sr=16000) ##fang add
if len(audio.shape) > 1:
raise ValueError(
f"RawNet3 supports mono input only. Input data has a shape of {audio.shape}."
)
if sample_rate != 16000:
raise ValueError(
f"RawNet3 supports 16k sampling rate only. Input data's sampling rate is {sample_rate}."
)
if (
len(audio) < n_samples
): # RawNet3 was trained using utterances of 3 seconds
shortage = n_samples - len(audio) + 1
audio = np.pad(audio, (0, shortage), "wrap")
audios = []
startframe = np.linspace(0, len(audio) - n_samples, num=n_segments)
for asf in startframe:
audios.append(audio[int(asf) : int(asf) + n_samples])
audios = torch.from_numpy(np.stack(audios, axis=0).astype(np.float32))
if gpu:
audios = audios.to("cuda")
with torch.no_grad():
output = model(audios)
return output
def get_embed(target_wav, embed_npy, model=None):
parser = argparse.ArgumentParser(description="RawNet3 inference")
parser.add_argument(
"--inference_utterance", default=True, action="store_true"
)
parser.add_argument(
"--input",
type=str,
default="",
help="Input file to extract embedding. Required when 'inference_utterance' is True",
)
parser.add_argument(
"--vox1_o_benchmark", default=False, action="store_true"
)
parser.add_argument(
"--DB_dir",
type=str,
default="",
help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True",
)
parser.add_argument("--out_dir", type=str, default="./out.npy")
parser.add_argument(
"--n_segments",
type=int,
default=10,
help="number of segments to make using each utterance",
)
args = parser.parse_args()
args.input = target_wav
args.out_dir = embed_npy
assert args.inference_utterance or args.vox1_o_benchmark
if args.inference_utterance:
assert args.input != ""
if args.vox1_o_benchmark:
assert args.DB_dir != ""
#sys.exit(main(args,model))
main(args,model)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="RawNet3 inference")
parser.add_argument(
"--inference_utterance", default=False, action="store_true"
)
parser.add_argument(
"--input",
type=str,
default="",
help="Input file to extract embedding. Required when 'inference_utterance' is True",
)
parser.add_argument(
"--vox1_o_benchmark", default=False, action="store_true"
)
parser.add_argument(
"--DB_dir",
type=str,
default="",
help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True",
)
parser.add_argument("--out_dir", type=str, default="./out.npy")
parser.add_argument(
"--n_segments",
type=int,
default=10,
help="number of segments to make using each utterance",
)
args = parser.parse_args()
assert args.inference_utterance or args.vox1_o_benchmark
if args.inference_utterance:
assert args.input != ""
if args.vox1_o_benchmark:
assert args.DB_dir != ""
sys.exit(main(args))
diff --git a/AIMeiSheng/docker_demo/.requirements.txt.swp b/AIMeiSheng/docker_demo/.requirements.txt.swp
deleted file mode 100644
index 1adaec3..0000000
Binary files a/AIMeiSheng/docker_demo/.requirements.txt.swp and /dev/null differ
diff --git a/AIMeiSheng/docker_demo/common.py b/AIMeiSheng/docker_demo/common.py
new file mode 100644
index 0000000..6a31932
--- /dev/null
+++ b/AIMeiSheng/docker_demo/common.py
@@ -0,0 +1,52 @@
+import os
+import time
+import logging
+import urllib, urllib.request
+
+
+def download2disk(url, dst_path):
+ st = time.time()
+ urllib.request.urlretrieve(url, dst_path)
+ print(f"download {url} -> {dst_path} sp = {time.time() - st}")
+ return os.path.exists(dst_path)
+
+
+def exec_cmd(cmd):
+ # gs_logger.info(cmd)
+ print(cmd)
+ ret = os.system(cmd)
+ if ret != 0:
+ return False
+ return True
+
+
+def exec_cmd_and_result(cmd):
+ r = os.popen(cmd)
+ text = r.read()
+ r.close()
+ return text
+
+
+def upload_file2cos(key, file_path, region='ap-singapore', bucket_name='av-audit-sync-sg-1256122840'):
+ """
+ 将文件上传到cos
+ :param key: 桶上的具体地址
+ :param file_path: 本地文件地址
+ :param region: 区域
+ :param bucket_name: 桶地址
+ :return:
+ """
+ gs_coscmd = "coscmd"
+ gs_coscmd_conf = "~/.cos.conf"
+
+ cmd = "{} -c {} -r {} -b {} upload {} {}".format(gs_coscmd, gs_coscmd_conf, region, bucket_name, file_path, key)
+ if exec_cmd(cmd):
+ cmd = "{} -c {} -r {} -b {} info {}".format(gs_coscmd, gs_coscmd_conf, region, bucket_name, key) \
+ + "| grep Content-Length |awk \'{print $2}\'"
+ res_str = exec_cmd_and_result(cmd)
+ logging.info("{},res={}".format(key, res_str))
+ size = float(res_str)
+ if size > 0:
+ return True
+ return False
+ return False
diff --git a/AIMeiSheng/docker_demo/http_server.py b/AIMeiSheng/docker_demo/http_server.py
new file mode 100644
index 0000000..23ac0ba
--- /dev/null
+++ b/AIMeiSheng/docker_demo/http_server.py
@@ -0,0 +1,128 @@
+# -*- coding: UTF-8 -*-
+
+"""
+SVC处理逻辑
+1. 根据跟定的vocal_url 判别男女
+2. 根据男女信息选择适合的男女url
+3. 模型推理
+"""
+
+import gc
+import os
+import shutil
+import sys
+import time
+import logging
+import hashlib
+import numpy as np
+import multiprocessing as mp
+from multiprocessing import Pool
+from flask import Flask, jsonify, request, abort
+from common import download2disk, exec_cmd, upload_file2cos
+from svc_online import GSWorkerAttr, SVCOnline, volume_adjustment
+
+# 全局设置
+import socket
+
+hostname = socket.gethostname()
+log_file_name = f"av_svc_{hostname}.log"
+logging.basicConfig(filename=log_file_name, format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S',
+ level=logging.INFO)
+
+# errcode
+gs_err_code_success = 0
+gs_err_code_download_vocal = 100
+gs_err_code_download_svc_url = 101
+gs_err_code_svc_process = 102
+gs_err_code_transcode = 103
+gs_err_code_volume_adjust = 104
+gs_err_code_upload = 105
+
+sys.path.append(os.path.dirname(__file__))
+sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
+
+app = Flask(__name__)
+
+
+def download_data(worker_attr):
+ vocal_path = os.path.join(worker_attr.tmp_dir, worker_attr.distinct_id)
+ if os.path.exists(vocal_path):
+ os.remove(vocal_path)
+
+ st = time.time()
+ if not download2disk(worker_attr.vocal_url, worker_attr.vocal_path):
+ return gs_err_code_download_vocal
+ logging.info(f"download vocal_url={worker_attr.vocal_url} sp = {time.time() - st}")
+
+ # download svc_source_url
+ if not os.path.exists(worker_attr.female_svc_source_path):
+ st = time.time()
+ if not download2disk(worker_attr.female_svc_source_url, worker_attr.female_svc_source_path):
+ return gs_err_code_download_svc_url
+ logging.info(f"download female_url={worker_attr.female_svc_source_url} sp = {time.time() - st}")
+
+ # download svc_source_url
+ if not os.path.exists(worker_attr.male_svc_source_path):
+ st = time.time()
+ if not download2disk(worker_attr.male_svc_source_url, worker_attr.male_svc_source_path):
+ return gs_err_code_download_svc_url
+ logging.info(f"download male_url={worker_attr.male_svc_source_url} sp = {time.time() - st}")
+ return gs_err_code_success
+
+
+def transcode(wav_path, dst_path):
+ st = time.time()
+ cmd = f"ffmpeg -i {wav_path} -ar 44100 -ac 2 -b:a 64k -y {dst_path} -loglevel fatal"
+ exec_cmd(cmd)
+ logging.info(f"transcode cmd={cmd}, sp = {time.time() - st}")
+ return os.path.exists(dst_path)
+
+
+gs_svc_online = None
+
+
+def process_one(input_data):
+ logging.info(f"start input={input_data} start prepare data ...")
+ worker_attr = GSWorkerAttr(input_data)
+ err = download_data(worker_attr)
+ if err != gs_err_code_success:
+ return err, None
+
+ # process audio
+ global gs_svc_online
+ if gs_svc_online is None:
+ gs_svc_online = SVCOnline()
+ gs_svc_online.process(worker_attr)
+ if not os.path.exists(worker_attr.target_wav_path):
+ return gs_err_code_svc_process, None
+
+ # 音量拉伸到指定响度
+ volume_adjustment(worker_attr.target_wav_path, worker_attr.target_loudness, worker_attr.target_wav_ad_path)
+ if not os.path.exists(worker_attr.target_wav_ad_path):
+ return gs_err_code_volume_adjust, None
+
+ # transcode
+ if not transcode(worker_attr.target_wav_path, worker_attr.target_path):
+ return gs_err_code_transcode, None
+
+ # upload
+ st = time.time()
+ if upload_file2cos(worker_attr.target_url, worker_attr.target_path):
+ return gs_err_code_upload, None
+ logging.info(f"audio_url={worker_attr.vocal_url} upload {worker_attr.target_url} sp = {time.time() - st}")
+ return gs_err_code_success, worker_attr.target_path
+
+
+@app.route("/ai_meisheng", methods=["POST"])
+def get_song_res():
+ data = request.json
+ st = time.time()
+ logging.info(f"ai_meisheng:in:{data}")
+ ret, url = process_one(data)
+ all_ret_msg = jsonify({"out_url": url, "ret": ret})
+ logging.info(f"ai_meisheng:out:{data}-{all_ret_msg}, sp={time.time() - st}")
+ return all_ret_msg
+
+
+if __name__ == "__main__":
+ app.run(host='0.0.0.0', port=5000, threaded=False)
diff --git a/AIMeiSheng/docker_demo/main.py b/AIMeiSheng/docker_demo/main.py
deleted file mode 100644
index 094c2fc..0000000
--- a/AIMeiSheng/docker_demo/main.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import gradio as gr
-
-def greet(name):
- return "Hello " + name + "!!"
-
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-
-if __name__ == "__main__":
- demo.launch(server_name="0.0.0.0")
- # 注意:gradio启动项目后默认地址为127.0.0.1;使用docker部署需要将地址修改为0.0.0.0,否则会导致地址访问错误
- # 默认端口为7860,如需更改可在launch()中设置server_port=7000
-~
diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py
new file mode 100644
index 0000000..f952346
--- /dev/null
+++ b/AIMeiSheng/docker_demo/svc_online.py
@@ -0,0 +1,162 @@
+# -*- coding: UTF-8 -*-
+"""
+SVC的核心处理逻辑
+"""
+import os
+import shutil
+import hashlib
+import time
+
+from AIMeiSheng.meisheng_svc_final import get_svc, process_svc
+from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass
+from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model
+from AIMeiSheng.myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe
+
+from AIMeiSheng.docker_demo.common import *
+
+gs_resource_cache_dir = "/tmp/gs_svc_resource_cache"
+gs_tmp_dir = "/tmp/gs_svc_tmp"
+gs_model_dir = "/tmp/models"
+
+if os.path.exists(gs_tmp_dir):
+ shutil.rmtree(gs_tmp_dir)
+os.makedirs(gs_model_dir, exist_ok=True)
+
+# 预设参数
+gs_gender_models_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/models.zip"
+gs_svc_emb_url = ""
+gs_svc_model_url = ""
+gs_volume_bin_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/dataset/AIMeiSheng/ebur128_tool"
+
+
+class GSWorkerAttr:
+ def __init__(self, input_data):
+ vocal_url = input_data["vocal_url"]
+ female_svc_source_url = input_data["female_svc_url"]
+ male_svc_source_url = input_data["male_svc_url"]
+ st_tm = input_data["st_tm"] # 单位是s
+ ed_tm = input_data["ed_tm"] # 单位是s
+
+ self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest()
+ self.vocal_url = vocal_url
+ self.target_url = input_data["target_url"]
+
+ ext = vocal_url.split(".")[-1]
+ self.vocal_path = os.path.join(gs_tmp_dir, self.distinct_id + f"_in.{ext}")
+ self.target_wav_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.wav")
+ self.target_wav_ad_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out_ad.wav")
+ self.target_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.m4a")
+
+ self.female_svc_source_url = female_svc_source_url
+ self.male_svc_source_url = male_svc_source_url
+
+ ext = female_svc_source_url.split(".")[-1]
+ self.female_svc_source_path = hashlib.md5(female_svc_source_url.encode()).hexdigest() + "." + ext
+ ext = male_svc_source_url.split(".")[-1]
+ self.male_svc_source_path = hashlib.md5(male_svc_source_url.encode()).hexdigest() + "." + ext
+ self.st_tm = st_tm
+ self.ed_tm = ed_tm
+ self.target_loudness = input_data["target_loudness"]
+
+ self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id)
+ if os.path.exists(self.tmp_dir):
+ shutil.rmtree(self.tmp_dir)
+ os.makedirs(self.tmp_dir)
+
+ def __del__(self):
+ if os.path.exists(self.tmp_dir):
+ shutil.rmtree(self.tmp_dir)
+
+
+def init_gender_model():
+ """
+ 下载模型
+ :return:
+ """
+ dst_model_dir = os.path.join(gs_model_dir, "voice_classification")
+ if not os.path.exists(dst_model_dir):
+ dst_zip_path = os.path.join(gs_model_dir, "models.zip")
+ if not download2disk(gs_gender_models_url, dst_zip_path):
+ logging.fatal(f"download gender_model err={gs_gender_models_url}")
+ cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}"
+ os.system(cmd)
+ if not os.path.exists(dst_model_dir):
+ logging.fatal(f"unzip {dst_zip_path} err")
+
+ music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth")
+ music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth")
+ gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth")
+ gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth")
+ vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
+ return vc
+
+
+def init_svc_model():
+ emb_model_path = os.path.join(gs_model_dir, "RawNet3_weights.pt")
+ if not os.path.exists(emb_model_path):
+ if not download2disk(gs_svc_emb_url, emb_model_path):
+ logging.fatal(f"download svc_emb_model err={gs_svc_emb_url}")
+ embed_model = get_embed_model(emb_model_path)
+ hubert_model = load_hubert()
+
+ svc_filename = gs_svc_model_url.split("/")[-1]
+ svc_model_path = os.path.join(gs_model_dir, svc_filename)
+ if not os.path.exists(svc_model_path):
+ if not download2disk(gs_svc_model_url, svc_model_path):
+ logging.fatal(f"download svc_model err={gs_svc_model_url}")
+
+ # 此处内部会生成全局模型
+ get_vc(svc_model_path)
+ return embed_model, hubert_model
+
+
+def volume_adjustment(wav_path, target_loudness, out_path):
+ """
+ 音量调整
+ :param wav_path:
+ :param target_loudness:
+ :param out_path:
+ :return:
+ """
+ volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
+ if not os.path.exists(volume_bin_path):
+ if not download2disk(gs_volume_bin_url, volume_bin_path):
+ logging.fatal(f"download volume_bin err={gs_volume_bin_url}")
+ cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}"
+ os.system(cmd)
+
+
+class SVCOnline:
+
+ def __init__(self):
+ st = time.time()
+ self.gender_model = init_gender_model()
+ self.embed_model, self.hubert_model = init_svc_model()
+ logging.info(f"svc init finished, sp = {time.time() - st}")
+
+ def gender_process(self, worker_attr):
+ st = time.time()
+ gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path)
+ logging.info(
+ f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, "
+ f"gender_process sp = {time.time() - st}")
+ if gender == 0:
+ gender = 'female'
+ elif gender == 1:
+ gender = 'male'
+ elif female_rate > 0.5:
+ gender = 'female'
+ else:
+ gender = 'male'
+ logging.info(f"{worker_attr.vocal_url}, modified gender={gender}")
+ return gender
+
+ def process(self, worker_attr):
+ gender = self.gender_process(worker_attr)
+ song_path = worker_attr.female_svc_source_path
+ if gender == "male":
+ song_path = worker_attr.male_svc_source_path
+ params = {'gender': gender, 'tst': worker_attr.st_ms, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None}
+ st = time.time()
+ similar = process_svc(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, params)
+ logging.info(f"{worker_attr.vocal_url}, similar={similar} process svc sp = {time.time() - st}")
diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py
index 6359fb9..e5a6b3f 100644
--- a/AIMeiSheng/meisheng_svc_final.py
+++ b/AIMeiSheng/meisheng_svc_final.py
@@ -1,212 +1,215 @@
import os,sys
import time
import shutil
import glob
import hashlib
import librosa
import soundfile
import gradio as gr
import pandas as pd
import numpy as np
sys.path.append('./RawNet3/')
from infererence_fang_meisheng import get_embed, get_embed_model
from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe
from gender_classify import load_gender_model
gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件
tmp_workspace_name = "batch_test_ocean_fi"#工作空间名
song_folder = "./data_meisheng/" ##song folder
gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径
pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件
cur_dir = os.path.abspath(os.path.dirname(__file__))
-abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/'
-
+abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/'
+f0_method = None
def mix(in_path, acc_path, dst_path):
# svc转码到442
svc_442_file = in_path + "_442.wav"
st = time.time()
cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file)
os.system(cmd)
if not os.path.exists(svc_442_file):
return -1
print("transcode,{},sp={}".format(in_path, time.time() - st))
# 混合
st = time.time()
cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path)
os.system(cmd)
print("mixer,{},sp={}".format(in_path, time.time() - st))
def load_model():
global f0_method
embed_model = get_embed_model()
hubert_model = load_hubert()
get_vc(pth_model_path)
f0_method = get_rmvpe()
print("model preload finish!!!")
return embed_model, hubert_model#,svc_model
embed_model, hubert_model = load_model() ##提前加载模型
gender_model = load_gender_model()
def pyin_process_single_rmvpe(input_file):
global f0_method
+ if f0_method is None:
+ f0_method = get_rmvpe()
+
rate = 16000 #44100
# 读取音频文件
y, sr = librosa.load(input_file, sr=rate)
len_s = len(y)/sr
lim_s = 15 #10
if(len_s > lim_s):
y1 = y[:sr*lim_s]
y2 = y[-sr*lim_s:]
f0 = f0_method.infer_from_audio(y1, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch1 = np.mean(valid_f0)
f0 = f0_method.infer_from_audio(y2, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch2 = np.mean(valid_f0)
if abs(mean_pitch1 - mean_pitch2) > 55:
mean_pitch_cur = min(mean_pitch1, mean_pitch2)
else:
mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2
else:
f0 = f0_method.infer_from_audio(y, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch_cur = np.mean(valid_f0)
return mean_pitch_cur
def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras):
##计算pitch
f0up_key = pyin_process_single_rmvpe(target_wav)
## get embed
get_embed(target_wav, embed_npy, embed_model)
print("svc main start...")
svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras)
print("svc main finished!!")
return 0
def process_svc(song_wav, target_wav, svc_out_path,paras):
song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename(
target_wav), os.path.basename(svc_out_path) #绝对路径
song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path
embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)
return similar
def get_svc(target_yinse_wav, song_name, paras):
'''
:param target_yinse_wav: 目标音色
:param song_name: 歌曲名字
;param paras: 其他参数
:return: svc路径名
'''
##清空工作空间临时路径
if os.path.exists(gs_work_dir):
#shutil.rmtree(gs_work_dir)
cmd = f"rm -rf {gs_work_dir}/*"
os.system(cmd)
else:
os.makedirs(gs_work_dir)
gender = paras['gender']##为了确定歌曲
##目标音色读取
f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav))
#print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav)
#shutil.move(target_yinse_wav, f_dst) ##放在工作目录
shutil.copy(target_yinse_wav, f_dst)
target_yinse_wav = f_dst
##歌曲/伴奏 读取(路径需要修改)
song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal
inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name))
#song_wav = './xusong_long.wav'
svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字
print("inputMsg:", song_wav, target_yinse_wav, svc_out_path)
## svc process
st = time.time()
print("start inference...")
similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras)
print("svc finished!!")
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(svc_out_path))
#'''
##加混响
print("add reverbration...")
svc_out_path_effect = svc_out_path[:-4] + '_effect.wav'
cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}"
print("cmd :", cmd)
os.system(cmd)
# # 人声伴奏合并
print("add acc...")
out_path = svc_out_path_effect[:-4] + '_music.wav'
mix(svc_out_path_effect, inf_acc_path, out_path)
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(out_path))
#'''
return svc_out_path
if __name__=='__main__':
###gender predict
target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
gender, female_rate, is_pure = gender_model.process(target_yinse_wav)
print('=====================')
print("gender:{}, female_rate:{},is_pure:{}".format(gender,female_rate,is_pure))
if gender == 0:
gender = 'female'
elif gender == 1:
gender = 'male'
elif female_rate > 0.5:
gender = 'female'
else:
gender = 'male'
print("modified gender:{} ".format(gender))
print('=====================')
###接口函数
'''
target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径
song_name = "drivers_license" #"Levitating" ##路径会自动添加(要更改)
paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms
#paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试
#'''
#'''
#target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
song_name = "lost_stars"
#paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None}
paras = {'gender': gender, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None }
get_svc(target_yinse_wav, song_name, paras)
#'''
diff --git a/tools/ebur128_tool/CMakeLists.txt b/tools/ebur128_tool/CMakeLists.txt
new file mode 100644
index 0000000..3017d49
--- /dev/null
+++ b/tools/ebur128_tool/CMakeLists.txt
@@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 2.8)
+project(ebur128_tool)
+
+set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib)
+
+include_directories(../ref/alimter/inc)
+include_directories(../ref/waves/inc)
+include_directories(../ref/ebur128/inc)
+
+add_subdirectory("../ref/alimter" ${PROJECT_SOURCE_DIR}/ref/alimter)
+add_subdirectory("../ref/waves" ${PROJECT_SOURCE_DIR}/ref/waves)
+add_subdirectory("../ref/ebur128" ${PROJECT_SOURCE_DIR}/ref/ebur128)
+
+add_executable(ebur128_tool ebur128_tool.cpp)
+
+target_link_libraries(ebur128_tool
+ ${LIBRARY_OUTPUT_PATH}/libalimiter.a
+ ${LIBRARY_OUTPUT_PATH}/libwaves.a
+ ${LIBRARY_OUTPUT_PATH}/libebur128.a)
\ No newline at end of file
diff --git a/tools/ebur128_tool/ebur128_tool.cpp b/tools/ebur128_tool/ebur128_tool.cpp
new file mode 100644
index 0000000..c3d171c
--- /dev/null
+++ b/tools/ebur128_tool/ebur128_tool.cpp
@@ -0,0 +1,107 @@
+//
+// Created by Administrator on 2024/7/8.
+//
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+
+#include "alimiter.h"
+#include "ebur128.h"
+#include "WaveFile.h"
+
+#define PROC_LEN 1024
+/**
+ * 获取增益
+ * @param nChannel
+ * @param nSampleRate
+ * @param pData
+ * @param nLength
+ * @param gain
+ * @return
+ */
+int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness)
+{
+ ebur128_state *st = NULL;
+ st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I);
+ if (NULL == st)
+ {
+ return -1;
+ }
+ int nPos = 0;
+ int nTmpLength = 0;
+ int nRet;
+ while (nPos < nLength)
+ {
+ nTmpLength = PROC_LEN;
+ if (nLength - nPos < PROC_LEN)
+ {
+ nTmpLength = nLength - nPos;
+ }
+ nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel);
+ if (nRet != 0)
+ {
+ return -2;
+ }
+ nPos += nTmpLength;
+ }
+ gated_loudness = -1;
+ ebur128_loudness_global(st, &gated_loudness);
+ ebur128_destroy(&st);
+ return 0;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 4)
+ {
+ printf("input error! example: ./main input_wav target_loudness dst_wav\n");
+ return -1;
+ }
+
+ std::string vocal_path = argv[1];
+ double target_loudness = atof(argv[2]);
+ std::string out_vocal_path = argv[3];
+
+ // 读取数据
+ CWaveFile vocal_wav = CWaveFile(vocal_path.c_str(), false);
+ if (!vocal_wav.GetStatus())
+ {
+ printf("%s not ok!\n", vocal_path.c_str());
+ return -2;
+ }
+ int vocal_buf_len = vocal_wav.GetChannels() * vocal_wav.GetTotalFrames();
+ float *vocal_buf = new float[vocal_buf_len];
+ short *short_vocal_buf = new short[vocal_buf_len];
+ vocal_wav.ReadFrameAsfloat(vocal_buf, vocal_wav.GetTotalFrames());
+ for(int i = 0; i < vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(); i++)
+ {
+ short_vocal_buf[i] = float(vocal_buf[i]) * 32767.f;
+ }
+
+ double vocal_gated_loudness = 0;
+ ebur128_whole(vocal_wav.GetChannels(), vocal_wav.GetSampleRate(), short_vocal_buf,
+ vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(), vocal_gated_loudness);
+ float db = (target_loudness - vocal_gated_loudness) / 20.f;
+ float ebur128_rate = pow(10, db);
+
+ printf("vocal_gated_loudness = %f, db = %f, gain = %f\n", vocal_gated_loudness, db, ebur128_rate);
+ SUPERSOUND::Alimiter limiter;
+ limiter.SetParam(vocal_wav.GetSampleRate(), vocal_wav.GetChannels());
+ for (int i = 0; i < vocal_buf_len; i++)
+ {
+ float out = vocal_buf[i] * ebur128_rate;
+ limiter.Filter(&out, &out, 1);
+ vocal_buf[i] = out;
+ }
+
+ CWaveFile out_wav = CWaveFile(out_vocal_path.c_str(), true);
+ out_wav.SetChannels(vocal_wav.GetChannels());
+ out_wav.SetSampleRate(vocal_wav.GetSampleRate());
+ out_wav.SetSampleFormat(SF_IEEE_FLOAT);
+ out_wav.SetupDone();
+ out_wav.WriteFrame(vocal_buf, vocal_wav.GetTotalFrames());
+
+ delete[] vocal_buf;
+ delete[] short_vocal_buf;
+ return 0;
+}
\ No newline at end of file
diff --git a/tools/ref/alimter/CMakeLists.txt b/tools/ref/alimter/CMakeLists.txt
new file mode 100644
index 0000000..9748c4d
--- /dev/null
+++ b/tools/ref/alimter/CMakeLists.txt
@@ -0,0 +1,3 @@
+include_directories(inc)
+AUX_SOURCE_DIRECTORY(src DIR_ALIMTER_SRCS)
+add_library(alimiter ${DIR_ALIMTER_SRCS})
\ No newline at end of file
diff --git a/tools/ref/alimter/inc/alimiter.h b/tools/ref/alimter/inc/alimiter.h
new file mode 100644
index 0000000..8022d39
--- /dev/null
+++ b/tools/ref/alimter/inc/alimiter.h
@@ -0,0 +1,99 @@
+
+/***************************************************************************
+* email : yijiangyang@tencent.com *
+***************************************************************************/
+
+//+ ----------------------------------------------------+
+//+ _oo0oo_ +
+//+ o8888888o +
+//+ 88" . "88 +
+//+ (| -_- |) +
+//+ 0\ = /0 +
+//+ ___/`---'\___ +
+//+ .' \\| |// '. +
+//+ / \\||| : |||// \ +
+//+ / _||||| -:- |||||- \ +
+//+ | | \\\ - /// | | +
+//+ | \_| ''\---/'' |_/ | +
+//+ \ .-\__ '-' ___/-. / +
+//+ ___'. .' /--.--\ `. .'___ +
+//+ ."" '< `.___\_<|>_/___.' >' "". +
+//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | +
+//+ \ \ `_. \_ __\ /__ _/ .-` / / +
+//+ =====`-.____`.___ \_____/___.-`___.-'===== +
+//+ `=---=' +
+//+ +
+//+ +
+//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +
+//+ +
+//+ 佛祖保佑 永无BUG +
+//+ ----------------------------------------------------+
+
+//实现 FFMPEG 中的限制器,这个压限器对频谱友好,但是压得比较厉害
+
+#ifndef __ALIMITER_H__
+#define __ALIMITER_H__
+
+#include <stdint.h>
+#define ERROR_SUPERSOUND_SUCCESS 0
+#define ERROR_SUPERSOUND_PARAM -1
+#define ERROR_SUPERSOUND_MEMORY -2
+typedef struct AudioLimiterContext
+{
+ float limit;
+ float attack;
+ float release;
+ float att;
+ float level_in;
+ float level_out;
+ int32_t auto_release;
+ int32_t auto_level;
+ float asc;
+ int32_t asc_c;
+ int32_t asc_pos;
+ float asc_coeff;
+
+ float *buffer;
+ int32_t buffer_size;
+ int32_t buffer_max_size;
+ int32_t pos;
+ int32_t *nextpos;
+ float *nextdelta;
+
+ float delta;
+ int32_t nextiter;
+ int32_t nextlen;
+ int32_t asc_changed;
+}AudioLimiterContext;
+
+namespace SUPERSOUND
+{
+
+
+class Alimiter
+{
+public:
+ Alimiter();
+ ~Alimiter();
+
+public:
+ void Flush();
+ int32_t GetLatecy();
+ int32_t SetParam(int32_t fs, int32_t channels);
+ void Filter(float * input, float * output, int32_t num);
+
+private:
+ void Uninit();
+ int32_t config_input();
+ float get_rdelta(AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc);
+
+private:
+ AudioLimiterContext m_alimiterCtx;
+ int m_nChannels;
+ int m_nFs;
+};
+
+
+}
+
+#endif /* __ALIMITER_H__ */
\ No newline at end of file
diff --git a/tools/ref/alimter/src/alimiter.cpp b/tools/ref/alimter/src/alimiter.cpp
new file mode 100644
index 0000000..abbd622
--- /dev/null
+++ b/tools/ref/alimter/src/alimiter.cpp
@@ -0,0 +1,306 @@
+
+#include "alimiter.h"
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+#include <new>
+
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#define MIDDLE(x, y, z) ((x)<(y)?((y)<(z)?(y):(x)<(z)?(z):(x)):((y)>(z)?(y):(x)>(z)?(z):(x)))
+#define SAFE_DELETE_PTR(ptr) \
+{ \
+ if(ptr) \
+ { \
+ delete [] ptr; \
+ ptr = NULL; \
+ } \
+}
+
+namespace SUPERSOUND
+{
+
+
+Alimiter::Alimiter()
+{
+ memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx));
+
+ m_nChannels = 0;
+ m_nFs = 0;
+
+ Flush();
+}
+
+Alimiter::~Alimiter()
+{
+ Uninit();
+}
+
+void Alimiter::Flush()
+{
+ float * buffer = m_alimiterCtx.buffer;
+ float * nextdelta = m_alimiterCtx.nextdelta;
+ int32_t * nextpos = m_alimiterCtx.nextpos;
+ int32_t buffer_max_size = m_alimiterCtx.buffer_max_size;
+ int32_t buffer_size = m_alimiterCtx.buffer_size;
+
+ if(buffer)
+ memset(buffer, 0, sizeof(float) * buffer_max_size);
+ if(nextdelta)
+ memset(nextdelta, 0, sizeof(float) * buffer_max_size);
+ if(nextpos)
+ memset(nextpos, -1, sizeof(float) * buffer_max_size);
+
+ memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx));
+
+ m_alimiterCtx.level_in = 1;
+ m_alimiterCtx.level_out = 32000 / 32768.0;
+ m_alimiterCtx.limit = 1;
+ m_alimiterCtx.attack = 5;
+ m_alimiterCtx.release = 50;
+ m_alimiterCtx.auto_release = 0;
+ m_alimiterCtx.asc_coeff = 0.5;
+ m_alimiterCtx.auto_level = 1;
+
+ m_alimiterCtx.attack /= 1000;
+ m_alimiterCtx.release /= 1000;
+ m_alimiterCtx.att = 1;
+ m_alimiterCtx.asc_pos = -1;
+ m_alimiterCtx.asc_coeff = pow(0.5f, m_alimiterCtx.asc_coeff - 0.5f) * 2 * -1;
+
+ m_alimiterCtx.buffer = buffer;
+ m_alimiterCtx.nextdelta = nextdelta;
+ m_alimiterCtx.nextpos = nextpos;
+ m_alimiterCtx.buffer_max_size = buffer_max_size;
+ m_alimiterCtx.buffer_size = buffer_size;
+}
+
+int32_t Alimiter::GetLatecy()
+{
+ return m_alimiterCtx.buffer_size / m_nChannels;
+}
+
+int32_t Alimiter::SetParam( int32_t fs, int32_t channels )
+{
+ if((fs == m_nFs) && (channels == m_nChannels))
+ return ERROR_SUPERSOUND_SUCCESS;
+
+ m_nChannels = channels;
+ m_nFs = fs;
+
+ return config_input();
+}
+
+void Alimiter::Filter( float * input, float * output, int32_t num )
+{
+ num = num / m_nChannels;
+ int channels = m_nChannels;
+ int buffer_size = m_alimiterCtx.buffer_size;
+ float * buffer = m_alimiterCtx.buffer;
+ float release = m_alimiterCtx.release;
+ float limit = m_alimiterCtx.limit;
+ float * nextdelta = m_alimiterCtx.nextdelta;
+ float level = m_alimiterCtx.auto_level ? 1 / limit : 1;
+ float level_out = m_alimiterCtx.level_out;
+ float level_in = m_alimiterCtx.level_in;
+ int *nextpos = m_alimiterCtx.nextpos;
+
+ float * buf;
+ float * dst;
+ float * src;
+ int n, c, i;
+ AudioLimiterContext * s = &m_alimiterCtx;
+
+ dst = output;
+ src = input;
+
+ for (n = 0; n < num; n++) {
+ float peak = 0;
+
+ for (c = 0; c < channels; c++) {
+ float sample = src[c] * level_in;
+
+ buffer[s->pos + c] = sample;
+ peak = MAX(peak, fabs(sample));
+ }
+
+ if (s->auto_release && peak > limit) {
+ s->asc += peak;
+ s->asc_c++;
+ }
+
+ if (peak > limit) {
+ float patt = MIN(limit / peak, 1);
+ float rdelta = get_rdelta(s, release, m_nFs,
+ peak, limit, patt, 0);
+ float delta = (limit / peak - s->att) / buffer_size * channels;
+ int found = 0;
+
+ if (delta < s->delta) {
+ s->delta = delta;
+ nextpos[0] = s->pos;
+ nextpos[1] = -1;
+ nextdelta[0] = rdelta;
+ s->nextlen = 1;
+ s->nextiter= 0;
+ } else {
+ for (i = s->nextiter; i < s->nextiter + s->nextlen; i++) {
+ int j = i % buffer_size;
+ float ppeak, pdelta;
+
+ ppeak = fabs(buffer[nextpos[j]]) > fabs(buffer[nextpos[j] + 1]) ?
+ fabs(buffer[nextpos[j]]) : fabs(buffer[nextpos[j] + 1]);
+ pdelta = (limit / peak - limit / ppeak) / (((buffer_size - nextpos[j] + s->pos) % buffer_size) / channels);
+ if (pdelta < nextdelta[j]) {
+ nextdelta[j] = pdelta;
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ s->nextlen = i - s->nextiter + 1;
+ nextpos[(s->nextiter + s->nextlen) % buffer_size] = s->pos;
+ nextdelta[(s->nextiter + s->nextlen) % buffer_size] = rdelta;
+ nextpos[(s->nextiter + s->nextlen + 1) % buffer_size] = -1;
+ s->nextlen++;
+ }
+ }
+ }
+
+ buf = &s->buffer[(s->pos + channels) % buffer_size];
+ peak = 0;
+ for (c = 0; c < channels; c++) {
+ float sample = buf[c];
+
+ peak = MAX(peak, fabs(sample));
+ }
+
+ if (s->pos == s->asc_pos && !s->asc_changed)
+ s->asc_pos = -1;
+
+ if (s->auto_release && s->asc_pos == -1 && peak > limit) {
+ s->asc -= peak;
+ s->asc_c--;
+ }
+
+ s->att += s->delta;
+
+ for (c = 0; c < channels; c++)
+ dst[c] = buf[c] * s->att;
+
+ if ((s->pos + channels) % buffer_size == nextpos[s->nextiter]) {
+ if (s->auto_release) {
+ s->delta = get_rdelta(s, release, m_nFs,
+ peak, limit, s->att, 1);
+ if (s->nextlen > 1) {
+ int pnextpos = nextpos[(s->nextiter + 1) % buffer_size];
+ float ppeak = fabs(buffer[pnextpos]) > fabs(buffer[pnextpos + 1]) ?
+ fabs(buffer[pnextpos]) :
+ fabs(buffer[pnextpos + 1]);
+ float pdelta = (limit / ppeak - s->att) /
+ (((buffer_size + pnextpos -
+ ((s->pos + channels) % buffer_size)) %
+ buffer_size) / channels);
+ if (pdelta < s->delta)
+ s->delta = pdelta;
+ }
+ } else {
+ s->delta = nextdelta[s->nextiter];
+ s->att = limit / peak;
+ }
+
+ s->nextlen -= 1;
+ nextpos[s->nextiter] = -1;
+ s->nextiter = (s->nextiter + 1) % buffer_size;
+ }
+
+ if (s->att > 1.) {
+ s->att = 1.;
+ s->delta = 0.;
+ s->nextiter = 0;
+ s->nextlen = 0;
+ nextpos[0] = -1;
+ }
+
+ if (s->att <= 0.) {
+ s->att = 0.000001f;
+ s->delta = (1 - s->att) / (m_nFs * release);
+ }
+
+ if (s->att != 1 && (1 - s->att) < 0.000001f)
+ s->att = 1;
+
+ if (s->delta != 0 && fabs(s->delta) < 0.000001f)
+ s->delta = 0;
+
+ for (c = 0; c < channels; c++)
+ dst[c] = MIDDLE(dst[c], -limit, limit) * level * level_out;
+
+ s->pos = (s->pos + channels) % buffer_size;
+ src += channels;
+ dst += channels;
+ }
+}
+
+void Alimiter::Uninit()
+{
+ SAFE_DELETE_PTR(m_alimiterCtx.buffer);
+ SAFE_DELETE_PTR(m_alimiterCtx.nextdelta);
+ SAFE_DELETE_PTR(m_alimiterCtx.nextpos);
+}
+
+int32_t Alimiter::config_input()
+{
+ int obuffer_size = int(m_nFs * m_nChannels * 100 / 1000. + m_nChannels);
+ if(obuffer_size < m_nChannels)
+ return ERROR_SUPERSOUND_PARAM;
+
+ if(obuffer_size > m_alimiterCtx.buffer_max_size)
+ {
+ SAFE_DELETE_PTR(m_alimiterCtx.buffer);
+ m_alimiterCtx.buffer = new(std::nothrow) float[obuffer_size];
+ if(m_alimiterCtx.buffer == NULL)
+ return ERROR_SUPERSOUND_MEMORY;
+ memset(m_alimiterCtx.buffer, 0, sizeof(float) * obuffer_size);
+
+ SAFE_DELETE_PTR(m_alimiterCtx.nextdelta);
+ m_alimiterCtx.nextdelta = new(std::nothrow) float[obuffer_size];
+ if(m_alimiterCtx.nextdelta == NULL)
+ return ERROR_SUPERSOUND_MEMORY;
+ memset(m_alimiterCtx.nextdelta, 0, sizeof(float) * obuffer_size);
+
+ SAFE_DELETE_PTR(m_alimiterCtx.nextpos);
+ m_alimiterCtx.nextpos = new(std::nothrow) int32_t[obuffer_size];
+ if(m_alimiterCtx.nextpos == NULL)
+ return ERROR_SUPERSOUND_MEMORY;
+ memset(m_alimiterCtx.nextpos, -1, obuffer_size*sizeof(int32_t));
+
+ m_alimiterCtx.buffer_max_size = obuffer_size;
+ }
+
+ m_alimiterCtx.buffer_size = int(m_nFs * m_alimiterCtx.attack * m_nChannels);
+ m_alimiterCtx.buffer_size -= m_alimiterCtx.buffer_size % m_nChannels;
+
+ return ERROR_SUPERSOUND_SUCCESS;
+}
+
+float Alimiter::get_rdelta( AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc )
+{
+ float rdelta = (1 - patt) / (sample_rate * release);
+
+ if (asc && s->auto_release && s->asc_c > 0) {
+ float a_att = limit / (s->asc_coeff * s->asc) * (float)s->asc_c;
+
+ if (a_att > patt) {
+ float delta = MAX((a_att - patt) / (sample_rate * release), rdelta / 10);
+
+ if (delta < rdelta)
+ rdelta = delta;
+ }
+ }
+
+ return rdelta;
+}
+
+
+}
\ No newline at end of file
diff --git a/tools/ref/ebur128/CMakeLists.txt b/tools/ref/ebur128/CMakeLists.txt
new file mode 100644
index 0000000..18a5a86
--- /dev/null
+++ b/tools/ref/ebur128/CMakeLists.txt
@@ -0,0 +1,3 @@
+include_directories(inc)
+AUX_SOURCE_DIRECTORY(src DIR_EBUR128_SRCS)
+add_library(ebur128 ${DIR_EBUR128_SRCS})
\ No newline at end of file
diff --git a/tools/ref/ebur128/inc/ebur128.h b/tools/ref/ebur128/inc/ebur128.h
new file mode 100644
index 0000000..faa66c6
--- /dev/null
+++ b/tools/ref/ebur128/inc/ebur128.h
@@ -0,0 +1,425 @@
+/* See COPYING file for copyright and license details. */
+
+#ifndef EBUR128_H_
+#define EBUR128_H_
+
+/** \file ebur128.h
+ * \brief libebur128 - a library for loudness measurement according to
+ * the EBU R128 standard.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define EBUR128_VERSION_MAJOR 1
+#define EBUR128_VERSION_MINOR 2
+#define EBUR128_VERSION_PATCH 4
+
+#include <stddef.h> /* for size_t */
+
+/** \enum channel
+ * Use these values when setting the channel map with ebur128_set_channel().
+ * See definitions in ITU R-REC-BS 1770-4
+ */
+enum channel {
+ EBUR128_UNUSED = 0, /**< unused channel (for example LFE channel) */
+ EBUR128_LEFT = 1,
+ EBUR128_Mp030 = 1, /**< itu M+030 */
+ EBUR128_RIGHT = 2,
+ EBUR128_Mm030 = 2, /**< itu M-030 */
+ EBUR128_CENTER = 3,
+ EBUR128_Mp000 = 3, /**< itu M+000 */
+ EBUR128_LEFT_SURROUND = 4,
+ EBUR128_Mp110 = 4, /**< itu M+110 */
+ EBUR128_RIGHT_SURROUND = 5,
+ EBUR128_Mm110 = 5, /**< itu M-110 */
+ EBUR128_DUAL_MONO, /**< a channel that is counted twice */
+ EBUR128_MpSC, /**< itu M+SC */
+ EBUR128_MmSC, /**< itu M-SC */
+ EBUR128_Mp060, /**< itu M+060 */
+ EBUR128_Mm060, /**< itu M-060 */
+ EBUR128_Mp090, /**< itu M+090 */
+ EBUR128_Mm090, /**< itu M-090 */
+ EBUR128_Mp135, /**< itu M+135 */
+ EBUR128_Mm135, /**< itu M-135 */
+ EBUR128_Mp180, /**< itu M+180 */
+ EBUR128_Up000, /**< itu U+000 */
+ EBUR128_Up030, /**< itu U+030 */
+ EBUR128_Um030, /**< itu U-030 */
+ EBUR128_Up045, /**< itu U+045 */
+ EBUR128_Um045, /**< itu U-030 */
+ EBUR128_Up090, /**< itu U+090 */
+ EBUR128_Um090, /**< itu U-090 */
+ EBUR128_Up110, /**< itu U+110 */
+ EBUR128_Um110, /**< itu U-110 */
+ EBUR128_Up135, /**< itu U+135 */
+ EBUR128_Um135, /**< itu U-135 */
+ EBUR128_Up180, /**< itu U+180 */
+ EBUR128_Tp000, /**< itu T+000 */
+ EBUR128_Bp000, /**< itu B+000 */
+ EBUR128_Bp045, /**< itu B+045 */
+ EBUR128_Bm045 /**< itu B-045 */
+};
+
+/** \enum error
+ * Error return values.
+ */
+enum error {
+ EBUR128_SUCCESS = 0,
+ EBUR128_ERROR_NOMEM,
+ EBUR128_ERROR_INVALID_MODE,
+ EBUR128_ERROR_INVALID_CHANNEL_INDEX,
+ EBUR128_ERROR_NO_CHANGE
+};
+
+/** \enum mode
+ * Use these values in ebur128_init (or'ed). Try to use the lowest possible
+ * modes that suit your needs, as performance will be better.
+ */
+enum mode {
+ /** can call ebur128_loudness_momentary */
+ EBUR128_MODE_M = (1 << 0),
+ /** can call ebur128_loudness_shortterm */
+ EBUR128_MODE_S = (1 << 1) | EBUR128_MODE_M,
+ /** can call ebur128_loudness_global_* and ebur128_relative_threshold */
+ EBUR128_MODE_I = (1 << 2) | EBUR128_MODE_M,
+ /** can call ebur128_loudness_range */
+ EBUR128_MODE_LRA = (1 << 3) | EBUR128_MODE_S,
+ /** can call ebur128_sample_peak */
+ EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | EBUR128_MODE_M,
+ /** can call ebur128_true_peak */
+ EBUR128_MODE_TRUE_PEAK = (1 << 5) | EBUR128_MODE_M
+ | EBUR128_MODE_SAMPLE_PEAK,
+ /** uses histogram algorithm to calculate loudness */
+ EBUR128_MODE_HISTOGRAM = (1 << 6)
+};
+
+/** forward declaration of ebur128_state_internal */
+struct ebur128_state_internal;
+
+/** \brief Contains information about the state of a loudness measurement.
+ *
+ * You should not need to modify this struct directly.
+ */
+typedef struct {
+ int mode; /**< The current mode. */
+ unsigned int channels; /**< The number of channels. */
+ unsigned long samplerate; /**< The sample rate. */
+ struct ebur128_state_internal* d; /**< Internal state. */
+} ebur128_state;
+
+/** \brief Get library version number. Do not pass null pointers here.
+ *
+ * @param major major version number of library
+ * @param minor minor version number of library
+ * @param patch patch version number of library
+ */
+void ebur128_get_version(int* major, int* minor, int* patch);
+
+/** \brief Initialize library state.
+ *
+ * @param channels the number of channels.
+ * @param samplerate the sample rate.
+ * @param mode see the mode enum for possible values.
+ * @return an initialized library state, or NULL on error.
+ */
+ebur128_state* ebur128_init(unsigned int channels,
+ unsigned long samplerate,
+ int mode);
+
+/** \brief Destroy library state.
+ *
+ * @param st pointer to a library state.
+ */
+void ebur128_destroy(ebur128_state** st);
+
+/** \brief Set channel type.
+ *
+ * The default is:
+ * - 0 -> EBUR128_LEFT
+ * - 1 -> EBUR128_RIGHT
+ * - 2 -> EBUR128_CENTER
+ * - 3 -> EBUR128_UNUSED
+ * - 4 -> EBUR128_LEFT_SURROUND
+ * - 5 -> EBUR128_RIGHT_SURROUND
+ *
+ * @param st library state.
+ * @param channel_number zero based channel index.
+ * @param value channel type from the "channel" enum.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_set_channel(ebur128_state* st,
+ unsigned int channel_number,
+ int value);
+
+/** \brief Change library parameters.
+ *
+ * Note that the channel map will be reset when setting a different number of
+ * channels. The current unfinished block will be lost.
+ *
+ * @param st library state.
+ * @param channels new number of channels.
+ * @param samplerate new sample rate.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be
+ * invalid and must be destroyed.
+ * - EBUR128_ERROR_NO_CHANGE if channels and sample rate were not changed.
+ */
+int ebur128_change_parameters(ebur128_state* st,
+ unsigned int channels,
+ unsigned long samplerate);
+
+/** \brief Set the maximum window duration.
+ *
+ * Set the maximum duration that will be used for ebur128_window_loudness().
+ * Note that this destroys the current content of the audio buffer.
+ *
+ * @param st library state.
+ * @param window duration of the window in ms.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be
+ * invalid and must be destroyed.
+ * - EBUR128_ERROR_NO_CHANGE if window duration not changed.
+ */
+int ebur128_set_max_window(ebur128_state* st, unsigned long window);
+
+/** \brief Set the maximum history.
+ *
+ * Set the maximum history that will be stored for loudness integration.
+ * More history provides more accurate results, but requires more resources.
+ *
+ * Applies to ebur128_loudness_range() and ebur128_loudness_global() when
+ * EBUR128_MODE_HISTOGRAM is not set.
+ *
+ * Default is ULONG_MAX (at least ~50 days).
+ * Minimum is 3000ms for EBUR128_MODE_LRA and 400ms for EBUR128_MODE_M.
+ *
+ * @param st library state.
+ * @param history duration of history in ms.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NO_CHANGE if history not changed.
+ */
+int ebur128_set_max_history(ebur128_state* st, unsigned long history);
+
+/** \brief Add frames to be processed.
+ *
+ * @param st library state.
+ * @param src array of source frames. Channels must be interleaved.
+ * @param frames number of frames. Not number of samples!
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM on memory allocation error.
+ */
+int ebur128_add_frames_short(ebur128_state* st,
+ const short* src,
+ size_t frames);
+/** \brief See \ref ebur128_add_frames_short */
+int ebur128_add_frames_int(ebur128_state* st,
+ const int* src,
+ size_t frames);
+/** \brief See \ref ebur128_add_frames_short */
+int ebur128_add_frames_float(ebur128_state* st,
+ const float* src,
+ size_t frames);
+/** \brief See \ref ebur128_add_frames_short */
+int ebur128_add_frames_double(ebur128_state* st,
+ const double* src,
+ size_t frames);
+
+/** \brief Get global integrated loudness in LUFS.
+ *
+ * @param st library state.
+ * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative
+ * infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set.
+ */
+int ebur128_loudness_global(ebur128_state* st, double* out);
+/** \brief Get global integrated loudness in LUFS across multiple instances.
+ *
+ * @param sts array of library states.
+ * @param size length of sts
+ * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative
+ * infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set.
+ */
+int ebur128_loudness_global_multiple(ebur128_state** sts,
+ size_t size,
+ double* out);
+
+/** \brief Get momentary loudness (last 400ms) in LUFS.
+ *
+ * @param st library state.
+ * @param out momentary loudness in LUFS. -HUGE_VAL if result is negative
+ * infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ */
+int ebur128_loudness_momentary(ebur128_state* st, double* out);
+/** \brief Get short-term loudness (last 3s) in LUFS.
+ *
+ * @param st library state.
+ * @param out short-term loudness in LUFS. -HUGE_VAL if result is negative
+ * infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_S" has not been set.
+ */
+int ebur128_loudness_shortterm(ebur128_state* st, double* out);
+
+/** \brief Get loudness of the specified window in LUFS.
+ *
+ * window must not be larger than the current window set in st.
+ * The current window can be changed by calling ebur128_set_max_window().
+ *
+ * @param st library state.
+ * @param window window in ms to calculate loudness.
+ * @param out loudness in LUFS. -HUGE_VAL if result is negative infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if window larger than current window in st.
+ */
+int ebur128_loudness_window(ebur128_state* st,
+ unsigned long window,
+ double* out);
+
+/** \brief Get loudness range (LRA) of programme in LU.
+ *
+ * Calculates loudness range according to EBU 3342.
+ *
+ * @param st library state.
+ * @param out loudness range (LRA) in LU. Will not be changed in case of
+ * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be
+ * returned in this case.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM in case of memory allocation error.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set.
+ */
+int ebur128_loudness_range(ebur128_state* st, double* out);
+/** \brief Get loudness range (LRA) in LU across multiple instances.
+ *
+ * Calculates loudness range according to EBU 3342.
+ *
+ * @param sts array of library states.
+ * @param size length of sts
+ * @param out loudness range (LRA) in LU. Will not be changed in case of
+ * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be
+ * returned in this case.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM in case of memory allocation error.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set.
+ */
+int ebur128_loudness_range_multiple(ebur128_state** sts,
+ size_t size,
+ double* out);
+
+/** \brief Get maximum sample peak from all frames that have been processed.
+ *
+ * The equation to convert to dBFS is: 20 * log10(out)
+ *
+ * @param st library state
+ * @param channel_number channel to analyse
+ * @param out maximum sample peak in float format (1.0 is 0 dBFS)
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not
+ * been set.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_sample_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out);
+
+/** \brief Get maximum sample peak from the last call to add_frames().
+ *
+ * The equation to convert to dBFS is: 20 * log10(out)
+ *
+ * @param st library state
+ * @param channel_number channel to analyse
+ * @param out maximum sample peak in float format (1.0 is 0 dBFS)
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not
+ * been set.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_prev_sample_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out);
+
+/** \brief Get maximum true peak from all frames that have been processed.
+ *
+ * Uses an implementation defined algorithm to calculate the true peak. Do not
+ * try to compare resulting values across different versions of the library,
+ * as the algorithm may change.
+ *
+ * The current implementation uses a custom polyphase FIR interpolator to
+ * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for
+ * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz.
+ *
+ * The equation to convert to dBTP is: 20 * log10(out)
+ *
+ * @param st library state
+ * @param channel_number channel to analyse
+ * @param out maximum true peak in float format (1.0 is 0 dBTP)
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not
+ * been set.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_true_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out);
+
+/** \brief Get maximum true peak from the last call to add_frames().
+ *
+ * Uses an implementation defined algorithm to calculate the true peak. Do not
+ * try to compare resulting values across different versions of the library,
+ * as the algorithm may change.
+ *
+ * The current implementation uses a custom polyphase FIR interpolator to
+ * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for
+ * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz.
+ *
+ * The equation to convert to dBTP is: 20 * log10(out)
+ *
+ * @param st library state
+ * @param channel_number channel to analyse
+ * @param out maximum true peak in float format (1.0 is 0 dBTP)
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not
+ * been set.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_prev_true_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out);
+
+/** \brief Get relative threshold in LUFS.
+ *
+ * @param st library state
+ * @param out relative threshold in LUFS.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not
+ * been set.
+ */
+int ebur128_relative_threshold(ebur128_state* st, double* out);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* EBUR128_H_ */
diff --git a/tools/ref/ebur128/src/ebur128.c b/tools/ref/ebur128/src/ebur128.c
new file mode 100644
index 0000000..6c10f1e
--- /dev/null
+++ b/tools/ref/ebur128/src/ebur128.c
@@ -0,0 +1,1333 @@
+/* See COPYING file for copyright and license details. */
+
+#include "ebur128.h"
+
+#include <float.h>
+#include <limits.h>
+#include <math.h> /* You may have to define _USE_MATH_DEFINES if you use MSVC */
+#include <stdio.h>
+#include <stdlib.h>
+
+/* This can be replaced by any BSD-like queue implementation. */
+#include <sys/queue.h>
+
+#define CHECK_ERROR(condition, errorcode, goto_point) \
+ if ((condition)) { \
+ errcode = (errorcode); \
+ goto goto_point; \
+ }
+
+STAILQ_HEAD(ebur128_double_queue, ebur128_dq_entry);
+struct ebur128_dq_entry {
+ double z;
+ STAILQ_ENTRY(ebur128_dq_entry) entries;
+};
+
+#define ALMOST_ZERO 0.000001
+
+typedef struct { /* Data structure for polyphase FIR interpolator */
+ unsigned int factor; /* Interpolation factor of the interpolator */
+ unsigned int taps; /* Taps (prefer odd to increase zero coeffs) */
+ unsigned int channels; /* Number of channels */
+ unsigned int delay; /* Size of delay buffer */
+ struct {
+ unsigned int count; /* Number of coefficients in this subfilter */
+ unsigned int* index; /* Delay index of corresponding filter coeff */
+ double* coeff; /* List of subfilter coefficients */
+ }* filter; /* List of subfilters (one for each factor) */
+ float** z; /* List of delay buffers (one for each channel) */
+ unsigned int zi; /* Current delay buffer index */
+} interpolator;
+
+struct ebur128_state_internal {
+ /** Filtered audio data (used as ring buffer). */
+ double* audio_data;
+ /** Size of audio_data array. */
+ size_t audio_data_frames;
+ /** Current index for audio_data. */
+ size_t audio_data_index;
+ /** How many frames are needed for a gating block. Will correspond to 400ms
+ * of audio at initialization, and 100ms after the first block (75% overlap
+ * as specified in the 2011 revision of BS1770). */
+ unsigned long needed_frames;
+ /** The channel map. Has as many elements as there are channels. */
+ int* channel_map;
+ /** How many samples fit in 100ms (rounded). */
+ unsigned long samples_in_100ms;
+ /** BS.1770 filter coefficients (nominator). */
+ double b[5];
+ /** BS.1770 filter coefficients (denominator). */
+ double a[5];
+ /** BS.1770 filter state. */
+ double v[5][5];
+ /** Linked list of block energies. */
+ struct ebur128_double_queue block_list;
+ unsigned long block_list_max;
+ unsigned long block_list_size;
+ /** Linked list of 3s-block energies, used to calculate LRA. */
+ struct ebur128_double_queue short_term_block_list;
+ unsigned long st_block_list_max;
+ unsigned long st_block_list_size;
+ int use_histogram;
+ unsigned long *block_energy_histogram;
+ unsigned long *short_term_block_energy_histogram;
+ /** Keeps track of when a new short term block is needed. */
+ size_t short_term_frame_counter;
+ /** Maximum sample peak, one per channel */
+ double* sample_peak;
+ double* prev_sample_peak;
+ /** Maximum true peak, one per channel */
+ double* true_peak;
+ double* prev_true_peak;
+ interpolator* interp;
+ float* resampler_buffer_input;
+ size_t resampler_buffer_input_frames;
+ float* resampler_buffer_output;
+ size_t resampler_buffer_output_frames;
+ /** The maximum window duration in ms. */
+ unsigned long window;
+ unsigned long history;
+};
+
+static double relative_gate = -10.0;
+
+/* Those will be calculated when initializing the library */
+static double relative_gate_factor;
+static double minus_twenty_decibels;
+static double histogram_energies[1000];
+static double histogram_energy_boundaries[1001];
+
+static interpolator* interp_create(unsigned int taps, unsigned int factor, unsigned int channels) {
+ interpolator* interp = calloc(1, sizeof(interpolator));
+ unsigned int j = 0;
+
+ interp->taps = taps;
+ interp->factor = factor;
+ interp->channels = channels;
+ interp->delay = (interp->taps + interp->factor - 1) / interp->factor;
+
+ /* Initialize the filter memory
+ * One subfilter per interpolation factor. */
+ interp->filter = calloc(interp->factor, sizeof(*interp->filter));
+ for (j = 0; j < interp->factor; j++) {
+ interp->filter[j].index = calloc(interp->delay, sizeof(unsigned int));
+ interp->filter[j].coeff = calloc(interp->delay, sizeof(double));
+ }
+ /* One delay buffer per channel. */
+ interp->z = calloc(interp->channels, sizeof(float*));
+ for (j = 0; j < interp->channels; j++) {
+ interp->z[j] = calloc( interp->delay, sizeof(float) );
+ }
+
+ /* Calculate the filter coefficients */
+ for (j = 0; j < interp->taps; j++) {
+ /* Calculate sinc */
+ double m = (double)j - (double)(interp->taps - 1) / 2.0;
+ double c = 1.0;
+ if (fabs(m) > ALMOST_ZERO) {
+ c = sin(m * M_PI / interp->factor) / (m * M_PI / interp->factor);
+ }
+ /* Apply Hanning window */
+ c *= 0.5 * (1 - cos(2 * M_PI * j / (interp->taps - 1)));
+
+ if (fabs(c) > ALMOST_ZERO) { /* Ignore any zero coeffs. */
+ /* Put the coefficient into the correct subfilter */
+ unsigned int f = j % interp->factor;
+ unsigned int t = interp->filter[f].count++;
+ interp->filter[f].coeff[t] = c;
+ interp->filter[f].index[t] = j / interp->factor;
+ }
+ }
+ return interp;
+}
+
+static void interp_destroy(interpolator* interp) {
+ unsigned int j = 0;
+ if (!interp) {
+ return;
+ }
+ for (j = 0; j < interp->factor; j++) {
+ free(interp->filter[j].index);
+ free(interp->filter[j].coeff);
+ }
+ free(interp->filter);
+ for (j = 0; j < interp->channels; j++) {
+ free(interp->z[j]);
+ }
+ free(interp->z);
+ free(interp);
+}
+
+static size_t interp_process(interpolator* interp, size_t frames, float* in, float* out) {
+ size_t frame = 0;
+ unsigned int chan = 0;
+ unsigned int f = 0;
+ unsigned int t = 0;
+ unsigned int out_stride = interp->channels * interp->factor;
+ float* outp = 0;
+ double acc = 0;
+ double c = 0;
+
+ for (frame = 0; frame < frames; frame++) {
+ for (chan = 0; chan < interp->channels; chan++) {
+ /* Add sample to delay buffer */
+ interp->z[chan][interp->zi] = *in++;
+ /* Apply coefficients */
+ outp = out + chan;
+ for (f = 0; f < interp->factor; f++) {
+ acc = 0.0;
+ for (t = 0; t < interp->filter[f].count; t++) {
+ int i = (int)interp->zi - (int)interp->filter[f].index[t];
+ if (i < 0) {
+ i += interp->delay;
+ }
+ c = interp->filter[f].coeff[t];
+ acc += interp->z[chan][i] * c;
+ }
+ *outp = (float)acc;
+ outp += interp->channels;
+ }
+ }
+ out += out_stride;
+ interp->zi++;
+ if (interp->zi == interp->delay) {
+ interp->zi = 0;
+ }
+ }
+
+ return frames * interp->factor;
+}
+
+static void ebur128_init_filter(ebur128_state* st) {
+ int i, j;
+
+ double f0 = 1681.974450955533;
+ double G = 3.999843853973347;
+ double Q = 0.7071752369554196;
+
+ double K = tan(M_PI * f0 / (double) st->samplerate);
+ double Vh = pow(10.0, G / 20.0);
+ double Vb = pow(Vh, 0.4996667741545416);
+
+ double pb[3] = {0.0, 0.0, 0.0};
+ double pa[3] = {1.0, 0.0, 0.0};
+ double rb[3] = {1.0, -2.0, 1.0};
+ double ra[3] = {1.0, 0.0, 0.0};
+
+ double a0 = 1.0 + K / Q + K * K ;
+ pb[0] = (Vh + Vb * K / Q + K * K) / a0;
+ pb[1] = 2.0 * (K * K - Vh) / a0;
+ pb[2] = (Vh - Vb * K / Q + K * K) / a0;
+ pa[1] = 2.0 * (K * K - 1.0) / a0;
+ pa[2] = (1.0 - K / Q + K * K) / a0;
+
+ /* fprintf(stderr, "%.14f %.14f %.14f %.14f %.14f\n",
+ b1[0], b1[1], b1[2], a1[1], a1[2]); */
+
+ f0 = 38.13547087602444;
+ Q = 0.5003270373238773;
+ K = tan(M_PI * f0 / (double) st->samplerate);
+
+ ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K);
+ ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K);
+
+ /* fprintf(stderr, "%.14f %.14f\n", a2[1], a2[2]); */
+
+ st->d->b[0] = pb[0] * rb[0];
+ st->d->b[1] = pb[0] * rb[1] + pb[1] * rb[0];
+ st->d->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0];
+ st->d->b[3] = pb[1] * rb[2] + pb[2] * rb[1];
+ st->d->b[4] = pb[2] * rb[2];
+
+ st->d->a[0] = pa[0] * ra[0];
+ st->d->a[1] = pa[0] * ra[1] + pa[1] * ra[0];
+ st->d->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0];
+ st->d->a[3] = pa[1] * ra[2] + pa[2] * ra[1];
+ st->d->a[4] = pa[2] * ra[2];
+
+ for (i = 0; i < 5; ++i) {
+ for (j = 0; j < 5; ++j) {
+ st->d->v[i][j] = 0.0;
+ }
+ }
+}
+
+static int ebur128_init_channel_map(ebur128_state* st) {
+ size_t i;
+ st->d->channel_map = (int*) malloc(st->channels * sizeof(int));
+ if (!st->d->channel_map) {
+ return EBUR128_ERROR_NOMEM;
+ }
+ if (st->channels == 4) {
+ st->d->channel_map[0] = EBUR128_LEFT;
+ st->d->channel_map[1] = EBUR128_RIGHT;
+ st->d->channel_map[2] = EBUR128_LEFT_SURROUND;
+ st->d->channel_map[3] = EBUR128_RIGHT_SURROUND;
+ } else if (st->channels == 5) {
+ st->d->channel_map[0] = EBUR128_LEFT;
+ st->d->channel_map[1] = EBUR128_RIGHT;
+ st->d->channel_map[2] = EBUR128_CENTER;
+ st->d->channel_map[3] = EBUR128_LEFT_SURROUND;
+ st->d->channel_map[4] = EBUR128_RIGHT_SURROUND;
+ } else {
+ for (i = 0; i < st->channels; ++i) {
+ switch (i) {
+ case 0: st->d->channel_map[i] = EBUR128_LEFT; break;
+ case 1: st->d->channel_map[i] = EBUR128_RIGHT; break;
+ case 2: st->d->channel_map[i] = EBUR128_CENTER; break;
+ case 3: st->d->channel_map[i] = EBUR128_UNUSED; break;
+ case 4: st->d->channel_map[i] = EBUR128_LEFT_SURROUND; break;
+ case 5: st->d->channel_map[i] = EBUR128_RIGHT_SURROUND; break;
+ default: st->d->channel_map[i] = EBUR128_UNUSED; break;
+ }
+ }
+ }
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_init_resampler(ebur128_state* st) {
+ int errcode = EBUR128_SUCCESS;
+
+ if (st->samplerate < 96000) {
+ st->d->interp = interp_create(49, 4, st->channels);
+ CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit)
+ } else if (st->samplerate < 192000) {
+ st->d->interp = interp_create(49, 2, st->channels);
+ CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit)
+ } else {
+ st->d->resampler_buffer_input = NULL;
+ st->d->resampler_buffer_output = NULL;
+ st->d->interp = NULL;
+ goto exit;
+ }
+
+ st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4;
+ st->d->resampler_buffer_input = malloc(st->d->resampler_buffer_input_frames *
+ st->channels *
+ sizeof(float));
+ CHECK_ERROR(!st->d->resampler_buffer_input, EBUR128_ERROR_NOMEM, free_interp)
+
+ st->d->resampler_buffer_output_frames =
+ st->d->resampler_buffer_input_frames *
+ st->d->interp->factor;
+ st->d->resampler_buffer_output = malloc
+ (st->d->resampler_buffer_output_frames *
+ st->channels *
+ sizeof(float));
+ CHECK_ERROR(!st->d->resampler_buffer_output, EBUR128_ERROR_NOMEM, free_input)
+
+ return errcode;
+
+free_interp:
+ interp_destroy(st->d->interp);
+ st->d->interp = NULL;
+free_input:
+ free(st->d->resampler_buffer_input);
+ st->d->resampler_buffer_input = NULL;
+exit:
+ return errcode;
+}
+
+static void ebur128_destroy_resampler(ebur128_state* st) {
+ free(st->d->resampler_buffer_input);
+ st->d->resampler_buffer_input = NULL;
+ free(st->d->resampler_buffer_output);
+ st->d->resampler_buffer_output = NULL;
+ interp_destroy(st->d->interp);
+ st->d->interp = NULL;
+}
+
+void ebur128_get_version(int* major, int* minor, int* patch) {
+ *major = EBUR128_VERSION_MAJOR;
+ *minor = EBUR128_VERSION_MINOR;
+ *patch = EBUR128_VERSION_PATCH;
+}
+
+ebur128_state* ebur128_init(unsigned int channels,
+ unsigned long samplerate,
+ int mode) {
+ int result;
+ int errcode;
+ ebur128_state* st;
+ unsigned int i;
+ size_t j;
+
+ if (channels == 0 || samplerate < 5) {
+ return NULL;
+ }
+
+ st = (ebur128_state*) malloc(sizeof(ebur128_state));
+ CHECK_ERROR(!st, 0, exit)
+ st->d = (struct ebur128_state_internal*)
+ malloc(sizeof(struct ebur128_state_internal));
+ CHECK_ERROR(!st->d, 0, free_state)
+ st->channels = channels;
+ errcode = ebur128_init_channel_map(st);
+ CHECK_ERROR(errcode, 0, free_internal)
+
+ st->d->sample_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map)
+ st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->prev_sample_peak, 0, free_sample_peak)
+ st->d->true_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->true_peak, 0, free_prev_sample_peak)
+ st->d->prev_true_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->prev_true_peak, 0, free_true_peak)
+ for (i = 0; i < channels; ++i) {
+ st->d->sample_peak[i] = 0.0;
+ st->d->prev_sample_peak[i] = 0.0;
+ st->d->true_peak[i] = 0.0;
+ st->d->prev_true_peak[i] = 0.0;
+ }
+
+ st->d->use_histogram = mode & EBUR128_MODE_HISTOGRAM ? 1 : 0;
+ st->d->history = ULONG_MAX;
+ st->samplerate = samplerate;
+ st->d->samples_in_100ms = (st->samplerate + 5) / 10;
+ st->mode = mode;
+ if ((mode & EBUR128_MODE_S) == EBUR128_MODE_S) {
+ st->d->window = 3000;
+ } else if ((mode & EBUR128_MODE_M) == EBUR128_MODE_M) {
+ st->d->window = 400;
+ } else {
+ goto free_prev_true_peak;
+ }
+ st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
+ if (st->d->audio_data_frames % st->d->samples_in_100ms) {
+ /* round up to multiple of samples_in_100ms */
+ st->d->audio_data_frames = st->d->audio_data_frames
+ + st->d->samples_in_100ms
+ - (st->d->audio_data_frames % st->d->samples_in_100ms);
+ }
+ st->d->audio_data = (double*) malloc(st->d->audio_data_frames *
+ st->channels *
+ sizeof(double));
+ CHECK_ERROR(!st->d->audio_data, 0, free_true_peak)
+ for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) {
+ st->d->audio_data[j] = 0.0;
+ }
+
+ ebur128_init_filter(st);
+
+ if (st->d->use_histogram) {
+ st->d->block_energy_histogram = malloc(1000 * sizeof(unsigned long));
+ CHECK_ERROR(!st->d->block_energy_histogram, 0, free_audio_data)
+ for (i = 0; i < 1000; ++i) {
+ st->d->block_energy_histogram[i] = 0;
+ }
+ } else {
+ st->d->block_energy_histogram = NULL;
+ }
+ if (st->d->use_histogram) {
+ st->d->short_term_block_energy_histogram = malloc(1000 * sizeof(unsigned long));
+ CHECK_ERROR(!st->d->short_term_block_energy_histogram, 0, free_block_energy_histogram)
+ for (i = 0; i < 1000; ++i) {
+ st->d->short_term_block_energy_histogram[i] = 0;
+ }
+ } else {
+ st->d->short_term_block_energy_histogram = NULL;
+ }
+ STAILQ_INIT(&st->d->block_list);
+ st->d->block_list_size = 0;
+ st->d->block_list_max = st->d->history / 100;
+ STAILQ_INIT(&st->d->short_term_block_list);
+ st->d->st_block_list_size = 0;
+ st->d->st_block_list_max = st->d->history / 3000;
+ st->d->short_term_frame_counter = 0;
+
+ result = ebur128_init_resampler(st);
+ CHECK_ERROR(result, 0, free_short_term_block_energy_histogram)
+
+ /* the first block needs 400ms of audio data */
+ st->d->needed_frames = st->d->samples_in_100ms * 4;
+ /* start at the beginning of the buffer */
+ st->d->audio_data_index = 0;
+
+ /* initialize static constants */
+ relative_gate_factor = pow(10.0, relative_gate / 10.0);
+ minus_twenty_decibels = pow(10.0, -20.0 / 10.0);
+ histogram_energy_boundaries[0] = pow(10.0, (-70.0 + 0.691) / 10.0);
+ if (st->d->use_histogram) {
+ for (i = 0; i < 1000; ++i) {
+ histogram_energies[i] = pow(10.0, ((double) i / 10.0 - 69.95 + 0.691) / 10.0);
+ }
+ for (i = 1; i < 1001; ++i) {
+ histogram_energy_boundaries[i] = pow(10.0, ((double) i / 10.0 - 70.0 + 0.691) / 10.0);
+ }
+ }
+
+ return st;
+
+free_short_term_block_energy_histogram:
+ free(st->d->short_term_block_energy_histogram);
+free_block_energy_histogram:
+ free(st->d->block_energy_histogram);
+free_audio_data:
+ free(st->d->audio_data);
+free_prev_true_peak:
+ free(st->d->prev_true_peak);
+free_true_peak:
+ free(st->d->true_peak);
+free_prev_sample_peak:
+ free(st->d->prev_sample_peak);
+free_sample_peak:
+ free(st->d->sample_peak);
+free_channel_map:
+ free(st->d->channel_map);
+free_internal:
+ free(st->d);
+free_state:
+ free(st);
+exit:
+ return NULL;
+}
+
+void ebur128_destroy(ebur128_state** st) {
+ struct ebur128_dq_entry* entry;
+ free((*st)->d->block_energy_histogram);
+ free((*st)->d->short_term_block_energy_histogram);
+ free((*st)->d->audio_data);
+ free((*st)->d->channel_map);
+ free((*st)->d->sample_peak);
+ free((*st)->d->prev_sample_peak);
+ free((*st)->d->true_peak);
+ free((*st)->d->prev_true_peak);
+ while (!STAILQ_EMPTY(&(*st)->d->block_list)) {
+ entry = STAILQ_FIRST(&(*st)->d->block_list);
+ STAILQ_REMOVE_HEAD(&(*st)->d->block_list, entries);
+ free(entry);
+ }
+ while (!STAILQ_EMPTY(&(*st)->d->short_term_block_list)) {
+ entry = STAILQ_FIRST(&(*st)->d->short_term_block_list);
+ STAILQ_REMOVE_HEAD(&(*st)->d->short_term_block_list, entries);
+ free(entry);
+ }
+ ebur128_destroy_resampler(*st);
+ free((*st)->d);
+ free(*st);
+ *st = NULL;
+}
+
+static void ebur128_check_true_peak(ebur128_state* st, size_t frames) {
+ size_t c, i, frames_out;
+
+ frames_out = interp_process(st->d->interp, frames,
+ st->d->resampler_buffer_input,
+ st->d->resampler_buffer_output);
+
+ for (i = 0; i < frames_out; ++i) {
+ for (c = 0; c < st->channels; ++c) {
+ float val = st->d->resampler_buffer_output[i * st->channels + c];
+
+ if (val > st->d->prev_true_peak[c]) {
+ st->d->prev_true_peak[c] = val;
+ } else if (-val > st->d->prev_true_peak[c]) {
+ st->d->prev_true_peak[c] = -val;
+ }
+ }
+ }
+}
+
+#ifdef __SSE2_MATH__
+#include <xmmintrin.h>
+#define TURN_ON_FTZ \
+ unsigned int mxcsr = _mm_getcsr(); \
+ _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON);
+#define TURN_OFF_FTZ _mm_setcsr(mxcsr);
+#define FLUSH_MANUALLY
+#else
+#warning "manual FTZ is being used, please enable SSE2 (-msse2 -mfpmath=sse)"
+#define TURN_ON_FTZ
+#define TURN_OFF_FTZ
+#define FLUSH_MANUALLY \
+ st->d->v[ci][4] = fabs(st->d->v[ci][4]) < DBL_MIN ? 0.0 : st->d->v[ci][4]; \
+ st->d->v[ci][3] = fabs(st->d->v[ci][3]) < DBL_MIN ? 0.0 : st->d->v[ci][3]; \
+ st->d->v[ci][2] = fabs(st->d->v[ci][2]) < DBL_MIN ? 0.0 : st->d->v[ci][2]; \
+ st->d->v[ci][1] = fabs(st->d->v[ci][1]) < DBL_MIN ? 0.0 : st->d->v[ci][1];
+#endif
+
+#define EBUR128_FILTER(type, min_scale, max_scale) \
+static void ebur128_filter_##type(ebur128_state* st, const type* src, \
+ size_t frames) { \
+ static double scaling_factor = \
+ -((double) (min_scale)) > (double) (max_scale) ? \
+ -((double) (min_scale)) : (double) (max_scale); \
+ double* audio_data = st->d->audio_data + st->d->audio_data_index; \
+ size_t i, c; \
+ \
+ TURN_ON_FTZ \
+ \
+ if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK) { \
+ for (c = 0; c < st->channels; ++c) { \
+ double max = 0.0; \
+ for (i = 0; i < frames; ++i) { \
+ if (src[i * st->channels + c] > max) { \
+ max = src[i * st->channels + c]; \
+ } else if (-src[i * st->channels + c] > max) { \
+ max = -1.0 * src[i * st->channels + c]; \
+ } \
+ } \
+ max /= scaling_factor; \
+ if (max > st->d->prev_sample_peak[c]) st->d->prev_sample_peak[c] = max; \
+ } \
+ } \
+ if ((st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK && \
+ st->d->interp) { \
+ for (c = 0; c < st->channels; ++c) { \
+ for (i = 0; i < frames; ++i) { \
+ st->d->resampler_buffer_input[i * st->channels + c] = \
+ (float) (src[i * st->channels + c] / scaling_factor); \
+ } \
+ } \
+ ebur128_check_true_peak(st, frames); \
+ } \
+ for (c = 0; c < st->channels; ++c) { \
+ int ci = st->d->channel_map[c] - 1; \
+ if (ci < 0) continue; \
+ else if (ci == EBUR128_DUAL_MONO - 1) ci = 0; /*dual mono */ \
+ for (i = 0; i < frames; ++i) { \
+ st->d->v[ci][0] = (double) (src[i * st->channels + c] / scaling_factor) \
+ - st->d->a[1] * st->d->v[ci][1] \
+ - st->d->a[2] * st->d->v[ci][2] \
+ - st->d->a[3] * st->d->v[ci][3] \
+ - st->d->a[4] * st->d->v[ci][4]; \
+ audio_data[i * st->channels + c] = \
+ st->d->b[0] * st->d->v[ci][0] \
+ + st->d->b[1] * st->d->v[ci][1] \
+ + st->d->b[2] * st->d->v[ci][2] \
+ + st->d->b[3] * st->d->v[ci][3] \
+ + st->d->b[4] * st->d->v[ci][4]; \
+ st->d->v[ci][4] = st->d->v[ci][3]; \
+ st->d->v[ci][3] = st->d->v[ci][2]; \
+ st->d->v[ci][2] = st->d->v[ci][1]; \
+ st->d->v[ci][1] = st->d->v[ci][0]; \
+ } \
+ FLUSH_MANUALLY \
+ } \
+ TURN_OFF_FTZ \
+}
+EBUR128_FILTER(short, SHRT_MIN, SHRT_MAX)
+EBUR128_FILTER(int, INT_MIN, INT_MAX)
+EBUR128_FILTER(float, -1.0f, 1.0f)
+EBUR128_FILTER(double, -1.0, 1.0)
+
+static double ebur128_energy_to_loudness(double energy) {
+ return 10 * (log(energy) / log(10.0)) - 0.691;
+}
+
+static size_t find_histogram_index(double energy) {
+ size_t index_min = 0;
+ size_t index_max = 1000;
+ size_t index_mid;
+
+ do {
+ index_mid = (index_min + index_max) / 2;
+ if (energy >= histogram_energy_boundaries[index_mid]) {
+ index_min = index_mid;
+ } else {
+ index_max = index_mid;
+ }
+ } while (index_max - index_min != 1);
+
+ return index_min;
+}
+
+static int ebur128_calc_gating_block(ebur128_state* st, size_t frames_per_block,
+ double* optional_output) {
+ size_t i, c;
+ double sum = 0.0;
+ double channel_sum;
+ for (c = 0; c < st->channels; ++c) {
+ if (st->d->channel_map[c] == EBUR128_UNUSED) {
+ continue;
+ }
+ channel_sum = 0.0;
+ if (st->d->audio_data_index < frames_per_block * st->channels) {
+ for (i = 0; i < st->d->audio_data_index / st->channels; ++i) {
+ channel_sum += st->d->audio_data[i * st->channels + c] *
+ st->d->audio_data[i * st->channels + c];
+ }
+ for (i = st->d->audio_data_frames -
+ (frames_per_block -
+ st->d->audio_data_index / st->channels);
+ i < st->d->audio_data_frames; ++i) {
+ channel_sum += st->d->audio_data[i * st->channels + c] *
+ st->d->audio_data[i * st->channels + c];
+ }
+ } else {
+ for (i = st->d->audio_data_index / st->channels - frames_per_block;
+ i < st->d->audio_data_index / st->channels;
+ ++i) {
+ channel_sum += st->d->audio_data[i * st->channels + c] *
+ st->d->audio_data[i * st->channels + c];
+ }
+ }
+ if (st->d->channel_map[c] == EBUR128_Mp110 ||
+ st->d->channel_map[c] == EBUR128_Mm110 ||
+ st->d->channel_map[c] == EBUR128_Mp060 ||
+ st->d->channel_map[c] == EBUR128_Mm060 ||
+ st->d->channel_map[c] == EBUR128_Mp090 ||
+ st->d->channel_map[c] == EBUR128_Mm090) {
+ channel_sum *= 1.41;
+ } else if (st->d->channel_map[c] == EBUR128_DUAL_MONO) {
+ channel_sum *= 2.0;
+ }
+ sum += channel_sum;
+ }
+ sum /= (double) frames_per_block;
+ if (optional_output) {
+ *optional_output = sum;
+ return EBUR128_SUCCESS;
+ } else if (sum >= histogram_energy_boundaries[0]) {
+ if (st->d->use_histogram) {
+ ++st->d->block_energy_histogram[find_histogram_index(sum)];
+ } else {
+ struct ebur128_dq_entry* block;
+ if (st->d->block_list_size == st->d->block_list_max) {
+ block = STAILQ_FIRST(&st->d->block_list);
+ STAILQ_REMOVE_HEAD(&st->d->block_list, entries);
+ } else {
+ block = (struct ebur128_dq_entry*) malloc(sizeof(struct ebur128_dq_entry));
+ if (!block) {
+ return EBUR128_ERROR_NOMEM;
+ }
+ st->d->block_list_size++;
+ }
+ block->z = sum;
+ STAILQ_INSERT_TAIL(&st->d->block_list, block, entries);
+ }
+ return EBUR128_SUCCESS;
+ } else {
+ return EBUR128_SUCCESS;
+ }
+}
+
+int ebur128_set_channel(ebur128_state* st,
+ unsigned int channel_number,
+ int value) {
+ if (channel_number >= st->channels) {
+ return 1;
+ }
+ if (value == EBUR128_DUAL_MONO &&
+ (st->channels != 1 || channel_number != 0)) {
+ fprintf(stderr, "EBUR128_DUAL_MONO only works with mono files!\n");
+ return 1;
+ }
+ st->d->channel_map[channel_number] = value;
+ return 0;
+}
+
+int ebur128_change_parameters(ebur128_state* st,
+ unsigned int channels,
+ unsigned long samplerate) {
+ int errcode = EBUR128_SUCCESS;
+ size_t j;
+
+ if (channels == 0 || samplerate < 5) {
+ return EBUR128_ERROR_NOMEM;
+ }
+
+ if (channels == st->channels &&
+ samplerate == st->samplerate) {
+ return EBUR128_ERROR_NO_CHANGE;
+ }
+
+ free(st->d->audio_data);
+ st->d->audio_data = NULL;
+
+ if (channels != st->channels) {
+ unsigned int i;
+
+ free(st->d->channel_map); st->d->channel_map = NULL;
+ free(st->d->sample_peak); st->d->sample_peak = NULL;
+ free(st->d->prev_sample_peak); st->d->prev_sample_peak = NULL;
+ free(st->d->true_peak); st->d->true_peak = NULL;
+ free(st->d->prev_true_peak); st->d->prev_true_peak = NULL;
+ st->channels = channels;
+
+ errcode = ebur128_init_channel_map(st);
+ CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit)
+
+ st->d->sample_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->sample_peak, EBUR128_ERROR_NOMEM, exit)
+ st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->prev_sample_peak, EBUR128_ERROR_NOMEM, exit)
+ st->d->true_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->true_peak, EBUR128_ERROR_NOMEM, exit)
+ st->d->prev_true_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->prev_true_peak, EBUR128_ERROR_NOMEM, exit)
+ for (i = 0; i < channels; ++i) {
+ st->d->sample_peak[i] = 0.0;
+ st->d->prev_sample_peak[i] = 0.0;
+ st->d->true_peak[i] = 0.0;
+ st->d->prev_true_peak[i] = 0.0;
+ }
+ }
+ if (samplerate != st->samplerate) {
+ st->samplerate = samplerate;
+ st->d->samples_in_100ms = (st->samplerate + 5) / 10;
+ ebur128_init_filter(st);
+ }
+ st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
+ if (st->d->audio_data_frames % st->d->samples_in_100ms) {
+ /* round up to multiple of samples_in_100ms */
+ st->d->audio_data_frames = st->d->audio_data_frames
+ + st->d->samples_in_100ms
+ - (st->d->audio_data_frames % st->d->samples_in_100ms);
+ }
+ st->d->audio_data = (double*) malloc(st->d->audio_data_frames *
+ st->channels *
+ sizeof(double));
+ CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit)
+ for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) {
+ st->d->audio_data[j] = 0.0;
+ }
+
+ ebur128_destroy_resampler(st);
+ errcode = ebur128_init_resampler(st);
+ CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit)
+
+ /* the first block needs 400ms of audio data */
+ st->d->needed_frames = st->d->samples_in_100ms * 4;
+ /* start at the beginning of the buffer */
+ st->d->audio_data_index = 0;
+ /* reset short term frame counter */
+ st->d->short_term_frame_counter = 0;
+
+exit:
+ return errcode;
+}
+
+int ebur128_set_max_window(ebur128_state* st, unsigned long window)
+{
+ int errcode = EBUR128_SUCCESS;
+ size_t j;
+
+ if ((st->mode & EBUR128_MODE_S) == EBUR128_MODE_S && window < 3000) {
+ window = 3000;
+ } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && window < 400) {
+ window = 400;
+ }
+ if (window == st->d->window) {
+ return EBUR128_ERROR_NO_CHANGE;
+ }
+
+ st->d->window = window;
+ free(st->d->audio_data);
+ st->d->audio_data = NULL;
+ st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
+ if (st->d->audio_data_frames % st->d->samples_in_100ms) {
+ /* round up to multiple of samples_in_100ms */
+ st->d->audio_data_frames = st->d->audio_data_frames
+ + st->d->samples_in_100ms
+ - (st->d->audio_data_frames % st->d->samples_in_100ms);
+ }
+ st->d->audio_data = (double*) malloc(st->d->audio_data_frames *
+ st->channels *
+ sizeof(double));
+ CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit)
+ for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) {
+ st->d->audio_data[j] = 0.0;
+ }
+
+ /* the first block needs 400ms of audio data */
+ st->d->needed_frames = st->d->samples_in_100ms * 4;
+ /* start at the beginning of the buffer */
+ st->d->audio_data_index = 0;
+ /* reset short term frame counter */
+ st->d->short_term_frame_counter = 0;
+
+exit:
+ return errcode;
+}
+
+int ebur128_set_max_history(ebur128_state* st, unsigned long history)
+{
+ if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA && history < 3000) {
+ history = 3000;
+ } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && history < 400) {
+ history = 400;
+ }
+ if (history == st->d->history) {
+ return EBUR128_ERROR_NO_CHANGE;
+ }
+ st->d->history = history;
+ st->d->block_list_max = st->d->history / 100;
+ st->d->st_block_list_max = st->d->history / 3000;
+ while (st->d->block_list_size > st->d->block_list_max) {
+ struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->block_list);
+ STAILQ_REMOVE_HEAD(&st->d->block_list, entries);
+ free(block);
+ st->d->block_list_size--;
+ }
+ while (st->d->st_block_list_size > st->d->st_block_list_max) {
+ struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->short_term_block_list);
+ STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries);
+ free(block);
+ st->d->st_block_list_size--;
+ }
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_energy_shortterm(ebur128_state* st, double* out);
+#define EBUR128_ADD_FRAMES(type) \
+int ebur128_add_frames_##type(ebur128_state* st, \
+ const type* src, size_t frames) { \
+ size_t src_index = 0; \
+ unsigned int c = 0; \
+ for (c = 0; c < st->channels; c++) { \
+ st->d->prev_sample_peak[c] = 0.0; \
+ st->d->prev_true_peak[c] = 0.0; \
+ } \
+ while (frames > 0) { \
+ if (frames >= st->d->needed_frames) { \
+ ebur128_filter_##type(st, src + src_index, st->d->needed_frames); \
+ src_index += st->d->needed_frames * st->channels; \
+ frames -= st->d->needed_frames; \
+ st->d->audio_data_index += st->d->needed_frames * st->channels; \
+ /* calculate the new gating block */ \
+ if ((st->mode & EBUR128_MODE_I) == EBUR128_MODE_I) { \
+ if (ebur128_calc_gating_block(st, st->d->samples_in_100ms * 4, NULL)) {\
+ return EBUR128_ERROR_NOMEM; \
+ } \
+ } \
+ if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \
+ st->d->short_term_frame_counter += st->d->needed_frames; \
+ if (st->d->short_term_frame_counter == st->d->samples_in_100ms * 30) { \
+ struct ebur128_dq_entry* block; \
+ double st_energy; \
+ if (ebur128_energy_shortterm(st, &st_energy) == EBUR128_SUCCESS && \
+ st_energy >= histogram_energy_boundaries[0]) { \
+ if (st->d->use_histogram) { \
+ ++st->d->short_term_block_energy_histogram[ \
+ find_histogram_index(st_energy)];\
+ } else { \
+ if (st->d->st_block_list_size == st->d->st_block_list_max) { \
+ block = STAILQ_FIRST(&st->d->short_term_block_list); \
+ STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); \
+ } else { \
+ block = (struct ebur128_dq_entry*) \
+ malloc(sizeof(struct ebur128_dq_entry)); \
+ if (!block) return EBUR128_ERROR_NOMEM; \
+ st->d->st_block_list_size++; \
+ } \
+ block->z = st_energy; \
+ STAILQ_INSERT_TAIL(&st->d->short_term_block_list, \
+ block, entries); \
+ } \
+ } \
+ st->d->short_term_frame_counter = st->d->samples_in_100ms * 20; \
+ } \
+ } \
+ /* 100ms are needed for all blocks besides the first one */ \
+ st->d->needed_frames = st->d->samples_in_100ms; \
+ /* reset audio_data_index when buffer full */ \
+ if (st->d->audio_data_index == st->d->audio_data_frames * st->channels) {\
+ st->d->audio_data_index = 0; \
+ } \
+ } else { \
+ ebur128_filter_##type(st, src + src_index, frames); \
+ st->d->audio_data_index += frames * st->channels; \
+ if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \
+ st->d->short_term_frame_counter += frames; \
+ } \
+ st->d->needed_frames -= frames; \
+ frames = 0; \
+ } \
+ } \
+ for (c = 0; c < st->channels; c++) { \
+ if (st->d->prev_sample_peak[c] > st->d->sample_peak[c]) { \
+ st->d->sample_peak[c] = st->d->prev_sample_peak[c]; \
+ } \
+ if (st->d->prev_true_peak[c] > st->d->true_peak[c]) { \
+ st->d->true_peak[c] = st->d->prev_true_peak[c]; \
+ } \
+ } \
+ return EBUR128_SUCCESS; \
+}
+EBUR128_ADD_FRAMES(short)
+EBUR128_ADD_FRAMES(int)
+EBUR128_ADD_FRAMES(float)
+EBUR128_ADD_FRAMES(double)
+
+static int ebur128_calc_relative_threshold(ebur128_state* st,
+ size_t* above_thresh_counter,
+ double* relative_threshold) {
+ struct ebur128_dq_entry* it;
+ size_t i;
+
+ if (st->d->use_histogram) {
+ for (i = 0; i < 1000; ++i) {
+ *relative_threshold += st->d->block_energy_histogram[i] *
+ histogram_energies[i];
+ *above_thresh_counter += st->d->block_energy_histogram[i];
+ }
+ } else {
+ STAILQ_FOREACH(it, &st->d->block_list, entries) {
+ ++*above_thresh_counter;
+ *relative_threshold += it->z;
+ }
+ }
+
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_gated_loudness(ebur128_state** sts, size_t size,
+ double* out) {
+ struct ebur128_dq_entry* it;
+ double gated_loudness = 0.0;
+ double relative_threshold = 0.0;
+ size_t above_thresh_counter = 0;
+ size_t i, j, start_index;
+
+ for (i = 0; i < size; i++) {
+ if (sts[i] && (sts[i]->mode & EBUR128_MODE_I) != EBUR128_MODE_I) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+ }
+
+ for (i = 0; i < size; i++) {
+ if (!sts[i]) {
+ continue;
+ }
+ ebur128_calc_relative_threshold(sts[i], &above_thresh_counter, &relative_threshold);
+ }
+ if (!above_thresh_counter) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+
+ relative_threshold /= (double)above_thresh_counter;
+ relative_threshold *= relative_gate_factor;
+
+ above_thresh_counter = 0;
+ if (relative_threshold < histogram_energy_boundaries[0]) {
+ start_index = 0;
+ } else {
+ start_index = find_histogram_index(relative_threshold);
+ if (relative_threshold > histogram_energies[start_index]) {
+ ++start_index;
+ }
+ }
+ for (i = 0; i < size; i++) {
+ if (!sts[i]) {
+ continue;
+ }
+ if (sts[i]->d->use_histogram) {
+ for (j = start_index; j < 1000; ++j) {
+ gated_loudness += sts[i]->d->block_energy_histogram[j] *
+ histogram_energies[j];
+ above_thresh_counter += sts[i]->d->block_energy_histogram[j];
+ }
+ } else {
+ STAILQ_FOREACH(it, &sts[i]->d->block_list, entries) {
+ if (it->z >= relative_threshold) {
+ ++above_thresh_counter;
+ gated_loudness += it->z;
+ }
+ }
+ }
+ }
+ if (!above_thresh_counter) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+ gated_loudness /= (double) above_thresh_counter;
+ *out = ebur128_energy_to_loudness(gated_loudness);
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_relative_threshold(ebur128_state* st, double* out) {
+ double relative_threshold = 0.0;
+ size_t above_thresh_counter = 0;
+
+ if ((st->mode & EBUR128_MODE_I) != EBUR128_MODE_I) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+
+ ebur128_calc_relative_threshold(st, &above_thresh_counter, &relative_threshold);
+
+ if (!above_thresh_counter) {
+ *out = -70.0;
+ return EBUR128_SUCCESS;
+ }
+
+ relative_threshold /= (double)above_thresh_counter;
+ relative_threshold *= relative_gate_factor;
+
+ *out = ebur128_energy_to_loudness(relative_threshold);
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_loudness_global(ebur128_state* st, double* out) {
+ return ebur128_gated_loudness(&st, 1, out);
+}
+
+int ebur128_loudness_global_multiple(ebur128_state** sts, size_t size,
+ double* out) {
+ return ebur128_gated_loudness(sts, size, out);
+}
+
+static int ebur128_energy_in_interval(ebur128_state* st,
+ size_t interval_frames,
+ double* out) {
+ if (interval_frames > st->d->audio_data_frames) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+ ebur128_calc_gating_block(st, interval_frames, out);
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_energy_shortterm(ebur128_state* st, double* out) {
+ return ebur128_energy_in_interval(st, st->d->samples_in_100ms * 30, out);
+}
+
+int ebur128_loudness_momentary(ebur128_state* st, double* out) {
+ double energy;
+ int error = ebur128_energy_in_interval(st, st->d->samples_in_100ms * 4,
+ &energy);
+ if (error) {
+ return error;
+ } else if (energy <= 0.0) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+ *out = ebur128_energy_to_loudness(energy);
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_loudness_shortterm(ebur128_state* st, double* out) {
+ double energy;
+ int error = ebur128_energy_shortterm(st, &energy);
+ if (error) {
+ return error;
+ } else if (energy <= 0.0) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+ *out = ebur128_energy_to_loudness(energy);
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_loudness_window(ebur128_state* st,
+ unsigned long window,
+ double* out) {
+ double energy;
+ size_t interval_frames = st->samplerate * window / 1000;
+ int error = ebur128_energy_in_interval(st, interval_frames, &energy);
+ if (error) {
+ return error;
+ } else if (energy <= 0.0) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+ *out = ebur128_energy_to_loudness(energy);
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_double_cmp(const void *p1, const void *p2) {
+ const double* d1 = (const double*) p1;
+ const double* d2 = (const double*) p2;
+ return (*d1 > *d2) - (*d1 < *d2);
+}
+
+/* EBU - TECH 3342 */
+int ebur128_loudness_range_multiple(ebur128_state** sts, size_t size,
+ double* out) {
+ size_t i, j;
+ struct ebur128_dq_entry* it;
+ double* stl_vector;
+ size_t stl_size;
+ double* stl_relgated;
+ size_t stl_relgated_size;
+ double stl_power, stl_integrated;
+ /* High and low percentile energy */
+ double h_en, l_en;
+ int use_histogram = 0;
+
+ for (i = 0; i < size; ++i) {
+ if (sts[i]) {
+ if ((sts[i]->mode & EBUR128_MODE_LRA) != EBUR128_MODE_LRA) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+ if (i == 0 && sts[i]->mode & EBUR128_MODE_HISTOGRAM) {
+ use_histogram = 1;
+ } else if (use_histogram != !!(sts[i]->mode & EBUR128_MODE_HISTOGRAM)) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+ }
+ }
+
+ if (use_histogram) {
+ unsigned long hist[1000] = { 0 };
+ size_t percentile_low, percentile_high;
+ size_t index;
+
+ stl_size = 0;
+ stl_power = 0.0;
+ for (i = 0; i < size; ++i) {
+ if (!sts[i]) {
+ continue;
+ }
+ for (j = 0; j < 1000; ++j) {
+ hist[j] += sts[i]->d->short_term_block_energy_histogram[j];
+ stl_size += sts[i]->d->short_term_block_energy_histogram[j];
+ stl_power += sts[i]->d->short_term_block_energy_histogram[j]
+ * histogram_energies[j];
+ }
+ }
+ if (!stl_size) {
+ *out = 0.0;
+ return EBUR128_SUCCESS;
+ }
+
+ stl_power /= stl_size;
+ stl_integrated = minus_twenty_decibels * stl_power;
+
+ if (stl_integrated < histogram_energy_boundaries[0]) {
+ index = 0;
+ } else {
+ index = find_histogram_index(stl_integrated);
+ if (stl_integrated > histogram_energies[index]) {
+ ++index;
+ }
+ }
+ stl_size = 0;
+ for (j = index; j < 1000; ++j) {
+ stl_size += hist[j];
+ }
+ if (!stl_size) {
+ *out = 0.0;
+ return EBUR128_SUCCESS;
+ }
+
+ percentile_low = (size_t) ((stl_size - 1) * 0.1 + 0.5);
+ percentile_high = (size_t) ((stl_size - 1) * 0.95 + 0.5);
+
+ stl_size = 0;
+ j = index;
+ while (stl_size <= percentile_low) {
+ stl_size += hist[j++];
+ }
+ l_en = histogram_energies[j - 1];
+ while (stl_size <= percentile_high) {
+ stl_size += hist[j++];
+ }
+ h_en = histogram_energies[j - 1];
+ *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en);
+ return EBUR128_SUCCESS;
+
+ } else {
+ stl_size = 0;
+ for (i = 0; i < size; ++i) {
+ if (!sts[i]) {
+ continue;
+ }
+ STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) {
+ ++stl_size;
+ }
+ }
+ if (!stl_size) {
+ *out = 0.0;
+ return EBUR128_SUCCESS;
+ }
+ stl_vector = (double*) malloc(stl_size * sizeof(double));
+ if (!stl_vector) {
+ return EBUR128_ERROR_NOMEM;
+ }
+
+ j = 0;
+ for (i = 0; i < size; ++i) {
+ if (!sts[i]) {
+ continue;
+ }
+ STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) {
+ stl_vector[j] = it->z;
+ ++j;
+ }
+ }
+ qsort(stl_vector, stl_size, sizeof(double), ebur128_double_cmp);
+ stl_power = 0.0;
+ for (i = 0; i < stl_size; ++i) {
+ stl_power += stl_vector[i];
+ }
+ stl_power /= (double) stl_size;
+ stl_integrated = minus_twenty_decibels * stl_power;
+
+ stl_relgated = stl_vector;
+ stl_relgated_size = stl_size;
+ while (stl_relgated_size > 0 && *stl_relgated < stl_integrated) {
+ ++stl_relgated;
+ --stl_relgated_size;
+ }
+
+ if (stl_relgated_size) {
+ h_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.95 + 0.5)];
+ l_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.1 + 0.5)];
+ free(stl_vector);
+ *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en);
+ return EBUR128_SUCCESS;
+ } else {
+ free(stl_vector);
+ *out = 0.0;
+ return EBUR128_SUCCESS;
+ }
+ }
+}
+
+int ebur128_loudness_range(ebur128_state* st, double* out) {
+ return ebur128_loudness_range_multiple(&st, 1, out);
+}
+
+int ebur128_sample_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out) {
+ if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) {
+ return EBUR128_ERROR_INVALID_MODE;
+ } else if (channel_number >= st->channels) {
+ return EBUR128_ERROR_INVALID_CHANNEL_INDEX;
+ }
+ *out = st->d->sample_peak[channel_number];
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_prev_sample_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out) {
+ if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) {
+ return EBUR128_ERROR_INVALID_MODE;
+ } else if (channel_number >= st->channels) {
+ return EBUR128_ERROR_INVALID_CHANNEL_INDEX;
+ }
+ *out = st->d->prev_sample_peak[channel_number];
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_true_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out) {
+ if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) {
+ return EBUR128_ERROR_INVALID_MODE;
+ } else if (channel_number >= st->channels) {
+ return EBUR128_ERROR_INVALID_CHANNEL_INDEX;
+ }
+ *out = st->d->true_peak[channel_number] > st->d->sample_peak[channel_number]
+ ? st->d->true_peak[channel_number]
+ : st->d->sample_peak[channel_number];
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_prev_true_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out) {
+ if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) {
+ return EBUR128_ERROR_INVALID_MODE;
+ } else if (channel_number >= st->channels) {
+ return EBUR128_ERROR_INVALID_CHANNEL_INDEX;
+ }
+ *out = st->d->prev_true_peak[channel_number]
+ > st->d->prev_sample_peak[channel_number]
+ ? st->d->prev_true_peak[channel_number]
+ : st->d->prev_sample_peak[channel_number];
+ return EBUR128_SUCCESS;
+}
\ No newline at end of file
diff --git a/tools/ref/waves/CMakeLists.txt b/tools/ref/waves/CMakeLists.txt
new file mode 100644
index 0000000..3045b00
--- /dev/null
+++ b/tools/ref/waves/CMakeLists.txt
@@ -0,0 +1,3 @@
+include_directories(inc)
+AUX_SOURCE_DIRECTORY(src DIR_WAVES_SRCS)
+add_library(waves ${DIR_WAVES_SRCS})
\ No newline at end of file
diff --git a/tools/ref/waves/inc/ExtraMono.h b/tools/ref/waves/inc/ExtraMono.h
new file mode 100644
index 0000000..280fab0
--- /dev/null
+++ b/tools/ref/waves/inc/ExtraMono.h
@@ -0,0 +1,230 @@
+
+#include <string>
+#include <string.h>
+
+#define SIZE_LONG 4
+#define SIZE_SHORT 2
+
+#define SIZE_FLAG 4
+#define FMT_TAG 0x0001
+
+#define BITS_PER_BYTE 8
+
+#ifndef AFS_CMPL_MAX_WAV
+#define AFS_CMPL_MAX_WAV 15360000 // 时长16分(960*16000)
+#endif
+
+//+---------------------------------------------------------------------------+
+//+ 从文件中读取一个32位数据
+//+---------------------------------------------------------------------------+
+unsigned long fa_read_u32(FILE* fp)
+{
+ unsigned long cx;
+ unsigned char temp[SIZE_LONG];
+
+ fread(temp, sizeof(unsigned char), SIZE_LONG, fp);
+ cx = (unsigned long)temp[0];
+ cx |= (unsigned long)temp[1] << 8;
+ cx |= (unsigned long)temp[2] << 16;
+ cx |= (unsigned long)temp[3] << 24;
+ return cx;
+}
+
+//+---------------------------------------------------------------------------+
+//+ 从文件中读取一个16位数据
+//+---------------------------------------------------------------------------+
+unsigned short fa_read_u16(FILE *fp)
+{
+ unsigned short cx;
+ unsigned char temp[SIZE_SHORT];
+
+ fread(temp, sizeof(unsigned char), SIZE_SHORT, fp);
+ cx = temp[0] | (temp[1] * 256);
+ return cx;
+}
+
+int GetWaveHeadLen(const char* pszFile,unsigned short &channels, int &nPos, int& nLength)
+{
+ //+---------------------------------------------------------------------------+
+ //+ 读取WAVE的头信息
+ //+---------------------------------------------------------------------------+
+ unsigned char temp[SIZE_FLAG];
+ unsigned short bits_per_sample;
+ unsigned long x_size;
+ unsigned long n_skip;
+
+ unsigned short format;
+ //unsigned short channels;
+ unsigned long sample_rate;
+ unsigned short block_align;
+ unsigned long data_size;
+ int nCnt = 0;
+
+ /* 读取通用信息 */
+ FILE* pWavFile = fopen(pszFile, "rb");
+ if ( pWavFile == NULL )
+ {
+ printf("Input file can not be opened!\n");
+ return -1;
+ }
+
+ fseek(pWavFile, 0, SEEK_END );
+ nLength = ftell(pWavFile);
+ fseek(pWavFile, 0, SEEK_SET );
+
+ // 判断资源标识为"RIFF"
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ if ( memcmp(temp, "RIFF", (size_t)SIZE_FLAG) != 0 )
+ {
+ fprintf(stderr, "Resource flag is not RIFF!\n");
+ fclose(pWavFile);
+
+ return -1;
+ }
+ nCnt += SIZE_FLAG;
+
+ fseek(pWavFile, SIZE_LONG, SEEK_CUR);
+ nCnt += SIZE_LONG;
+
+ // 判断文件标识为"WAVE"
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ if ( memcmp(temp, "WAVE", (size_t)SIZE_FLAG) != 0 )
+ {
+ fprintf(stderr, "File flag is not WAVE\n");
+ fclose(pWavFile);
+
+ return -1;
+ }
+ nCnt += SIZE_FLAG;
+
+ // 判断格式标识为"fmt "
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ if ( memcmp(temp, "fmt ", (size_t)SIZE_FLAG) != 0 )
+ {
+ fprintf(stderr, "Format flag is not FMT!\n");
+ fclose(pWavFile);
+
+ return -1;
+ }
+ nCnt += SIZE_FLAG;
+
+ x_size = fa_read_u32(pWavFile);
+ nCnt += SIZE_LONG;
+
+ // 判断编码格式为0x0001
+ format = fa_read_u16(pWavFile);
+ nCnt += SIZE_SHORT;
+ if ( format != FMT_TAG )
+ {
+ fprintf(stderr, "Encoding format is not 0x0001!\n");
+ fclose(pWavFile);
+
+ return -1;
+ }
+
+ // 读取声道数目和采样频率
+ channels = fa_read_u16(pWavFile);
+ sample_rate = fa_read_u32(pWavFile);
+
+ fseek(pWavFile, SIZE_LONG, SEEK_CUR);
+
+ // 读取对齐单位和样本位数
+ block_align = fa_read_u16(pWavFile);
+ bits_per_sample = fa_read_u16(pWavFile);
+
+ /* 读取特殊信息 */
+ x_size -= (4*SIZE_SHORT + 2*SIZE_LONG);
+ if ( x_size != 0 )
+ {
+ fseek(pWavFile, x_size, SEEK_CUR);
+ }
+
+ // 读取数据大小
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ while ( memcmp(temp, "data", SIZE_FLAG) != 0 )
+ {
+ n_skip = fa_read_u32(pWavFile);
+ fseek(pWavFile, n_skip, SEEK_CUR);
+
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ }
+
+ data_size = fa_read_u32(pWavFile);
+ fclose(pWavFile);
+
+ //+---------------------------------------------------------------------------+
+ //+ 返回WAVE的头长度
+ //+---------------------------------------------------------------------------+
+ nPos = nCnt;
+ int nHeadLength = nLength - data_size;
+ return nHeadLength;
+}
+
+bool ExtraMono(const std::string &sInput, const std::string &sOutput)
+{
+ FILE *pFile = fopen(sInput.c_str(), "rb");
+ if ( NULL == pFile )
+ {
+ printf("Fopen Error %s", sInput.c_str());
+ return false;
+ }
+
+ FILE *pFile2 = fopen(sOutput.c_str(), "wb");
+ if ( NULL == pFile2 )
+ {
+ printf("Fopen2 Error %s", sOutput.c_str());
+ return false;
+ }
+
+ short *pBuf = new short[AFS_CMPL_MAX_WAV];
+ int nLen = 0;
+
+ nLen = fread(pBuf, sizeof(short), AFS_CMPL_MAX_WAV, pFile);
+ if ( nLen <= 0 )
+ {
+ perror("Fread Error!");
+ return false;
+ }
+
+ unsigned short channels=0;
+ int nPos;
+ int nLength;
+ int nHeadByte = GetWaveHeadLen(sInput.c_str(),channels, nPos, nLength);
+ int nHeadShort = nHeadByte/2;
+
+ if (channels==1)
+ {
+ fwrite(pBuf + nHeadShort, sizeof(short), nLen - nHeadShort, pFile2);
+ }
+ else
+ {
+ short *pBuf2 = new short[AFS_CMPL_MAX_WAV];
+ memcpy( pBuf2, pBuf, nHeadShort*sizeof(short));
+ pBuf2[nPos] = 1;
+
+ unsigned char tmp[2];
+ memcpy(tmp, &pBuf2[nPos], 2);
+
+ pBuf2[nPos] = static_cast<short>(tmp[0] | tmp[1]*256);
+
+ short *pWav = pBuf + nHeadShort;
+ nLen -= nHeadShort;
+
+ int halfnlen=nLen/2;
+ for (int i=0;i<=halfnlen;i++ )
+ {
+ pBuf2[nHeadShort+i] = *(pWav+i*2);
+ }
+ fwrite(pBuf2, sizeof(short), nLen+nHeadShort, pFile2);
+
+ delete []pBuf;
+ delete []pBuf2;
+ pBuf = NULL;
+ pBuf2 = NULL;
+ }
+
+
+ fclose(pFile);
+ fclose(pFile2);
+ return true;
+}
diff --git a/tools/ref/waves/inc/WaveFile.h b/tools/ref/waves/inc/WaveFile.h
new file mode 100644
index 0000000..8b57806
--- /dev/null
+++ b/tools/ref/waves/inc/WaveFile.h
@@ -0,0 +1,74 @@
+#ifndef WAVE_FILE_H
+#define WAVE_FILE_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+
+typedef enum SAMPLE_FORMAT
+{
+ SF_U8 = 8,
+ SF_S16 = 16,
+ SF_S24 = 24,
+ SF_S32 = 32,
+ SF_IEEE_FLOAT = 0x100 + 32,
+ SF_IEEE_DOUBLE = 0x100 + 64,
+ SF_MAX,
+} SAMPLE_FORMAT;
+
+/* 主处理对象 **/
+class CWaveFile
+{
+public:
+ /* 构造传入文件及 是读还是写 **/
+ CWaveFile(const char* Filename, bool Write);
+ virtual ~CWaveFile();
+
+public:
+ int GetChannels();
+ int GetSampleRate();
+ double GetDuration(); // in second
+ uint32_t GetChannelMask();
+ void SetChannels(int Channels);
+ void SetSampleRate(int SampleRate);
+ void SetSampleFormat(SAMPLE_FORMAT Format);
+ void SetChannelMask(uint32_t Mask);
+ void Stat();
+ void SetupDone();
+ bool ReadFrameAsS16(short* FrameSamples, int Frames = 1);
+ bool ReadFrameAsDouble(double* FrameSamples, int Frames = 1);
+ bool ReadFrameAsfloat(float* FrameSamples, int Frames = 1);
+ void WriteRaw(void* Raw, int Size);
+ void WriteFrame(uint8_t* FrameSamples, int Frames = 1);
+ void WriteFrame(short* FrameSamples, int Frames = 1);
+ void WriteFrame(int32_t* FrameSamples, int Frames = 1);
+ void WriteFrameS24(int32_t* FrameSamples, int Frames = 1);
+ void WriteFrame(double* FrameSamples, int Frames = 1);
+ void WriteFrame(float* FrameSamples, int Frames=1);
+ void Seek(int FramePos, int Where = SEEK_SET);
+ bool GetStatus();
+ SAMPLE_FORMAT GetFormat();
+ int GetTotalFrames();
+ int GetFramesRead();
+
+
+protected:
+ FILE* File;
+ int Channels; /* 通道数 **/
+ int SampleRate; /* 采样率 **/
+ SAMPLE_FORMAT Format; /* 采样精度 **/
+ int SampleSize; // Measured in Bits
+ unsigned int FrameStartPos; /* 音频数据的起始位置 **/
+ unsigned long TotalFrames; /* 总帧数,如果16bit,则一个short为一帧 **/
+ unsigned long FramesRead;
+ double Duration; /* 时长 **/
+
+ bool ReadOnly; /* 是度还是写 **/
+
+ uint32_t ChannelMask;
+
+ bool m_bOK; /* 文件是否已经被打开 **/
+};
+
+
+#endif
\ No newline at end of file
diff --git a/tools/ref/waves/src/WaveFile.cpp b/tools/ref/waves/src/WaveFile.cpp
new file mode 100644
index 0000000..83b83d7
--- /dev/null
+++ b/tools/ref/waves/src/WaveFile.cpp
@@ -0,0 +1,824 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <errno.h>
+
+#if WIN32
+#else
+#include <inttypes.h>
+#endif
+
+#include "WaveFile.h"
+
+#define SPEAKER_FRONT_LEFT 0x1
+#define SPEAKER_FRONT_RIGHT 0x2
+#define SPEAKER_FRONT_CENTER 0x4
+#define SPEAKER_LOW_FREQUENCY 0x8
+#define SPEAKER_BACK_LEFT 0x10
+#define SPEAKER_BACK_RIGHT 0x20
+#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40
+#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80
+#define SPEAKER_BACK_CENTER 0x100
+#define SPEAKER_SIDE_LEFT 0x200
+#define SPEAKER_SIDE_RIGHT 0x400
+#define SPEAKER_TOP_CENTER 0x800
+#define SPEAKER_TOP_FRONT_LEFT 0x1000
+#define SPEAKER_TOP_FRONT_CENTER 0x2000
+#define SPEAKER_TOP_FRONT_RIGHT 0x4000
+#define SPEAKER_TOP_BACK_LEFT 0x8000
+#define SPEAKER_TOP_BACK_CENTER 0x10000
+#define SPEAKER_TOP_BACK_RIGHT 0x20000
+#define SPEAKER_RESERVED 0x80000000
+
+
+#define SPEAKER_REAR_CENTER_SURROUND SPEAKER_BACK_CENTER
+
+#define DCA_MONO 0
+#define DCA_CHANNEL 1
+#define DCA_STEREO 2
+#define DCA_STEREO_SUMDIFF 3
+#define DCA_STEREO_TOTAL 4
+#define DCA_3F 5
+#define DCA_2F1R 6
+#define DCA_3F1R 7
+#define DCA_2F2R 8
+#define DCA_3F2R 9
+#define DCA_4F2R 10
+
+#define DCA_DOLBY 101 /* FIXME */
+
+#define DCA_CHANNEL_MAX DCA_3F2R /* We don't handle anything above that */
+#define DCA_CHANNEL_BITS 6
+#define DCA_CHANNEL_MASK 0x3F
+
+#define DCA_LFE 0x80
+#define DCA_ADJUST_LEVEL 0x100
+
+#define WAVE_FORMAT_PCM 0x0001
+#define WAVE_FORMAT_IEEE_FLOAT 0x0003
+#define WAVE_FORMAT_EXTENSIBLE 0xFFFE
+
+static uint8_t wav_header[] = {
+ 'R', 'I', 'F', 'F', 0xfc, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E',
+ 'f', 'm', 't', ' ', 16, 0, 0, 0,
+ WAVE_FORMAT_PCM, WAVE_FORMAT_PCM >> 8,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0,
+ 'd', 'a', 't', 'a', 0xd8, 0xff, 0xff, 0xff
+};
+
+static uint8_t wavmulti_header[] = {
+ 'R', 'I', 'F', 'F', 0xf0, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E',
+ 'f', 'm', 't', ' ', 40, 0, 0, 0,
+ (uint8_t)(WAVE_FORMAT_EXTENSIBLE & 0xFF), WAVE_FORMAT_EXTENSIBLE >> 8,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 22, 0,
+ 0, 0, 0, 0, 0, 0,
+ WAVE_FORMAT_IEEE_FLOAT, WAVE_FORMAT_IEEE_FLOAT >> 8,
+ 0, 0, 0, 0, 0x10, 0x00, 0x80, 0, 0, 0xaa, 0, 0x38, 0x9b, 0x71,
+ 'd', 'a', 't', 'a', 0xb4, 0xff, 0xff, 0xff
+};
+
+static void store4 (uint8_t * buf, int value)
+{
+ buf[0] = value;
+ buf[1] = value >> 8;
+ buf[2] = value >> 16;
+ buf[3] = value >> 24;
+}
+
+static void store2 (uint8_t * buf, int value)
+{
+ buf[0] = value;
+ buf[1] = value >> 8;
+}
+
+
+static uint32_t find_chunk(FILE * file, const uint8_t chunk_id[4])
+{
+ uint8_t buffer[8];
+ while (1) {
+ size_t chunksize;
+ size_t s = fread(buffer, 1, 8, file);
+ if (s < 8)
+ return 0;
+ chunksize = (uint32_t)buffer[4] | ((uint32_t)buffer[5] << 8) |
+ ((uint32_t)buffer[6] << 16) | ((uint32_t)buffer[7] << 24);
+ if (!memcmp(buffer, chunk_id, 4))
+ return chunksize;
+ fseek(file, chunksize, SEEK_CUR);
+ }
+}
+
+
+CWaveFile::CWaveFile(const char* Filename, bool Write)
+ : Duration(0), ReadOnly(false), m_bOK(false)
+{
+ Channels = 0;
+
+ /* 打开文件 **/
+ File = fopen(Filename, Write ? "wb":"rb");
+ if ( !File )
+ return;
+
+ /* 设置写文件初始参数 **/
+ if ( Write )
+ {
+ SampleRate = 44100;
+ Channels = 2;
+ Format = SF_S16;
+ SampleSize = 16;
+ ChannelMask = 0;
+ m_bOK = true;
+ return;
+ }
+
+ ReadOnly = true;
+
+ size_t s;
+ uint8_t buffer[8];
+ uint8_t *fmt = NULL;
+ uint32_t v;
+ uint32_t avg_bps;
+ uint32_t block_align;
+ unsigned short FormatType;
+ unsigned short SampleType;
+
+ static const uint8_t riff[4] = { 'R', 'I', 'F', 'F' };
+ static const uint8_t wave[4] = { 'W', 'A', 'V', 'E' };
+ static const uint8_t fmt_[4] = { 'f', 'm', 't', ' ' };
+ static const uint8_t data[4] = { 'd', 'a', 't', 'a' };
+
+ /* 前四个字节为 riff **/
+ s = fread(buffer, 1, 8, File);
+ if (s < 8)
+ goto err2;
+
+ if (memcmp(buffer, riff, 4))
+ goto err2;
+
+ /* 8~12为wave **/
+ /* TODO: check size (in buffer[4..8]) */
+ s = fread(buffer, 1, 4, File);
+ if (s < 4)
+ goto err2;
+
+ if (memcmp(buffer, wave, 4))
+ goto err2;
+
+ s = find_chunk(File, fmt_);
+ if ( s != 16 && s != 18 && s != 40 )
+ goto err2;
+
+ fmt = (uint8_t*)malloc(s);
+ if (!fmt)
+ goto err2;
+
+ if (fread(fmt, 1, s, File) != s)
+ goto err3;
+
+ /* wFormatTag */
+ v = (uint32_t)fmt[0] | ((uint32_t)fmt[1] << 8);
+ if (v != WAVE_FORMAT_PCM && v != WAVE_FORMAT_IEEE_FLOAT && v != WAVE_FORMAT_EXTENSIBLE)
+ goto err3;
+
+ FormatType = v;
+
+ if (s == 40 && 0xfffe == v)
+ {
+ // fmt begins at 0x14 of the wave file
+ v = *(unsigned short*)&fmt[0x2C - 0x14];
+ }
+
+ SampleType = v;
+
+ /* wChannels */
+ v = (uint32_t)fmt[2] | ((uint32_t)fmt[3] << 8);
+
+ Channels = v;
+
+ if (v < 1 || v > 32)
+ goto err3;
+
+ /* dwSamplesPerSec */
+ SampleRate = (uint32_t)fmt[4] | ((uint32_t)fmt[5] << 8) |
+ ((uint32_t)fmt[6] << 16) | ((uint32_t)fmt[7] << 24);
+
+ /* dwAvgBytesPerSec */
+ avg_bps = (uint32_t)fmt[8] | ((uint32_t)fmt[9] << 8) |
+ ((uint32_t)fmt[10] << 16) | ((uint32_t)fmt[11] << 24);
+
+ /* wBlockAlign */
+ block_align = (uint32_t)fmt[12] | ((uint32_t)fmt[13] << 8);
+
+ /* wBitsPerSample */
+ SampleSize = (uint32_t)fmt[14] | ((uint32_t)fmt[15] << 8);
+ if (SampleSize != 8 && SampleSize != 16 && SampleSize != 32 && SampleSize != 24 && SampleSize != 64)
+ goto err3;
+
+ switch (SampleSize)
+ {
+ case 8:
+ Format = SF_U8;
+ break;
+ case 16:
+ Format = SF_S16;
+ break;
+ case 24:
+ Format = SF_S24;
+ break;
+ case 32:
+ {
+ if (SampleType == WAVE_FORMAT_IEEE_FLOAT)
+ Format = SF_IEEE_FLOAT;
+ else
+ Format = SF_S32;
+
+ }
+ break;
+ case 64:
+ if (SampleType != WAVE_FORMAT_IEEE_FLOAT)
+ goto err3;
+ Format = SF_IEEE_DOUBLE;
+ break;
+ }
+
+
+ // Handle 24-bit samples individually
+#if 0
+ if (SampleSize == 24 && Channels <= 2)
+ {
+ int ba24 = Channels * (SampleSize / 8); // Align to 4x
+
+ ba24 = (ba24 + 3) / 4 * 4;
+
+ if (block_align != ba24)
+ goto err3;
+ }
+ else
+#endif
+ {
+ if (block_align != Channels * (SampleSize / 8))
+ goto err3;
+ }
+
+ if (avg_bps != block_align * SampleRate)
+ goto err3;
+
+ v = find_chunk(File, data);
+
+ if (v == 0 || v % block_align != 0)
+ goto err3;
+
+ TotalFrames = v / block_align;
+
+ FramesRead = 0;
+
+ if (FormatType == WAVE_FORMAT_EXTENSIBLE)
+ {
+ ChannelMask = *(unsigned int*)(&fmt[0x14]);
+ }
+ else
+ {
+ ChannelMask = 0;
+ }
+
+ FrameStartPos = ftell(File);
+
+ free(fmt);
+ m_bOK = true;
+ return;
+
+err3:
+ free(fmt);
+err2:
+ fclose(File);
+
+ File = NULL;
+}
+
+bool CWaveFile::GetStatus()
+{
+ return m_bOK;
+}
+
+SAMPLE_FORMAT CWaveFile::GetFormat()
+{
+ return Format;
+}
+
+int CWaveFile::GetTotalFrames()
+{
+ return TotalFrames;
+}
+
+int CWaveFile::GetFramesRead()
+{
+ return FramesRead;
+}
+
+CWaveFile::~CWaveFile()
+{
+ if (File != NULL)
+ {
+ if (!ReadOnly)
+ {
+ unsigned int Size = ftell(File) - FrameStartPos;// 44;
+
+ fseek(File, FrameStartPos - 4, SEEK_SET);
+ fwrite(&Size, 4, 1, File);
+
+ Size += FrameStartPos - 8;
+
+ fseek(File, 4, SEEK_SET);
+ fwrite(&Size, 4, 1, File);
+ }
+
+ fclose(File);
+ }
+}
+
+int CWaveFile::GetSampleRate()
+{
+ return SampleRate;
+}
+
+void CWaveFile::SetSampleRate(int SampleRate)
+{
+ this->SampleRate = SampleRate;
+}
+
+void CWaveFile::SetupDone()
+{
+ unsigned char Header[68];
+
+ fseek(File, 0, SEEK_SET);
+
+ SampleSize = Format & 0xFF;
+
+ if (ChannelMask)
+ {
+ memcpy(Header, wavmulti_header, sizeof(wavmulti_header));
+
+ if (Format < SF_IEEE_FLOAT)
+ {
+ // store2(Header + 20, WAVE_FORMAT_PCM);
+ store2(Header + 44, WAVE_FORMAT_PCM);
+ }
+
+ store2(Header + 22, Channels);
+ store4(Header + 24, SampleRate);
+ store4(Header + 28, SampleSize / 8 * SampleRate * Channels);
+ store2(Header + 32, SampleSize / 8 * Channels);
+ store2(Header + 34, SampleSize / 8 * 8);
+
+ store2(Header + 38, SampleSize / 8 * 8);
+ store4(Header + 40, ChannelMask);
+
+ fwrite(Header, sizeof(wavmulti_header), 1, File);
+ }
+ else
+ {
+ memcpy(Header, wav_header, sizeof(wav_header));
+
+ if (Format >= SF_IEEE_FLOAT)
+ {
+ store2(Header + 20, WAVE_FORMAT_IEEE_FLOAT);
+ }
+
+ store2(Header + 22, Channels);
+ store4(Header + 24, SampleRate);
+ store4(Header + 28, SampleSize / 8 * SampleRate * Channels);
+ store2(Header + 32, SampleSize / 8 * Channels);
+ store2(Header + 34, SampleSize / 8 * 8);
+
+ fwrite(Header, sizeof(wav_header), 1, File);
+ }
+
+
+ FrameStartPos = ftell(File);
+}
+
+
+void CWaveFile::Seek(int FramePos, int Where)
+{
+ // Ignoring Where
+
+ fseek(File, FrameStartPos + FramePos * Channels* (SampleSize / 8), Where);
+
+ FramesRead = FramePos;
+
+}
+
+int CWaveFile::GetChannels()
+{
+ return Channels;
+}
+
+void CWaveFile::SetChannels(int Channels)
+{
+ this->Channels = Channels;
+}
+
+void CWaveFile::SetSampleFormat(SAMPLE_FORMAT Format)
+{
+ this->Format = Format;
+}
+
+uint32_t CWaveFile::GetChannelMask()
+{
+ return ChannelMask;
+}
+
+void CWaveFile::SetChannelMask(uint32_t Mask)
+{
+ ChannelMask = Mask;
+}
+
+bool CWaveFile::ReadFrameAsS16(short* FrameSamples, int Frames)
+{
+ if (FramesRead >= TotalFrames)
+ return false;
+
+ FramesRead += Frames;
+
+ switch (Format)
+ {
+ case SF_U8:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 1, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (DirectSample - 128) << 8;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S16:
+ return Frames == fread(FrameSamples, sizeof(FrameSamples[0])*Channels, Frames, File);
+ case SF_S24:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ unsigned int DirectSample = 0;
+ if (1 == fread(&DirectSample, 3, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 8); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S32:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ unsigned int DirectSample = 0;
+ if (1 == fread(&DirectSample, 4, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 16); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_IEEE_FLOAT:
+ {
+ float DirectSamples[32];
+
+ if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File))
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768);
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+ case SF_IEEE_DOUBLE:
+ {
+ double DirectSamples[32];
+
+ if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File))
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768);
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+bool CWaveFile::ReadFrameAsfloat(float* FrameSamples, int Frames)
+{
+ if (FramesRead >= TotalFrames)
+ return false;
+
+ FramesRead += Frames;
+
+ switch (Format)
+ {
+ case SF_U8:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 1, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S16:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 2, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S24:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ uint32_t DirectSample = 0;
+ if (1 == fread(&DirectSample, 3, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) /
+ (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S32:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ uint32_t DirectSample = 0;
+ if (1 == fread(&DirectSample, 4, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample))) /
+ (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_IEEE_FLOAT:
+ {
+ if(fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File))
+ {
+ return true;
+ }
+ return false;
+
+// float DirectSamples[32];
+//
+// if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File))
+// {
+// for (int frame = 0; frame < Frames; frame++)
+// {
+// for (int ch = 0; ch < Channels; ch++)
+// {
+// FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]);
+// }
+// }
+// return true;
+// }
+// return false;
+ }
+ case SF_IEEE_DOUBLE:
+ {
+ if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File))
+ {
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+bool CWaveFile::ReadFrameAsDouble(double* FrameSamples, int Frames)
+{
+ if (FramesRead >= TotalFrames)
+ return false;
+
+ FramesRead += Frames;
+
+ switch (Format)
+ {
+ case SF_U8:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 1, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S16:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 2, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S24:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ uint32_t DirectSample = 0;
+ if (1 == fread(&DirectSample, 3, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) /
+ (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S32:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ uint32_t DirectSample = 0;
+ if (1 == fread(&DirectSample, 4, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample ))) /
+ (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_IEEE_FLOAT:
+ {
+ float DirectSamples[32];
+
+ if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File))
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]);
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+ case SF_IEEE_DOUBLE:
+ {
+ if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File))
+ {
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+void CWaveFile::WriteRaw(void* Raw, int Size)
+{
+ fwrite(Raw, Size, 1, File);
+}
+
+
+void CWaveFile::WriteFrame(uint8_t* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+void CWaveFile::WriteFrame(short* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+void CWaveFile::WriteFrame(int32_t* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+void CWaveFile::WriteFrameS24(int32_t* FrameSamples, int Frames)
+{
+ for (int c = 0; c < Channels; c++)
+ {
+ fwrite(&FrameSamples[c], 3, 1, File);
+ }
+}
+
+void CWaveFile::WriteFrame(double* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+void CWaveFile::WriteFrame(float* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+
+double CWaveFile::GetDuration()
+{
+ return Duration;
+}

File Metadata

Mime Type
text/x-diff
Expires
Sun, Jan 12, 08:33 (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1346411
Default Alt Text
(141 KB)

Event Timeline