Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F4880324
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
141 KB
Subscribers
None
View Options
diff --git a/AIMeiSheng/RawNet3/infererence_fang_meisheng.py b/AIMeiSheng/RawNet3/infererence_fang_meisheng.py
index 471f92a..5612582 100644
--- a/AIMeiSheng/RawNet3/infererence_fang_meisheng.py
+++ b/AIMeiSheng/RawNet3/infererence_fang_meisheng.py
@@ -1,269 +1,270 @@
import argparse
import itertools
import os
import sys
from typing import Dict
import numpy as np
import soundfile as sf
import torch
import torch.nn.functional as F
from tqdm import tqdm
from models.RawNet3 import RawNet3
from models.RawNetBasicBlock import Bottle2neck
from utils import tuneThresholdfromScore, ComputeErrorRates, ComputeMinDcf
#model_directory = '/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3'
#sys.path.append(os.path.abspath(model_directory))
-def get_embed_model():
+def get_embed_model(model_path):
model = RawNet3(
Bottle2neck,
model_scale=8,
context=True,
summed=True,
encoder_type="ECA",
nOut=256,
out_bn=False,
sinc_stride=10,
log_sinc=True,
norm_sinc="mean",
grad_mult=1,
)
model.load_state_dict(
torch.load(
- "/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3/models/weights/model.pt",
+ model_path,
+ # "/data/bingxiao.fang/speaker_identify/RawNet/python/RawNet3/models/weights/model.pt",
map_location=lambda storage, loc: storage,
)["model"]
)
model.eval()
return model
def main(args: Dict, model=None) -> None:
if model == None:
model = RawNet3(
Bottle2neck,
model_scale=8,
context=True,
summed=True,
encoder_type="ECA",
nOut=256,
out_bn=False,
sinc_stride=10,
log_sinc=True,
norm_sinc="mean",
grad_mult=1,
)
model.load_state_dict(
torch.load(
"./models/weights/model.pt",
map_location=lambda storage, loc: storage,
)["model"]
)
model.eval()
# gpu = False
gpu = True if torch.cuda.is_available() else False
#print("RawNet3 initialised & weights loaded!")
if torch.cuda.is_available():
#print("Cuda available, conducting inference on GPU")
model = model.to("cuda")
gpu = True
if args.inference_utterance:
output = extract_speaker_embd(
model,
fn=args.input,
n_samples=48000,
n_segments=args.n_segments,
gpu=gpu,
).mean(0)
#print("embead shape:", output.size())
np.save(args.out_dir, output.detach().cpu().numpy())
return
if args.vox1_o_benchmark:
with open("../../trials/cleaned_test_list.txt", "r") as f:
trials = f.readlines()
## Get a list of unique file names
files = list(itertools.chain(*[x.strip().split()[-2:] for x in trials]))
setfiles = list(set(files))
setfiles.sort()
embd_dic = {}
for f in tqdm(setfiles):
embd_dic[f] = extract_speaker_embd(
model, os.path.join(args.DB_dir, f), n_samples=64000, gpu=gpu
)
labels, scores = [], []
for line in trials:
data = line.split()
ref_feat = F.normalize(embd_dic[data[1]], p=2, dim=1)
com_feat = F.normalize(embd_dic[data[2]], p=2, dim=1)
if gpu:
ref_feat = ref_feat.cuda()
com_feat = com_feat.cuda()
dist = (
torch.cdist(
ref_feat.reshape((args.n_segments, -1)),
com_feat.reshape((args.n_segments, -1)),
)
.detach()
.cpu()
.numpy()
)
score = -1.0 * np.mean(dist)
labels.append(int(data[0]))
scores.append(score)
result = tuneThresholdfromScore(scores, labels, [1, 0.1])
fnrs, fprs, thresholds = ComputeErrorRates(scores, labels)
p_target, c_miss, c_fa = 0.05, 1, 1
mindcf, _ = ComputeMinDcf(
fnrs, fprs, thresholds, p_target, c_miss, c_fa
)
print(
"Vox1-O benchmark Finished. EER: %2.4f, minDCF:%.5f"
% (result[1], mindcf)
)
import librosa
def extract_speaker_embd(
model, fn: str, n_samples: int, n_segments: int = 10, gpu: bool = False
) -> np.ndarray:
#audio, sample_rate = sf.read(fn)
audio, sample_rate = librosa.load(fn,sr=16000) ##fang add
if len(audio.shape) > 1:
raise ValueError(
f"RawNet3 supports mono input only. Input data has a shape of {audio.shape}."
)
if sample_rate != 16000:
raise ValueError(
f"RawNet3 supports 16k sampling rate only. Input data's sampling rate is {sample_rate}."
)
if (
len(audio) < n_samples
): # RawNet3 was trained using utterances of 3 seconds
shortage = n_samples - len(audio) + 1
audio = np.pad(audio, (0, shortage), "wrap")
audios = []
startframe = np.linspace(0, len(audio) - n_samples, num=n_segments)
for asf in startframe:
audios.append(audio[int(asf) : int(asf) + n_samples])
audios = torch.from_numpy(np.stack(audios, axis=0).astype(np.float32))
if gpu:
audios = audios.to("cuda")
with torch.no_grad():
output = model(audios)
return output
def get_embed(target_wav, embed_npy, model=None):
parser = argparse.ArgumentParser(description="RawNet3 inference")
parser.add_argument(
"--inference_utterance", default=True, action="store_true"
)
parser.add_argument(
"--input",
type=str,
default="",
help="Input file to extract embedding. Required when 'inference_utterance' is True",
)
parser.add_argument(
"--vox1_o_benchmark", default=False, action="store_true"
)
parser.add_argument(
"--DB_dir",
type=str,
default="",
help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True",
)
parser.add_argument("--out_dir", type=str, default="./out.npy")
parser.add_argument(
"--n_segments",
type=int,
default=10,
help="number of segments to make using each utterance",
)
args = parser.parse_args()
args.input = target_wav
args.out_dir = embed_npy
assert args.inference_utterance or args.vox1_o_benchmark
if args.inference_utterance:
assert args.input != ""
if args.vox1_o_benchmark:
assert args.DB_dir != ""
#sys.exit(main(args,model))
main(args,model)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="RawNet3 inference")
parser.add_argument(
"--inference_utterance", default=False, action="store_true"
)
parser.add_argument(
"--input",
type=str,
default="",
help="Input file to extract embedding. Required when 'inference_utterance' is True",
)
parser.add_argument(
"--vox1_o_benchmark", default=False, action="store_true"
)
parser.add_argument(
"--DB_dir",
type=str,
default="",
help="Directory for VoxCeleb1. Required when 'vox1_o_benchmark' is True",
)
parser.add_argument("--out_dir", type=str, default="./out.npy")
parser.add_argument(
"--n_segments",
type=int,
default=10,
help="number of segments to make using each utterance",
)
args = parser.parse_args()
assert args.inference_utterance or args.vox1_o_benchmark
if args.inference_utterance:
assert args.input != ""
if args.vox1_o_benchmark:
assert args.DB_dir != ""
sys.exit(main(args))
diff --git a/AIMeiSheng/docker_demo/.requirements.txt.swp b/AIMeiSheng/docker_demo/.requirements.txt.swp
deleted file mode 100644
index 1adaec3..0000000
Binary files a/AIMeiSheng/docker_demo/.requirements.txt.swp and /dev/null differ
diff --git a/AIMeiSheng/docker_demo/common.py b/AIMeiSheng/docker_demo/common.py
new file mode 100644
index 0000000..6a31932
--- /dev/null
+++ b/AIMeiSheng/docker_demo/common.py
@@ -0,0 +1,52 @@
+import os
+import time
+import logging
+import urllib, urllib.request
+
+
+def download2disk(url, dst_path):
+ st = time.time()
+ urllib.request.urlretrieve(url, dst_path)
+ print(f"download {url} -> {dst_path} sp = {time.time() - st}")
+ return os.path.exists(dst_path)
+
+
+def exec_cmd(cmd):
+ # gs_logger.info(cmd)
+ print(cmd)
+ ret = os.system(cmd)
+ if ret != 0:
+ return False
+ return True
+
+
+def exec_cmd_and_result(cmd):
+ r = os.popen(cmd)
+ text = r.read()
+ r.close()
+ return text
+
+
+def upload_file2cos(key, file_path, region='ap-singapore', bucket_name='av-audit-sync-sg-1256122840'):
+ """
+ 将文件上传到cos
+ :param key: 桶上的具体地址
+ :param file_path: 本地文件地址
+ :param region: 区域
+ :param bucket_name: 桶地址
+ :return:
+ """
+ gs_coscmd = "coscmd"
+ gs_coscmd_conf = "~/.cos.conf"
+
+ cmd = "{} -c {} -r {} -b {} upload {} {}".format(gs_coscmd, gs_coscmd_conf, region, bucket_name, file_path, key)
+ if exec_cmd(cmd):
+ cmd = "{} -c {} -r {} -b {} info {}".format(gs_coscmd, gs_coscmd_conf, region, bucket_name, key) \
+ + "| grep Content-Length |awk \'{print $2}\'"
+ res_str = exec_cmd_and_result(cmd)
+ logging.info("{},res={}".format(key, res_str))
+ size = float(res_str)
+ if size > 0:
+ return True
+ return False
+ return False
diff --git a/AIMeiSheng/docker_demo/http_server.py b/AIMeiSheng/docker_demo/http_server.py
new file mode 100644
index 0000000..23ac0ba
--- /dev/null
+++ b/AIMeiSheng/docker_demo/http_server.py
@@ -0,0 +1,128 @@
+# -*- coding: UTF-8 -*-
+
+"""
+SVC处理逻辑
+1. 根据跟定的vocal_url 判别男女
+2. 根据男女信息选择适合的男女url
+3. 模型推理
+"""
+
+import gc
+import os
+import shutil
+import sys
+import time
+import logging
+import hashlib
+import numpy as np
+import multiprocessing as mp
+from multiprocessing import Pool
+from flask import Flask, jsonify, request, abort
+from common import download2disk, exec_cmd, upload_file2cos
+from svc_online import GSWorkerAttr, SVCOnline, volume_adjustment
+
+# 全局设置
+import socket
+
+hostname = socket.gethostname()
+log_file_name = f"av_svc_{hostname}.log"
+logging.basicConfig(filename=log_file_name, format='%(asctime)s %(levelname)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S',
+ level=logging.INFO)
+
+# errcode
+gs_err_code_success = 0
+gs_err_code_download_vocal = 100
+gs_err_code_download_svc_url = 101
+gs_err_code_svc_process = 102
+gs_err_code_transcode = 103
+gs_err_code_volume_adjust = 104
+gs_err_code_upload = 105
+
+sys.path.append(os.path.dirname(__file__))
+sys.path.append(os.path.join(os.path.dirname(__file__), "../"))
+
+app = Flask(__name__)
+
+
+def download_data(worker_attr):
+ vocal_path = os.path.join(worker_attr.tmp_dir, worker_attr.distinct_id)
+ if os.path.exists(vocal_path):
+ os.remove(vocal_path)
+
+ st = time.time()
+ if not download2disk(worker_attr.vocal_url, worker_attr.vocal_path):
+ return gs_err_code_download_vocal
+ logging.info(f"download vocal_url={worker_attr.vocal_url} sp = {time.time() - st}")
+
+ # download svc_source_url
+ if not os.path.exists(worker_attr.female_svc_source_path):
+ st = time.time()
+ if not download2disk(worker_attr.female_svc_source_url, worker_attr.female_svc_source_path):
+ return gs_err_code_download_svc_url
+ logging.info(f"download female_url={worker_attr.female_svc_source_url} sp = {time.time() - st}")
+
+ # download svc_source_url
+ if not os.path.exists(worker_attr.male_svc_source_path):
+ st = time.time()
+ if not download2disk(worker_attr.male_svc_source_url, worker_attr.male_svc_source_path):
+ return gs_err_code_download_svc_url
+ logging.info(f"download male_url={worker_attr.male_svc_source_url} sp = {time.time() - st}")
+ return gs_err_code_success
+
+
+def transcode(wav_path, dst_path):
+ st = time.time()
+ cmd = f"ffmpeg -i {wav_path} -ar 44100 -ac 2 -b:a 64k -y {dst_path} -loglevel fatal"
+ exec_cmd(cmd)
+ logging.info(f"transcode cmd={cmd}, sp = {time.time() - st}")
+ return os.path.exists(dst_path)
+
+
+gs_svc_online = None
+
+
+def process_one(input_data):
+ logging.info(f"start input={input_data} start prepare data ...")
+ worker_attr = GSWorkerAttr(input_data)
+ err = download_data(worker_attr)
+ if err != gs_err_code_success:
+ return err, None
+
+ # process audio
+ global gs_svc_online
+ if gs_svc_online is None:
+ gs_svc_online = SVCOnline()
+ gs_svc_online.process(worker_attr)
+ if not os.path.exists(worker_attr.target_wav_path):
+ return gs_err_code_svc_process, None
+
+ # 音量拉伸到指定响度
+ volume_adjustment(worker_attr.target_wav_path, worker_attr.target_loudness, worker_attr.target_wav_ad_path)
+ if not os.path.exists(worker_attr.target_wav_ad_path):
+ return gs_err_code_volume_adjust, None
+
+ # transcode
+ if not transcode(worker_attr.target_wav_path, worker_attr.target_path):
+ return gs_err_code_transcode, None
+
+ # upload
+ st = time.time()
+ if upload_file2cos(worker_attr.target_url, worker_attr.target_path):
+ return gs_err_code_upload, None
+ logging.info(f"audio_url={worker_attr.vocal_url} upload {worker_attr.target_url} sp = {time.time() - st}")
+ return gs_err_code_success, worker_attr.target_path
+
+
+@app.route("/ai_meisheng", methods=["POST"])
+def get_song_res():
+ data = request.json
+ st = time.time()
+ logging.info(f"ai_meisheng:in:{data}")
+ ret, url = process_one(data)
+ all_ret_msg = jsonify({"out_url": url, "ret": ret})
+ logging.info(f"ai_meisheng:out:{data}-{all_ret_msg}, sp={time.time() - st}")
+ return all_ret_msg
+
+
+if __name__ == "__main__":
+ app.run(host='0.0.0.0', port=5000, threaded=False)
diff --git a/AIMeiSheng/docker_demo/main.py b/AIMeiSheng/docker_demo/main.py
deleted file mode 100644
index 094c2fc..0000000
--- a/AIMeiSheng/docker_demo/main.py
+++ /dev/null
@@ -1,12 +0,0 @@
-import gradio as gr
-
-def greet(name):
- return "Hello " + name + "!!"
-
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-
-if __name__ == "__main__":
- demo.launch(server_name="0.0.0.0")
- # 注意:gradio启动项目后默认地址为127.0.0.1;使用docker部署需要将地址修改为0.0.0.0,否则会导致地址访问错误
- # 默认端口为7860,如需更改可在launch()中设置server_port=7000
-~
diff --git a/AIMeiSheng/docker_demo/svc_online.py b/AIMeiSheng/docker_demo/svc_online.py
new file mode 100644
index 0000000..f952346
--- /dev/null
+++ b/AIMeiSheng/docker_demo/svc_online.py
@@ -0,0 +1,162 @@
+# -*- coding: UTF-8 -*-
+"""
+SVC的核心处理逻辑
+"""
+import os
+import shutil
+import hashlib
+import time
+
+from AIMeiSheng.meisheng_svc_final import get_svc, process_svc
+from AIMeiSheng.voice_classification.online.voice_class_online_fang import VoiceClass
+from AIMeiSheng.RawNet3.infererence_fang_meisheng import get_embed, get_embed_model
+from AIMeiSheng.myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main, load_hubert, get_vc, get_rmvpe
+
+from AIMeiSheng.docker_demo.common import *
+
+gs_resource_cache_dir = "/tmp/gs_svc_resource_cache"
+gs_tmp_dir = "/tmp/gs_svc_tmp"
+gs_model_dir = "/tmp/models"
+
+if os.path.exists(gs_tmp_dir):
+ shutil.rmtree(gs_tmp_dir)
+os.makedirs(gs_model_dir, exist_ok=True)
+
+# 预设参数
+gs_gender_models_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/hub/voice_classification/models.zip"
+gs_svc_emb_url = ""
+gs_svc_model_url = ""
+gs_volume_bin_url = "https://av-audit-sync-in-1256122840.cos.ap-mumbai.myqcloud.com/dataset/AIMeiSheng/ebur128_tool"
+
+
+class GSWorkerAttr:
+ def __init__(self, input_data):
+ vocal_url = input_data["vocal_url"]
+ female_svc_source_url = input_data["female_svc_url"]
+ male_svc_source_url = input_data["male_svc_url"]
+ st_tm = input_data["st_tm"] # 单位是s
+ ed_tm = input_data["ed_tm"] # 单位是s
+
+ self.distinct_id = hashlib.md5(vocal_url.encode()).hexdigest()
+ self.vocal_url = vocal_url
+ self.target_url = input_data["target_url"]
+
+ ext = vocal_url.split(".")[-1]
+ self.vocal_path = os.path.join(gs_tmp_dir, self.distinct_id + f"_in.{ext}")
+ self.target_wav_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.wav")
+ self.target_wav_ad_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out_ad.wav")
+ self.target_path = os.path.join(gs_tmp_dir, self.distinct_id + "_out.m4a")
+
+ self.female_svc_source_url = female_svc_source_url
+ self.male_svc_source_url = male_svc_source_url
+
+ ext = female_svc_source_url.split(".")[-1]
+ self.female_svc_source_path = hashlib.md5(female_svc_source_url.encode()).hexdigest() + "." + ext
+ ext = male_svc_source_url.split(".")[-1]
+ self.male_svc_source_path = hashlib.md5(male_svc_source_url.encode()).hexdigest() + "." + ext
+ self.st_tm = st_tm
+ self.ed_tm = ed_tm
+ self.target_loudness = input_data["target_loudness"]
+
+ self.tmp_dir = os.path.join(gs_tmp_dir, self.distinct_id)
+ if os.path.exists(self.tmp_dir):
+ shutil.rmtree(self.tmp_dir)
+ os.makedirs(self.tmp_dir)
+
+ def __del__(self):
+ if os.path.exists(self.tmp_dir):
+ shutil.rmtree(self.tmp_dir)
+
+
+def init_gender_model():
+ """
+ 下载模型
+ :return:
+ """
+ dst_model_dir = os.path.join(gs_model_dir, "voice_classification")
+ if not os.path.exists(dst_model_dir):
+ dst_zip_path = os.path.join(gs_model_dir, "models.zip")
+ if not download2disk(gs_gender_models_url, dst_zip_path):
+ logging.fatal(f"download gender_model err={gs_gender_models_url}")
+ cmd = f"cd {gs_model_dir}; unzip {dst_zip_path}; mv models voice_classification; rm -f {dst_zip_path}"
+ os.system(cmd)
+ if not os.path.exists(dst_model_dir):
+ logging.fatal(f"unzip {dst_zip_path} err")
+
+ music_voice_pure_model = os.path.join(dst_model_dir, "voice_005_rec_v5.pth")
+ music_voice_no_pure_model = os.path.join(dst_model_dir, "voice_10_v5.pth")
+ gender_pure_model = os.path.join(dst_model_dir, "gender_8k_ratev5_v6_adam.pth")
+ gender_no_pure_model = os.path.join(dst_model_dir, "gender_8k_v6_adam.pth")
+ vc = VoiceClass(music_voice_pure_model, music_voice_no_pure_model, gender_pure_model, gender_no_pure_model)
+ return vc
+
+
+def init_svc_model():
+ emb_model_path = os.path.join(gs_model_dir, "RawNet3_weights.pt")
+ if not os.path.exists(emb_model_path):
+ if not download2disk(gs_svc_emb_url, emb_model_path):
+ logging.fatal(f"download svc_emb_model err={gs_svc_emb_url}")
+ embed_model = get_embed_model(emb_model_path)
+ hubert_model = load_hubert()
+
+ svc_filename = gs_svc_model_url.split("/")[-1]
+ svc_model_path = os.path.join(gs_model_dir, svc_filename)
+ if not os.path.exists(svc_model_path):
+ if not download2disk(gs_svc_model_url, svc_model_path):
+ logging.fatal(f"download svc_model err={gs_svc_model_url}")
+
+ # 此处内部会生成全局模型
+ get_vc(svc_model_path)
+ return embed_model, hubert_model
+
+
+def volume_adjustment(wav_path, target_loudness, out_path):
+ """
+ 音量调整
+ :param wav_path:
+ :param target_loudness:
+ :param out_path:
+ :return:
+ """
+ volume_bin_path = os.path.join(gs_model_dir, "ebur128_tool")
+ if not os.path.exists(volume_bin_path):
+ if not download2disk(gs_volume_bin_url, volume_bin_path):
+ logging.fatal(f"download volume_bin err={gs_volume_bin_url}")
+ cmd = f"{volume_bin_path} {wav_path} {target_loudness} {out_path}"
+ os.system(cmd)
+
+
+class SVCOnline:
+
+ def __init__(self):
+ st = time.time()
+ self.gender_model = init_gender_model()
+ self.embed_model, self.hubert_model = init_svc_model()
+ logging.info(f"svc init finished, sp = {time.time() - st}")
+
+ def gender_process(self, worker_attr):
+ st = time.time()
+ gender, female_rate, is_pure = self.gender_model.process(worker_attr.vocal_path)
+ logging.info(
+ f"{worker_attr.vocal_url}, gender={gender}, female_rate={female_rate}, is_pure={is_pure}, "
+ f"gender_process sp = {time.time() - st}")
+ if gender == 0:
+ gender = 'female'
+ elif gender == 1:
+ gender = 'male'
+ elif female_rate > 0.5:
+ gender = 'female'
+ else:
+ gender = 'male'
+ logging.info(f"{worker_attr.vocal_url}, modified gender={gender}")
+ return gender
+
+ def process(self, worker_attr):
+ gender = self.gender_process(worker_attr)
+ song_path = worker_attr.female_svc_source_path
+ if gender == "male":
+ song_path = worker_attr.male_svc_source_path
+ params = {'gender': gender, 'tst': worker_attr.st_ms, "tnd": worker_attr.ed_tm, 'delay': 0, 'song_path': None}
+ st = time.time()
+ similar = process_svc(song_path, worker_attr.vocal_path, worker_attr.target_wav_path, params)
+ logging.info(f"{worker_attr.vocal_url}, similar={similar} process svc sp = {time.time() - st}")
diff --git a/AIMeiSheng/meisheng_svc_final.py b/AIMeiSheng/meisheng_svc_final.py
index 6359fb9..e5a6b3f 100644
--- a/AIMeiSheng/meisheng_svc_final.py
+++ b/AIMeiSheng/meisheng_svc_final.py
@@ -1,212 +1,215 @@
import os,sys
import time
import shutil
import glob
import hashlib
import librosa
import soundfile
import gradio as gr
import pandas as pd
import numpy as np
sys.path.append('./RawNet3/')
from infererence_fang_meisheng import get_embed, get_embed_model
from myinfer_multi_spk_embed_in_dec_diff_fi_meisheng import svc_main,load_hubert, get_vc, get_rmvpe
from gender_classify import load_gender_model
gs_simple_mixer_path = "/data/gpu_env_common/bin/simple_mixer" ##混音执行文件
tmp_workspace_name = "batch_test_ocean_fi"#工作空间名
song_folder = "./data_meisheng/" ##song folder
gs_work_dir = f"./data_meisheng/{tmp_workspace_name}" #工作空间路径
pth_model_path = "./weights/xusong_v2_org_version_alldata_embed1_enzx_diff_fi_e15_s244110.pth" ##模型文件
cur_dir = os.path.abspath(os.path.dirname(__file__))
-abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/'
-
+abs_path = os.path.join(cur_dir,song_folder,tmp_workspace_name) + '/'
+f0_method = None
def mix(in_path, acc_path, dst_path):
# svc转码到442
svc_442_file = in_path + "_442.wav"
st = time.time()
cmd = "ffmpeg -i {} -ar 44100 -ac 2 -y {} -loglevel fatal".format(in_path, svc_442_file)
os.system(cmd)
if not os.path.exists(svc_442_file):
return -1
print("transcode,{},sp={}".format(in_path, time.time() - st))
# 混合
st = time.time()
cmd = "{} {} {} {} 1".format(gs_simple_mixer_path, svc_442_file, acc_path, dst_path)
os.system(cmd)
print("mixer,{},sp={}".format(in_path, time.time() - st))
def load_model():
global f0_method
embed_model = get_embed_model()
hubert_model = load_hubert()
get_vc(pth_model_path)
f0_method = get_rmvpe()
print("model preload finish!!!")
return embed_model, hubert_model#,svc_model
embed_model, hubert_model = load_model() ##提前加载模型
gender_model = load_gender_model()
def pyin_process_single_rmvpe(input_file):
global f0_method
+ if f0_method is None:
+ f0_method = get_rmvpe()
+
rate = 16000 #44100
# 读取音频文件
y, sr = librosa.load(input_file, sr=rate)
len_s = len(y)/sr
lim_s = 15 #10
if(len_s > lim_s):
y1 = y[:sr*lim_s]
y2 = y[-sr*lim_s:]
f0 = f0_method.infer_from_audio(y1, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch1 = np.mean(valid_f0)
f0 = f0_method.infer_from_audio(y2, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch2 = np.mean(valid_f0)
if abs(mean_pitch1 - mean_pitch2) > 55:
mean_pitch_cur = min(mean_pitch1, mean_pitch2)
else:
mean_pitch_cur = (mean_pitch1 + mean_pitch2) / 2
else:
f0 = f0_method.infer_from_audio(y, thred=0.03)
f0 = f0[f0 < 600]
valid_f0 = f0[f0 > 50]
mean_pitch_cur = np.mean(valid_f0)
return mean_pitch_cur
def meisheng_svc(song_wav, target_wav, svc_out_path, embed_npy, paras):
##计算pitch
f0up_key = pyin_process_single_rmvpe(target_wav)
## get embed
get_embed(target_wav, embed_npy, embed_model)
print("svc main start...")
svc_main(song_wav,svc_out_path,pth_model_path,embed_npy,f0up_key,hubert_model,paras)
print("svc main finished!!")
return 0
def process_svc(song_wav, target_wav, svc_out_path,paras):
song_wav1, target_wav, svc_out_path = os.path.basename(song_wav), os.path.basename(
target_wav), os.path.basename(svc_out_path) #绝对路径
song_wav, target_wav, svc_out_path = song_wav, abs_path + target_wav, abs_path + svc_out_path
embed_npy = target_wav[:-4] + '.npy' ##embd npy存储位置
similar = meisheng_svc(song_wav,target_wav,svc_out_path,embed_npy,paras)
return similar
def get_svc(target_yinse_wav, song_name, paras):
'''
:param target_yinse_wav: 目标音色
:param song_name: 歌曲名字
;param paras: 其他参数
:return: svc路径名
'''
##清空工作空间临时路径
if os.path.exists(gs_work_dir):
#shutil.rmtree(gs_work_dir)
cmd = f"rm -rf {gs_work_dir}/*"
os.system(cmd)
else:
os.makedirs(gs_work_dir)
gender = paras['gender']##为了确定歌曲
##目标音色读取
f_dst = os.path.join(gs_work_dir, os.path.basename(target_yinse_wav))
#print("dir :", f_dst,"target_yinse_wav:",target_yinse_wav)
#shutil.move(target_yinse_wav, f_dst) ##放在工作目录
shutil.copy(target_yinse_wav, f_dst)
target_yinse_wav = f_dst
##歌曲/伴奏 读取(路径需要修改)
song_wav = os.path.join("{}{}/{}/vocal321.wav".format(song_folder, gender, song_name)) # 歌曲vocal
inf_acc_path = os.path.join("{}{}/{}/acc.wav".format(song_folder, gender, song_name))
#song_wav = './xusong_long.wav'
svc_out_path = os.path.join(gs_work_dir, "svc.wav") ###svc结果名字
print("inputMsg:", song_wav, target_yinse_wav, svc_out_path)
## svc process
st = time.time()
print("start inference...")
similar = process_svc(song_wav, target_yinse_wav, svc_out_path,paras)
print("svc finished!!")
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(svc_out_path))
#'''
##加混响
print("add reverbration...")
svc_out_path_effect = svc_out_path[:-4] + '_effect.wav'
cmd = f"/data/gpu_env_common/bin/effect_tool {svc_out_path} {svc_out_path_effect}"
print("cmd :", cmd)
os.system(cmd)
# # 人声伴奏合并
print("add acc...")
out_path = svc_out_path_effect[:-4] + '_music.wav'
mix(svc_out_path_effect, inf_acc_path, out_path)
print("time cost = {}".format(time.time() - st))
print("out path name {} ".format(out_path))
#'''
return svc_out_path
if __name__=='__main__':
###gender predict
target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
gender, female_rate, is_pure = gender_model.process(target_yinse_wav)
print('=====================')
print("gender:{}, female_rate:{},is_pure:{}".format(gender,female_rate,is_pure))
if gender == 0:
gender = 'female'
elif gender == 1:
gender = 'male'
elif female_rate > 0.5:
gender = 'female'
else:
gender = 'male'
print("modified gender:{} ".format(gender))
print('=====================')
###接口函数
'''
target_yinse_wav = "./raw/meisheng_yinse/female/changying.wav" #需要完整路径
song_name = "drivers_license" #"Levitating" ##路径会自动添加(要更改)
paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None} ##单位都是ms
#paras = {'gender': 'female', 'tst': 0, "tnd": 30, 'delay': 0} ###片段svc测试
#'''
#'''
#target_yinse_wav = "./raw/meisheng_yinse/female/target_yinse_cloris.m4a"
song_name = "lost_stars"
#paras = {'gender': 'female', 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None}
paras = {'gender': gender, 'tst': 0, "tnd": None, 'delay': 0, 'song_path': None }
get_svc(target_yinse_wav, song_name, paras)
#'''
diff --git a/tools/ebur128_tool/CMakeLists.txt b/tools/ebur128_tool/CMakeLists.txt
new file mode 100644
index 0000000..3017d49
--- /dev/null
+++ b/tools/ebur128_tool/CMakeLists.txt
@@ -0,0 +1,19 @@
+cmake_minimum_required(VERSION 2.8)
+project(ebur128_tool)
+
+set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/lib)
+
+include_directories(../ref/alimter/inc)
+include_directories(../ref/waves/inc)
+include_directories(../ref/ebur128/inc)
+
+add_subdirectory("../ref/alimter" ${PROJECT_SOURCE_DIR}/ref/alimter)
+add_subdirectory("../ref/waves" ${PROJECT_SOURCE_DIR}/ref/waves)
+add_subdirectory("../ref/ebur128" ${PROJECT_SOURCE_DIR}/ref/ebur128)
+
+add_executable(ebur128_tool ebur128_tool.cpp)
+
+target_link_libraries(ebur128_tool
+ ${LIBRARY_OUTPUT_PATH}/libalimiter.a
+ ${LIBRARY_OUTPUT_PATH}/libwaves.a
+ ${LIBRARY_OUTPUT_PATH}/libebur128.a)
\ No newline at end of file
diff --git a/tools/ebur128_tool/ebur128_tool.cpp b/tools/ebur128_tool/ebur128_tool.cpp
new file mode 100644
index 0000000..c3d171c
--- /dev/null
+++ b/tools/ebur128_tool/ebur128_tool.cpp
@@ -0,0 +1,107 @@
+//
+// Created by Administrator on 2024/7/8.
+//
+#include <cmath>
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+
+#include "alimiter.h"
+#include "ebur128.h"
+#include "WaveFile.h"
+
+#define PROC_LEN 1024
+/**
+ * 获取增益
+ * @param nChannel
+ * @param nSampleRate
+ * @param pData
+ * @param nLength
+ * @param gain
+ * @return
+ */
+int ebur128_whole(int nChannel, int nSampleRate, short *pData, const int nLength, double &gated_loudness)
+{
+ ebur128_state *st = NULL;
+ st = ebur128_init(nChannel, nSampleRate, EBUR128_MODE_I);
+ if (NULL == st)
+ {
+ return -1;
+ }
+ int nPos = 0;
+ int nTmpLength = 0;
+ int nRet;
+ while (nPos < nLength)
+ {
+ nTmpLength = PROC_LEN;
+ if (nLength - nPos < PROC_LEN)
+ {
+ nTmpLength = nLength - nPos;
+ }
+ nRet = ebur128_add_frames_short(st, pData + nPos, nTmpLength / nChannel);
+ if (nRet != 0)
+ {
+ return -2;
+ }
+ nPos += nTmpLength;
+ }
+ gated_loudness = -1;
+ ebur128_loudness_global(st, &gated_loudness);
+ ebur128_destroy(&st);
+ return 0;
+}
+
+int main(int argc, char* argv[]) {
+ if (argc < 4)
+ {
+ printf("input error! example: ./main input_wav target_loudness dst_wav\n");
+ return -1;
+ }
+
+ std::string vocal_path = argv[1];
+ double target_loudness = atof(argv[2]);
+ std::string out_vocal_path = argv[3];
+
+ // 读取数据
+ CWaveFile vocal_wav = CWaveFile(vocal_path.c_str(), false);
+ if (!vocal_wav.GetStatus())
+ {
+ printf("%s not ok!\n", vocal_path.c_str());
+ return -2;
+ }
+ int vocal_buf_len = vocal_wav.GetChannels() * vocal_wav.GetTotalFrames();
+ float *vocal_buf = new float[vocal_buf_len];
+ short *short_vocal_buf = new short[vocal_buf_len];
+ vocal_wav.ReadFrameAsfloat(vocal_buf, vocal_wav.GetTotalFrames());
+ for(int i = 0; i < vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(); i++)
+ {
+ short_vocal_buf[i] = float(vocal_buf[i]) * 32767.f;
+ }
+
+ double vocal_gated_loudness = 0;
+ ebur128_whole(vocal_wav.GetChannels(), vocal_wav.GetSampleRate(), short_vocal_buf,
+ vocal_wav.GetTotalFrames() * vocal_wav.GetChannels(), vocal_gated_loudness);
+ float db = (target_loudness - vocal_gated_loudness) / 20.f;
+ float ebur128_rate = pow(10, db);
+
+ printf("vocal_gated_loudness = %f, db = %f, gain = %f\n", vocal_gated_loudness, db, ebur128_rate);
+ SUPERSOUND::Alimiter limiter;
+ limiter.SetParam(vocal_wav.GetSampleRate(), vocal_wav.GetChannels());
+ for (int i = 0; i < vocal_buf_len; i++)
+ {
+ float out = vocal_buf[i] * ebur128_rate;
+ limiter.Filter(&out, &out, 1);
+ vocal_buf[i] = out;
+ }
+
+ CWaveFile out_wav = CWaveFile(out_vocal_path.c_str(), true);
+ out_wav.SetChannels(vocal_wav.GetChannels());
+ out_wav.SetSampleRate(vocal_wav.GetSampleRate());
+ out_wav.SetSampleFormat(SF_IEEE_FLOAT);
+ out_wav.SetupDone();
+ out_wav.WriteFrame(vocal_buf, vocal_wav.GetTotalFrames());
+
+ delete[] vocal_buf;
+ delete[] short_vocal_buf;
+ return 0;
+}
\ No newline at end of file
diff --git a/tools/ref/alimter/CMakeLists.txt b/tools/ref/alimter/CMakeLists.txt
new file mode 100644
index 0000000..9748c4d
--- /dev/null
+++ b/tools/ref/alimter/CMakeLists.txt
@@ -0,0 +1,3 @@
+include_directories(inc)
+AUX_SOURCE_DIRECTORY(src DIR_ALIMTER_SRCS)
+add_library(alimiter ${DIR_ALIMTER_SRCS})
\ No newline at end of file
diff --git a/tools/ref/alimter/inc/alimiter.h b/tools/ref/alimter/inc/alimiter.h
new file mode 100644
index 0000000..8022d39
--- /dev/null
+++ b/tools/ref/alimter/inc/alimiter.h
@@ -0,0 +1,99 @@
+
+/***************************************************************************
+* email : yijiangyang@tencent.com *
+***************************************************************************/
+
+//+ ----------------------------------------------------+
+//+ _oo0oo_ +
+//+ o8888888o +
+//+ 88" . "88 +
+//+ (| -_- |) +
+//+ 0\ = /0 +
+//+ ___/`---'\___ +
+//+ .' \\| |// '. +
+//+ / \\||| : |||// \ +
+//+ / _||||| -:- |||||- \ +
+//+ | | \\\ - /// | | +
+//+ | \_| ''\---/'' |_/ | +
+//+ \ .-\__ '-' ___/-. / +
+//+ ___'. .' /--.--\ `. .'___ +
+//+ ."" '< `.___\_<|>_/___.' >' "". +
+//+ | | : `- \`.;`\ _ /`;.`/ - ` : | | +
+//+ \ \ `_. \_ __\ /__ _/ .-` / / +
+//+ =====`-.____`.___ \_____/___.-`___.-'===== +
+//+ `=---=' +
+//+ +
+//+ +
+//+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +
+//+ +
+//+ 佛祖保佑 永无BUG +
+//+ ----------------------------------------------------+
+
+//实现 FFMPEG 中的限制器,这个压限器对频谱友好,但是压得比较厉害
+
+#ifndef __ALIMITER_H__
+#define __ALIMITER_H__
+
+#include <stdint.h>
+#define ERROR_SUPERSOUND_SUCCESS 0
+#define ERROR_SUPERSOUND_PARAM -1
+#define ERROR_SUPERSOUND_MEMORY -2
+typedef struct AudioLimiterContext
+{
+ float limit;
+ float attack;
+ float release;
+ float att;
+ float level_in;
+ float level_out;
+ int32_t auto_release;
+ int32_t auto_level;
+ float asc;
+ int32_t asc_c;
+ int32_t asc_pos;
+ float asc_coeff;
+
+ float *buffer;
+ int32_t buffer_size;
+ int32_t buffer_max_size;
+ int32_t pos;
+ int32_t *nextpos;
+ float *nextdelta;
+
+ float delta;
+ int32_t nextiter;
+ int32_t nextlen;
+ int32_t asc_changed;
+}AudioLimiterContext;
+
+namespace SUPERSOUND
+{
+
+
+class Alimiter
+{
+public:
+ Alimiter();
+ ~Alimiter();
+
+public:
+ void Flush();
+ int32_t GetLatecy();
+ int32_t SetParam(int32_t fs, int32_t channels);
+ void Filter(float * input, float * output, int32_t num);
+
+private:
+ void Uninit();
+ int32_t config_input();
+ float get_rdelta(AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc);
+
+private:
+ AudioLimiterContext m_alimiterCtx;
+ int m_nChannels;
+ int m_nFs;
+};
+
+
+}
+
+#endif /* __ALIMITER_H__ */
\ No newline at end of file
diff --git a/tools/ref/alimter/src/alimiter.cpp b/tools/ref/alimter/src/alimiter.cpp
new file mode 100644
index 0000000..abbd622
--- /dev/null
+++ b/tools/ref/alimter/src/alimiter.cpp
@@ -0,0 +1,306 @@
+
+#include "alimiter.h"
+#include <string.h>
+#include <math.h>
+#include <stdio.h>
+#include <new>
+
+#define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#define MIDDLE(x, y, z) ((x)<(y)?((y)<(z)?(y):(x)<(z)?(z):(x)):((y)>(z)?(y):(x)>(z)?(z):(x)))
+#define SAFE_DELETE_PTR(ptr) \
+{ \
+ if(ptr) \
+ { \
+ delete [] ptr; \
+ ptr = NULL; \
+ } \
+}
+
+namespace SUPERSOUND
+{
+
+
+Alimiter::Alimiter()
+{
+ memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx));
+
+ m_nChannels = 0;
+ m_nFs = 0;
+
+ Flush();
+}
+
+Alimiter::~Alimiter()
+{
+ Uninit();
+}
+
+void Alimiter::Flush()
+{
+ float * buffer = m_alimiterCtx.buffer;
+ float * nextdelta = m_alimiterCtx.nextdelta;
+ int32_t * nextpos = m_alimiterCtx.nextpos;
+ int32_t buffer_max_size = m_alimiterCtx.buffer_max_size;
+ int32_t buffer_size = m_alimiterCtx.buffer_size;
+
+ if(buffer)
+ memset(buffer, 0, sizeof(float) * buffer_max_size);
+ if(nextdelta)
+ memset(nextdelta, 0, sizeof(float) * buffer_max_size);
+ if(nextpos)
+ memset(nextpos, -1, sizeof(float) * buffer_max_size);
+
+ memset(&m_alimiterCtx, 0, sizeof(m_alimiterCtx));
+
+ m_alimiterCtx.level_in = 1;
+ m_alimiterCtx.level_out = 32000 / 32768.0;
+ m_alimiterCtx.limit = 1;
+ m_alimiterCtx.attack = 5;
+ m_alimiterCtx.release = 50;
+ m_alimiterCtx.auto_release = 0;
+ m_alimiterCtx.asc_coeff = 0.5;
+ m_alimiterCtx.auto_level = 1;
+
+ m_alimiterCtx.attack /= 1000;
+ m_alimiterCtx.release /= 1000;
+ m_alimiterCtx.att = 1;
+ m_alimiterCtx.asc_pos = -1;
+ m_alimiterCtx.asc_coeff = pow(0.5f, m_alimiterCtx.asc_coeff - 0.5f) * 2 * -1;
+
+ m_alimiterCtx.buffer = buffer;
+ m_alimiterCtx.nextdelta = nextdelta;
+ m_alimiterCtx.nextpos = nextpos;
+ m_alimiterCtx.buffer_max_size = buffer_max_size;
+ m_alimiterCtx.buffer_size = buffer_size;
+}
+
+int32_t Alimiter::GetLatecy()
+{
+ return m_alimiterCtx.buffer_size / m_nChannels;
+}
+
+int32_t Alimiter::SetParam( int32_t fs, int32_t channels )
+{
+ if((fs == m_nFs) && (channels == m_nChannels))
+ return ERROR_SUPERSOUND_SUCCESS;
+
+ m_nChannels = channels;
+ m_nFs = fs;
+
+ return config_input();
+}
+
+void Alimiter::Filter( float * input, float * output, int32_t num )
+{
+ num = num / m_nChannels;
+ int channels = m_nChannels;
+ int buffer_size = m_alimiterCtx.buffer_size;
+ float * buffer = m_alimiterCtx.buffer;
+ float release = m_alimiterCtx.release;
+ float limit = m_alimiterCtx.limit;
+ float * nextdelta = m_alimiterCtx.nextdelta;
+ float level = m_alimiterCtx.auto_level ? 1 / limit : 1;
+ float level_out = m_alimiterCtx.level_out;
+ float level_in = m_alimiterCtx.level_in;
+ int *nextpos = m_alimiterCtx.nextpos;
+
+ float * buf;
+ float * dst;
+ float * src;
+ int n, c, i;
+ AudioLimiterContext * s = &m_alimiterCtx;
+
+ dst = output;
+ src = input;
+
+ for (n = 0; n < num; n++) {
+ float peak = 0;
+
+ for (c = 0; c < channels; c++) {
+ float sample = src[c] * level_in;
+
+ buffer[s->pos + c] = sample;
+ peak = MAX(peak, fabs(sample));
+ }
+
+ if (s->auto_release && peak > limit) {
+ s->asc += peak;
+ s->asc_c++;
+ }
+
+ if (peak > limit) {
+ float patt = MIN(limit / peak, 1);
+ float rdelta = get_rdelta(s, release, m_nFs,
+ peak, limit, patt, 0);
+ float delta = (limit / peak - s->att) / buffer_size * channels;
+ int found = 0;
+
+ if (delta < s->delta) {
+ s->delta = delta;
+ nextpos[0] = s->pos;
+ nextpos[1] = -1;
+ nextdelta[0] = rdelta;
+ s->nextlen = 1;
+ s->nextiter= 0;
+ } else {
+ for (i = s->nextiter; i < s->nextiter + s->nextlen; i++) {
+ int j = i % buffer_size;
+ float ppeak, pdelta;
+
+ ppeak = fabs(buffer[nextpos[j]]) > fabs(buffer[nextpos[j] + 1]) ?
+ fabs(buffer[nextpos[j]]) : fabs(buffer[nextpos[j] + 1]);
+ pdelta = (limit / peak - limit / ppeak) / (((buffer_size - nextpos[j] + s->pos) % buffer_size) / channels);
+ if (pdelta < nextdelta[j]) {
+ nextdelta[j] = pdelta;
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ s->nextlen = i - s->nextiter + 1;
+ nextpos[(s->nextiter + s->nextlen) % buffer_size] = s->pos;
+ nextdelta[(s->nextiter + s->nextlen) % buffer_size] = rdelta;
+ nextpos[(s->nextiter + s->nextlen + 1) % buffer_size] = -1;
+ s->nextlen++;
+ }
+ }
+ }
+
+ buf = &s->buffer[(s->pos + channels) % buffer_size];
+ peak = 0;
+ for (c = 0; c < channels; c++) {
+ float sample = buf[c];
+
+ peak = MAX(peak, fabs(sample));
+ }
+
+ if (s->pos == s->asc_pos && !s->asc_changed)
+ s->asc_pos = -1;
+
+ if (s->auto_release && s->asc_pos == -1 && peak > limit) {
+ s->asc -= peak;
+ s->asc_c--;
+ }
+
+ s->att += s->delta;
+
+ for (c = 0; c < channels; c++)
+ dst[c] = buf[c] * s->att;
+
+ if ((s->pos + channels) % buffer_size == nextpos[s->nextiter]) {
+ if (s->auto_release) {
+ s->delta = get_rdelta(s, release, m_nFs,
+ peak, limit, s->att, 1);
+ if (s->nextlen > 1) {
+ int pnextpos = nextpos[(s->nextiter + 1) % buffer_size];
+ float ppeak = fabs(buffer[pnextpos]) > fabs(buffer[pnextpos + 1]) ?
+ fabs(buffer[pnextpos]) :
+ fabs(buffer[pnextpos + 1]);
+ float pdelta = (limit / ppeak - s->att) /
+ (((buffer_size + pnextpos -
+ ((s->pos + channels) % buffer_size)) %
+ buffer_size) / channels);
+ if (pdelta < s->delta)
+ s->delta = pdelta;
+ }
+ } else {
+ s->delta = nextdelta[s->nextiter];
+ s->att = limit / peak;
+ }
+
+ s->nextlen -= 1;
+ nextpos[s->nextiter] = -1;
+ s->nextiter = (s->nextiter + 1) % buffer_size;
+ }
+
+ if (s->att > 1.) {
+ s->att = 1.;
+ s->delta = 0.;
+ s->nextiter = 0;
+ s->nextlen = 0;
+ nextpos[0] = -1;
+ }
+
+ if (s->att <= 0.) {
+ s->att = 0.000001f;
+ s->delta = (1 - s->att) / (m_nFs * release);
+ }
+
+ if (s->att != 1 && (1 - s->att) < 0.000001f)
+ s->att = 1;
+
+ if (s->delta != 0 && fabs(s->delta) < 0.000001f)
+ s->delta = 0;
+
+ for (c = 0; c < channels; c++)
+ dst[c] = MIDDLE(dst[c], -limit, limit) * level * level_out;
+
+ s->pos = (s->pos + channels) % buffer_size;
+ src += channels;
+ dst += channels;
+ }
+}
+
+void Alimiter::Uninit()
+{
+ SAFE_DELETE_PTR(m_alimiterCtx.buffer);
+ SAFE_DELETE_PTR(m_alimiterCtx.nextdelta);
+ SAFE_DELETE_PTR(m_alimiterCtx.nextpos);
+}
+
+int32_t Alimiter::config_input()
+{
+ int obuffer_size = int(m_nFs * m_nChannels * 100 / 1000. + m_nChannels);
+ if(obuffer_size < m_nChannels)
+ return ERROR_SUPERSOUND_PARAM;
+
+ if(obuffer_size > m_alimiterCtx.buffer_max_size)
+ {
+ SAFE_DELETE_PTR(m_alimiterCtx.buffer);
+ m_alimiterCtx.buffer = new(std::nothrow) float[obuffer_size];
+ if(m_alimiterCtx.buffer == NULL)
+ return ERROR_SUPERSOUND_MEMORY;
+ memset(m_alimiterCtx.buffer, 0, sizeof(float) * obuffer_size);
+
+ SAFE_DELETE_PTR(m_alimiterCtx.nextdelta);
+ m_alimiterCtx.nextdelta = new(std::nothrow) float[obuffer_size];
+ if(m_alimiterCtx.nextdelta == NULL)
+ return ERROR_SUPERSOUND_MEMORY;
+ memset(m_alimiterCtx.nextdelta, 0, sizeof(float) * obuffer_size);
+
+ SAFE_DELETE_PTR(m_alimiterCtx.nextpos);
+ m_alimiterCtx.nextpos = new(std::nothrow) int32_t[obuffer_size];
+ if(m_alimiterCtx.nextpos == NULL)
+ return ERROR_SUPERSOUND_MEMORY;
+ memset(m_alimiterCtx.nextpos, -1, obuffer_size*sizeof(int32_t));
+
+ m_alimiterCtx.buffer_max_size = obuffer_size;
+ }
+
+ m_alimiterCtx.buffer_size = int(m_nFs * m_alimiterCtx.attack * m_nChannels);
+ m_alimiterCtx.buffer_size -= m_alimiterCtx.buffer_size % m_nChannels;
+
+ return ERROR_SUPERSOUND_SUCCESS;
+}
+
+float Alimiter::get_rdelta( AudioLimiterContext *s, float release, int sample_rate, float peak, float limit, float patt, int asc )
+{
+ float rdelta = (1 - patt) / (sample_rate * release);
+
+ if (asc && s->auto_release && s->asc_c > 0) {
+ float a_att = limit / (s->asc_coeff * s->asc) * (float)s->asc_c;
+
+ if (a_att > patt) {
+ float delta = MAX((a_att - patt) / (sample_rate * release), rdelta / 10);
+
+ if (delta < rdelta)
+ rdelta = delta;
+ }
+ }
+
+ return rdelta;
+}
+
+
+}
\ No newline at end of file
diff --git a/tools/ref/ebur128/CMakeLists.txt b/tools/ref/ebur128/CMakeLists.txt
new file mode 100644
index 0000000..18a5a86
--- /dev/null
+++ b/tools/ref/ebur128/CMakeLists.txt
@@ -0,0 +1,3 @@
+include_directories(inc)
+AUX_SOURCE_DIRECTORY(src DIR_EBUR128_SRCS)
+add_library(ebur128 ${DIR_EBUR128_SRCS})
\ No newline at end of file
diff --git a/tools/ref/ebur128/inc/ebur128.h b/tools/ref/ebur128/inc/ebur128.h
new file mode 100644
index 0000000..faa66c6
--- /dev/null
+++ b/tools/ref/ebur128/inc/ebur128.h
@@ -0,0 +1,425 @@
+/* See COPYING file for copyright and license details. */
+
+#ifndef EBUR128_H_
+#define EBUR128_H_
+
+/** \file ebur128.h
+ * \brief libebur128 - a library for loudness measurement according to
+ * the EBU R128 standard.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define EBUR128_VERSION_MAJOR 1
+#define EBUR128_VERSION_MINOR 2
+#define EBUR128_VERSION_PATCH 4
+
+#include <stddef.h> /* for size_t */
+
+/** \enum channel
+ * Use these values when setting the channel map with ebur128_set_channel().
+ * See definitions in ITU R-REC-BS 1770-4
+ */
+enum channel {
+ EBUR128_UNUSED = 0, /**< unused channel (for example LFE channel) */
+ EBUR128_LEFT = 1,
+ EBUR128_Mp030 = 1, /**< itu M+030 */
+ EBUR128_RIGHT = 2,
+ EBUR128_Mm030 = 2, /**< itu M-030 */
+ EBUR128_CENTER = 3,
+ EBUR128_Mp000 = 3, /**< itu M+000 */
+ EBUR128_LEFT_SURROUND = 4,
+ EBUR128_Mp110 = 4, /**< itu M+110 */
+ EBUR128_RIGHT_SURROUND = 5,
+ EBUR128_Mm110 = 5, /**< itu M-110 */
+ EBUR128_DUAL_MONO, /**< a channel that is counted twice */
+ EBUR128_MpSC, /**< itu M+SC */
+ EBUR128_MmSC, /**< itu M-SC */
+ EBUR128_Mp060, /**< itu M+060 */
+ EBUR128_Mm060, /**< itu M-060 */
+ EBUR128_Mp090, /**< itu M+090 */
+ EBUR128_Mm090, /**< itu M-090 */
+ EBUR128_Mp135, /**< itu M+135 */
+ EBUR128_Mm135, /**< itu M-135 */
+ EBUR128_Mp180, /**< itu M+180 */
+ EBUR128_Up000, /**< itu U+000 */
+ EBUR128_Up030, /**< itu U+030 */
+ EBUR128_Um030, /**< itu U-030 */
+ EBUR128_Up045, /**< itu U+045 */
+ EBUR128_Um045, /**< itu U-030 */
+ EBUR128_Up090, /**< itu U+090 */
+ EBUR128_Um090, /**< itu U-090 */
+ EBUR128_Up110, /**< itu U+110 */
+ EBUR128_Um110, /**< itu U-110 */
+ EBUR128_Up135, /**< itu U+135 */
+ EBUR128_Um135, /**< itu U-135 */
+ EBUR128_Up180, /**< itu U+180 */
+ EBUR128_Tp000, /**< itu T+000 */
+ EBUR128_Bp000, /**< itu B+000 */
+ EBUR128_Bp045, /**< itu B+045 */
+ EBUR128_Bm045 /**< itu B-045 */
+};
+
+/** \enum error
+ * Error return values.
+ */
+enum error {
+ EBUR128_SUCCESS = 0,
+ EBUR128_ERROR_NOMEM,
+ EBUR128_ERROR_INVALID_MODE,
+ EBUR128_ERROR_INVALID_CHANNEL_INDEX,
+ EBUR128_ERROR_NO_CHANGE
+};
+
+/** \enum mode
+ * Use these values in ebur128_init (or'ed). Try to use the lowest possible
+ * modes that suit your needs, as performance will be better.
+ */
+enum mode {
+ /** can call ebur128_loudness_momentary */
+ EBUR128_MODE_M = (1 << 0),
+ /** can call ebur128_loudness_shortterm */
+ EBUR128_MODE_S = (1 << 1) | EBUR128_MODE_M,
+ /** can call ebur128_loudness_global_* and ebur128_relative_threshold */
+ EBUR128_MODE_I = (1 << 2) | EBUR128_MODE_M,
+ /** can call ebur128_loudness_range */
+ EBUR128_MODE_LRA = (1 << 3) | EBUR128_MODE_S,
+ /** can call ebur128_sample_peak */
+ EBUR128_MODE_SAMPLE_PEAK = (1 << 4) | EBUR128_MODE_M,
+ /** can call ebur128_true_peak */
+ EBUR128_MODE_TRUE_PEAK = (1 << 5) | EBUR128_MODE_M
+ | EBUR128_MODE_SAMPLE_PEAK,
+ /** uses histogram algorithm to calculate loudness */
+ EBUR128_MODE_HISTOGRAM = (1 << 6)
+};
+
+/** forward declaration of ebur128_state_internal */
+struct ebur128_state_internal;
+
+/** \brief Contains information about the state of a loudness measurement.
+ *
+ * You should not need to modify this struct directly.
+ */
+typedef struct {
+ int mode; /**< The current mode. */
+ unsigned int channels; /**< The number of channels. */
+ unsigned long samplerate; /**< The sample rate. */
+ struct ebur128_state_internal* d; /**< Internal state. */
+} ebur128_state;
+
+/** \brief Get library version number. Do not pass null pointers here.
+ *
+ * @param major major version number of library
+ * @param minor minor version number of library
+ * @param patch patch version number of library
+ */
+void ebur128_get_version(int* major, int* minor, int* patch);
+
+/** \brief Initialize library state.
+ *
+ * @param channels the number of channels.
+ * @param samplerate the sample rate.
+ * @param mode see the mode enum for possible values.
+ * @return an initialized library state, or NULL on error.
+ */
+ebur128_state* ebur128_init(unsigned int channels,
+ unsigned long samplerate,
+ int mode);
+
+/** \brief Destroy library state.
+ *
+ * @param st pointer to a library state.
+ */
+void ebur128_destroy(ebur128_state** st);
+
+/** \brief Set channel type.
+ *
+ * The default is:
+ * - 0 -> EBUR128_LEFT
+ * - 1 -> EBUR128_RIGHT
+ * - 2 -> EBUR128_CENTER
+ * - 3 -> EBUR128_UNUSED
+ * - 4 -> EBUR128_LEFT_SURROUND
+ * - 5 -> EBUR128_RIGHT_SURROUND
+ *
+ * @param st library state.
+ * @param channel_number zero based channel index.
+ * @param value channel type from the "channel" enum.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_set_channel(ebur128_state* st,
+ unsigned int channel_number,
+ int value);
+
+/** \brief Change library parameters.
+ *
+ * Note that the channel map will be reset when setting a different number of
+ * channels. The current unfinished block will be lost.
+ *
+ * @param st library state.
+ * @param channels new number of channels.
+ * @param samplerate new sample rate.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be
+ * invalid and must be destroyed.
+ * - EBUR128_ERROR_NO_CHANGE if channels and sample rate were not changed.
+ */
+int ebur128_change_parameters(ebur128_state* st,
+ unsigned int channels,
+ unsigned long samplerate);
+
+/** \brief Set the maximum window duration.
+ *
+ * Set the maximum duration that will be used for ebur128_window_loudness().
+ * Note that this destroys the current content of the audio buffer.
+ *
+ * @param st library state.
+ * @param window duration of the window in ms.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM on memory allocation error. The state will be
+ * invalid and must be destroyed.
+ * - EBUR128_ERROR_NO_CHANGE if window duration not changed.
+ */
+int ebur128_set_max_window(ebur128_state* st, unsigned long window);
+
+/** \brief Set the maximum history.
+ *
+ * Set the maximum history that will be stored for loudness integration.
+ * More history provides more accurate results, but requires more resources.
+ *
+ * Applies to ebur128_loudness_range() and ebur128_loudness_global() when
+ * EBUR128_MODE_HISTOGRAM is not set.
+ *
+ * Default is ULONG_MAX (at least ~50 days).
+ * Minimum is 3000ms for EBUR128_MODE_LRA and 400ms for EBUR128_MODE_M.
+ *
+ * @param st library state.
+ * @param history duration of history in ms.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NO_CHANGE if history not changed.
+ */
+int ebur128_set_max_history(ebur128_state* st, unsigned long history);
+
+/** \brief Add frames to be processed.
+ *
+ * @param st library state.
+ * @param src array of source frames. Channels must be interleaved.
+ * @param frames number of frames. Not number of samples!
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM on memory allocation error.
+ */
+int ebur128_add_frames_short(ebur128_state* st,
+ const short* src,
+ size_t frames);
+/** \brief See \ref ebur128_add_frames_short */
+int ebur128_add_frames_int(ebur128_state* st,
+ const int* src,
+ size_t frames);
+/** \brief See \ref ebur128_add_frames_short */
+int ebur128_add_frames_float(ebur128_state* st,
+ const float* src,
+ size_t frames);
+/** \brief See \ref ebur128_add_frames_short */
+int ebur128_add_frames_double(ebur128_state* st,
+ const double* src,
+ size_t frames);
+
+/** \brief Get global integrated loudness in LUFS.
+ *
+ * @param st library state.
+ * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative
+ * infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set.
+ */
+int ebur128_loudness_global(ebur128_state* st, double* out);
+/** \brief Get global integrated loudness in LUFS across multiple instances.
+ *
+ * @param sts array of library states.
+ * @param size length of sts
+ * @param out integrated loudness in LUFS. -HUGE_VAL if result is negative
+ * infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not been set.
+ */
+int ebur128_loudness_global_multiple(ebur128_state** sts,
+ size_t size,
+ double* out);
+
+/** \brief Get momentary loudness (last 400ms) in LUFS.
+ *
+ * @param st library state.
+ * @param out momentary loudness in LUFS. -HUGE_VAL if result is negative
+ * infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ */
+int ebur128_loudness_momentary(ebur128_state* st, double* out);
+/** \brief Get short-term loudness (last 3s) in LUFS.
+ *
+ * @param st library state.
+ * @param out short-term loudness in LUFS. -HUGE_VAL if result is negative
+ * infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_S" has not been set.
+ */
+int ebur128_loudness_shortterm(ebur128_state* st, double* out);
+
+/** \brief Get loudness of the specified window in LUFS.
+ *
+ * window must not be larger than the current window set in st.
+ * The current window can be changed by calling ebur128_set_max_window().
+ *
+ * @param st library state.
+ * @param window window in ms to calculate loudness.
+ * @param out loudness in LUFS. -HUGE_VAL if result is negative infinity.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if window larger than current window in st.
+ */
+int ebur128_loudness_window(ebur128_state* st,
+ unsigned long window,
+ double* out);
+
+/** \brief Get loudness range (LRA) of programme in LU.
+ *
+ * Calculates loudness range according to EBU 3342.
+ *
+ * @param st library state.
+ * @param out loudness range (LRA) in LU. Will not be changed in case of
+ * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be
+ * returned in this case.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM in case of memory allocation error.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set.
+ */
+int ebur128_loudness_range(ebur128_state* st, double* out);
+/** \brief Get loudness range (LRA) in LU across multiple instances.
+ *
+ * Calculates loudness range according to EBU 3342.
+ *
+ * @param sts array of library states.
+ * @param size length of sts
+ * @param out loudness range (LRA) in LU. Will not be changed in case of
+ * error. EBUR128_ERROR_NOMEM or EBUR128_ERROR_INVALID_MODE will be
+ * returned in this case.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_NOMEM in case of memory allocation error.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_LRA" has not been set.
+ */
+int ebur128_loudness_range_multiple(ebur128_state** sts,
+ size_t size,
+ double* out);
+
+/** \brief Get maximum sample peak from all frames that have been processed.
+ *
+ * The equation to convert to dBFS is: 20 * log10(out)
+ *
+ * @param st library state
+ * @param channel_number channel to analyse
+ * @param out maximum sample peak in float format (1.0 is 0 dBFS)
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not
+ * been set.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_sample_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out);
+
+/** \brief Get maximum sample peak from the last call to add_frames().
+ *
+ * The equation to convert to dBFS is: 20 * log10(out)
+ *
+ * @param st library state
+ * @param channel_number channel to analyse
+ * @param out maximum sample peak in float format (1.0 is 0 dBFS)
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_SAMPLE_PEAK" has not
+ * been set.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_prev_sample_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out);
+
+/** \brief Get maximum true peak from all frames that have been processed.
+ *
+ * Uses an implementation defined algorithm to calculate the true peak. Do not
+ * try to compare resulting values across different versions of the library,
+ * as the algorithm may change.
+ *
+ * The current implementation uses a custom polyphase FIR interpolator to
+ * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for
+ * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz.
+ *
+ * The equation to convert to dBTP is: 20 * log10(out)
+ *
+ * @param st library state
+ * @param channel_number channel to analyse
+ * @param out maximum true peak in float format (1.0 is 0 dBTP)
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not
+ * been set.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_true_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out);
+
+/** \brief Get maximum true peak from the last call to add_frames().
+ *
+ * Uses an implementation defined algorithm to calculate the true peak. Do not
+ * try to compare resulting values across different versions of the library,
+ * as the algorithm may change.
+ *
+ * The current implementation uses a custom polyphase FIR interpolator to
+ * calculate true peak. Will oversample 4x for sample rates < 96000 Hz, 2x for
+ * sample rates < 192000 Hz and leave the signal unchanged for 192000 Hz.
+ *
+ * The equation to convert to dBTP is: 20 * log10(out)
+ *
+ * @param st library state
+ * @param channel_number channel to analyse
+ * @param out maximum true peak in float format (1.0 is 0 dBTP)
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_TRUE_PEAK" has not
+ * been set.
+ * - EBUR128_ERROR_INVALID_CHANNEL_INDEX if invalid channel index.
+ */
+int ebur128_prev_true_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out);
+
+/** \brief Get relative threshold in LUFS.
+ *
+ * @param st library state
+ * @param out relative threshold in LUFS.
+ * @return
+ * - EBUR128_SUCCESS on success.
+ * - EBUR128_ERROR_INVALID_MODE if mode "EBUR128_MODE_I" has not
+ * been set.
+ */
+int ebur128_relative_threshold(ebur128_state* st, double* out);
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* EBUR128_H_ */
diff --git a/tools/ref/ebur128/src/ebur128.c b/tools/ref/ebur128/src/ebur128.c
new file mode 100644
index 0000000..6c10f1e
--- /dev/null
+++ b/tools/ref/ebur128/src/ebur128.c
@@ -0,0 +1,1333 @@
+/* See COPYING file for copyright and license details. */
+
+#include "ebur128.h"
+
+#include <float.h>
+#include <limits.h>
+#include <math.h> /* You may have to define _USE_MATH_DEFINES if you use MSVC */
+#include <stdio.h>
+#include <stdlib.h>
+
+/* This can be replaced by any BSD-like queue implementation. */
+#include <sys/queue.h>
+
+#define CHECK_ERROR(condition, errorcode, goto_point) \
+ if ((condition)) { \
+ errcode = (errorcode); \
+ goto goto_point; \
+ }
+
+STAILQ_HEAD(ebur128_double_queue, ebur128_dq_entry);
+struct ebur128_dq_entry {
+ double z;
+ STAILQ_ENTRY(ebur128_dq_entry) entries;
+};
+
+#define ALMOST_ZERO 0.000001
+
+typedef struct { /* Data structure for polyphase FIR interpolator */
+ unsigned int factor; /* Interpolation factor of the interpolator */
+ unsigned int taps; /* Taps (prefer odd to increase zero coeffs) */
+ unsigned int channels; /* Number of channels */
+ unsigned int delay; /* Size of delay buffer */
+ struct {
+ unsigned int count; /* Number of coefficients in this subfilter */
+ unsigned int* index; /* Delay index of corresponding filter coeff */
+ double* coeff; /* List of subfilter coefficients */
+ }* filter; /* List of subfilters (one for each factor) */
+ float** z; /* List of delay buffers (one for each channel) */
+ unsigned int zi; /* Current delay buffer index */
+} interpolator;
+
+struct ebur128_state_internal {
+ /** Filtered audio data (used as ring buffer). */
+ double* audio_data;
+ /** Size of audio_data array. */
+ size_t audio_data_frames;
+ /** Current index for audio_data. */
+ size_t audio_data_index;
+ /** How many frames are needed for a gating block. Will correspond to 400ms
+ * of audio at initialization, and 100ms after the first block (75% overlap
+ * as specified in the 2011 revision of BS1770). */
+ unsigned long needed_frames;
+ /** The channel map. Has as many elements as there are channels. */
+ int* channel_map;
+ /** How many samples fit in 100ms (rounded). */
+ unsigned long samples_in_100ms;
+ /** BS.1770 filter coefficients (nominator). */
+ double b[5];
+ /** BS.1770 filter coefficients (denominator). */
+ double a[5];
+ /** BS.1770 filter state. */
+ double v[5][5];
+ /** Linked list of block energies. */
+ struct ebur128_double_queue block_list;
+ unsigned long block_list_max;
+ unsigned long block_list_size;
+ /** Linked list of 3s-block energies, used to calculate LRA. */
+ struct ebur128_double_queue short_term_block_list;
+ unsigned long st_block_list_max;
+ unsigned long st_block_list_size;
+ int use_histogram;
+ unsigned long *block_energy_histogram;
+ unsigned long *short_term_block_energy_histogram;
+ /** Keeps track of when a new short term block is needed. */
+ size_t short_term_frame_counter;
+ /** Maximum sample peak, one per channel */
+ double* sample_peak;
+ double* prev_sample_peak;
+ /** Maximum true peak, one per channel */
+ double* true_peak;
+ double* prev_true_peak;
+ interpolator* interp;
+ float* resampler_buffer_input;
+ size_t resampler_buffer_input_frames;
+ float* resampler_buffer_output;
+ size_t resampler_buffer_output_frames;
+ /** The maximum window duration in ms. */
+ unsigned long window;
+ unsigned long history;
+};
+
+static double relative_gate = -10.0;
+
+/* Those will be calculated when initializing the library */
+static double relative_gate_factor;
+static double minus_twenty_decibels;
+static double histogram_energies[1000];
+static double histogram_energy_boundaries[1001];
+
+static interpolator* interp_create(unsigned int taps, unsigned int factor, unsigned int channels) {
+ interpolator* interp = calloc(1, sizeof(interpolator));
+ unsigned int j = 0;
+
+ interp->taps = taps;
+ interp->factor = factor;
+ interp->channels = channels;
+ interp->delay = (interp->taps + interp->factor - 1) / interp->factor;
+
+ /* Initialize the filter memory
+ * One subfilter per interpolation factor. */
+ interp->filter = calloc(interp->factor, sizeof(*interp->filter));
+ for (j = 0; j < interp->factor; j++) {
+ interp->filter[j].index = calloc(interp->delay, sizeof(unsigned int));
+ interp->filter[j].coeff = calloc(interp->delay, sizeof(double));
+ }
+ /* One delay buffer per channel. */
+ interp->z = calloc(interp->channels, sizeof(float*));
+ for (j = 0; j < interp->channels; j++) {
+ interp->z[j] = calloc( interp->delay, sizeof(float) );
+ }
+
+ /* Calculate the filter coefficients */
+ for (j = 0; j < interp->taps; j++) {
+ /* Calculate sinc */
+ double m = (double)j - (double)(interp->taps - 1) / 2.0;
+ double c = 1.0;
+ if (fabs(m) > ALMOST_ZERO) {
+ c = sin(m * M_PI / interp->factor) / (m * M_PI / interp->factor);
+ }
+ /* Apply Hanning window */
+ c *= 0.5 * (1 - cos(2 * M_PI * j / (interp->taps - 1)));
+
+ if (fabs(c) > ALMOST_ZERO) { /* Ignore any zero coeffs. */
+ /* Put the coefficient into the correct subfilter */
+ unsigned int f = j % interp->factor;
+ unsigned int t = interp->filter[f].count++;
+ interp->filter[f].coeff[t] = c;
+ interp->filter[f].index[t] = j / interp->factor;
+ }
+ }
+ return interp;
+}
+
+static void interp_destroy(interpolator* interp) {
+ unsigned int j = 0;
+ if (!interp) {
+ return;
+ }
+ for (j = 0; j < interp->factor; j++) {
+ free(interp->filter[j].index);
+ free(interp->filter[j].coeff);
+ }
+ free(interp->filter);
+ for (j = 0; j < interp->channels; j++) {
+ free(interp->z[j]);
+ }
+ free(interp->z);
+ free(interp);
+}
+
+static size_t interp_process(interpolator* interp, size_t frames, float* in, float* out) {
+ size_t frame = 0;
+ unsigned int chan = 0;
+ unsigned int f = 0;
+ unsigned int t = 0;
+ unsigned int out_stride = interp->channels * interp->factor;
+ float* outp = 0;
+ double acc = 0;
+ double c = 0;
+
+ for (frame = 0; frame < frames; frame++) {
+ for (chan = 0; chan < interp->channels; chan++) {
+ /* Add sample to delay buffer */
+ interp->z[chan][interp->zi] = *in++;
+ /* Apply coefficients */
+ outp = out + chan;
+ for (f = 0; f < interp->factor; f++) {
+ acc = 0.0;
+ for (t = 0; t < interp->filter[f].count; t++) {
+ int i = (int)interp->zi - (int)interp->filter[f].index[t];
+ if (i < 0) {
+ i += interp->delay;
+ }
+ c = interp->filter[f].coeff[t];
+ acc += interp->z[chan][i] * c;
+ }
+ *outp = (float)acc;
+ outp += interp->channels;
+ }
+ }
+ out += out_stride;
+ interp->zi++;
+ if (interp->zi == interp->delay) {
+ interp->zi = 0;
+ }
+ }
+
+ return frames * interp->factor;
+}
+
+static void ebur128_init_filter(ebur128_state* st) {
+ int i, j;
+
+ double f0 = 1681.974450955533;
+ double G = 3.999843853973347;
+ double Q = 0.7071752369554196;
+
+ double K = tan(M_PI * f0 / (double) st->samplerate);
+ double Vh = pow(10.0, G / 20.0);
+ double Vb = pow(Vh, 0.4996667741545416);
+
+ double pb[3] = {0.0, 0.0, 0.0};
+ double pa[3] = {1.0, 0.0, 0.0};
+ double rb[3] = {1.0, -2.0, 1.0};
+ double ra[3] = {1.0, 0.0, 0.0};
+
+ double a0 = 1.0 + K / Q + K * K ;
+ pb[0] = (Vh + Vb * K / Q + K * K) / a0;
+ pb[1] = 2.0 * (K * K - Vh) / a0;
+ pb[2] = (Vh - Vb * K / Q + K * K) / a0;
+ pa[1] = 2.0 * (K * K - 1.0) / a0;
+ pa[2] = (1.0 - K / Q + K * K) / a0;
+
+ /* fprintf(stderr, "%.14f %.14f %.14f %.14f %.14f\n",
+ b1[0], b1[1], b1[2], a1[1], a1[2]); */
+
+ f0 = 38.13547087602444;
+ Q = 0.5003270373238773;
+ K = tan(M_PI * f0 / (double) st->samplerate);
+
+ ra[1] = 2.0 * (K * K - 1.0) / (1.0 + K / Q + K * K);
+ ra[2] = (1.0 - K / Q + K * K) / (1.0 + K / Q + K * K);
+
+ /* fprintf(stderr, "%.14f %.14f\n", a2[1], a2[2]); */
+
+ st->d->b[0] = pb[0] * rb[0];
+ st->d->b[1] = pb[0] * rb[1] + pb[1] * rb[0];
+ st->d->b[2] = pb[0] * rb[2] + pb[1] * rb[1] + pb[2] * rb[0];
+ st->d->b[3] = pb[1] * rb[2] + pb[2] * rb[1];
+ st->d->b[4] = pb[2] * rb[2];
+
+ st->d->a[0] = pa[0] * ra[0];
+ st->d->a[1] = pa[0] * ra[1] + pa[1] * ra[0];
+ st->d->a[2] = pa[0] * ra[2] + pa[1] * ra[1] + pa[2] * ra[0];
+ st->d->a[3] = pa[1] * ra[2] + pa[2] * ra[1];
+ st->d->a[4] = pa[2] * ra[2];
+
+ for (i = 0; i < 5; ++i) {
+ for (j = 0; j < 5; ++j) {
+ st->d->v[i][j] = 0.0;
+ }
+ }
+}
+
+static int ebur128_init_channel_map(ebur128_state* st) {
+ size_t i;
+ st->d->channel_map = (int*) malloc(st->channels * sizeof(int));
+ if (!st->d->channel_map) {
+ return EBUR128_ERROR_NOMEM;
+ }
+ if (st->channels == 4) {
+ st->d->channel_map[0] = EBUR128_LEFT;
+ st->d->channel_map[1] = EBUR128_RIGHT;
+ st->d->channel_map[2] = EBUR128_LEFT_SURROUND;
+ st->d->channel_map[3] = EBUR128_RIGHT_SURROUND;
+ } else if (st->channels == 5) {
+ st->d->channel_map[0] = EBUR128_LEFT;
+ st->d->channel_map[1] = EBUR128_RIGHT;
+ st->d->channel_map[2] = EBUR128_CENTER;
+ st->d->channel_map[3] = EBUR128_LEFT_SURROUND;
+ st->d->channel_map[4] = EBUR128_RIGHT_SURROUND;
+ } else {
+ for (i = 0; i < st->channels; ++i) {
+ switch (i) {
+ case 0: st->d->channel_map[i] = EBUR128_LEFT; break;
+ case 1: st->d->channel_map[i] = EBUR128_RIGHT; break;
+ case 2: st->d->channel_map[i] = EBUR128_CENTER; break;
+ case 3: st->d->channel_map[i] = EBUR128_UNUSED; break;
+ case 4: st->d->channel_map[i] = EBUR128_LEFT_SURROUND; break;
+ case 5: st->d->channel_map[i] = EBUR128_RIGHT_SURROUND; break;
+ default: st->d->channel_map[i] = EBUR128_UNUSED; break;
+ }
+ }
+ }
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_init_resampler(ebur128_state* st) {
+ int errcode = EBUR128_SUCCESS;
+
+ if (st->samplerate < 96000) {
+ st->d->interp = interp_create(49, 4, st->channels);
+ CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit)
+ } else if (st->samplerate < 192000) {
+ st->d->interp = interp_create(49, 2, st->channels);
+ CHECK_ERROR(!st->d->interp, EBUR128_ERROR_NOMEM, exit)
+ } else {
+ st->d->resampler_buffer_input = NULL;
+ st->d->resampler_buffer_output = NULL;
+ st->d->interp = NULL;
+ goto exit;
+ }
+
+ st->d->resampler_buffer_input_frames = st->d->samples_in_100ms * 4;
+ st->d->resampler_buffer_input = malloc(st->d->resampler_buffer_input_frames *
+ st->channels *
+ sizeof(float));
+ CHECK_ERROR(!st->d->resampler_buffer_input, EBUR128_ERROR_NOMEM, free_interp)
+
+ st->d->resampler_buffer_output_frames =
+ st->d->resampler_buffer_input_frames *
+ st->d->interp->factor;
+ st->d->resampler_buffer_output = malloc
+ (st->d->resampler_buffer_output_frames *
+ st->channels *
+ sizeof(float));
+ CHECK_ERROR(!st->d->resampler_buffer_output, EBUR128_ERROR_NOMEM, free_input)
+
+ return errcode;
+
+free_interp:
+ interp_destroy(st->d->interp);
+ st->d->interp = NULL;
+free_input:
+ free(st->d->resampler_buffer_input);
+ st->d->resampler_buffer_input = NULL;
+exit:
+ return errcode;
+}
+
+static void ebur128_destroy_resampler(ebur128_state* st) {
+ free(st->d->resampler_buffer_input);
+ st->d->resampler_buffer_input = NULL;
+ free(st->d->resampler_buffer_output);
+ st->d->resampler_buffer_output = NULL;
+ interp_destroy(st->d->interp);
+ st->d->interp = NULL;
+}
+
+void ebur128_get_version(int* major, int* minor, int* patch) {
+ *major = EBUR128_VERSION_MAJOR;
+ *minor = EBUR128_VERSION_MINOR;
+ *patch = EBUR128_VERSION_PATCH;
+}
+
+ebur128_state* ebur128_init(unsigned int channels,
+ unsigned long samplerate,
+ int mode) {
+ int result;
+ int errcode;
+ ebur128_state* st;
+ unsigned int i;
+ size_t j;
+
+ if (channels == 0 || samplerate < 5) {
+ return NULL;
+ }
+
+ st = (ebur128_state*) malloc(sizeof(ebur128_state));
+ CHECK_ERROR(!st, 0, exit)
+ st->d = (struct ebur128_state_internal*)
+ malloc(sizeof(struct ebur128_state_internal));
+ CHECK_ERROR(!st->d, 0, free_state)
+ st->channels = channels;
+ errcode = ebur128_init_channel_map(st);
+ CHECK_ERROR(errcode, 0, free_internal)
+
+ st->d->sample_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->sample_peak, 0, free_channel_map)
+ st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->prev_sample_peak, 0, free_sample_peak)
+ st->d->true_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->true_peak, 0, free_prev_sample_peak)
+ st->d->prev_true_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->prev_true_peak, 0, free_true_peak)
+ for (i = 0; i < channels; ++i) {
+ st->d->sample_peak[i] = 0.0;
+ st->d->prev_sample_peak[i] = 0.0;
+ st->d->true_peak[i] = 0.0;
+ st->d->prev_true_peak[i] = 0.0;
+ }
+
+ st->d->use_histogram = mode & EBUR128_MODE_HISTOGRAM ? 1 : 0;
+ st->d->history = ULONG_MAX;
+ st->samplerate = samplerate;
+ st->d->samples_in_100ms = (st->samplerate + 5) / 10;
+ st->mode = mode;
+ if ((mode & EBUR128_MODE_S) == EBUR128_MODE_S) {
+ st->d->window = 3000;
+ } else if ((mode & EBUR128_MODE_M) == EBUR128_MODE_M) {
+ st->d->window = 400;
+ } else {
+ goto free_prev_true_peak;
+ }
+ st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
+ if (st->d->audio_data_frames % st->d->samples_in_100ms) {
+ /* round up to multiple of samples_in_100ms */
+ st->d->audio_data_frames = st->d->audio_data_frames
+ + st->d->samples_in_100ms
+ - (st->d->audio_data_frames % st->d->samples_in_100ms);
+ }
+ st->d->audio_data = (double*) malloc(st->d->audio_data_frames *
+ st->channels *
+ sizeof(double));
+ CHECK_ERROR(!st->d->audio_data, 0, free_true_peak)
+ for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) {
+ st->d->audio_data[j] = 0.0;
+ }
+
+ ebur128_init_filter(st);
+
+ if (st->d->use_histogram) {
+ st->d->block_energy_histogram = malloc(1000 * sizeof(unsigned long));
+ CHECK_ERROR(!st->d->block_energy_histogram, 0, free_audio_data)
+ for (i = 0; i < 1000; ++i) {
+ st->d->block_energy_histogram[i] = 0;
+ }
+ } else {
+ st->d->block_energy_histogram = NULL;
+ }
+ if (st->d->use_histogram) {
+ st->d->short_term_block_energy_histogram = malloc(1000 * sizeof(unsigned long));
+ CHECK_ERROR(!st->d->short_term_block_energy_histogram, 0, free_block_energy_histogram)
+ for (i = 0; i < 1000; ++i) {
+ st->d->short_term_block_energy_histogram[i] = 0;
+ }
+ } else {
+ st->d->short_term_block_energy_histogram = NULL;
+ }
+ STAILQ_INIT(&st->d->block_list);
+ st->d->block_list_size = 0;
+ st->d->block_list_max = st->d->history / 100;
+ STAILQ_INIT(&st->d->short_term_block_list);
+ st->d->st_block_list_size = 0;
+ st->d->st_block_list_max = st->d->history / 3000;
+ st->d->short_term_frame_counter = 0;
+
+ result = ebur128_init_resampler(st);
+ CHECK_ERROR(result, 0, free_short_term_block_energy_histogram)
+
+ /* the first block needs 400ms of audio data */
+ st->d->needed_frames = st->d->samples_in_100ms * 4;
+ /* start at the beginning of the buffer */
+ st->d->audio_data_index = 0;
+
+ /* initialize static constants */
+ relative_gate_factor = pow(10.0, relative_gate / 10.0);
+ minus_twenty_decibels = pow(10.0, -20.0 / 10.0);
+ histogram_energy_boundaries[0] = pow(10.0, (-70.0 + 0.691) / 10.0);
+ if (st->d->use_histogram) {
+ for (i = 0; i < 1000; ++i) {
+ histogram_energies[i] = pow(10.0, ((double) i / 10.0 - 69.95 + 0.691) / 10.0);
+ }
+ for (i = 1; i < 1001; ++i) {
+ histogram_energy_boundaries[i] = pow(10.0, ((double) i / 10.0 - 70.0 + 0.691) / 10.0);
+ }
+ }
+
+ return st;
+
+free_short_term_block_energy_histogram:
+ free(st->d->short_term_block_energy_histogram);
+free_block_energy_histogram:
+ free(st->d->block_energy_histogram);
+free_audio_data:
+ free(st->d->audio_data);
+free_prev_true_peak:
+ free(st->d->prev_true_peak);
+free_true_peak:
+ free(st->d->true_peak);
+free_prev_sample_peak:
+ free(st->d->prev_sample_peak);
+free_sample_peak:
+ free(st->d->sample_peak);
+free_channel_map:
+ free(st->d->channel_map);
+free_internal:
+ free(st->d);
+free_state:
+ free(st);
+exit:
+ return NULL;
+}
+
+void ebur128_destroy(ebur128_state** st) {
+ struct ebur128_dq_entry* entry;
+ free((*st)->d->block_energy_histogram);
+ free((*st)->d->short_term_block_energy_histogram);
+ free((*st)->d->audio_data);
+ free((*st)->d->channel_map);
+ free((*st)->d->sample_peak);
+ free((*st)->d->prev_sample_peak);
+ free((*st)->d->true_peak);
+ free((*st)->d->prev_true_peak);
+ while (!STAILQ_EMPTY(&(*st)->d->block_list)) {
+ entry = STAILQ_FIRST(&(*st)->d->block_list);
+ STAILQ_REMOVE_HEAD(&(*st)->d->block_list, entries);
+ free(entry);
+ }
+ while (!STAILQ_EMPTY(&(*st)->d->short_term_block_list)) {
+ entry = STAILQ_FIRST(&(*st)->d->short_term_block_list);
+ STAILQ_REMOVE_HEAD(&(*st)->d->short_term_block_list, entries);
+ free(entry);
+ }
+ ebur128_destroy_resampler(*st);
+ free((*st)->d);
+ free(*st);
+ *st = NULL;
+}
+
+static void ebur128_check_true_peak(ebur128_state* st, size_t frames) {
+ size_t c, i, frames_out;
+
+ frames_out = interp_process(st->d->interp, frames,
+ st->d->resampler_buffer_input,
+ st->d->resampler_buffer_output);
+
+ for (i = 0; i < frames_out; ++i) {
+ for (c = 0; c < st->channels; ++c) {
+ float val = st->d->resampler_buffer_output[i * st->channels + c];
+
+ if (val > st->d->prev_true_peak[c]) {
+ st->d->prev_true_peak[c] = val;
+ } else if (-val > st->d->prev_true_peak[c]) {
+ st->d->prev_true_peak[c] = -val;
+ }
+ }
+ }
+}
+
+#ifdef __SSE2_MATH__
+#include <xmmintrin.h>
+#define TURN_ON_FTZ \
+ unsigned int mxcsr = _mm_getcsr(); \
+ _mm_setcsr(mxcsr | _MM_FLUSH_ZERO_ON);
+#define TURN_OFF_FTZ _mm_setcsr(mxcsr);
+#define FLUSH_MANUALLY
+#else
+#warning "manual FTZ is being used, please enable SSE2 (-msse2 -mfpmath=sse)"
+#define TURN_ON_FTZ
+#define TURN_OFF_FTZ
+#define FLUSH_MANUALLY \
+ st->d->v[ci][4] = fabs(st->d->v[ci][4]) < DBL_MIN ? 0.0 : st->d->v[ci][4]; \
+ st->d->v[ci][3] = fabs(st->d->v[ci][3]) < DBL_MIN ? 0.0 : st->d->v[ci][3]; \
+ st->d->v[ci][2] = fabs(st->d->v[ci][2]) < DBL_MIN ? 0.0 : st->d->v[ci][2]; \
+ st->d->v[ci][1] = fabs(st->d->v[ci][1]) < DBL_MIN ? 0.0 : st->d->v[ci][1];
+#endif
+
+#define EBUR128_FILTER(type, min_scale, max_scale) \
+static void ebur128_filter_##type(ebur128_state* st, const type* src, \
+ size_t frames) { \
+ static double scaling_factor = \
+ -((double) (min_scale)) > (double) (max_scale) ? \
+ -((double) (min_scale)) : (double) (max_scale); \
+ double* audio_data = st->d->audio_data + st->d->audio_data_index; \
+ size_t i, c; \
+ \
+ TURN_ON_FTZ \
+ \
+ if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) == EBUR128_MODE_SAMPLE_PEAK) { \
+ for (c = 0; c < st->channels; ++c) { \
+ double max = 0.0; \
+ for (i = 0; i < frames; ++i) { \
+ if (src[i * st->channels + c] > max) { \
+ max = src[i * st->channels + c]; \
+ } else if (-src[i * st->channels + c] > max) { \
+ max = -1.0 * src[i * st->channels + c]; \
+ } \
+ } \
+ max /= scaling_factor; \
+ if (max > st->d->prev_sample_peak[c]) st->d->prev_sample_peak[c] = max; \
+ } \
+ } \
+ if ((st->mode & EBUR128_MODE_TRUE_PEAK) == EBUR128_MODE_TRUE_PEAK && \
+ st->d->interp) { \
+ for (c = 0; c < st->channels; ++c) { \
+ for (i = 0; i < frames; ++i) { \
+ st->d->resampler_buffer_input[i * st->channels + c] = \
+ (float) (src[i * st->channels + c] / scaling_factor); \
+ } \
+ } \
+ ebur128_check_true_peak(st, frames); \
+ } \
+ for (c = 0; c < st->channels; ++c) { \
+ int ci = st->d->channel_map[c] - 1; \
+ if (ci < 0) continue; \
+ else if (ci == EBUR128_DUAL_MONO - 1) ci = 0; /*dual mono */ \
+ for (i = 0; i < frames; ++i) { \
+ st->d->v[ci][0] = (double) (src[i * st->channels + c] / scaling_factor) \
+ - st->d->a[1] * st->d->v[ci][1] \
+ - st->d->a[2] * st->d->v[ci][2] \
+ - st->d->a[3] * st->d->v[ci][3] \
+ - st->d->a[4] * st->d->v[ci][4]; \
+ audio_data[i * st->channels + c] = \
+ st->d->b[0] * st->d->v[ci][0] \
+ + st->d->b[1] * st->d->v[ci][1] \
+ + st->d->b[2] * st->d->v[ci][2] \
+ + st->d->b[3] * st->d->v[ci][3] \
+ + st->d->b[4] * st->d->v[ci][4]; \
+ st->d->v[ci][4] = st->d->v[ci][3]; \
+ st->d->v[ci][3] = st->d->v[ci][2]; \
+ st->d->v[ci][2] = st->d->v[ci][1]; \
+ st->d->v[ci][1] = st->d->v[ci][0]; \
+ } \
+ FLUSH_MANUALLY \
+ } \
+ TURN_OFF_FTZ \
+}
+EBUR128_FILTER(short, SHRT_MIN, SHRT_MAX)
+EBUR128_FILTER(int, INT_MIN, INT_MAX)
+EBUR128_FILTER(float, -1.0f, 1.0f)
+EBUR128_FILTER(double, -1.0, 1.0)
+
+static double ebur128_energy_to_loudness(double energy) {
+ return 10 * (log(energy) / log(10.0)) - 0.691;
+}
+
+static size_t find_histogram_index(double energy) {
+ size_t index_min = 0;
+ size_t index_max = 1000;
+ size_t index_mid;
+
+ do {
+ index_mid = (index_min + index_max) / 2;
+ if (energy >= histogram_energy_boundaries[index_mid]) {
+ index_min = index_mid;
+ } else {
+ index_max = index_mid;
+ }
+ } while (index_max - index_min != 1);
+
+ return index_min;
+}
+
+static int ebur128_calc_gating_block(ebur128_state* st, size_t frames_per_block,
+ double* optional_output) {
+ size_t i, c;
+ double sum = 0.0;
+ double channel_sum;
+ for (c = 0; c < st->channels; ++c) {
+ if (st->d->channel_map[c] == EBUR128_UNUSED) {
+ continue;
+ }
+ channel_sum = 0.0;
+ if (st->d->audio_data_index < frames_per_block * st->channels) {
+ for (i = 0; i < st->d->audio_data_index / st->channels; ++i) {
+ channel_sum += st->d->audio_data[i * st->channels + c] *
+ st->d->audio_data[i * st->channels + c];
+ }
+ for (i = st->d->audio_data_frames -
+ (frames_per_block -
+ st->d->audio_data_index / st->channels);
+ i < st->d->audio_data_frames; ++i) {
+ channel_sum += st->d->audio_data[i * st->channels + c] *
+ st->d->audio_data[i * st->channels + c];
+ }
+ } else {
+ for (i = st->d->audio_data_index / st->channels - frames_per_block;
+ i < st->d->audio_data_index / st->channels;
+ ++i) {
+ channel_sum += st->d->audio_data[i * st->channels + c] *
+ st->d->audio_data[i * st->channels + c];
+ }
+ }
+ if (st->d->channel_map[c] == EBUR128_Mp110 ||
+ st->d->channel_map[c] == EBUR128_Mm110 ||
+ st->d->channel_map[c] == EBUR128_Mp060 ||
+ st->d->channel_map[c] == EBUR128_Mm060 ||
+ st->d->channel_map[c] == EBUR128_Mp090 ||
+ st->d->channel_map[c] == EBUR128_Mm090) {
+ channel_sum *= 1.41;
+ } else if (st->d->channel_map[c] == EBUR128_DUAL_MONO) {
+ channel_sum *= 2.0;
+ }
+ sum += channel_sum;
+ }
+ sum /= (double) frames_per_block;
+ if (optional_output) {
+ *optional_output = sum;
+ return EBUR128_SUCCESS;
+ } else if (sum >= histogram_energy_boundaries[0]) {
+ if (st->d->use_histogram) {
+ ++st->d->block_energy_histogram[find_histogram_index(sum)];
+ } else {
+ struct ebur128_dq_entry* block;
+ if (st->d->block_list_size == st->d->block_list_max) {
+ block = STAILQ_FIRST(&st->d->block_list);
+ STAILQ_REMOVE_HEAD(&st->d->block_list, entries);
+ } else {
+ block = (struct ebur128_dq_entry*) malloc(sizeof(struct ebur128_dq_entry));
+ if (!block) {
+ return EBUR128_ERROR_NOMEM;
+ }
+ st->d->block_list_size++;
+ }
+ block->z = sum;
+ STAILQ_INSERT_TAIL(&st->d->block_list, block, entries);
+ }
+ return EBUR128_SUCCESS;
+ } else {
+ return EBUR128_SUCCESS;
+ }
+}
+
+int ebur128_set_channel(ebur128_state* st,
+ unsigned int channel_number,
+ int value) {
+ if (channel_number >= st->channels) {
+ return 1;
+ }
+ if (value == EBUR128_DUAL_MONO &&
+ (st->channels != 1 || channel_number != 0)) {
+ fprintf(stderr, "EBUR128_DUAL_MONO only works with mono files!\n");
+ return 1;
+ }
+ st->d->channel_map[channel_number] = value;
+ return 0;
+}
+
+int ebur128_change_parameters(ebur128_state* st,
+ unsigned int channels,
+ unsigned long samplerate) {
+ int errcode = EBUR128_SUCCESS;
+ size_t j;
+
+ if (channels == 0 || samplerate < 5) {
+ return EBUR128_ERROR_NOMEM;
+ }
+
+ if (channels == st->channels &&
+ samplerate == st->samplerate) {
+ return EBUR128_ERROR_NO_CHANGE;
+ }
+
+ free(st->d->audio_data);
+ st->d->audio_data = NULL;
+
+ if (channels != st->channels) {
+ unsigned int i;
+
+ free(st->d->channel_map); st->d->channel_map = NULL;
+ free(st->d->sample_peak); st->d->sample_peak = NULL;
+ free(st->d->prev_sample_peak); st->d->prev_sample_peak = NULL;
+ free(st->d->true_peak); st->d->true_peak = NULL;
+ free(st->d->prev_true_peak); st->d->prev_true_peak = NULL;
+ st->channels = channels;
+
+ errcode = ebur128_init_channel_map(st);
+ CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit)
+
+ st->d->sample_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->sample_peak, EBUR128_ERROR_NOMEM, exit)
+ st->d->prev_sample_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->prev_sample_peak, EBUR128_ERROR_NOMEM, exit)
+ st->d->true_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->true_peak, EBUR128_ERROR_NOMEM, exit)
+ st->d->prev_true_peak = (double*) malloc(channels * sizeof(double));
+ CHECK_ERROR(!st->d->prev_true_peak, EBUR128_ERROR_NOMEM, exit)
+ for (i = 0; i < channels; ++i) {
+ st->d->sample_peak[i] = 0.0;
+ st->d->prev_sample_peak[i] = 0.0;
+ st->d->true_peak[i] = 0.0;
+ st->d->prev_true_peak[i] = 0.0;
+ }
+ }
+ if (samplerate != st->samplerate) {
+ st->samplerate = samplerate;
+ st->d->samples_in_100ms = (st->samplerate + 5) / 10;
+ ebur128_init_filter(st);
+ }
+ st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
+ if (st->d->audio_data_frames % st->d->samples_in_100ms) {
+ /* round up to multiple of samples_in_100ms */
+ st->d->audio_data_frames = st->d->audio_data_frames
+ + st->d->samples_in_100ms
+ - (st->d->audio_data_frames % st->d->samples_in_100ms);
+ }
+ st->d->audio_data = (double*) malloc(st->d->audio_data_frames *
+ st->channels *
+ sizeof(double));
+ CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit)
+ for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) {
+ st->d->audio_data[j] = 0.0;
+ }
+
+ ebur128_destroy_resampler(st);
+ errcode = ebur128_init_resampler(st);
+ CHECK_ERROR(errcode, EBUR128_ERROR_NOMEM, exit)
+
+ /* the first block needs 400ms of audio data */
+ st->d->needed_frames = st->d->samples_in_100ms * 4;
+ /* start at the beginning of the buffer */
+ st->d->audio_data_index = 0;
+ /* reset short term frame counter */
+ st->d->short_term_frame_counter = 0;
+
+exit:
+ return errcode;
+}
+
+int ebur128_set_max_window(ebur128_state* st, unsigned long window)
+{
+ int errcode = EBUR128_SUCCESS;
+ size_t j;
+
+ if ((st->mode & EBUR128_MODE_S) == EBUR128_MODE_S && window < 3000) {
+ window = 3000;
+ } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && window < 400) {
+ window = 400;
+ }
+ if (window == st->d->window) {
+ return EBUR128_ERROR_NO_CHANGE;
+ }
+
+ st->d->window = window;
+ free(st->d->audio_data);
+ st->d->audio_data = NULL;
+ st->d->audio_data_frames = st->samplerate * st->d->window / 1000;
+ if (st->d->audio_data_frames % st->d->samples_in_100ms) {
+ /* round up to multiple of samples_in_100ms */
+ st->d->audio_data_frames = st->d->audio_data_frames
+ + st->d->samples_in_100ms
+ - (st->d->audio_data_frames % st->d->samples_in_100ms);
+ }
+ st->d->audio_data = (double*) malloc(st->d->audio_data_frames *
+ st->channels *
+ sizeof(double));
+ CHECK_ERROR(!st->d->audio_data, EBUR128_ERROR_NOMEM, exit)
+ for (j = 0; j < st->d->audio_data_frames * st->channels; ++j) {
+ st->d->audio_data[j] = 0.0;
+ }
+
+ /* the first block needs 400ms of audio data */
+ st->d->needed_frames = st->d->samples_in_100ms * 4;
+ /* start at the beginning of the buffer */
+ st->d->audio_data_index = 0;
+ /* reset short term frame counter */
+ st->d->short_term_frame_counter = 0;
+
+exit:
+ return errcode;
+}
+
+int ebur128_set_max_history(ebur128_state* st, unsigned long history)
+{
+ if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA && history < 3000) {
+ history = 3000;
+ } else if ((st->mode & EBUR128_MODE_M) == EBUR128_MODE_M && history < 400) {
+ history = 400;
+ }
+ if (history == st->d->history) {
+ return EBUR128_ERROR_NO_CHANGE;
+ }
+ st->d->history = history;
+ st->d->block_list_max = st->d->history / 100;
+ st->d->st_block_list_max = st->d->history / 3000;
+ while (st->d->block_list_size > st->d->block_list_max) {
+ struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->block_list);
+ STAILQ_REMOVE_HEAD(&st->d->block_list, entries);
+ free(block);
+ st->d->block_list_size--;
+ }
+ while (st->d->st_block_list_size > st->d->st_block_list_max) {
+ struct ebur128_dq_entry* block = STAILQ_FIRST(&st->d->short_term_block_list);
+ STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries);
+ free(block);
+ st->d->st_block_list_size--;
+ }
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_energy_shortterm(ebur128_state* st, double* out);
+#define EBUR128_ADD_FRAMES(type) \
+int ebur128_add_frames_##type(ebur128_state* st, \
+ const type* src, size_t frames) { \
+ size_t src_index = 0; \
+ unsigned int c = 0; \
+ for (c = 0; c < st->channels; c++) { \
+ st->d->prev_sample_peak[c] = 0.0; \
+ st->d->prev_true_peak[c] = 0.0; \
+ } \
+ while (frames > 0) { \
+ if (frames >= st->d->needed_frames) { \
+ ebur128_filter_##type(st, src + src_index, st->d->needed_frames); \
+ src_index += st->d->needed_frames * st->channels; \
+ frames -= st->d->needed_frames; \
+ st->d->audio_data_index += st->d->needed_frames * st->channels; \
+ /* calculate the new gating block */ \
+ if ((st->mode & EBUR128_MODE_I) == EBUR128_MODE_I) { \
+ if (ebur128_calc_gating_block(st, st->d->samples_in_100ms * 4, NULL)) {\
+ return EBUR128_ERROR_NOMEM; \
+ } \
+ } \
+ if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \
+ st->d->short_term_frame_counter += st->d->needed_frames; \
+ if (st->d->short_term_frame_counter == st->d->samples_in_100ms * 30) { \
+ struct ebur128_dq_entry* block; \
+ double st_energy; \
+ if (ebur128_energy_shortterm(st, &st_energy) == EBUR128_SUCCESS && \
+ st_energy >= histogram_energy_boundaries[0]) { \
+ if (st->d->use_histogram) { \
+ ++st->d->short_term_block_energy_histogram[ \
+ find_histogram_index(st_energy)];\
+ } else { \
+ if (st->d->st_block_list_size == st->d->st_block_list_max) { \
+ block = STAILQ_FIRST(&st->d->short_term_block_list); \
+ STAILQ_REMOVE_HEAD(&st->d->short_term_block_list, entries); \
+ } else { \
+ block = (struct ebur128_dq_entry*) \
+ malloc(sizeof(struct ebur128_dq_entry)); \
+ if (!block) return EBUR128_ERROR_NOMEM; \
+ st->d->st_block_list_size++; \
+ } \
+ block->z = st_energy; \
+ STAILQ_INSERT_TAIL(&st->d->short_term_block_list, \
+ block, entries); \
+ } \
+ } \
+ st->d->short_term_frame_counter = st->d->samples_in_100ms * 20; \
+ } \
+ } \
+ /* 100ms are needed for all blocks besides the first one */ \
+ st->d->needed_frames = st->d->samples_in_100ms; \
+ /* reset audio_data_index when buffer full */ \
+ if (st->d->audio_data_index == st->d->audio_data_frames * st->channels) {\
+ st->d->audio_data_index = 0; \
+ } \
+ } else { \
+ ebur128_filter_##type(st, src + src_index, frames); \
+ st->d->audio_data_index += frames * st->channels; \
+ if ((st->mode & EBUR128_MODE_LRA) == EBUR128_MODE_LRA) { \
+ st->d->short_term_frame_counter += frames; \
+ } \
+ st->d->needed_frames -= frames; \
+ frames = 0; \
+ } \
+ } \
+ for (c = 0; c < st->channels; c++) { \
+ if (st->d->prev_sample_peak[c] > st->d->sample_peak[c]) { \
+ st->d->sample_peak[c] = st->d->prev_sample_peak[c]; \
+ } \
+ if (st->d->prev_true_peak[c] > st->d->true_peak[c]) { \
+ st->d->true_peak[c] = st->d->prev_true_peak[c]; \
+ } \
+ } \
+ return EBUR128_SUCCESS; \
+}
+EBUR128_ADD_FRAMES(short)
+EBUR128_ADD_FRAMES(int)
+EBUR128_ADD_FRAMES(float)
+EBUR128_ADD_FRAMES(double)
+
+static int ebur128_calc_relative_threshold(ebur128_state* st,
+ size_t* above_thresh_counter,
+ double* relative_threshold) {
+ struct ebur128_dq_entry* it;
+ size_t i;
+
+ if (st->d->use_histogram) {
+ for (i = 0; i < 1000; ++i) {
+ *relative_threshold += st->d->block_energy_histogram[i] *
+ histogram_energies[i];
+ *above_thresh_counter += st->d->block_energy_histogram[i];
+ }
+ } else {
+ STAILQ_FOREACH(it, &st->d->block_list, entries) {
+ ++*above_thresh_counter;
+ *relative_threshold += it->z;
+ }
+ }
+
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_gated_loudness(ebur128_state** sts, size_t size,
+ double* out) {
+ struct ebur128_dq_entry* it;
+ double gated_loudness = 0.0;
+ double relative_threshold = 0.0;
+ size_t above_thresh_counter = 0;
+ size_t i, j, start_index;
+
+ for (i = 0; i < size; i++) {
+ if (sts[i] && (sts[i]->mode & EBUR128_MODE_I) != EBUR128_MODE_I) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+ }
+
+ for (i = 0; i < size; i++) {
+ if (!sts[i]) {
+ continue;
+ }
+ ebur128_calc_relative_threshold(sts[i], &above_thresh_counter, &relative_threshold);
+ }
+ if (!above_thresh_counter) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+
+ relative_threshold /= (double)above_thresh_counter;
+ relative_threshold *= relative_gate_factor;
+
+ above_thresh_counter = 0;
+ if (relative_threshold < histogram_energy_boundaries[0]) {
+ start_index = 0;
+ } else {
+ start_index = find_histogram_index(relative_threshold);
+ if (relative_threshold > histogram_energies[start_index]) {
+ ++start_index;
+ }
+ }
+ for (i = 0; i < size; i++) {
+ if (!sts[i]) {
+ continue;
+ }
+ if (sts[i]->d->use_histogram) {
+ for (j = start_index; j < 1000; ++j) {
+ gated_loudness += sts[i]->d->block_energy_histogram[j] *
+ histogram_energies[j];
+ above_thresh_counter += sts[i]->d->block_energy_histogram[j];
+ }
+ } else {
+ STAILQ_FOREACH(it, &sts[i]->d->block_list, entries) {
+ if (it->z >= relative_threshold) {
+ ++above_thresh_counter;
+ gated_loudness += it->z;
+ }
+ }
+ }
+ }
+ if (!above_thresh_counter) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+ gated_loudness /= (double) above_thresh_counter;
+ *out = ebur128_energy_to_loudness(gated_loudness);
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_relative_threshold(ebur128_state* st, double* out) {
+ double relative_threshold = 0.0;
+ size_t above_thresh_counter = 0;
+
+ if ((st->mode & EBUR128_MODE_I) != EBUR128_MODE_I) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+
+ ebur128_calc_relative_threshold(st, &above_thresh_counter, &relative_threshold);
+
+ if (!above_thresh_counter) {
+ *out = -70.0;
+ return EBUR128_SUCCESS;
+ }
+
+ relative_threshold /= (double)above_thresh_counter;
+ relative_threshold *= relative_gate_factor;
+
+ *out = ebur128_energy_to_loudness(relative_threshold);
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_loudness_global(ebur128_state* st, double* out) {
+ return ebur128_gated_loudness(&st, 1, out);
+}
+
+int ebur128_loudness_global_multiple(ebur128_state** sts, size_t size,
+ double* out) {
+ return ebur128_gated_loudness(sts, size, out);
+}
+
+static int ebur128_energy_in_interval(ebur128_state* st,
+ size_t interval_frames,
+ double* out) {
+ if (interval_frames > st->d->audio_data_frames) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+ ebur128_calc_gating_block(st, interval_frames, out);
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_energy_shortterm(ebur128_state* st, double* out) {
+ return ebur128_energy_in_interval(st, st->d->samples_in_100ms * 30, out);
+}
+
+int ebur128_loudness_momentary(ebur128_state* st, double* out) {
+ double energy;
+ int error = ebur128_energy_in_interval(st, st->d->samples_in_100ms * 4,
+ &energy);
+ if (error) {
+ return error;
+ } else if (energy <= 0.0) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+ *out = ebur128_energy_to_loudness(energy);
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_loudness_shortterm(ebur128_state* st, double* out) {
+ double energy;
+ int error = ebur128_energy_shortterm(st, &energy);
+ if (error) {
+ return error;
+ } else if (energy <= 0.0) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+ *out = ebur128_energy_to_loudness(energy);
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_loudness_window(ebur128_state* st,
+ unsigned long window,
+ double* out) {
+ double energy;
+ size_t interval_frames = st->samplerate * window / 1000;
+ int error = ebur128_energy_in_interval(st, interval_frames, &energy);
+ if (error) {
+ return error;
+ } else if (energy <= 0.0) {
+ *out = -HUGE_VAL;
+ return EBUR128_SUCCESS;
+ }
+ *out = ebur128_energy_to_loudness(energy);
+ return EBUR128_SUCCESS;
+}
+
+static int ebur128_double_cmp(const void *p1, const void *p2) {
+ const double* d1 = (const double*) p1;
+ const double* d2 = (const double*) p2;
+ return (*d1 > *d2) - (*d1 < *d2);
+}
+
+/* EBU - TECH 3342 */
+int ebur128_loudness_range_multiple(ebur128_state** sts, size_t size,
+ double* out) {
+ size_t i, j;
+ struct ebur128_dq_entry* it;
+ double* stl_vector;
+ size_t stl_size;
+ double* stl_relgated;
+ size_t stl_relgated_size;
+ double stl_power, stl_integrated;
+ /* High and low percentile energy */
+ double h_en, l_en;
+ int use_histogram = 0;
+
+ for (i = 0; i < size; ++i) {
+ if (sts[i]) {
+ if ((sts[i]->mode & EBUR128_MODE_LRA) != EBUR128_MODE_LRA) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+ if (i == 0 && sts[i]->mode & EBUR128_MODE_HISTOGRAM) {
+ use_histogram = 1;
+ } else if (use_histogram != !!(sts[i]->mode & EBUR128_MODE_HISTOGRAM)) {
+ return EBUR128_ERROR_INVALID_MODE;
+ }
+ }
+ }
+
+ if (use_histogram) {
+ unsigned long hist[1000] = { 0 };
+ size_t percentile_low, percentile_high;
+ size_t index;
+
+ stl_size = 0;
+ stl_power = 0.0;
+ for (i = 0; i < size; ++i) {
+ if (!sts[i]) {
+ continue;
+ }
+ for (j = 0; j < 1000; ++j) {
+ hist[j] += sts[i]->d->short_term_block_energy_histogram[j];
+ stl_size += sts[i]->d->short_term_block_energy_histogram[j];
+ stl_power += sts[i]->d->short_term_block_energy_histogram[j]
+ * histogram_energies[j];
+ }
+ }
+ if (!stl_size) {
+ *out = 0.0;
+ return EBUR128_SUCCESS;
+ }
+
+ stl_power /= stl_size;
+ stl_integrated = minus_twenty_decibels * stl_power;
+
+ if (stl_integrated < histogram_energy_boundaries[0]) {
+ index = 0;
+ } else {
+ index = find_histogram_index(stl_integrated);
+ if (stl_integrated > histogram_energies[index]) {
+ ++index;
+ }
+ }
+ stl_size = 0;
+ for (j = index; j < 1000; ++j) {
+ stl_size += hist[j];
+ }
+ if (!stl_size) {
+ *out = 0.0;
+ return EBUR128_SUCCESS;
+ }
+
+ percentile_low = (size_t) ((stl_size - 1) * 0.1 + 0.5);
+ percentile_high = (size_t) ((stl_size - 1) * 0.95 + 0.5);
+
+ stl_size = 0;
+ j = index;
+ while (stl_size <= percentile_low) {
+ stl_size += hist[j++];
+ }
+ l_en = histogram_energies[j - 1];
+ while (stl_size <= percentile_high) {
+ stl_size += hist[j++];
+ }
+ h_en = histogram_energies[j - 1];
+ *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en);
+ return EBUR128_SUCCESS;
+
+ } else {
+ stl_size = 0;
+ for (i = 0; i < size; ++i) {
+ if (!sts[i]) {
+ continue;
+ }
+ STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) {
+ ++stl_size;
+ }
+ }
+ if (!stl_size) {
+ *out = 0.0;
+ return EBUR128_SUCCESS;
+ }
+ stl_vector = (double*) malloc(stl_size * sizeof(double));
+ if (!stl_vector) {
+ return EBUR128_ERROR_NOMEM;
+ }
+
+ j = 0;
+ for (i = 0; i < size; ++i) {
+ if (!sts[i]) {
+ continue;
+ }
+ STAILQ_FOREACH(it, &sts[i]->d->short_term_block_list, entries) {
+ stl_vector[j] = it->z;
+ ++j;
+ }
+ }
+ qsort(stl_vector, stl_size, sizeof(double), ebur128_double_cmp);
+ stl_power = 0.0;
+ for (i = 0; i < stl_size; ++i) {
+ stl_power += stl_vector[i];
+ }
+ stl_power /= (double) stl_size;
+ stl_integrated = minus_twenty_decibels * stl_power;
+
+ stl_relgated = stl_vector;
+ stl_relgated_size = stl_size;
+ while (stl_relgated_size > 0 && *stl_relgated < stl_integrated) {
+ ++stl_relgated;
+ --stl_relgated_size;
+ }
+
+ if (stl_relgated_size) {
+ h_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.95 + 0.5)];
+ l_en = stl_relgated[(size_t) ((stl_relgated_size - 1) * 0.1 + 0.5)];
+ free(stl_vector);
+ *out = ebur128_energy_to_loudness(h_en) - ebur128_energy_to_loudness(l_en);
+ return EBUR128_SUCCESS;
+ } else {
+ free(stl_vector);
+ *out = 0.0;
+ return EBUR128_SUCCESS;
+ }
+ }
+}
+
+int ebur128_loudness_range(ebur128_state* st, double* out) {
+ return ebur128_loudness_range_multiple(&st, 1, out);
+}
+
+int ebur128_sample_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out) {
+ if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) {
+ return EBUR128_ERROR_INVALID_MODE;
+ } else if (channel_number >= st->channels) {
+ return EBUR128_ERROR_INVALID_CHANNEL_INDEX;
+ }
+ *out = st->d->sample_peak[channel_number];
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_prev_sample_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out) {
+ if ((st->mode & EBUR128_MODE_SAMPLE_PEAK) != EBUR128_MODE_SAMPLE_PEAK) {
+ return EBUR128_ERROR_INVALID_MODE;
+ } else if (channel_number >= st->channels) {
+ return EBUR128_ERROR_INVALID_CHANNEL_INDEX;
+ }
+ *out = st->d->prev_sample_peak[channel_number];
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_true_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out) {
+ if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) {
+ return EBUR128_ERROR_INVALID_MODE;
+ } else if (channel_number >= st->channels) {
+ return EBUR128_ERROR_INVALID_CHANNEL_INDEX;
+ }
+ *out = st->d->true_peak[channel_number] > st->d->sample_peak[channel_number]
+ ? st->d->true_peak[channel_number]
+ : st->d->sample_peak[channel_number];
+ return EBUR128_SUCCESS;
+}
+
+int ebur128_prev_true_peak(ebur128_state* st,
+ unsigned int channel_number,
+ double* out) {
+ if ((st->mode & EBUR128_MODE_TRUE_PEAK) != EBUR128_MODE_TRUE_PEAK) {
+ return EBUR128_ERROR_INVALID_MODE;
+ } else if (channel_number >= st->channels) {
+ return EBUR128_ERROR_INVALID_CHANNEL_INDEX;
+ }
+ *out = st->d->prev_true_peak[channel_number]
+ > st->d->prev_sample_peak[channel_number]
+ ? st->d->prev_true_peak[channel_number]
+ : st->d->prev_sample_peak[channel_number];
+ return EBUR128_SUCCESS;
+}
\ No newline at end of file
diff --git a/tools/ref/waves/CMakeLists.txt b/tools/ref/waves/CMakeLists.txt
new file mode 100644
index 0000000..3045b00
--- /dev/null
+++ b/tools/ref/waves/CMakeLists.txt
@@ -0,0 +1,3 @@
+include_directories(inc)
+AUX_SOURCE_DIRECTORY(src DIR_WAVES_SRCS)
+add_library(waves ${DIR_WAVES_SRCS})
\ No newline at end of file
diff --git a/tools/ref/waves/inc/ExtraMono.h b/tools/ref/waves/inc/ExtraMono.h
new file mode 100644
index 0000000..280fab0
--- /dev/null
+++ b/tools/ref/waves/inc/ExtraMono.h
@@ -0,0 +1,230 @@
+
+#include <string>
+#include <string.h>
+
+#define SIZE_LONG 4
+#define SIZE_SHORT 2
+
+#define SIZE_FLAG 4
+#define FMT_TAG 0x0001
+
+#define BITS_PER_BYTE 8
+
+#ifndef AFS_CMPL_MAX_WAV
+#define AFS_CMPL_MAX_WAV 15360000 // 时长16分(960*16000)
+#endif
+
+//+---------------------------------------------------------------------------+
+//+ 从文件中读取一个32位数据
+//+---------------------------------------------------------------------------+
+unsigned long fa_read_u32(FILE* fp)
+{
+ unsigned long cx;
+ unsigned char temp[SIZE_LONG];
+
+ fread(temp, sizeof(unsigned char), SIZE_LONG, fp);
+ cx = (unsigned long)temp[0];
+ cx |= (unsigned long)temp[1] << 8;
+ cx |= (unsigned long)temp[2] << 16;
+ cx |= (unsigned long)temp[3] << 24;
+ return cx;
+}
+
+//+---------------------------------------------------------------------------+
+//+ 从文件中读取一个16位数据
+//+---------------------------------------------------------------------------+
+unsigned short fa_read_u16(FILE *fp)
+{
+ unsigned short cx;
+ unsigned char temp[SIZE_SHORT];
+
+ fread(temp, sizeof(unsigned char), SIZE_SHORT, fp);
+ cx = temp[0] | (temp[1] * 256);
+ return cx;
+}
+
+int GetWaveHeadLen(const char* pszFile,unsigned short &channels, int &nPos, int& nLength)
+{
+ //+---------------------------------------------------------------------------+
+ //+ 读取WAVE的头信息
+ //+---------------------------------------------------------------------------+
+ unsigned char temp[SIZE_FLAG];
+ unsigned short bits_per_sample;
+ unsigned long x_size;
+ unsigned long n_skip;
+
+ unsigned short format;
+ //unsigned short channels;
+ unsigned long sample_rate;
+ unsigned short block_align;
+ unsigned long data_size;
+ int nCnt = 0;
+
+ /* 读取通用信息 */
+ FILE* pWavFile = fopen(pszFile, "rb");
+ if ( pWavFile == NULL )
+ {
+ printf("Input file can not be opened!\n");
+ return -1;
+ }
+
+ fseek(pWavFile, 0, SEEK_END );
+ nLength = ftell(pWavFile);
+ fseek(pWavFile, 0, SEEK_SET );
+
+ // 判断资源标识为"RIFF"
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ if ( memcmp(temp, "RIFF", (size_t)SIZE_FLAG) != 0 )
+ {
+ fprintf(stderr, "Resource flag is not RIFF!\n");
+ fclose(pWavFile);
+
+ return -1;
+ }
+ nCnt += SIZE_FLAG;
+
+ fseek(pWavFile, SIZE_LONG, SEEK_CUR);
+ nCnt += SIZE_LONG;
+
+ // 判断文件标识为"WAVE"
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ if ( memcmp(temp, "WAVE", (size_t)SIZE_FLAG) != 0 )
+ {
+ fprintf(stderr, "File flag is not WAVE\n");
+ fclose(pWavFile);
+
+ return -1;
+ }
+ nCnt += SIZE_FLAG;
+
+ // 判断格式标识为"fmt "
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ if ( memcmp(temp, "fmt ", (size_t)SIZE_FLAG) != 0 )
+ {
+ fprintf(stderr, "Format flag is not FMT!\n");
+ fclose(pWavFile);
+
+ return -1;
+ }
+ nCnt += SIZE_FLAG;
+
+ x_size = fa_read_u32(pWavFile);
+ nCnt += SIZE_LONG;
+
+ // 判断编码格式为0x0001
+ format = fa_read_u16(pWavFile);
+ nCnt += SIZE_SHORT;
+ if ( format != FMT_TAG )
+ {
+ fprintf(stderr, "Encoding format is not 0x0001!\n");
+ fclose(pWavFile);
+
+ return -1;
+ }
+
+ // 读取声道数目和采样频率
+ channels = fa_read_u16(pWavFile);
+ sample_rate = fa_read_u32(pWavFile);
+
+ fseek(pWavFile, SIZE_LONG, SEEK_CUR);
+
+ // 读取对齐单位和样本位数
+ block_align = fa_read_u16(pWavFile);
+ bits_per_sample = fa_read_u16(pWavFile);
+
+ /* 读取特殊信息 */
+ x_size -= (4*SIZE_SHORT + 2*SIZE_LONG);
+ if ( x_size != 0 )
+ {
+ fseek(pWavFile, x_size, SEEK_CUR);
+ }
+
+ // 读取数据大小
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ while ( memcmp(temp, "data", SIZE_FLAG) != 0 )
+ {
+ n_skip = fa_read_u32(pWavFile);
+ fseek(pWavFile, n_skip, SEEK_CUR);
+
+ fread(temp, sizeof(unsigned char), SIZE_FLAG, pWavFile);
+ }
+
+ data_size = fa_read_u32(pWavFile);
+ fclose(pWavFile);
+
+ //+---------------------------------------------------------------------------+
+ //+ 返回WAVE的头长度
+ //+---------------------------------------------------------------------------+
+ nPos = nCnt;
+ int nHeadLength = nLength - data_size;
+ return nHeadLength;
+}
+
+bool ExtraMono(const std::string &sInput, const std::string &sOutput)
+{
+ FILE *pFile = fopen(sInput.c_str(), "rb");
+ if ( NULL == pFile )
+ {
+ printf("Fopen Error %s", sInput.c_str());
+ return false;
+ }
+
+ FILE *pFile2 = fopen(sOutput.c_str(), "wb");
+ if ( NULL == pFile2 )
+ {
+ printf("Fopen2 Error %s", sOutput.c_str());
+ return false;
+ }
+
+ short *pBuf = new short[AFS_CMPL_MAX_WAV];
+ int nLen = 0;
+
+ nLen = fread(pBuf, sizeof(short), AFS_CMPL_MAX_WAV, pFile);
+ if ( nLen <= 0 )
+ {
+ perror("Fread Error!");
+ return false;
+ }
+
+ unsigned short channels=0;
+ int nPos;
+ int nLength;
+ int nHeadByte = GetWaveHeadLen(sInput.c_str(),channels, nPos, nLength);
+ int nHeadShort = nHeadByte/2;
+
+ if (channels==1)
+ {
+ fwrite(pBuf + nHeadShort, sizeof(short), nLen - nHeadShort, pFile2);
+ }
+ else
+ {
+ short *pBuf2 = new short[AFS_CMPL_MAX_WAV];
+ memcpy( pBuf2, pBuf, nHeadShort*sizeof(short));
+ pBuf2[nPos] = 1;
+
+ unsigned char tmp[2];
+ memcpy(tmp, &pBuf2[nPos], 2);
+
+ pBuf2[nPos] = static_cast<short>(tmp[0] | tmp[1]*256);
+
+ short *pWav = pBuf + nHeadShort;
+ nLen -= nHeadShort;
+
+ int halfnlen=nLen/2;
+ for (int i=0;i<=halfnlen;i++ )
+ {
+ pBuf2[nHeadShort+i] = *(pWav+i*2);
+ }
+ fwrite(pBuf2, sizeof(short), nLen+nHeadShort, pFile2);
+
+ delete []pBuf;
+ delete []pBuf2;
+ pBuf = NULL;
+ pBuf2 = NULL;
+ }
+
+
+ fclose(pFile);
+ fclose(pFile2);
+ return true;
+}
diff --git a/tools/ref/waves/inc/WaveFile.h b/tools/ref/waves/inc/WaveFile.h
new file mode 100644
index 0000000..8b57806
--- /dev/null
+++ b/tools/ref/waves/inc/WaveFile.h
@@ -0,0 +1,74 @@
+#ifndef WAVE_FILE_H
+#define WAVE_FILE_H
+
+#include <stdio.h>
+#include <stdint.h>
+
+
+typedef enum SAMPLE_FORMAT
+{
+ SF_U8 = 8,
+ SF_S16 = 16,
+ SF_S24 = 24,
+ SF_S32 = 32,
+ SF_IEEE_FLOAT = 0x100 + 32,
+ SF_IEEE_DOUBLE = 0x100 + 64,
+ SF_MAX,
+} SAMPLE_FORMAT;
+
+/* 主处理对象 **/
+class CWaveFile
+{
+public:
+ /* 构造传入文件及 是读还是写 **/
+ CWaveFile(const char* Filename, bool Write);
+ virtual ~CWaveFile();
+
+public:
+ int GetChannels();
+ int GetSampleRate();
+ double GetDuration(); // in second
+ uint32_t GetChannelMask();
+ void SetChannels(int Channels);
+ void SetSampleRate(int SampleRate);
+ void SetSampleFormat(SAMPLE_FORMAT Format);
+ void SetChannelMask(uint32_t Mask);
+ void Stat();
+ void SetupDone();
+ bool ReadFrameAsS16(short* FrameSamples, int Frames = 1);
+ bool ReadFrameAsDouble(double* FrameSamples, int Frames = 1);
+ bool ReadFrameAsfloat(float* FrameSamples, int Frames = 1);
+ void WriteRaw(void* Raw, int Size);
+ void WriteFrame(uint8_t* FrameSamples, int Frames = 1);
+ void WriteFrame(short* FrameSamples, int Frames = 1);
+ void WriteFrame(int32_t* FrameSamples, int Frames = 1);
+ void WriteFrameS24(int32_t* FrameSamples, int Frames = 1);
+ void WriteFrame(double* FrameSamples, int Frames = 1);
+ void WriteFrame(float* FrameSamples, int Frames=1);
+ void Seek(int FramePos, int Where = SEEK_SET);
+ bool GetStatus();
+ SAMPLE_FORMAT GetFormat();
+ int GetTotalFrames();
+ int GetFramesRead();
+
+
+protected:
+ FILE* File;
+ int Channels; /* 通道数 **/
+ int SampleRate; /* 采样率 **/
+ SAMPLE_FORMAT Format; /* 采样精度 **/
+ int SampleSize; // Measured in Bits
+ unsigned int FrameStartPos; /* 音频数据的起始位置 **/
+ unsigned long TotalFrames; /* 总帧数,如果16bit,则一个short为一帧 **/
+ unsigned long FramesRead;
+ double Duration; /* 时长 **/
+
+ bool ReadOnly; /* 是度还是写 **/
+
+ uint32_t ChannelMask;
+
+ bool m_bOK; /* 文件是否已经被打开 **/
+};
+
+
+#endif
\ No newline at end of file
diff --git a/tools/ref/waves/src/WaveFile.cpp b/tools/ref/waves/src/WaveFile.cpp
new file mode 100644
index 0000000..83b83d7
--- /dev/null
+++ b/tools/ref/waves/src/WaveFile.cpp
@@ -0,0 +1,824 @@
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <memory.h>
+#include <errno.h>
+
+#if WIN32
+#else
+#include <inttypes.h>
+#endif
+
+#include "WaveFile.h"
+
+#define SPEAKER_FRONT_LEFT 0x1
+#define SPEAKER_FRONT_RIGHT 0x2
+#define SPEAKER_FRONT_CENTER 0x4
+#define SPEAKER_LOW_FREQUENCY 0x8
+#define SPEAKER_BACK_LEFT 0x10
+#define SPEAKER_BACK_RIGHT 0x20
+#define SPEAKER_FRONT_LEFT_OF_CENTER 0x40
+#define SPEAKER_FRONT_RIGHT_OF_CENTER 0x80
+#define SPEAKER_BACK_CENTER 0x100
+#define SPEAKER_SIDE_LEFT 0x200
+#define SPEAKER_SIDE_RIGHT 0x400
+#define SPEAKER_TOP_CENTER 0x800
+#define SPEAKER_TOP_FRONT_LEFT 0x1000
+#define SPEAKER_TOP_FRONT_CENTER 0x2000
+#define SPEAKER_TOP_FRONT_RIGHT 0x4000
+#define SPEAKER_TOP_BACK_LEFT 0x8000
+#define SPEAKER_TOP_BACK_CENTER 0x10000
+#define SPEAKER_TOP_BACK_RIGHT 0x20000
+#define SPEAKER_RESERVED 0x80000000
+
+
+#define SPEAKER_REAR_CENTER_SURROUND SPEAKER_BACK_CENTER
+
+#define DCA_MONO 0
+#define DCA_CHANNEL 1
+#define DCA_STEREO 2
+#define DCA_STEREO_SUMDIFF 3
+#define DCA_STEREO_TOTAL 4
+#define DCA_3F 5
+#define DCA_2F1R 6
+#define DCA_3F1R 7
+#define DCA_2F2R 8
+#define DCA_3F2R 9
+#define DCA_4F2R 10
+
+#define DCA_DOLBY 101 /* FIXME */
+
+#define DCA_CHANNEL_MAX DCA_3F2R /* We don't handle anything above that */
+#define DCA_CHANNEL_BITS 6
+#define DCA_CHANNEL_MASK 0x3F
+
+#define DCA_LFE 0x80
+#define DCA_ADJUST_LEVEL 0x100
+
+#define WAVE_FORMAT_PCM 0x0001
+#define WAVE_FORMAT_IEEE_FLOAT 0x0003
+#define WAVE_FORMAT_EXTENSIBLE 0xFFFE
+
+static uint8_t wav_header[] = {
+ 'R', 'I', 'F', 'F', 0xfc, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E',
+ 'f', 'm', 't', ' ', 16, 0, 0, 0,
+ WAVE_FORMAT_PCM, WAVE_FORMAT_PCM >> 8,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0,
+ 'd', 'a', 't', 'a', 0xd8, 0xff, 0xff, 0xff
+};
+
+static uint8_t wavmulti_header[] = {
+ 'R', 'I', 'F', 'F', 0xf0, 0xff, 0xff, 0xff, 'W', 'A', 'V', 'E',
+ 'f', 'm', 't', ' ', 40, 0, 0, 0,
+ (uint8_t)(WAVE_FORMAT_EXTENSIBLE & 0xFF), WAVE_FORMAT_EXTENSIBLE >> 8,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 0, 22, 0,
+ 0, 0, 0, 0, 0, 0,
+ WAVE_FORMAT_IEEE_FLOAT, WAVE_FORMAT_IEEE_FLOAT >> 8,
+ 0, 0, 0, 0, 0x10, 0x00, 0x80, 0, 0, 0xaa, 0, 0x38, 0x9b, 0x71,
+ 'd', 'a', 't', 'a', 0xb4, 0xff, 0xff, 0xff
+};
+
+static void store4 (uint8_t * buf, int value)
+{
+ buf[0] = value;
+ buf[1] = value >> 8;
+ buf[2] = value >> 16;
+ buf[3] = value >> 24;
+}
+
+static void store2 (uint8_t * buf, int value)
+{
+ buf[0] = value;
+ buf[1] = value >> 8;
+}
+
+
+static uint32_t find_chunk(FILE * file, const uint8_t chunk_id[4])
+{
+ uint8_t buffer[8];
+ while (1) {
+ size_t chunksize;
+ size_t s = fread(buffer, 1, 8, file);
+ if (s < 8)
+ return 0;
+ chunksize = (uint32_t)buffer[4] | ((uint32_t)buffer[5] << 8) |
+ ((uint32_t)buffer[6] << 16) | ((uint32_t)buffer[7] << 24);
+ if (!memcmp(buffer, chunk_id, 4))
+ return chunksize;
+ fseek(file, chunksize, SEEK_CUR);
+ }
+}
+
+
+CWaveFile::CWaveFile(const char* Filename, bool Write)
+ : Duration(0), ReadOnly(false), m_bOK(false)
+{
+ Channels = 0;
+
+ /* 打开文件 **/
+ File = fopen(Filename, Write ? "wb":"rb");
+ if ( !File )
+ return;
+
+ /* 设置写文件初始参数 **/
+ if ( Write )
+ {
+ SampleRate = 44100;
+ Channels = 2;
+ Format = SF_S16;
+ SampleSize = 16;
+ ChannelMask = 0;
+ m_bOK = true;
+ return;
+ }
+
+ ReadOnly = true;
+
+ size_t s;
+ uint8_t buffer[8];
+ uint8_t *fmt = NULL;
+ uint32_t v;
+ uint32_t avg_bps;
+ uint32_t block_align;
+ unsigned short FormatType;
+ unsigned short SampleType;
+
+ static const uint8_t riff[4] = { 'R', 'I', 'F', 'F' };
+ static const uint8_t wave[4] = { 'W', 'A', 'V', 'E' };
+ static const uint8_t fmt_[4] = { 'f', 'm', 't', ' ' };
+ static const uint8_t data[4] = { 'd', 'a', 't', 'a' };
+
+ /* 前四个字节为 riff **/
+ s = fread(buffer, 1, 8, File);
+ if (s < 8)
+ goto err2;
+
+ if (memcmp(buffer, riff, 4))
+ goto err2;
+
+ /* 8~12为wave **/
+ /* TODO: check size (in buffer[4..8]) */
+ s = fread(buffer, 1, 4, File);
+ if (s < 4)
+ goto err2;
+
+ if (memcmp(buffer, wave, 4))
+ goto err2;
+
+ s = find_chunk(File, fmt_);
+ if ( s != 16 && s != 18 && s != 40 )
+ goto err2;
+
+ fmt = (uint8_t*)malloc(s);
+ if (!fmt)
+ goto err2;
+
+ if (fread(fmt, 1, s, File) != s)
+ goto err3;
+
+ /* wFormatTag */
+ v = (uint32_t)fmt[0] | ((uint32_t)fmt[1] << 8);
+ if (v != WAVE_FORMAT_PCM && v != WAVE_FORMAT_IEEE_FLOAT && v != WAVE_FORMAT_EXTENSIBLE)
+ goto err3;
+
+ FormatType = v;
+
+ if (s == 40 && 0xfffe == v)
+ {
+ // fmt begins at 0x14 of the wave file
+ v = *(unsigned short*)&fmt[0x2C - 0x14];
+ }
+
+ SampleType = v;
+
+ /* wChannels */
+ v = (uint32_t)fmt[2] | ((uint32_t)fmt[3] << 8);
+
+ Channels = v;
+
+ if (v < 1 || v > 32)
+ goto err3;
+
+ /* dwSamplesPerSec */
+ SampleRate = (uint32_t)fmt[4] | ((uint32_t)fmt[5] << 8) |
+ ((uint32_t)fmt[6] << 16) | ((uint32_t)fmt[7] << 24);
+
+ /* dwAvgBytesPerSec */
+ avg_bps = (uint32_t)fmt[8] | ((uint32_t)fmt[9] << 8) |
+ ((uint32_t)fmt[10] << 16) | ((uint32_t)fmt[11] << 24);
+
+ /* wBlockAlign */
+ block_align = (uint32_t)fmt[12] | ((uint32_t)fmt[13] << 8);
+
+ /* wBitsPerSample */
+ SampleSize = (uint32_t)fmt[14] | ((uint32_t)fmt[15] << 8);
+ if (SampleSize != 8 && SampleSize != 16 && SampleSize != 32 && SampleSize != 24 && SampleSize != 64)
+ goto err3;
+
+ switch (SampleSize)
+ {
+ case 8:
+ Format = SF_U8;
+ break;
+ case 16:
+ Format = SF_S16;
+ break;
+ case 24:
+ Format = SF_S24;
+ break;
+ case 32:
+ {
+ if (SampleType == WAVE_FORMAT_IEEE_FLOAT)
+ Format = SF_IEEE_FLOAT;
+ else
+ Format = SF_S32;
+
+ }
+ break;
+ case 64:
+ if (SampleType != WAVE_FORMAT_IEEE_FLOAT)
+ goto err3;
+ Format = SF_IEEE_DOUBLE;
+ break;
+ }
+
+
+ // Handle 24-bit samples individually
+#if 0
+ if (SampleSize == 24 && Channels <= 2)
+ {
+ int ba24 = Channels * (SampleSize / 8); // Align to 4x
+
+ ba24 = (ba24 + 3) / 4 * 4;
+
+ if (block_align != ba24)
+ goto err3;
+ }
+ else
+#endif
+ {
+ if (block_align != Channels * (SampleSize / 8))
+ goto err3;
+ }
+
+ if (avg_bps != block_align * SampleRate)
+ goto err3;
+
+ v = find_chunk(File, data);
+
+ if (v == 0 || v % block_align != 0)
+ goto err3;
+
+ TotalFrames = v / block_align;
+
+ FramesRead = 0;
+
+ if (FormatType == WAVE_FORMAT_EXTENSIBLE)
+ {
+ ChannelMask = *(unsigned int*)(&fmt[0x14]);
+ }
+ else
+ {
+ ChannelMask = 0;
+ }
+
+ FrameStartPos = ftell(File);
+
+ free(fmt);
+ m_bOK = true;
+ return;
+
+err3:
+ free(fmt);
+err2:
+ fclose(File);
+
+ File = NULL;
+}
+
+bool CWaveFile::GetStatus()
+{
+ return m_bOK;
+}
+
+SAMPLE_FORMAT CWaveFile::GetFormat()
+{
+ return Format;
+}
+
+int CWaveFile::GetTotalFrames()
+{
+ return TotalFrames;
+}
+
+int CWaveFile::GetFramesRead()
+{
+ return FramesRead;
+}
+
+CWaveFile::~CWaveFile()
+{
+ if (File != NULL)
+ {
+ if (!ReadOnly)
+ {
+ unsigned int Size = ftell(File) - FrameStartPos;// 44;
+
+ fseek(File, FrameStartPos - 4, SEEK_SET);
+ fwrite(&Size, 4, 1, File);
+
+ Size += FrameStartPos - 8;
+
+ fseek(File, 4, SEEK_SET);
+ fwrite(&Size, 4, 1, File);
+ }
+
+ fclose(File);
+ }
+}
+
+int CWaveFile::GetSampleRate()
+{
+ return SampleRate;
+}
+
+void CWaveFile::SetSampleRate(int SampleRate)
+{
+ this->SampleRate = SampleRate;
+}
+
+void CWaveFile::SetupDone()
+{
+ unsigned char Header[68];
+
+ fseek(File, 0, SEEK_SET);
+
+ SampleSize = Format & 0xFF;
+
+ if (ChannelMask)
+ {
+ memcpy(Header, wavmulti_header, sizeof(wavmulti_header));
+
+ if (Format < SF_IEEE_FLOAT)
+ {
+ // store2(Header + 20, WAVE_FORMAT_PCM);
+ store2(Header + 44, WAVE_FORMAT_PCM);
+ }
+
+ store2(Header + 22, Channels);
+ store4(Header + 24, SampleRate);
+ store4(Header + 28, SampleSize / 8 * SampleRate * Channels);
+ store2(Header + 32, SampleSize / 8 * Channels);
+ store2(Header + 34, SampleSize / 8 * 8);
+
+ store2(Header + 38, SampleSize / 8 * 8);
+ store4(Header + 40, ChannelMask);
+
+ fwrite(Header, sizeof(wavmulti_header), 1, File);
+ }
+ else
+ {
+ memcpy(Header, wav_header, sizeof(wav_header));
+
+ if (Format >= SF_IEEE_FLOAT)
+ {
+ store2(Header + 20, WAVE_FORMAT_IEEE_FLOAT);
+ }
+
+ store2(Header + 22, Channels);
+ store4(Header + 24, SampleRate);
+ store4(Header + 28, SampleSize / 8 * SampleRate * Channels);
+ store2(Header + 32, SampleSize / 8 * Channels);
+ store2(Header + 34, SampleSize / 8 * 8);
+
+ fwrite(Header, sizeof(wav_header), 1, File);
+ }
+
+
+ FrameStartPos = ftell(File);
+}
+
+
+void CWaveFile::Seek(int FramePos, int Where)
+{
+ // Ignoring Where
+
+ fseek(File, FrameStartPos + FramePos * Channels* (SampleSize / 8), Where);
+
+ FramesRead = FramePos;
+
+}
+
+int CWaveFile::GetChannels()
+{
+ return Channels;
+}
+
+void CWaveFile::SetChannels(int Channels)
+{
+ this->Channels = Channels;
+}
+
+void CWaveFile::SetSampleFormat(SAMPLE_FORMAT Format)
+{
+ this->Format = Format;
+}
+
+uint32_t CWaveFile::GetChannelMask()
+{
+ return ChannelMask;
+}
+
+void CWaveFile::SetChannelMask(uint32_t Mask)
+{
+ ChannelMask = Mask;
+}
+
+bool CWaveFile::ReadFrameAsS16(short* FrameSamples, int Frames)
+{
+ if (FramesRead >= TotalFrames)
+ return false;
+
+ FramesRead += Frames;
+
+ switch (Format)
+ {
+ case SF_U8:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 1, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (DirectSample - 128) << 8;
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S16:
+ return Frames == fread(FrameSamples, sizeof(FrameSamples[0])*Channels, Frames, File);
+ case SF_S24:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ unsigned int DirectSample = 0;
+ if (1 == fread(&DirectSample, 3, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 8); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S32:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ unsigned int DirectSample = 0;
+ if (1 == fread(&DirectSample, 4, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (short)(unsigned short)(DirectSample >> 16); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_IEEE_FLOAT:
+ {
+ float DirectSamples[32];
+
+ if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File))
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768);
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+ case SF_IEEE_DOUBLE:
+ {
+ double DirectSamples[32];
+
+ if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File))
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ FrameSamples[ch + frame*Channels] = (short)(DirectSamples[ch + frame*Channels] * 32768);
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+bool CWaveFile::ReadFrameAsfloat(float* FrameSamples, int Frames)
+{
+ if (FramesRead >= TotalFrames)
+ return false;
+
+ FramesRead += Frames;
+
+ switch (Format)
+ {
+ case SF_U8:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 1, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S16:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 2, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S24:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ uint32_t DirectSample = 0;
+ if (1 == fread(&DirectSample, 3, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) /
+ (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S32:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ uint32_t DirectSample = 0;
+ if (1 == fread(&DirectSample, 4, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample))) /
+ (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_IEEE_FLOAT:
+ {
+ if(fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File))
+ {
+ return true;
+ }
+ return false;
+
+// float DirectSamples[32];
+//
+// if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File))
+// {
+// for (int frame = 0; frame < Frames; frame++)
+// {
+// for (int ch = 0; ch < Channels; ch++)
+// {
+// FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]);
+// }
+// }
+// return true;
+// }
+// return false;
+ }
+ case SF_IEEE_DOUBLE:
+ {
+ if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File))
+ {
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+bool CWaveFile::ReadFrameAsDouble(double* FrameSamples, int Frames)
+{
+ if (FramesRead >= TotalFrames)
+ return false;
+
+ FramesRead += Frames;
+
+ switch (Format)
+ {
+ case SF_U8:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 1, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = (DirectSample - 128) / 128.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S16:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ short DirectSample = 0;
+ if (1 == fread(&DirectSample, 2, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = DirectSample / 32768.0; // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S24:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ uint32_t DirectSample = 0;
+ if (1 == fread(&DirectSample, 3, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample << 8))) /
+ (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_S32:
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ uint32_t DirectSample = 0;
+ if (1 == fread(&DirectSample, 4, 1, File))
+ {
+ FrameSamples[ch + frame*Channels] = ((int32_t)((uint32_t)(DirectSample ))) /
+ (double)(((uint32_t)(1 << 31))); // (short)(DirectSample * 32767.0 / ((1 << 24) - 1));
+ }
+ else
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+ case SF_IEEE_FLOAT:
+ {
+ float DirectSamples[32];
+
+ if (Frames == fread(DirectSamples, sizeof(DirectSamples[0]) * Channels, Frames, File))
+ {
+ for (int frame = 0; frame < Frames; frame++)
+ {
+ for (int ch = 0; ch < Channels; ch++)
+ {
+ FrameSamples[ch + frame*Channels] = (double)(DirectSamples[ch + frame*Channels]);
+ }
+ }
+ return true;
+ }
+ return false;
+ }
+ case SF_IEEE_DOUBLE:
+ {
+ if (Frames == fread(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File))
+ {
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+void CWaveFile::WriteRaw(void* Raw, int Size)
+{
+ fwrite(Raw, Size, 1, File);
+}
+
+
+void CWaveFile::WriteFrame(uint8_t* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+void CWaveFile::WriteFrame(short* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+void CWaveFile::WriteFrame(int32_t* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+void CWaveFile::WriteFrameS24(int32_t* FrameSamples, int Frames)
+{
+ for (int c = 0; c < Channels; c++)
+ {
+ fwrite(&FrameSamples[c], 3, 1, File);
+ }
+}
+
+void CWaveFile::WriteFrame(double* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+void CWaveFile::WriteFrame(float* FrameSamples, int Frames)
+{
+ fwrite(FrameSamples, sizeof(FrameSamples[0]) * Channels, Frames, File);
+}
+
+
+double CWaveFile::GetDuration()
+{
+ return Duration;
+}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Sun, Jan 12, 08:33 (1 d, 15 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1346411
Default Alt Text
(141 KB)
Attached To
R350 av_svc
Event Timeline
Log In to Comment