require 'phash' module Phash # read audio # # param filename - path and name of audio file to read # param sr - sample rate conversion # param channels - nb channels to convert to (always 1) unused # param buf - preallocated buffer # param buflen - (in/out) param for buf length # param nbsecs - float value for duration (in secs) to read from file # # return float* - float pointer to start of buffer - one channel of audio, NULL if error # # float* ph_readaudio(const char *filename, int sr, int channels, float *sigbuf, int &buflen, const float nbsecs = 0); # attach_function :ph_readaudio, [:string, :int, :int, :pointer, :pointer, :float], :pointer, :blocking => true # audio hash calculation # purpose: hash calculation for each frame in the buffer. # Each value is computed from successive overlapping frames of the input buffer. # The value is based on the bark scale values of the frame fft spectrum. The value # computed from temporal and spectral differences on the bark scale. # # param buf - pointer to start of buffer # param N - length of buffer # param sr - sample rate on which to base the audiohash # param nb_frames - (out) number of frames in audio buf and length of audiohash buffer returned # # return uint32 pointer to audio hash, NULL for error # # uint32_t* ph_audiohash(float *buf, int nbbuf, const int sr, int &nbframes); # attach_function :ph_audiohash, [:pointer, :int, :int, :pointer], :pointer, :blocking => true # distance function between two hashes # # param hash_a - first hash # param Na - length of first hash # param hash_b - second hash # param Nb - length of second hash # param threshold - threshold value to compare successive blocks, 0.25, 0.30, 0.35 # param block_size - length of block_size, 256 # param Nc - (out) length of confidence score vector # # return double - ptr to confidence score vector # # double* ph_audio_distance_ber(uint32_t *hash_a, const int Na, uint32_t *hash_b, const int Nb, const float threshold, const int block_size, int &Nc); # attach_function :ph_audio_distance_ber, [:pointer, :int, :pointer, :int, :float, :int, :pointer], :pointer, :blocking => true attach_function :free, [:pointer], :void class << self DEFAULT_SAMPLE_RATE = 8000 # Read audio file specified by path and optional length using ph_readaudio def audio_data(path, length = 0, sample_rate = nil) sample_rate ||= DEFAULT_SAMPLE_RATE audio_data_length_p = FFI::MemoryPointer.new :int if audio_data = ph_readaudio(path.to_s, sample_rate, 1, nil, audio_data_length_p, length.to_f) audio_data_length = audio_data_length_p.get_int(0) audio_data_length_p.free Data.new(audio_data, audio_data_length) end end # Get hash of audio data using ph_audiohash def audio_data_hash(audio_data, sample_rate = nil) sample_rate ||= DEFAULT_SAMPLE_RATE hash_data_length_p = FFI::MemoryPointer.new :int if hash_data = ph_audiohash(audio_data.data, audio_data.length, sample_rate, hash_data_length_p) hash_data_length = hash_data_length_p.get_int(0) hash_data_length_p.free AudioHash.new(hash_data, hash_data_length) end end # Use audio_data and audio_data_hash to compute hash for file at path, specify max length in seconds to read def audio_hash(path, length = nil, sample_rate = nil) sample_rate ||= DEFAULT_SAMPLE_RATE if audio_data = audio_data(path, length, sample_rate) audio_data_hash(audio_data, sample_rate) end end # Get distance between two audio hashes using ph_audio_distance_ber def audio_distance_ber(hash_a, hash_b, threshold = 0.25, block_size = 256) hash_a.is_a?(AudioHash) or raise ArgumentError.new('hash_a is not an AudioHash') hash_b.is_a?(AudioHash) or raise ArgumentError.new('hash_b is not an AudioHash') distance_vector_length_p = FFI::MemoryPointer.new :int block_size = [block_size.to_i, hash_a.length, hash_b.length].min if distance_vector = ph_audio_distance_ber(hash_a.data, hash_a.length, hash_b.data, hash_b.length, threshold.to_f, block_size, distance_vector_length_p) distance_vector_length = distance_vector_length_p.get_int(0) distance_vector_length_p.free distance = distance_vector.get_array_of_double(0, distance_vector_length) free(distance_vector) distance end end # Get similarity from audio_distance_ber def audio_similarity(hash_a, hash_b, *args) audio_distance_ber(hash_a, hash_b, *args).max end end # Class to store audio hash and compare to other class AudioHash < HashData end # Class to store audio file hash and compare to other class Audio < FileHash attr_reader :length # Audio path and optional length in seconds to read def initialize(path, length = nil, sample_rate = nil) @path, @length, @sample_rate = path, length, sample_rate end def compute_phash Phash.audio_hash(@path, @length, @sample_rate) end end end