feat: 🎸 (xgplayer-transmuxer) support opus codec for audio

This commit is contained in:
gemstone 2024-08-14 17:07:03 +08:00
parent f6445b33e5
commit d6c698c851
8 changed files with 270 additions and 37 deletions

View File

@ -1,4 +1,5 @@
export { AVC } from './avc'
export { AAC } from './aac'
export { AVC } from './avc'
export { HEVC } from './hevc'
export { NALu } from './nalu'
export { OPUS } from './opus'

View File

@ -0,0 +1,80 @@
/**
* Opus documentation, https://opus-codec.org/docs/
* Ogg Encapsulation for the Opus Audio Codec, https://datatracker.ietf.org/doc/html/rfc7845.html
*
* Packet Organization:
*
* Page 0 Pages 1 ... n Pages (n+1) ...
* +------------+ +---+ +---+ ... +---+ +-----------+ +---------+ +--
* | | | | | | | | | | | | |
* |+----------+| |+-----------------+| |+-------------------+ +-----
* |||ID Header|| || Comment Header || ||Audio Data Packet 1| | ...
* |+----------+| |+-----------------+| |+-------------------+ +-----
* | | | | | | | | | | | | |
* +------------+ +---+ +---+ ... +---+ +-----------+ +---------+ +--
* ^ ^ ^
* | | |
* | | Mandatory Page Break
* | |
* | ID header is contained on a single page
* |
* 'Beginning Of Stream'
*/
export class OPUS {
static getFrameDuration (samples, timescale = 1000) {
return 20
}
/**
* Identification Header + Comment Header
* @param {Uint8Array} data
*/
static parseHeaderPackets (data) {
if (!data.length) return
const dv = new DataView(data.buffer, data.byteOffset, data.byteLength)
// Identification Header
let magicSignature = ''
for (let i = 0; i < 8; i++) {
magicSignature += String.fromCodePoint(data[i])
}
if (magicSignature !== 'OpusHead') {
throw new Error('Invalid Opus MagicSignature')
}
// skip version
// data[8]
// console.log('Pre-skip', data[8])
const channelCount = data[9]
// skip Pre-skip
// data[10] & data[11]
console.log('Pre-skip', data[10], data[11])
const sampleRate = dv.getUint32(12, true)
const outputGain = dv.getInt16(16, true)
// const mappingFamily = dv.getUint8(18)
// play as no audio track stream
if (!sampleRate) return
const codec = 'opus'
const originCodec = 'opus'
const config = new Uint8Array(data.buffer, data.byteOffset + 8, data.byteLength - 8)
console.log('config:', config)
// config[0] = 0 // ??
return {
outputGain,
sampleRate,
channelCount,
config,
codec,
originCodec
}
}
}

View File

@ -1,15 +1,15 @@
import { AudioCodecType, AudioSample, WarningType } from '../model'
import { AAC } from '../codec'
import { AAC, OPUS } from '../codec'
import { isSafari } from '../utils'
const LARGE_AV_FIRST_FRAME_GAP = 500 // ms
const AUDIO_GAP_OVERLAP_THRESHOLD_COUNT = 3
const MAX_SILENT_FRAME_DURATION = 1000 // ms
const AUDIO_EXCETION_LOG_EMIT_DURATION = 5000 // 5s
const AUDIO_EXCEPTION_LOG_EMIT_DURATION = 5000 // 5s
const MAX_VIDEO_FRAME_DURATION = 1000 // ms
const MAX_DTS_DELTA_WITH_NEXT_CHUNK = 200 // ms
const VIDEO_EXCETION_LOG_EMIT_DURATION = 5000 // 5s
const TRACK_BREACKED_CHECK_TIME = 5
const VIDEO_EXCEPTION_LOG_EMIT_DURATION = 5000 // 5s
const TRACK_BROKEN_CHECK_TIME = 5
export class FlvFixer {
constructor (videoTrack, audioTrack, metadataTrack) {
@ -152,7 +152,7 @@ export class FlvFixer {
if (Math.abs(vDelta) > MAX_DTS_DELTA_WITH_NEXT_CHUNK) {
// emit large delta of first sample with expect
if (Math.abs(firstSample.dts - this._lastVideoExceptionChunkFirstDtsDot) > VIDEO_EXCETION_LOG_EMIT_DURATION) {
if (Math.abs(firstSample.dts - this._lastVideoExceptionChunkFirstDtsDot) > VIDEO_EXCEPTION_LOG_EMIT_DURATION) {
this._lastVideoExceptionChunkFirstDtsDot = firstSample.dts
videoTrack.warnings.push({
@ -164,8 +164,8 @@ export class FlvFixer {
})
}
// only video breaked
if (this._videoTimestampBreak >= TRACK_BREACKED_CHECK_TIME) {
// only video break
if (this._videoTimestampBreak >= TRACK_BROKEN_CHECK_TIME) {
this._videoNextDts = firstSample.dts
this._videoTimestampBreak = 0
} else {
@ -192,8 +192,8 @@ export class FlvFixer {
if (sampleDuration > MAX_VIDEO_FRAME_DURATION || sampleDuration < 0) {
this._videoTimestampBreak++
// emit stream breaked
if (Math.abs(dts - this._lastVideoExceptionLargeGapDot) > VIDEO_EXCETION_LOG_EMIT_DURATION) {
// emit stream break
if (Math.abs(dts - this._lastVideoExceptionLargeGapDot) > VIDEO_EXCEPTION_LOG_EMIT_DURATION) {
this._lastVideoExceptionLargeGapDot = dts
videoTrack.warnings.push({
type: WarningType.LARGE_VIDEO_GAP,
@ -266,7 +266,7 @@ export class FlvFixer {
_resetBaseDtsWhenStreamBreaked () {
/**
* timestamp breaked
* timestamp break
* _audioNextDts
* ---------------------|
* (_baseDts) _videoNextDts
@ -297,13 +297,33 @@ export class FlvFixer {
_doFixAudioInternal (audioTrack, samples, timescale) {
if (!audioTrack.sampleDuration) {
audioTrack.sampleDuration = audioTrack.codecType === AudioCodecType.AAC
? AAC.getFrameDuration(audioTrack.timescale, timescale)
: this._getG711Duration(audioTrack)
switch (audioTrack.codecType) {
case AudioCodecType.AAC: {
audioTrack.sampleDuration = AAC.getFrameDuration(audioTrack.timescale, timescale)
break
}
case AudioCodecType.OPUS: {
audioTrack.sampleDuration = OPUS.getFrameDuration(audioTrack.samples, timescale)
break
}
case AudioCodecType.G711PCMA:
case AudioCodecType.G711PCMU: {
audioTrack.sampleDuration = this._getG711Duration(audioTrack)
break
}
default:
console.error('can\'t fix audio codecType:', audioTrack.codecType)
break
}
}
const refSampleDuration = audioTrack.sampleDuration
const sampleDurationInSampleRate = audioTrack.codecType === AudioCodecType.AAC ? 1024 : refSampleDuration * audioTrack.timescale / 1000
const sampleDurationInSampleRate =
audioTrack.codecType === AudioCodecType.OPUS
? 20
: audioTrack.codecType === AudioCodecType.AAC
? 1024
: (refSampleDuration * audioTrack.timescale) / 1000
if (this._audioNextPts === undefined) {
const samp0 = samples[0]
@ -315,8 +335,8 @@ export class FlvFixer {
const sample = samples[i]
let delta = sample.pts - nextPts
// only audio breaked
if (i === 0 && this._audioTimestampBreak >= TRACK_BREACKED_CHECK_TIME && this._keyFrameInNextChunk) {
// only audio break
if (i === 0 && this._audioTimestampBreak >= TRACK_BROKEN_CHECK_TIME && this._keyFrameInNextChunk) {
nextPts = this._audioNextPts = sample.dts
delta = 0
this._audioTimestampBreak = 0
@ -329,7 +349,7 @@ export class FlvFixer {
const silentFrame = this._getSilentFrame(audioTrack) || samples[0].data.subarray()
const count = Math.floor(delta / refSampleDuration)
if (Math.abs(sample.pts - this._lastAudioExceptionGapDot) > AUDIO_EXCETION_LOG_EMIT_DURATION) {
if (Math.abs(sample.pts - this._lastAudioExceptionGapDot) > AUDIO_EXCEPTION_LOG_EMIT_DURATION) {
this._lastAudioExceptionGapDot = sample.pts
audioTrack.warnings.push({
type: WarningType.AUDIO_FILLED,
@ -354,7 +374,7 @@ export class FlvFixer {
// delta >= -500ms
} else if (delta <= -AUDIO_GAP_OVERLAP_THRESHOLD_COUNT * refSampleDuration && delta >= -1 * MAX_SILENT_FRAME_DURATION) {
// need discard frames
if (Math.abs(sample.pts - this._lastAudioExceptionOverlapDot) > AUDIO_EXCETION_LOG_EMIT_DURATION) {
if (Math.abs(sample.pts - this._lastAudioExceptionOverlapDot) > AUDIO_EXCEPTION_LOG_EMIT_DURATION) {
this._lastAudioExceptionOverlapDot = sample.pts
audioTrack.warnings.push({
@ -371,7 +391,7 @@ export class FlvFixer {
if (Math.abs(delta) > MAX_SILENT_FRAME_DURATION) {
this._audioTimestampBreak++
if (Math.abs(sample.pts - this._lastAudioExceptionLargeGapDot) > AUDIO_EXCETION_LOG_EMIT_DURATION) {
if (Math.abs(sample.pts - this._lastAudioExceptionLargeGapDot) > AUDIO_EXCEPTION_LOG_EMIT_DURATION) {
this._lastAudioExceptionLargeGapDot = sample.pts
audioTrack.warnings.push({
type: WarningType.LARGE_AUDIO_GAP,
@ -385,8 +405,15 @@ export class FlvFixer {
}
}
sample.dts = sample.pts = nextPts
sample.duration = sampleDurationInSampleRate
if (audioTrack.codecType === AudioCodecType.OPUS) {
const lastSample = samples[samples.length - 1]
if (lastSample) {
lastSample.duration = sample.pts - lastSample.pts
}
} else {
sample.dts = sample.pts = nextPts
sample.duration = sampleDurationInSampleRate
}
this._audioNextPts += refSampleDuration
}
}

View File

@ -3,6 +3,8 @@ import { FlvFixer } from './fixer'
import { concatUint8Array, Logger, readBig32 } from '../utils'
import { AAC, AVC, HEVC, NALu } from '../codec'
import { AMF } from './amf'
import { FlvSoundFormat } from './soundFormat'
import { OPUS } from '../codec/opus'
const logger = new Logger('FlvDemuxer')
@ -132,7 +134,7 @@ export class FlvDemuxer {
}
audioTrack.formatTimescale = videoTrack.formatTimescale = videoTrack.timescale = metadataTrack.timescale = 1000
audioTrack.timescale = audioTrack.sampleRate || 0
audioTrack.timescale = audioTrack.codecType === AudioCodecType.OPUS ? 1000 : audioTrack.sampleRate || 0
if (!audioTrack.exist() && audioTrack.hasSample()) {
audioTrack.reset()
@ -209,6 +211,11 @@ export class FlvDemuxer {
return readBig32(data, 5) >= 9
}
/**
* @param {Uint8Array} data
* @param {number} pts
* @private
*/
_parseAudio (data, pts) {
if (!data.length) return
@ -216,16 +223,17 @@ export class FlvDemuxer {
const track = this.audioTrack
if (
format !== 10 && // AAC
format !== 7 && // G.711 A-law logarithmic PCM
format !== 8 // G.711 mu-law logarithmic PCM
format !== FlvSoundFormat.AAC &&
format !== FlvSoundFormat.G711A &&
format !== FlvSoundFormat.G711M &&
format !== FlvSoundFormat.OPUS
) {
logger.warn(`Unsupported sound format: ${format}`)
track.reset()
return
}
if (format !== 10) {
if (format !== FlvSoundFormat.AAC && format !== FlvSoundFormat.OPUS) {
const soundRate = (data[0] & 0x0c) >> 2
const soundSize = (data[0] & 0x02) >> 1
const soundType = (data[0] & 0x01)
@ -235,18 +243,59 @@ export class FlvDemuxer {
}
switch (format) {
case 7 /* G.711 A-law logarithmic PCM */:
case 8 /* G.711 mu-law logarithmic PCM */:
case FlvSoundFormat.G711A:
case FlvSoundFormat.G711M:
this._parseG711(data, pts, format)
break
case 10 /* AAC */:
case FlvSoundFormat.AAC:
this._parseAac(data, pts)
break
case FlvSoundFormat.OPUS:
this._parseOpus(data, pts)
break
default:
break
}
}
/**
* @param {Uint8Array} data
* @param {number} pts
* @private
*/
_parseOpus (data, pts) {
const track = this.audioTrack
const packetType = data[1]
track.codecType = AudioCodecType.OPUS
switch (packetType) {
case 0 /* Header Packets */: {
const ret = OPUS.parseHeaderPackets(data.subarray(2))
if (ret) {
track.codec = ret.codec
track.channelCount = ret.channelCount
track.sampleRate = ret.sampleRate
track.config = ret.config
track.sampleDuration = OPUS.getFrameDuration([], track.timescale)
} else {
track.reset()
logger.warn('Cannot parse AudioSpecificConfig', data)
}
break
}
case 1 /* Raw OPUS frame data */: {
if (pts === undefined || pts === null) return
const newSample = new AudioSample(pts, data.subarray(2), track.sampleDuration)
track.samples.push(newSample)
break
}
default:
logger.warn(`Unknown OpusPacketType: ${packetType}`)
}
}
_parseG711 (data, pts, format) {
const track = this.audioTrack
track.codecType = format === 7 ? AudioCodecType.G711PCMA : AudioCodecType.G711PCMU

View File

@ -0,0 +1,12 @@
/**
* AudioData
* @property {number} FlvSoundFormat
*/
export const FlvSoundFormat = {
MP3: 2,
G711A: 7, // G.711 A-law logarithmic PCM
G711M: 8, // G.711 mu-law logarithmic PCM
AAC: 10, // AAC
OPUS: 13
}

View File

@ -80,7 +80,8 @@ export class AudioTrack {
this.codec &&
(this.codecType === AudioCodecType.AAC ||
this.codecType === AudioCodecType.G711PCMA ||
this.codecType === AudioCodecType.G711PCMU)
this.codecType === AudioCodecType.G711PCMU ||
this.codecType === AudioCodecType.OPUS)
)
}

View File

@ -16,7 +16,8 @@ export const VideoCodecType = {
export const AudioCodecType = {
AAC: 'aac',
G711PCMA: 'g7110a',
G711PCMU: 'g7110m'
G711PCMU: 'g7110m',
OPUS: 'opus'
}
/** @enum {string} */

View File

@ -1,11 +1,13 @@
import { TrackType, VideoCodecType } from '../model'
import { concatUint8Array, parse /* hashVal */ } from '../utils'
import { AudioCodecType, TrackType, VideoCodecType } from '../model'
import { concatUint8Array, parse } from '../utils'
import Buffer from './buffer'
// import Crypto from './crypto/crypto'
const UINT32_MAX = 2 ** 32 - 1
export class MP4 {
static types = [
'Opus',
'dOps',
'av01',
'av1C',
'avc1',
@ -343,13 +345,17 @@ export class MP4 {
static stsd (track) {
let content
if (track.type === 'audio') {
if (track.useEME && track.enca) {
content = MP4.enca(track)
// console.log('[remux],enca, len,', content.byteLength, track.type, hashVal(content.toString()))
} else {
content = MP4.mp4a(track)
// console.log('[remux],mp4a, len,', content.byteLength, track.type, hashVal(content.toString()))
if (track.codecType === AudioCodecType.OPUS) {
content = MP4.opus(track)
} else {
content = MP4.mp4a(track)
}
}
} else if (track.useEME && track.encv) {
content = MP4.encv(track)
@ -717,6 +723,62 @@ export class MP4 {
return esds
}
/**
* https://opus-codec.org/docs/opus_in_isobmff.html
*/
static opus (track) {
const opusAudioDescription = new Uint8Array([
0x00, 0x00, 0x00, // version
0x00, 0x00, 0x00, // reserved
0x00, 0x01, // data_reference_index
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // reserved
track.channelCount,
0x00, 0x10, // sampleSize
0x00, 0x00, // pre_defined
0x00, 0x00, // reserved
(track.sampleRate >> 8) & 0xff,
track.sampleRate & 0xff, // sampleRate
0x00, 0x00
])
const opusSpecificConfig = track.config.length ? MP4.dOps(track) : []
return MP4.box(MP4.types.Opus, opusAudioDescription, opusSpecificConfig)
}
static dOps (track) {
if (track.config) {
track.config[4] = (track.sampleRate >>> 24) & 0xFF
track.config[5] = (track.sampleRate >>> 16) & 0xFF
track.config[6] = (track.sampleRate >>> 8) & 0xFF
track.config[7] = (track.sampleRate) & 0xFF
return MP4.box(MP4.types.dOps, track.config)
}
// const {channelCount, channelConfigCode, sampleRate } = track
// const mapping = channelConfigCode?.map((c) => {
// return [
// (c >>> 4) & 0xFF, // Channel mapping family
// c & 0xFF // Channel mapping index
// ]
// }).flat() || []
// const data = new Uint8Array([
// 0x00, // version
// channelCount, // channelCount
// 0x00, 0x00, // preSkip
// (sampleRate >>> 24) & 0xFF,
// (sampleRate >>> 17) & 0xFF,
// (sampleRate >>> 8) & 0xFF,
// (sampleRate >>> 0) & 0xFF,
// 0x00, 0x00, // Global Gain
// ... mapping
// ])
// return data
}
static mvex (tracks) {
// const mehd = MP4.box(MP4.types.mehd, this.extension(0, 0), Buffer.writeUint32(tracks[0].tkhdDuration))
// const mvex = MP4.box(MP4.types.mvex, mehd, MP4.trex1(1), MP4.trex2(2))