mirror of
https://github.com/bytedance/xgplayer.git
synced 2025-04-05 11:18:46 +08:00
feat: 🎸 (xgplayer-transmuxer) support opus codec for audio
This commit is contained in:
parent
f6445b33e5
commit
d6c698c851
@ -1,4 +1,5 @@
|
||||
export { AVC } from './avc'
|
||||
export { AAC } from './aac'
|
||||
export { AVC } from './avc'
|
||||
export { HEVC } from './hevc'
|
||||
export { NALu } from './nalu'
|
||||
export { OPUS } from './opus'
|
||||
|
80
packages/xgplayer-transmuxer/src/codec/opus.js
Normal file
80
packages/xgplayer-transmuxer/src/codec/opus.js
Normal file
@ -0,0 +1,80 @@
|
||||
/**
|
||||
* Opus documentation, https://opus-codec.org/docs/
|
||||
* Ogg Encapsulation for the Opus Audio Codec, https://datatracker.ietf.org/doc/html/rfc7845.html
|
||||
*
|
||||
* Packet Organization:
|
||||
*
|
||||
* Page 0 Pages 1 ... n Pages (n+1) ...
|
||||
* +------------+ +---+ +---+ ... +---+ +-----------+ +---------+ +--
|
||||
* | | | | | | | | | | | | |
|
||||
* |+----------+| |+-----------------+| |+-------------------+ +-----
|
||||
* |||ID Header|| || Comment Header || ||Audio Data Packet 1| | ...
|
||||
* |+----------+| |+-----------------+| |+-------------------+ +-----
|
||||
* | | | | | | | | | | | | |
|
||||
* +------------+ +---+ +---+ ... +---+ +-----------+ +---------+ +--
|
||||
* ^ ^ ^
|
||||
* | | |
|
||||
* | | Mandatory Page Break
|
||||
* | |
|
||||
* | ID header is contained on a single page
|
||||
* |
|
||||
* 'Beginning Of Stream'
|
||||
*/
|
||||
export class OPUS {
|
||||
static getFrameDuration (samples, timescale = 1000) {
|
||||
return 20
|
||||
}
|
||||
|
||||
/**
|
||||
* Identification Header + Comment Header
|
||||
* @param {Uint8Array} data
|
||||
*/
|
||||
static parseHeaderPackets (data) {
|
||||
if (!data.length) return
|
||||
|
||||
const dv = new DataView(data.buffer, data.byteOffset, data.byteLength)
|
||||
|
||||
// Identification Header
|
||||
let magicSignature = ''
|
||||
for (let i = 0; i < 8; i++) {
|
||||
magicSignature += String.fromCodePoint(data[i])
|
||||
}
|
||||
if (magicSignature !== 'OpusHead') {
|
||||
throw new Error('Invalid Opus MagicSignature')
|
||||
}
|
||||
|
||||
// skip version
|
||||
// data[8]
|
||||
// console.log('Pre-skip', data[8])
|
||||
|
||||
const channelCount = data[9]
|
||||
|
||||
// skip Pre-skip
|
||||
// data[10] & data[11]
|
||||
console.log('Pre-skip', data[10], data[11])
|
||||
|
||||
const sampleRate = dv.getUint32(12, true)
|
||||
const outputGain = dv.getInt16(16, true)
|
||||
|
||||
// const mappingFamily = dv.getUint8(18)
|
||||
|
||||
// play as no audio track stream
|
||||
if (!sampleRate) return
|
||||
|
||||
const codec = 'opus'
|
||||
const originCodec = 'opus'
|
||||
const config = new Uint8Array(data.buffer, data.byteOffset + 8, data.byteLength - 8)
|
||||
|
||||
console.log('config:', config)
|
||||
// config[0] = 0 // ??
|
||||
|
||||
return {
|
||||
outputGain,
|
||||
sampleRate,
|
||||
channelCount,
|
||||
config,
|
||||
codec,
|
||||
originCodec
|
||||
}
|
||||
}
|
||||
}
|
@ -1,15 +1,15 @@
|
||||
import { AudioCodecType, AudioSample, WarningType } from '../model'
|
||||
import { AAC } from '../codec'
|
||||
import { AAC, OPUS } from '../codec'
|
||||
import { isSafari } from '../utils'
|
||||
|
||||
const LARGE_AV_FIRST_FRAME_GAP = 500 // ms
|
||||
const AUDIO_GAP_OVERLAP_THRESHOLD_COUNT = 3
|
||||
const MAX_SILENT_FRAME_DURATION = 1000 // ms
|
||||
const AUDIO_EXCETION_LOG_EMIT_DURATION = 5000 // 5s
|
||||
const AUDIO_EXCEPTION_LOG_EMIT_DURATION = 5000 // 5s
|
||||
const MAX_VIDEO_FRAME_DURATION = 1000 // ms
|
||||
const MAX_DTS_DELTA_WITH_NEXT_CHUNK = 200 // ms
|
||||
const VIDEO_EXCETION_LOG_EMIT_DURATION = 5000 // 5s
|
||||
const TRACK_BREACKED_CHECK_TIME = 5
|
||||
const VIDEO_EXCEPTION_LOG_EMIT_DURATION = 5000 // 5s
|
||||
const TRACK_BROKEN_CHECK_TIME = 5
|
||||
|
||||
export class FlvFixer {
|
||||
constructor (videoTrack, audioTrack, metadataTrack) {
|
||||
@ -152,7 +152,7 @@ export class FlvFixer {
|
||||
|
||||
if (Math.abs(vDelta) > MAX_DTS_DELTA_WITH_NEXT_CHUNK) {
|
||||
// emit large delta of first sample with expect
|
||||
if (Math.abs(firstSample.dts - this._lastVideoExceptionChunkFirstDtsDot) > VIDEO_EXCETION_LOG_EMIT_DURATION) {
|
||||
if (Math.abs(firstSample.dts - this._lastVideoExceptionChunkFirstDtsDot) > VIDEO_EXCEPTION_LOG_EMIT_DURATION) {
|
||||
this._lastVideoExceptionChunkFirstDtsDot = firstSample.dts
|
||||
|
||||
videoTrack.warnings.push({
|
||||
@ -164,8 +164,8 @@ export class FlvFixer {
|
||||
})
|
||||
}
|
||||
|
||||
// only video breaked
|
||||
if (this._videoTimestampBreak >= TRACK_BREACKED_CHECK_TIME) {
|
||||
// only video break
|
||||
if (this._videoTimestampBreak >= TRACK_BROKEN_CHECK_TIME) {
|
||||
this._videoNextDts = firstSample.dts
|
||||
this._videoTimestampBreak = 0
|
||||
} else {
|
||||
@ -192,8 +192,8 @@ export class FlvFixer {
|
||||
|
||||
if (sampleDuration > MAX_VIDEO_FRAME_DURATION || sampleDuration < 0) {
|
||||
this._videoTimestampBreak++
|
||||
// emit stream breaked
|
||||
if (Math.abs(dts - this._lastVideoExceptionLargeGapDot) > VIDEO_EXCETION_LOG_EMIT_DURATION) {
|
||||
// emit stream break
|
||||
if (Math.abs(dts - this._lastVideoExceptionLargeGapDot) > VIDEO_EXCEPTION_LOG_EMIT_DURATION) {
|
||||
this._lastVideoExceptionLargeGapDot = dts
|
||||
videoTrack.warnings.push({
|
||||
type: WarningType.LARGE_VIDEO_GAP,
|
||||
@ -266,7 +266,7 @@ export class FlvFixer {
|
||||
|
||||
_resetBaseDtsWhenStreamBreaked () {
|
||||
/**
|
||||
* timestamp breaked
|
||||
* timestamp break
|
||||
* _audioNextDts
|
||||
* ---------------------|
|
||||
* (_baseDts) _videoNextDts
|
||||
@ -297,13 +297,33 @@ export class FlvFixer {
|
||||
|
||||
_doFixAudioInternal (audioTrack, samples, timescale) {
|
||||
if (!audioTrack.sampleDuration) {
|
||||
audioTrack.sampleDuration = audioTrack.codecType === AudioCodecType.AAC
|
||||
? AAC.getFrameDuration(audioTrack.timescale, timescale)
|
||||
: this._getG711Duration(audioTrack)
|
||||
switch (audioTrack.codecType) {
|
||||
case AudioCodecType.AAC: {
|
||||
audioTrack.sampleDuration = AAC.getFrameDuration(audioTrack.timescale, timescale)
|
||||
break
|
||||
}
|
||||
case AudioCodecType.OPUS: {
|
||||
audioTrack.sampleDuration = OPUS.getFrameDuration(audioTrack.samples, timescale)
|
||||
break
|
||||
}
|
||||
case AudioCodecType.G711PCMA:
|
||||
case AudioCodecType.G711PCMU: {
|
||||
audioTrack.sampleDuration = this._getG711Duration(audioTrack)
|
||||
break
|
||||
}
|
||||
default:
|
||||
console.error('can\'t fix audio codecType:', audioTrack.codecType)
|
||||
break
|
||||
}
|
||||
}
|
||||
const refSampleDuration = audioTrack.sampleDuration
|
||||
|
||||
const sampleDurationInSampleRate = audioTrack.codecType === AudioCodecType.AAC ? 1024 : refSampleDuration * audioTrack.timescale / 1000
|
||||
const sampleDurationInSampleRate =
|
||||
audioTrack.codecType === AudioCodecType.OPUS
|
||||
? 20
|
||||
: audioTrack.codecType === AudioCodecType.AAC
|
||||
? 1024
|
||||
: (refSampleDuration * audioTrack.timescale) / 1000
|
||||
|
||||
if (this._audioNextPts === undefined) {
|
||||
const samp0 = samples[0]
|
||||
@ -315,8 +335,8 @@ export class FlvFixer {
|
||||
const sample = samples[i]
|
||||
let delta = sample.pts - nextPts
|
||||
|
||||
// only audio breaked
|
||||
if (i === 0 && this._audioTimestampBreak >= TRACK_BREACKED_CHECK_TIME && this._keyFrameInNextChunk) {
|
||||
// only audio break
|
||||
if (i === 0 && this._audioTimestampBreak >= TRACK_BROKEN_CHECK_TIME && this._keyFrameInNextChunk) {
|
||||
nextPts = this._audioNextPts = sample.dts
|
||||
delta = 0
|
||||
this._audioTimestampBreak = 0
|
||||
@ -329,7 +349,7 @@ export class FlvFixer {
|
||||
const silentFrame = this._getSilentFrame(audioTrack) || samples[0].data.subarray()
|
||||
const count = Math.floor(delta / refSampleDuration)
|
||||
|
||||
if (Math.abs(sample.pts - this._lastAudioExceptionGapDot) > AUDIO_EXCETION_LOG_EMIT_DURATION) {
|
||||
if (Math.abs(sample.pts - this._lastAudioExceptionGapDot) > AUDIO_EXCEPTION_LOG_EMIT_DURATION) {
|
||||
this._lastAudioExceptionGapDot = sample.pts
|
||||
audioTrack.warnings.push({
|
||||
type: WarningType.AUDIO_FILLED,
|
||||
@ -354,7 +374,7 @@ export class FlvFixer {
|
||||
// delta >= -500ms
|
||||
} else if (delta <= -AUDIO_GAP_OVERLAP_THRESHOLD_COUNT * refSampleDuration && delta >= -1 * MAX_SILENT_FRAME_DURATION) {
|
||||
// need discard frames
|
||||
if (Math.abs(sample.pts - this._lastAudioExceptionOverlapDot) > AUDIO_EXCETION_LOG_EMIT_DURATION) {
|
||||
if (Math.abs(sample.pts - this._lastAudioExceptionOverlapDot) > AUDIO_EXCEPTION_LOG_EMIT_DURATION) {
|
||||
this._lastAudioExceptionOverlapDot = sample.pts
|
||||
|
||||
audioTrack.warnings.push({
|
||||
@ -371,7 +391,7 @@ export class FlvFixer {
|
||||
if (Math.abs(delta) > MAX_SILENT_FRAME_DURATION) {
|
||||
this._audioTimestampBreak++
|
||||
|
||||
if (Math.abs(sample.pts - this._lastAudioExceptionLargeGapDot) > AUDIO_EXCETION_LOG_EMIT_DURATION) {
|
||||
if (Math.abs(sample.pts - this._lastAudioExceptionLargeGapDot) > AUDIO_EXCEPTION_LOG_EMIT_DURATION) {
|
||||
this._lastAudioExceptionLargeGapDot = sample.pts
|
||||
audioTrack.warnings.push({
|
||||
type: WarningType.LARGE_AUDIO_GAP,
|
||||
@ -385,8 +405,15 @@ export class FlvFixer {
|
||||
}
|
||||
}
|
||||
|
||||
sample.dts = sample.pts = nextPts
|
||||
sample.duration = sampleDurationInSampleRate
|
||||
if (audioTrack.codecType === AudioCodecType.OPUS) {
|
||||
const lastSample = samples[samples.length - 1]
|
||||
if (lastSample) {
|
||||
lastSample.duration = sample.pts - lastSample.pts
|
||||
}
|
||||
} else {
|
||||
sample.dts = sample.pts = nextPts
|
||||
sample.duration = sampleDurationInSampleRate
|
||||
}
|
||||
this._audioNextPts += refSampleDuration
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,8 @@ import { FlvFixer } from './fixer'
|
||||
import { concatUint8Array, Logger, readBig32 } from '../utils'
|
||||
import { AAC, AVC, HEVC, NALu } from '../codec'
|
||||
import { AMF } from './amf'
|
||||
import { FlvSoundFormat } from './soundFormat'
|
||||
import { OPUS } from '../codec/opus'
|
||||
|
||||
const logger = new Logger('FlvDemuxer')
|
||||
|
||||
@ -132,7 +134,7 @@ export class FlvDemuxer {
|
||||
}
|
||||
|
||||
audioTrack.formatTimescale = videoTrack.formatTimescale = videoTrack.timescale = metadataTrack.timescale = 1000
|
||||
audioTrack.timescale = audioTrack.sampleRate || 0
|
||||
audioTrack.timescale = audioTrack.codecType === AudioCodecType.OPUS ? 1000 : audioTrack.sampleRate || 0
|
||||
|
||||
if (!audioTrack.exist() && audioTrack.hasSample()) {
|
||||
audioTrack.reset()
|
||||
@ -209,6 +211,11 @@ export class FlvDemuxer {
|
||||
return readBig32(data, 5) >= 9
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Uint8Array} data
|
||||
* @param {number} pts
|
||||
* @private
|
||||
*/
|
||||
_parseAudio (data, pts) {
|
||||
if (!data.length) return
|
||||
|
||||
@ -216,16 +223,17 @@ export class FlvDemuxer {
|
||||
const track = this.audioTrack
|
||||
|
||||
if (
|
||||
format !== 10 && // AAC
|
||||
format !== 7 && // G.711 A-law logarithmic PCM
|
||||
format !== 8 // G.711 mu-law logarithmic PCM
|
||||
format !== FlvSoundFormat.AAC &&
|
||||
format !== FlvSoundFormat.G711A &&
|
||||
format !== FlvSoundFormat.G711M &&
|
||||
format !== FlvSoundFormat.OPUS
|
||||
) {
|
||||
logger.warn(`Unsupported sound format: ${format}`)
|
||||
track.reset()
|
||||
return
|
||||
}
|
||||
|
||||
if (format !== 10) {
|
||||
if (format !== FlvSoundFormat.AAC && format !== FlvSoundFormat.OPUS) {
|
||||
const soundRate = (data[0] & 0x0c) >> 2
|
||||
const soundSize = (data[0] & 0x02) >> 1
|
||||
const soundType = (data[0] & 0x01)
|
||||
@ -235,18 +243,59 @@ export class FlvDemuxer {
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
case 7 /* G.711 A-law logarithmic PCM */:
|
||||
case 8 /* G.711 mu-law logarithmic PCM */:
|
||||
case FlvSoundFormat.G711A:
|
||||
case FlvSoundFormat.G711M:
|
||||
this._parseG711(data, pts, format)
|
||||
break
|
||||
case 10 /* AAC */:
|
||||
case FlvSoundFormat.AAC:
|
||||
this._parseAac(data, pts)
|
||||
break
|
||||
case FlvSoundFormat.OPUS:
|
||||
this._parseOpus(data, pts)
|
||||
break
|
||||
default:
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Uint8Array} data
|
||||
* @param {number} pts
|
||||
* @private
|
||||
*/
|
||||
_parseOpus (data, pts) {
|
||||
const track = this.audioTrack
|
||||
const packetType = data[1]
|
||||
|
||||
track.codecType = AudioCodecType.OPUS
|
||||
|
||||
switch (packetType) {
|
||||
case 0 /* Header Packets */: {
|
||||
const ret = OPUS.parseHeaderPackets(data.subarray(2))
|
||||
if (ret) {
|
||||
track.codec = ret.codec
|
||||
track.channelCount = ret.channelCount
|
||||
track.sampleRate = ret.sampleRate
|
||||
track.config = ret.config
|
||||
track.sampleDuration = OPUS.getFrameDuration([], track.timescale)
|
||||
} else {
|
||||
track.reset()
|
||||
logger.warn('Cannot parse AudioSpecificConfig', data)
|
||||
}
|
||||
break
|
||||
}
|
||||
case 1 /* Raw OPUS frame data */: {
|
||||
if (pts === undefined || pts === null) return
|
||||
|
||||
const newSample = new AudioSample(pts, data.subarray(2), track.sampleDuration)
|
||||
track.samples.push(newSample)
|
||||
break
|
||||
}
|
||||
default:
|
||||
logger.warn(`Unknown OpusPacketType: ${packetType}`)
|
||||
}
|
||||
}
|
||||
|
||||
_parseG711 (data, pts, format) {
|
||||
const track = this.audioTrack
|
||||
track.codecType = format === 7 ? AudioCodecType.G711PCMA : AudioCodecType.G711PCMU
|
||||
|
12
packages/xgplayer-transmuxer/src/flv/soundFormat.js
Normal file
12
packages/xgplayer-transmuxer/src/flv/soundFormat.js
Normal file
@ -0,0 +1,12 @@
|
||||
/**
|
||||
* AudioData
|
||||
* @property {number} FlvSoundFormat
|
||||
*/
|
||||
export const FlvSoundFormat = {
|
||||
MP3: 2,
|
||||
G711A: 7, // G.711 A-law logarithmic PCM
|
||||
G711M: 8, // G.711 mu-law logarithmic PCM
|
||||
AAC: 10, // AAC
|
||||
OPUS: 13
|
||||
}
|
||||
|
@ -80,7 +80,8 @@ export class AudioTrack {
|
||||
this.codec &&
|
||||
(this.codecType === AudioCodecType.AAC ||
|
||||
this.codecType === AudioCodecType.G711PCMA ||
|
||||
this.codecType === AudioCodecType.G711PCMU)
|
||||
this.codecType === AudioCodecType.G711PCMU ||
|
||||
this.codecType === AudioCodecType.OPUS)
|
||||
)
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,8 @@ export const VideoCodecType = {
|
||||
export const AudioCodecType = {
|
||||
AAC: 'aac',
|
||||
G711PCMA: 'g7110a',
|
||||
G711PCMU: 'g7110m'
|
||||
G711PCMU: 'g7110m',
|
||||
OPUS: 'opus'
|
||||
}
|
||||
|
||||
/** @enum {string} */
|
||||
|
@ -1,11 +1,13 @@
|
||||
import { TrackType, VideoCodecType } from '../model'
|
||||
import { concatUint8Array, parse /* hashVal */ } from '../utils'
|
||||
import { AudioCodecType, TrackType, VideoCodecType } from '../model'
|
||||
import { concatUint8Array, parse } from '../utils'
|
||||
import Buffer from './buffer'
|
||||
// import Crypto from './crypto/crypto'
|
||||
|
||||
const UINT32_MAX = 2 ** 32 - 1
|
||||
|
||||
export class MP4 {
|
||||
static types = [
|
||||
'Opus',
|
||||
'dOps',
|
||||
'av01',
|
||||
'av1C',
|
||||
'avc1',
|
||||
@ -343,13 +345,17 @@ export class MP4 {
|
||||
|
||||
static stsd (track) {
|
||||
let content
|
||||
|
||||
if (track.type === 'audio') {
|
||||
if (track.useEME && track.enca) {
|
||||
content = MP4.enca(track)
|
||||
// console.log('[remux],enca, len,', content.byteLength, track.type, hashVal(content.toString()))
|
||||
} else {
|
||||
content = MP4.mp4a(track)
|
||||
// console.log('[remux],mp4a, len,', content.byteLength, track.type, hashVal(content.toString()))
|
||||
if (track.codecType === AudioCodecType.OPUS) {
|
||||
content = MP4.opus(track)
|
||||
} else {
|
||||
content = MP4.mp4a(track)
|
||||
}
|
||||
}
|
||||
} else if (track.useEME && track.encv) {
|
||||
content = MP4.encv(track)
|
||||
@ -717,6 +723,62 @@ export class MP4 {
|
||||
return esds
|
||||
}
|
||||
|
||||
/**
|
||||
* https://opus-codec.org/docs/opus_in_isobmff.html
|
||||
*/
|
||||
static opus (track) {
|
||||
const opusAudioDescription = new Uint8Array([
|
||||
0x00, 0x00, 0x00, // version
|
||||
0x00, 0x00, 0x00, // reserved
|
||||
0x00, 0x01, // data_reference_index
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // reserved
|
||||
track.channelCount,
|
||||
0x00, 0x10, // sampleSize
|
||||
0x00, 0x00, // pre_defined
|
||||
0x00, 0x00, // reserved
|
||||
(track.sampleRate >> 8) & 0xff,
|
||||
track.sampleRate & 0xff, // sampleRate
|
||||
0x00, 0x00
|
||||
])
|
||||
|
||||
const opusSpecificConfig = track.config.length ? MP4.dOps(track) : []
|
||||
return MP4.box(MP4.types.Opus, opusAudioDescription, opusSpecificConfig)
|
||||
}
|
||||
|
||||
static dOps (track) {
|
||||
if (track.config) {
|
||||
track.config[4] = (track.sampleRate >>> 24) & 0xFF
|
||||
track.config[5] = (track.sampleRate >>> 16) & 0xFF
|
||||
track.config[6] = (track.sampleRate >>> 8) & 0xFF
|
||||
track.config[7] = (track.sampleRate) & 0xFF
|
||||
|
||||
return MP4.box(MP4.types.dOps, track.config)
|
||||
}
|
||||
|
||||
// const {channelCount, channelConfigCode, sampleRate } = track
|
||||
|
||||
// const mapping = channelConfigCode?.map((c) => {
|
||||
// return [
|
||||
// (c >>> 4) & 0xFF, // Channel mapping family
|
||||
// c & 0xFF // Channel mapping index
|
||||
// ]
|
||||
// }).flat() || []
|
||||
|
||||
// const data = new Uint8Array([
|
||||
// 0x00, // version
|
||||
// channelCount, // channelCount
|
||||
// 0x00, 0x00, // preSkip
|
||||
// (sampleRate >>> 24) & 0xFF,
|
||||
// (sampleRate >>> 17) & 0xFF,
|
||||
// (sampleRate >>> 8) & 0xFF,
|
||||
// (sampleRate >>> 0) & 0xFF,
|
||||
// 0x00, 0x00, // Global Gain
|
||||
// ... mapping
|
||||
// ])
|
||||
|
||||
// return data
|
||||
}
|
||||
|
||||
static mvex (tracks) {
|
||||
// const mehd = MP4.box(MP4.types.mehd, this.extension(0, 0), Buffer.writeUint32(tracks[0].tkhdDuration))
|
||||
// const mvex = MP4.box(MP4.types.mvex, mehd, MP4.trex1(1), MP4.trex2(2))
|
||||
|
Loading…
x
Reference in New Issue
Block a user