From 5fb194061032b91e8eab39a2d3771bb326831d75 Mon Sep 17 00:00:00 2001 From: "liujing.cyan" Date: Thu, 20 Jun 2024 22:07:59 +0800 Subject: [PATCH] =?UTF-8?q?feat(xgplayer-mp4=E3=80=81xgplayer-transmuxer):?= =?UTF-8?q?=20=E6=94=AF=E6=8C=81fmp4=20+=20av1=E8=A7=A3=E6=9E=90=E6=92=AD?= =?UTF-8?q?=E6=94=BE=E3=80=81seek=E7=AD=89=E8=83=BD=E5=8A=9B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/xgplayer-mp4-loader/src/loader.js | 59 +++++- packages/xgplayer-mp4-loader/src/utils.js | 170 +++++++++++++++++- packages/xgplayer-mp4/src/mp4.js | 33 +++- packages/xgplayer-mp4/src/mp4Plugin.js | 5 +- packages/xgplayer-mp4/src/util/download.js | 2 +- .../xgplayer-transmuxer/src/model/types.js | 1 + .../src/model/video-track.js | 3 + .../src/mp4/fmp4-demuxer.js | 154 +++++++++++++++- .../src/mp4/fmp4-remuxer.js | 54 ++++-- .../xgplayer-transmuxer/src/mp4/mp4-parser.js | 86 ++++++++- packages/xgplayer-transmuxer/src/mp4/mp4.js | 30 +++- .../src/utils/bit-reader.ts | 38 ++++ .../src/utils/byte-reader.ts | 4 +- 13 files changed, 593 insertions(+), 46 deletions(-) create mode 100644 packages/xgplayer-transmuxer/src/utils/bit-reader.ts diff --git a/packages/xgplayer-mp4-loader/src/loader.js b/packages/xgplayer-mp4-loader/src/loader.js index 14358b217..e3dcb29c2 100644 --- a/packages/xgplayer-mp4-loader/src/loader.js +++ b/packages/xgplayer-mp4-loader/src/loader.js @@ -3,7 +3,7 @@ import { MP4Parser } from 'xgplayer-transmuxer' import { getConfig } from './config' import { MediaError } from './error' import { Cache } from './cache' -import { isNumber, moovToMeta, moovToSegments } from './utils' +import { isNumber, moovToMeta, moovToSegments, sidxToSegments } from './utils' import EventEmitter from 'eventemitter3' export class MP4Loader extends EventEmitter { @@ -124,7 +124,42 @@ export class MP4Loader extends EventEmitter { // throw new MediaError('cannot parse moov box', moov.data) } - const segments = moovToSegments(parsedMoov, this._config.segmentDuration) + let segments = moovToSegments(parsedMoov, this._config.segmentDuration) + + // 当box存在但不完整时,补全box + const getCompletedBox = async (name) => { + const box = MP4Parser.findBox(this.buffer, [name])[0] + if (box) { + if (box.size > box.data.length) { + const res = await this.loadData([box.start, box.start + box.size - 1], cache, config) + if (res) { + return MP4Parser.findBox(res.data, [name])[0] + } + } else { + return box + } + } + } + + // 现在的分段式range加载逻辑不适用于fmp4,需要判断太多条件 + // 因为fmp4的samples信息存放在moof中,而解析moof的range需要依赖sidx + // 而sidx和moof的size都是动态的(且sidx不一定存在),导致每个环节都需要判断是否满足解析条件以及对应的兜底处理 + // todo: 后续加载逻辑需要改为【开区间range+主动取消】才能更好的处理fmp4 + let isFragmentMP4 = false + if (!(segments && segments.videoSegments.length && segments.audioSegments.length)) { + const sidx = await getCompletedBox('sidx') + if (sidx) { + const parsedSidx = MP4Parser.sidx(sidx) + if (parsedSidx) { + segments = sidxToSegments(parsedMoov, parsedSidx) + isFragmentMP4 = true + } + } else { + // 无 sidx box 场景,当前架构只能通过模拟加载完整的fmp4来解析出segments,这样会导致fetch加载的数据量特别大,loading时间长 + // 更倾向于使用【开区间range+主动取消】方案异步读取,todo + } + } + if (!segments) { this._error = true onProgress(null, state, options, {err:'cannot parse segments'}) @@ -132,7 +167,7 @@ export class MP4Loader extends EventEmitter { // throw new MediaError('cannot parse segments', moov.data) } - this.meta = moovToMeta(parsedMoov) + this.meta = moovToMeta(parsedMoov, isFragmentMP4) const { videoSegments, audioSegments } = segments this.videoSegments = videoSegments this.audioSegments = audioSegments @@ -182,12 +217,26 @@ export class MP4Loader extends EventEmitter { throw new MediaError('cannot parse moov box', moov.data) } - const segments = moovToSegments(parsedMoov, this._config.segmentDuration) + let segments = moovToSegments(parsedMoov, this._config.segmentDuration) if (!segments) { throw new MediaError('cannot parse segments', moov.data) } - this.meta = moovToMeta(parsedMoov) + let parsedSidx + if (!(segments.videoSegments.length && segments.audioSegments.length)) { + const moof = MP4Parser.findBox(this.buffer, ['moof'])[0] + const sidx = MP4Parser.findBox(this.buffer, ['sidx'])[0] + if (moof && moof.size <= moof.data.length && sidx) { + const parsedMoof = MP4Parser.moof(moof) + + parsedSidx = MP4Parser.sidx(sidx) + if (parsedMoof && parsedSidx) { + segments = sidxToSegments(parsedMoov, parsedSidx, parsedMoof) + } + } + } + + this.meta = moovToMeta(parsedMoov, parsedSidx) const { videoSegments, audioSegments } = segments this.videoSegments = videoSegments this.audioSegments = audioSegments diff --git a/packages/xgplayer-mp4-loader/src/utils.js b/packages/xgplayer-mp4-loader/src/utils.js index 3fe988a60..0e0ce8366 100644 --- a/packages/xgplayer-mp4-loader/src/utils.js +++ b/packages/xgplayer-mp4-loader/src/utils.js @@ -1,3 +1,161 @@ +const TFHDFlag = { + BASE_DATA_OFFSET: 1, + SAMPLE_DESC: 2, + SAMPLE_DUR: 8, + SAMPLE_SIZE: 16, + SAMPLE_FLAG: 32, + DUR_EMPTY: 65536, + DEFAULT_BASE_IS_MOOF: 131072 +} +const TRUNFlag = { + DATA_OFFSET: 1, + FIRST_FLAG: 4, + DURATION: 256, + SIZE: 512, + FLAG: 1024, + CTS_OFFSET: 2048 +} +const SampleFlag = { + DEGRADATION_PRIORITY_MASK: 65535, + IS_NON_SYNC: 65536, + PADDING_MASK: 917504, + REDUNDANCY_MASK: 3145728, + DEPENDED_MASK: 12582912, + DEPENDS_MASK: 50331648, + DEPENDS_NO: 33554432, + DEPENDS_YES: 16777216 +} + +export function trafToSegments (traf, trex = {}, moofOffset, segDuration, timescale) { + const { tfhd, trun, tfdt } = traf + const { samples: trunSamples, flags: trunFlags } = trun + const { flags: tfhdFlags } = tfhd + + // const defaultSampleDescriptionIndex = tfhdFlags & TFHDFlag.SAMPLE_DESC ? tfhd.sampleDescriptionIndex : (trex.defaultSampleDescriptionIndex || 1) + const defaultSampleDuration = tfhdFlags & TFHDFlag.SAMPLE_DUR ? tfhd.defaultSampleDuration : (trex.defaultSampleDuration || 0) + const defaultSampleSize = tfhdFlags & TFHDFlag.SAMPLE_SIZE ? tfhd.defaultSampleSize : (trex.defaultSampleSize || 0) + const defaultSampleFlags = tfhdFlags & TFHDFlag.SAMPLE_FLAG ? tfhd.defaultSampleFlags : (trex.defaultSampleFlags || 0) + const startOffset = tfhdFlags & TFHDFlag.BASE_DATA_OFFSET ? tfhd.baseDataOffset : (tfhdFlags & TFHDFlag.DEFAULT_BASE_IS_MOOF ? moofOffset : 0) + + const frames = [] + const gops = [] + + for (let lastDts = 0, startTime = 0, gopId = 0, totalOffset = startOffset, i = 0; i < trunSamples.length; i++) { + const frame = {} + frame.index = i + frame.size = trunFlags & TRUNFlag.SIZE ? trunSamples[i].size : defaultSampleSize + frame.duration = trunFlags & TRUNFlag.DURATION ? trunSamples[i].duration : defaultSampleDuration + frame.dts = lastDts > 0 ? lastDts : (tfdt ? tfdt.baseMediaDecodeTime : 0) + frame.startTime = startTime + if (trunFlags & TRUNFlag.CTS_OFFSET) { + frame.pts = frame.dts + trunSamples[i].cts + } else { + frame.pts = frame.dts + } + lastDts = frame.dts + frame.duration + startTime += frame.duration + + let sampleFlags = defaultSampleFlags + if (trunFlags & TRUNFlag.FLAG) { + sampleFlags = trunSamples[i].flags + } else if (0 === i && trunFlags & TRUNFlag.FIRST_FLAG) { + sampleFlags = trun.firstSampleFlag + } + frame.offset = totalOffset + frame.keyframe = !(sampleFlags & (SampleFlag.IS_NON_SYNC | SampleFlag.DEPENDS_YES)) + totalOffset += frame.size + frames.push(frame) + if (frame.keyframe) { + gopId++ + gops.push([frame]) + } else if (gops.length) { + gops[gops.length - 1].push(frame) + } + frame.gopId = gopId + } + + const len = frames.length + if (!len || (!frames[0].keyframe)) return [] + + let time = 0 + let lastFrame + const segments = [] + const scaledDuration = segDuration * timescale + let segmentFrames = [] + + // 合并gop至segments,以segDuration作为参考 + for (let i = 0, len = gops.length; i < len; i++) { + time += gops[i].reduce((wret, w) => wret + w.duration, 0) + segmentFrames = segmentFrames.concat(gops[i]) + + if (time >= scaledDuration || i === gops.length - 1) { + lastFrame = segmentFrames[segmentFrames.length - 1] + segments.push({ + index: segments.length, + startTime: (segments[segments.length - 1]?.endTime || segmentFrames[0].startTime / timescale), + endTime: (lastFrame.startTime + lastFrame.duration) / timescale, + duration: time / timescale, + range: [segmentFrames[0].offset, lastFrame.offset + lastFrame.size], + frames: segmentFrames + }) + time = 0 + segmentFrames = [] + } + } + + return segments +} + +export function sidxToSegments (moov, sidx) { + const tracks = moov.trak + if (!tracks || !tracks.length) return + const videoTrack = tracks.find(t => t.mdia?.hdlr?.handlerType === 'vide') + const audioTrack = tracks.find(t => t.mdia?.hdlr?.handlerType === 'soun') + if (!videoTrack && !audioTrack) return + + let audioSegments = [] + let videoSegments = [] + if (sidx) { + const segments = [] + let prevTime = 0 + let prevOffset = sidx.start + sidx.size + sidx.references.forEach((ref, i) => { + segments.push({ + index: i, + startTime: prevTime, + endTime: prevTime + (ref.subsegment_duration / sidx.timescale), + duration: ref.subsegment_duration / sidx.timescale, + range: [prevOffset, prevOffset + ref.referenced_size], + frames: [] + }) + prevTime += ref.subsegment_duration / sidx.timescale + prevOffset += ref.referenced_size + }) + audioSegments = segments + videoSegments = segments + } else { + // 如果sidx不存在,则代表后续的segments无法通过seek读取 + // 把整段fmp4当作一个segment,使用开区间range即可 + const getTrakSegments = (box) => { + if (!box) return [] + return [{ + index: 0, + startTime: 0, + endTime: box.duration / box.timescale, + duration: box.duration / box.timescale, + range: [moov.start + moov.size, ''], + frames: [] + }] + } + videoSegments = getTrakSegments(moov.mvhd.duration ? moov.mvhd : videoTrack.mdia?.mdhd) + audioSegments = getTrakSegments(moov.mvhd.duration ? moov.mvhd : audioTrack.mdia?.mdhd) + } + + return { + videoSegments, + audioSegments + } +} export function moovToSegments (moov, duration) { const tracks = moov.trak @@ -65,7 +223,7 @@ function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts let chunkIndex = 0 let chunkRunIndex = 0 let offsetInChunk = 0 - let lastSampleInChunk = stscEntries[0].samplesPerChunk + let lastSampleInChunk = stscEntries[0]?.samplesPerChunk let lastChunkInRun = stscEntries[1] ? stscEntries[1].firstChunk - 1 : Infinity let dts = 0 let gopId = -1 @@ -118,7 +276,7 @@ function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts }) const l = frames.length - if (!l || (stss && !frames[0].keyframe)) return + if (!l || (stss && !frames[0].keyframe)) return [] const segments = [] let segFrames = [] @@ -166,11 +324,10 @@ function getSegments (segDuration, timescale, stts, stsc, stsz, stco, stss, ctts } } } - return segments } -export function moovToMeta (moov) { +export function moovToMeta (moov, isFragmentMP4) { let videoCodec = '' let audioCodec = '' let width = 0 @@ -197,7 +354,7 @@ export function moovToMeta (moov) { width = e1.width height = e1.height videoTimescale = videoTrack.mdia?.mdhd?.timescale - videoCodec = (e1.avcC || e1.hvcC)?.codec + videoCodec = (e1.avcC || e1.hvcC || e1.av1C)?.codec if (e1.type === 'encv') { defaultKID = e1.sinf?.schi?.tenc.default_KID } @@ -226,7 +383,8 @@ export function moovToMeta (moov) { audioSampleRate, duration, audioTimescale, - moov + moov, + isFragmentMP4 } } } diff --git a/packages/xgplayer-mp4/src/mp4.js b/packages/xgplayer-mp4/src/mp4.js index 72b48d124..4d9b5a6e7 100644 --- a/packages/xgplayer-mp4/src/mp4.js +++ b/packages/xgplayer-mp4/src/mp4.js @@ -1,6 +1,6 @@ import EventEmitter from 'eventemitter3' import Concat from 'concat-typed-array' -import { MP4Demuxer, FMP4Remuxer } from 'xgplayer-transmuxer' +import { MP4Demuxer, FMP4Demuxer, FMP4Remuxer } from 'xgplayer-transmuxer' import { ERROR_CODES, NetWorkError, ParserError, ERROR_TYPES } from './error' import util from './util' import MP4Loader from 'xgplayer-mp4-loader' @@ -68,6 +68,7 @@ class MP4 extends EventEmitter { ...options.reqOptions, openLog: checkOpenLog() }) + this.fMP4Demuxer = null this.MP4Demuxer = null this.FMP4Remuxer = null this._needInitSegment = true @@ -348,6 +349,10 @@ class MP4 extends EventEmitter { this.log('>>>>>getSubRange time,',time, JSON.stringify(range)) if (this.videoTrak) { const videoSeg = fragIndex < this.videoTrak.length ? this.videoTrak[fragIndex] : this.videoTrak[this.videoTrak.length - 1] + if (videoSeg.frames.length === 0) { + this.log('>>>>>getSubRange video, no frames') + return range + } const keyFrameList = videoSeg.frames.filter(getKeyFrameList) const videoTimescale = this.meta.videoTimescale let startTime = keyFrameList[0].startTime / videoTimescale @@ -373,6 +378,10 @@ class MP4 extends EventEmitter { i = 1 if (this.audioTrak) { const audioSeg = fragIndex < this.audioTrak.length ? this.audioTrak[fragIndex] : this.audioTrak[this.audioTrak.length - 1] + if (audioSeg.frames.length === 0) { + this.log('>>>>>getSubRange video, no frames') + return range + } const frameList = audioSeg.frames const audioTimescale = this.meta.audioTimescale i = Math.floor((time * audioTimescale - frameList[0].startTime) / audioSeg.frames[0].duration) @@ -421,7 +430,7 @@ class MP4 extends EventEmitter { const videoIndexRange = this.getSamplesRange(fragIndex, 'video') const audioIndexRange = this.getSamplesRange(fragIndex, 'audio') const range = [start, start + buffer.byteLength] - if (this.transmuxerWorkerControl) { + if (this.transmuxerWorkerControl && !this.meta.isFragmentMP4) { // todo: fmp4 demux worker const context = { range, state, @@ -431,12 +440,20 @@ class MP4 extends EventEmitter { this.transmuxerWorkerControl.transmux(this.workerSequence, buffer, start, videoIndexRange, audioIndexRange, this.meta.moov, this.useEME, this.kidValue, context) } else { try { - if (!this.MP4Demuxer) { - this.MP4Demuxer = new MP4Demuxer(this.videoTrak, this.audioTrak, null,{openLog: checkOpenLog()}) + let demuxRet + if (this.meta.isFragmentMP4) { + if (!this.fMP4Demuxer) { + this.fMP4Demuxer = new FMP4Demuxer() + } + demuxRet = this.fMP4Demuxer.demuxPart(buffer, start, this.meta.moov) + } else { + if (!this.MP4Demuxer) { + this.MP4Demuxer = new MP4Demuxer(this.videoTrak, this.audioTrak, null,{openLog: checkOpenLog()}) + } + demuxRet = this.MP4Demuxer.demuxPart(buffer, start, videoIndexRange, audioIndexRange, this.meta.moov, this.useEME, this.kidValue) } - const demuxRet = this.MP4Demuxer.demuxPart(buffer, start, videoIndexRange, audioIndexRange, this.meta.moov, this.useEME, this.kidValue) if (!this.FMP4Remuxer && (!this.checkCodecH265() || this.options.supportHevc)) { - this.FMP4Remuxer = new FMP4Remuxer(this.MP4Demuxer.videoTrack, this.MP4Demuxer.audioTrack, {openLog: checkOpenLog()}) + this.FMP4Remuxer = new FMP4Remuxer(demuxRet.videoTrack, demuxRet.audioTrack, {openLog: checkOpenLog()}) } let res this.log('[mux], videoTimeRange,',demuxRet.videoTrack ? [demuxRet.videoTrack.startPts, demuxRet.videoTrack.endPts] : null, ',audioTimeRange,',demuxRet.audioTrack ? [demuxRet.audioTrack.startPts, demuxRet.audioTrack.endPts] : null) @@ -481,8 +498,9 @@ class MP4 extends EventEmitter { const range = [] switch (type) { case 'video': - if (this.videoTrak && fragmentIdx < this.videoTrak.length ) { + if (this.videoTrak && fragmentIdx < this.videoTrak.length) { const frames = this.videoTrak[fragmentIdx].frames + if (!frames.length) break range.push(frames[0].index) range.push(frames[frames.length - 1].index) } @@ -490,6 +508,7 @@ class MP4 extends EventEmitter { case 'audio': if (this.audioTrak && fragmentIdx < this.audioTrak.length ) { const frames = this.audioTrak[fragmentIdx].frames + if (!frames.length) break range.push(frames[0].index) range.push(frames[frames.length - 1].index) } diff --git a/packages/xgplayer-mp4/src/mp4Plugin.js b/packages/xgplayer-mp4/src/mp4Plugin.js index 4f06b4594..56120fa3d 100644 --- a/packages/xgplayer-mp4/src/mp4Plugin.js +++ b/packages/xgplayer-mp4/src/mp4Plugin.js @@ -368,15 +368,14 @@ export default class Mp4Plugin extends BasePlugin { await this.mse.unbindMedia() this.mse = null } - const isHvc = this.mp4 && this.mp4.checkCodecH265() const hasVideo = !!meta.videoCodec const hasAudio = !!meta.audioCodec let codec if (hasVideo && hasAudio) { - codec = isHvc ? 'video/mp4; codecs="hev1.1.6.L93.B0, mp4a.40.5"' : 'video/mp4; codecs="avc1.64001E, mp4a.40.5"' + codec = `video/mp4; codecs="${meta.videoCodec}, mp4a.40.5"` } else if (hasVideo) { - codec = isHvc ? 'video/mp4; codecs="hev1.1.6.L93.B0"' : 'video/mp4; codecs="avc1.64001E"' + codec = `video/mp4; codecs="${meta.videoCodec}"` } else { codec = 'video/mp4; codecs="mp4a.40.5"' } diff --git a/packages/xgplayer-mp4/src/util/download.js b/packages/xgplayer-mp4/src/util/download.js index c8dcb46de..b4dcacee5 100644 --- a/packages/xgplayer-mp4/src/util/download.js +++ b/packages/xgplayer-mp4/src/util/download.js @@ -1,7 +1,7 @@ class Download { constructor (filename, content) { const aLink = document.createElement('a') - const blob = new Blob([content]) + const blob = new Blob(content) const evt = document.createEvent('MouseEvents') evt.initEvent('click', false, false) aLink.download = filename diff --git a/packages/xgplayer-transmuxer/src/model/types.js b/packages/xgplayer-transmuxer/src/model/types.js index 48492f2d1..18991cb80 100644 --- a/packages/xgplayer-transmuxer/src/model/types.js +++ b/packages/xgplayer-transmuxer/src/model/types.js @@ -7,6 +7,7 @@ export const TrackType = { /** @enum {string} */ export const VideoCodecType = { + AV1: 'av1', AVC: 'avc', HEVC: 'hevc' } diff --git a/packages/xgplayer-transmuxer/src/model/video-track.js b/packages/xgplayer-transmuxer/src/model/video-track.js index edfb55680..ac5acd2da 100644 --- a/packages/xgplayer-transmuxer/src/model/video-track.js +++ b/packages/xgplayer-transmuxer/src/model/video-track.js @@ -111,6 +111,9 @@ export class VideoTrack { * @returns {boolean} */ exist () { + if (/av01/.test(this.codec)) { + return true + } return !!(this.pps.length && this.sps.length && this.codec) } diff --git a/packages/xgplayer-transmuxer/src/mp4/fmp4-demuxer.js b/packages/xgplayer-transmuxer/src/mp4/fmp4-demuxer.js index 15921b45a..75753bc9e 100644 --- a/packages/xgplayer-transmuxer/src/mp4/fmp4-demuxer.js +++ b/packages/xgplayer-transmuxer/src/mp4/fmp4-demuxer.js @@ -1,14 +1,166 @@ import { VideoTrack, AudioTrack, MetadataTrack, VideoSample, AudioSample } from '../model' -import { readBig32 } from '../utils' +import { concatUint8Array, readBig32 } from '../utils' import { MP4Parser } from './mp4-parser' export class FMP4Demuxer { + __loadedMoofWraps = [] + __lastRemainData = null + __lastRemainDataStart = 0 + __nextMoofStart = -1 + + constructor (videoTrack, audioTrack, metadataTrack) { this.videoTrack = videoTrack || new VideoTrack() this.audioTrack = audioTrack || new AudioTrack() this.metadataTrack = metadataTrack || new MetadataTrack() } + demuxPart (partData, partDataStart, moov) { + const { videoTrack, audioTrack } = this + const videoExist = videoTrack.exist() + const audioExist = audioTrack.exist() + + const isAV01 = /av01/.test(videoTrack.codec) + videoTrack.samples = [] + audioTrack.samples = [] + + let data = partData + let dataStart = partDataStart + if (this.__lastRemainData) { + const lastRemainDataEnd = this.__lastRemainDataStart + this.__lastRemainData.byteLength + // 如果遗留数据和新数据之间存在非包含关系的重叠,则需要合并,否则丢弃遗留数据 + // 如果遗留数据和新数据之间不存在重叠,需要丢弃遗留数据,保证后续的新数据可以连续的解析 + const continuous = partDataStart <= lastRemainDataEnd && partDataStart > this.__lastRemainDataStart && partDataStart + partData.byteLength > lastRemainDataEnd + if (continuous) { + // data = 遗留数据 + 新数据,为了程序健壮性,即可能存在重复的range,需要subarray规避 + const noDuplicateData = partData.subarray(this.__lastRemainData.byteLength + this.__lastRemainDataStart - partDataStart) + data = concatUint8Array(this.__lastRemainData, noDuplicateData) + dataStart = this.__lastRemainDataStart + this.__lastRemainData = null + } else { + this.__lastRemainData = null + this.__lastRemainDataStart = 0 + this.__nextMoofStart = -1 + } + } + + if (!moov) { + const moovBox = MP4Parser.findBox(data, ['moov'])[0] + if (!moovBox) throw new Error('cannot found moov box') + moov = MP4Parser.moov(moovBox) + } + + if (data) { + const dataEnd = dataStart + data.byteLength + if (!videoExist && !audioExist) { + MP4Parser.moovToTrack(moov, videoTrack, audioTrack) + } + // findBox要求参数由box header作为起始,故需要定位到moof的起始点,通过前一个moof计算出下一个moof的start位置,并保证解析的内容大于header的长度(8) + const moofBoxes = [] + if (this.__nextMoofStart < 0) { + MP4Parser.findBox(data, ['moof'], dataStart).forEach(v => moofBoxes.push(v)) + } else if (this.__nextMoofStart >= dataStart && this.__nextMoofStart <= dataEnd - 8) { + MP4Parser.findBox(data.subarray(this.__nextMoofStart - dataStart), ['moof'], this.__nextMoofStart).forEach(v => moofBoxes.push(v)) + } + moofBoxes.filter(moofBox => moofBox.size <= moofBox.data.length).forEach(moofBox => { + const moof = MP4Parser.moof(moofBox) + // 通过(trun.dataOffset + all samples的合并值)最大值计算得到下一个moof.start,也可以通过mdat box获取,此处为前者逻辑 + this.__nextMoofStart = moof.start + Math.max(...moof.traf.map(v => v.trun.samples.reduce((ret, w) => ret + w.size, v.trun.dataOffset || 0))) + this.__loadedMoofWraps.push({ + start: moof.start, + nextMoofStart: this.__nextMoofStart, + moof + }) + this.__loadedMoofWraps.sort((p, n) => p.start - n.start) // 排序,兼容seek + }) + + for (const moofWrap of this.__loadedMoofWraps) { + // 跳过不需要解析的moof,减少非必要的损耗 + if (moofWrap.start > dataEnd || moofWrap.nextMoofStart < dataStart) { + continue + } + const moofStart = moofWrap.start + const tracks = MP4Parser.moofToSamples(moofWrap.moof, videoTrack, audioTrack) + + const videoBaseMediaDecodeTime = videoTrack.baseMediaDecodeTime + const audioBaseMediaDecodeTime = audioTrack.baseMediaDecodeTime + let nalSize + Object.keys(tracks).forEach(k => { + // eslint-disable-next-line + if (videoTrack.id == k) { + tracks[k] + .some(x => { + const xStart = x.offset += moofStart + if (xStart < dataStart) { + return + } + // 如果当前sample已经超过了数据范围,则中断遍历 + if (xStart + x.size > dataEnd) { + return true + } + const sample = new VideoSample((x.pts || x.dts) + videoBaseMediaDecodeTime, x.dts + videoBaseMediaDecodeTime) + sample.duration = x.duration + sample.gopId = x.gopId + if (x.keyframe) sample.setToKeyframe() + const sampleData = data.subarray(xStart - dataStart, xStart - dataStart + x.size) + sample.data = sampleData + if (!isAV01) { // av1编码的数据不需要解析nals + let start = 0 + const len = sampleData.length - 1 + while (start < len) { + nalSize = readBig32(sampleData, start) + start += 4 + sample.units.push(sampleData.subarray(start, start + nalSize)) + start += nalSize + } + } + this.__lastRemainDataStart = xStart + x.size + videoTrack.samples.push(sample) + }) + // eslint-disable-next-line eqeqeq + } else if (audioTrack.id == k) { + tracks[k] + .some(x => { + const xStart = x.offset + moofStart + if (xStart < dataStart) { + return + } + if (xStart + x.size > dataEnd) { + return true + } + const sampleData = data.subarray(xStart - dataStart, xStart - dataStart + x.size) + audioTrack.samples.push(new AudioSample(x.dts + audioBaseMediaDecodeTime, sampleData, x.duration)) + this.__lastRemainDataStart = xStart + x.size + }) + } + }) + } + } + + // 保存未解析的数据,下次解析时合并使用 + if (this.__lastRemainDataStart > dataStart && this.__lastRemainDataStart < data.byteLength + dataStart) { + this.__lastRemainData = data.subarray(this.__lastRemainDataStart - dataStart) + } else { + this.__lastRemainData = data + this.__lastRemainDataStart = dataStart + } + + // fmp4中sample.pts是通过tfdt的baseMediaDecodeTime + sample.duration计算得出的 + // 所以此处sample的pts是多少不重要,反而需要确保demuxPart中baseMediaDecodeTime的值为起始帧的pts,才能保证remux后解析的结果与原始数据一致 + if (videoTrack.samples.length) { + videoTrack.baseMediaDecodeTime = videoTrack.samples[0].pts + } + if (audioTrack.samples.length) { + audioTrack.baseMediaDecodeTime = audioTrack.samples[0].pts + } + + return { + videoTrack, + audioTrack, + metadataTrack: this.metadataTrack + } + } + demux (videoData, audioData) { const { videoTrack, audioTrack } = this const videoExist = videoTrack.exist() diff --git a/packages/xgplayer-transmuxer/src/mp4/fmp4-remuxer.js b/packages/xgplayer-transmuxer/src/mp4/fmp4-remuxer.js index 5afee24e7..eac814527 100644 --- a/packages/xgplayer-transmuxer/src/mp4/fmp4-remuxer.js +++ b/packages/xgplayer-transmuxer/src/mp4/fmp4-remuxer.js @@ -82,29 +82,47 @@ export class FMP4Remuxer { track.samples[0].flag = { dependsOn: 2, isNonSyncSample: 0 } } const samples = track.samples + const isAV01 = /av01/.test(track.codec) let mdatSize = 0 - samples.forEach((s) => { - mdatSize += s.units.reduce((t, c) => (t + c.byteLength), 0) - mdatSize += (s.units.length * 4) - }) + if (isAV01) { + samples.forEach((s) => { + mdatSize += s.data.byteLength + }) + } else { + samples.forEach((s) => { + mdatSize += s.units.reduce((t, c) => (t + c.byteLength), 0) + mdatSize += (s.units.length * 4) + }) + } const mdata = new Uint8Array(mdatSize) - const mdatView = new DataView(mdata.buffer) - - for (let i = 0, l = samples.length, offset = 0, sample; i < l; i++) { - sample = samples[i] - - let sampleSize = 0 - sample.units.forEach((u) => { - mdatView.setUint32(offset, u.byteLength) - offset += 4 - mdata.set(u, offset) - offset += u.byteLength - sampleSize += (4 + u.byteLength) - }) - sample.size = sampleSize + + // av1没有uints,直接写入data即可 + // todo: H.265/H.264为什么要拼接nals/uints而不直接用data? + if (isAV01) { + for (let i = 0, l = samples.length, offset = 0, sample; i < l; i++) { + sample = samples[i] + mdata.set(sample.data, offset) + sample.size = sample.data.byteLength + offset += sample.size + } + } else { + const mdatView = new DataView(mdata.buffer) + for (let i = 0, l = samples.length, offset = 0, sample; i < l; i++) { + sample = samples[i] + + let sampleSize = 0 + sample.units.forEach((u) => { + mdatView.setUint32(offset, u.byteLength) + offset += 4 + mdata.set(u, offset) + offset += u.byteLength + sampleSize += (4 + u.byteLength) + }) + sample.size = sampleSize + } } const mdat = MP4.mdat(mdata) diff --git a/packages/xgplayer-transmuxer/src/mp4/mp4-parser.js b/packages/xgplayer-transmuxer/src/mp4/mp4-parser.js index 3dea55dc9..b177bc5e1 100644 --- a/packages/xgplayer-transmuxer/src/mp4/mp4-parser.js +++ b/packages/xgplayer-transmuxer/src/mp4/mp4-parser.js @@ -2,6 +2,7 @@ import { AudioCodecType, VideoCodecType } from '../model' import { getAvcCodec, readBig16, readBig24, readBig32, readBig64, combineToFloat, toDegree } from '../utils' import { AAC } from '../codec' import { ByteReader } from '../utils/byte-reader' +import { BitReader } from '../utils/bit-reader' export class MP4Parser { static findBox (data, names, start = 0) { const ret = [] @@ -318,6 +319,8 @@ export class MP4Parser { ret.entryCount = readBig32(data) ret.entries = MP4Parser.findBox(data.subarray(4), [], start + 4).map(b => { switch (b.type) { + case 'av01': + return MP4Parser.av01(b) case 'avc1': case 'avc2': case 'avc3': @@ -397,6 +400,78 @@ export class MP4Parser { }) } + static colr (box) { + return parseBox(box, false, (ret, data) => { + const byte = ByteReader.fromUint8(data) + ret.data = box.data + ret.colorType = byte.readString(4) + // Array.from(data.subarray(0, 4)).map(v => String.fromCharCode(v)).join('') + if (ret.colorType === 'nclx') { + ret.colorPrimaries = byte.read(2) + ret.transferCharacteristics = byte.read(2) + ret.matrixCoefficients = byte.read(2) + ret.fullRangeFlag = byte.read(1) >> 7 + } else if (ret.colorType === 'rICC' || ret.colorType === 'prof') { + ret.iccProfile = data.readToUint8() + } + }) + } + + static av01 (box) { + return parseBox(box, false, (ret, data, start) => { + const bodyStart = parseVisualSampleEntry(ret, data) + const bodyData = data.subarray(bodyStart) + start += bodyStart + ret.av1C = MP4Parser.av1C(MP4Parser.findBox(bodyData, ['av1C'], start)[0]) + ret.colr = MP4Parser.colr(MP4Parser.findBox(bodyData, ['colr'], start)[0]) + }) + } + + static av1C (box) { + return parseBox(box, false, (ret, data) => { + ret.data = box.data + + const byte = ByteReader.fromUint8(data) + const bit = BitReader.fromByte(byte, 4) + + ret.marker = bit.read(1) + ret.version = bit.read(7) + + ret.seqProfile = bit.read(3) + ret.seqLevelIdx0 = bit.read(5) + + ret.seqTier0 = bit.read(1) + ret.highBitdepth = bit.read(1) + ret.twelveBit = bit.read(1) + ret.monochrome = bit.read(1) + ret.chromaSubsamplingX = bit.read(1) + ret.chromaSubsamplingY = bit.read(1) + ret.chromaSamplePosition = bit.read(2) + ret.reserved = bit.read(3) + ret.initialPresentationDelayPresent = bit.read(1) + + if (ret.initialPresentationDelayPresent) { + ret.initialPresentationDelayMinusOne = bit.read(4) + } else { + ret.initialPresentationDelayMinusOne = 0 + } + ret.configOBUs = byte.readToUint8() + + let bitdepth + if (ret.seqLevelIdx0 === 2 && ret.highBitdepth === 1) { + bitdepth = ret.twelveBit === 1 ? '12' : '10' + } else if (ret.seqProfile <= 2) { + bitdepth = ret.highBitdepth === 1 ? '10' : '08' + } + ret.codec = [ + 'av01', + ret.seqProfile, + (ret.seqLevelIdx0 < 10 ? '0' + ret.seqLevelIdx0 : ret.seqLevelIdx0) + (ret.seqTier0 ? 'H' : 'M'), + bitdepth + ].join('.') + }) + } + static avc1 (box) { return parseBox(box, false, (ret, data, start) => { const bodyStart = parseVisualSampleEntry(ret, data) @@ -794,7 +869,12 @@ export class MP4Parser { if (e1.pasp) { v.sarRatio = [e1.pasp.hSpacing, e1.pasp.vSpacing] } - if (e1.hvcC) { + if (e1.av1C) { + v.codecType = VideoCodecType.AV1 + v.codec = e1.av1C.codec + v.av1C = e1.av1C.data + v.colr = e1.colr.data + } else if (e1.hvcC) { v.codecType = VideoCodecType.HEVC v.codec = e1.hvcC.codec v.vps = e1.hvcC.vps @@ -1103,7 +1183,9 @@ function parseAudioSampleEntry (ret, data) { function parseBox (box, isFullBox, parse) { if (!box) return - if (box.size !== box.data.length) throw new Error(`box ${box.type} size !== data.length`) + if (box.size !== box.data.length) { + throw new Error(`box ${box.type} size !== data.length`) + } const ret = { start: box.start, size: box.size, diff --git a/packages/xgplayer-transmuxer/src/mp4/mp4.js b/packages/xgplayer-transmuxer/src/mp4/mp4.js index 89e6780ff..d827ab288 100644 --- a/packages/xgplayer-transmuxer/src/mp4/mp4.js +++ b/packages/xgplayer-transmuxer/src/mp4/mp4.js @@ -6,6 +6,8 @@ const UINT32_MAX = 2 ** 32 - 1 export class MP4 { static types = [ + 'av01', + 'av1C', 'avc1', 'avcC', 'hvc1', @@ -352,6 +354,8 @@ export class MP4 { } else if (track.useEME && track.encv) { content = MP4.encv(track) // console.log('[remux],encv, len,', content.byteLength, track.type, hashVal(content.toString())) + } else if (track.av1C) { + content = MP4.av01(track) } else { content = MP4.avc1hev1(track) // console.log('[remux],avc1hev1, len,', content.byteLength, track.type, hashVal(content.toString())) @@ -492,7 +496,31 @@ export class MP4 { const schi = MP4.schi(data) return MP4.box(MP4.types.sinf, content, MP4.box(MP4.types.frma, frma), MP4.box(MP4.types.schm, schm), schi) } - + static av01 (track) { + return MP4.box(MP4.types.av01, new Uint8Array([ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // data_reference_index + 0x00, 0x00, // pre_defined + 0x00, 0x00, // reserved + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // pre_defined + (track.width >> 8) & 0xff, track.width & 0xff, // width + (track.height >> 8) & 0xff, track.height & 0xff, // height + 0x00, 0x48, 0x00, 0x00, // horizresolution + 0x00, 0x48, 0x00, 0x00, // vertresolution + 0x00, 0x00, 0x00, 0x00, // reserved + 0x00, 0x01, // frame_count + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, // compressor name + 0x00, 0x18, // depth + 0x11, 0x11 // pre_defined = -1 //todo + ]), track.av1C, track.colr) + } static avc1hev1 (track) { const isHevc = track.codecType === VideoCodecType.HEVC const typ = isHevc ? MP4.types.hvc1 : MP4.types.avc1 diff --git a/packages/xgplayer-transmuxer/src/utils/bit-reader.ts b/packages/xgplayer-transmuxer/src/utils/bit-reader.ts new file mode 100644 index 000000000..1f962449f --- /dev/null +++ b/packages/xgplayer-transmuxer/src/utils/bit-reader.ts @@ -0,0 +1,38 @@ +import { ByteReader } from './byte-reader' + +export class BitReader { + private val: number + size: number + offset = 0 + constructor(val: number, size: number) { + this.val = val + this.size = size + } + static fromByte(byte: ByteReader, len: number) { + return new BitReader(byte.read(len), len << 3) + } + skip(len: number) { + this.offset += len + } + read(len: number) { + const unreadLength = this.size - this.offset - len + + if (unreadLength >= 0) { + let bits = 0, i = 0 + this.offset += len + // 32位及以上的整数不支持位移运算,使用 / + Math.pow 规避 + if (this.size > 31) { + for (; i < len; i++) { + bits += Math.pow(2, i) + } + return this.val / Math.pow(2, unreadLength) & bits + } else { + for (; i < len; i++) { + bits += 1 << i + } + return this.val >>> unreadLength & bits + } + } + throw new Error(`the number of the read operation exceeds the total length limit of bits`) + } +} \ No newline at end of file diff --git a/packages/xgplayer-transmuxer/src/utils/byte-reader.ts b/packages/xgplayer-transmuxer/src/utils/byte-reader.ts index d4f680f7e..38a86cb45 100644 --- a/packages/xgplayer-transmuxer/src/utils/byte-reader.ts +++ b/packages/xgplayer-transmuxer/src/utils/byte-reader.ts @@ -6,7 +6,7 @@ export class ByteReader { constructor(buf: ArrayBuffer, offset: number, len: number) { this.dv = new DataView(buf) this.start = this.offset = offset || this.dv.byteOffset - this.end = len ? this.start + len : this.dv.byteLength + this.end = len ? this.start + len : this.start + this.dv.byteLength } static fromUint8(uint8: Uint8Array) { return new ByteReader(uint8.buffer, uint8.byteOffset, uint8.byteLength) @@ -105,7 +105,7 @@ export class ByteReader { readToBuffer(len?: number) { let buffer: ArrayBuffer if (this.offset || len) { - buffer = this.dv.buffer.slice(this.offset, len ? this.offset + len : undefined) + buffer = this.dv.buffer.slice(this.offset, len ? this.offset + len : this.end) } else { buffer = this.dv.buffer }