ZigZag Sin
登 陆
上一篇:avcC Box 下一篇:stts Box

esds Box

Time
2022-2-21 11:39 阅读 381

具体 Box 分析

Avcc Box

介绍

该 Box 则包含了音频的编码信息和音频码率信息,所以解码音频时非常关键。Esds中可以分为三层,每层为包含关系,分别为 MP4ESDescr,MP4DecConfigDescr,MP4DecSpecificDescr。

工具分析

image-20211225210145180

直接分析字段值,这个 Box 是 Full Box,这里 length, type 和 version flag 就不分析了,具体可以看上面,下面我们就直接分析 Data 数据。这里要注意上面红框标注的 80 80 80,这个相当于分隔符,对应 Esds 每一层的数据。当然有一些 MP4文件下的 Esds Box' 是没有这个80 80 80,那就直接解析就行了

名称实际值(16进制)具体值(10进制 / ASCII)
es description(ed) tag033基本流描述标记:默认0x03
ed tag szie(跳过80 80 80)1F30表示后面有30字节
ed track id00 0000表示音频的原始es数据的id是0,一般一路音频,这个值就默认是0;
ed flag000
decoder config descriptor(dcd) tag044默认值0x04
dcd tag size1420长度
dcd mepg-4 audio40 0x40 是 Audio ISO/IEC 14496-3
dcd audio stream15 一般默认0x15
dcd buffersize db00 60 00 建议的解码器缓存大小
dcd max bitrate00 02 EE 00192000(187.5kb/s)音频数据最大码率
dcd avg bitrate00 02 EE 00192000(187.5kb/s)音频数据平均码率
decoder specific info description(dsid) tag055解码规格标记,默认值:0x05
dsid tag szie022解码规格标记及其后面值大小
dsid audio specific config(asc)11 900b10001 0b10010000音频规格数据,见下面各个bit位解释
asc object type110b000105bit,AAC Main
asc frequency index11 900b00114bit,48000 Hz
asc channel configuration900b00104bit,双声道
asc frame length flag900b01bit,1024 samples,每个包的大小为 1024字节 也就是一帧音频的大小。
asc depends on core coder900b01bit,不太重要
asc extesion flag900b01bit,不太重要

image-20211225214356402

上面分析基本是和程序是一致的,下面是对上面各个字段不同值的补充解释。

ed flag:

一般默认00:0x00:00000000

其中每个bit还代表是否后面有相应的字段。

第一bit为1,则有16bit的dependOn_ES_IS字段;

第二bit为1,则有8bit的URL ing字段;

第三bit为1,则有16bit的OCR_ES_ID字段;

最后5bit,代表streamPriority

asc object type**(5bit)**:

0: Null

1: AAC Main

2: AAC LC (Low Complexity)

3: AAC SSR (Scalable Sample Rate)

asc frequency index**(4bit)**

0: 96000 Hz

1: 88200 Hz

2: 64000 Hz

3: 48000 Hz

4: 44100 Hz

5: 32000 Hz

6: 24000 Hz

7: 22050 Hz

8: 16000 Hz

9: 12000 Hz

10: 11025 Hz

11: 8000 Hz

12: 7350 Hz

13: Reserved

14: Reserved

15: frequency is written explictly

asc channel configuration**(4bit)**

0: Defined in AOT Specifc Config

1: 1channel: front-center 单声道

2: 2channels:front-left, front-right 双声道

3: 3channels:front-center,front-left, front-right 3声道

asc frame length flag**(1bit)**:

0: Each packet contains 1024 samples

1: Each packet contains 960 samples

参照: https://wiki.multimedia.cx/index.php?title=MPEG-4_Audio

代码定义

// BaseBox.h  

// ...
// 其他 Box 的定义

class TimeEsdsBox :public BaseBox {
public:
    Timebyte version = 0;
    Timebyte flags = 0;

    unsigned char es_description_tag = 0; // 基本流描述标记:默认0x03

    unsigned char ed_tag_szie = 0; // 长度
    unsigned short ed_track_id = 0; // es id 原始音频流的id
    unsigned char ed_flag = 0; // 一般默认00


    unsigned char decoder_config_descriptor_tag = 0; // 解码配置参数描述标记:默认0x04
    unsigned char dcd_tag_size = 0; // Length Field长度
    unsigned char dcd_mepg_audio = 0; // 如果是mp4则计算得到:0x40
    unsigned char dcd_audio_stream = 0; // 按照标准或计算得到:此处一般默认0x15
    unsigned int dcd_buffersize_db = 0; // 3byte 建议的解码器缓存大小
    unsigned int dcd_max_bitrate = 0; // 音频数据最大码率
    unsigned int dcd_avg_bitrate = 0; // 音频数据平均码率


    unsigned char decoder_specific_info_description_tag = 0; // 解码规格标记 默认值:0x05
    unsigned char dsid_tag_szie = 0 ;// 解码规格标记及其后面值大小
    // 音频规格数据(2byte) 16bit代表的含义
    unsigned char asc_object_type = 0 ;// 5bit 表示采用的音频编码规格
    unsigned char asc_frequency_index = 0 ;// 4bit 表示采样率
    unsigned char asc_channel_configuration = 0 ;// 4bit 表示通道数
    unsigned char asc_frame_length_flag = 0 ;// 1bit 表示一帧音频的大小
    // ... 还有2bite 暂时不关心



    TimeEsdsBox(BoxHeader h, Timebyte * d): BaseBox(h, d){};
    void PrintDataInfo() override;
};
// TimeEsdsBox.cpp

void TimeEsdsBox::PrintDataInfo() {
    TimeBufferStream bufferStream(data, h.GetDataSize());
    version = bufferStream.GetUChar();
    bufferStream.GetLenData(&flags, 3);
    {
        es_description_tag = bufferStream.GetUChar();
        // 0x80 0x80 0x80
        unsigned char catNumb[3] = {0};
        bufferStream.GetLenData(catNumb, 3);
        if (catNumb[0] == 0x80 && catNumb[1] == 0x80 && catNumb[2] == 0x80) {
            ed_tag_szie = bufferStream.GetUChar();
            ed_track_id = bufferStream.GetUShort();
        } else {
            ed_tag_szie = catNumb[0];
            ed_track_id = catNumb[1] << 8 | catNumb[2];
        }
        ed_flag = bufferStream.GetUChar();
    }


    {
        decoder_config_descriptor_tag = bufferStream.GetUChar();
        // 0x80 0x80 0x80
        unsigned char catNumb[3] = {0};
        bufferStream.GetLenData(catNumb, 3);
        if (catNumb[0] == 0x80 && catNumb[1] == 0x80 && catNumb[2] == 0x80) {

            dcd_tag_size = bufferStream.GetUChar();
            dcd_mepg_audio = bufferStream.GetUChar();
            dcd_audio_stream = bufferStream.GetUChar();
        } else {
            ed_tag_szie = catNumb[0];
            dcd_mepg_audio = catNumb[1];
            dcd_audio_stream = catNumb[2];
        }
        bufferStream.GetLenData(&dcd_buffersize_db, 3);

        dcd_max_bitrate = bufferStream.GetUInt();
        dcd_avg_bitrate = bufferStream.GetUInt();
    }

    {
        decoder_specific_info_description_tag = bufferStream.GetUChar();
        // 0x80 0x80 0x80
        unsigned char catNumb[3] = {0};
        unsigned short temp = 0;
        bufferStream.GetLenData(catNumb, 3);
        if (catNumb[0] == 0x80 && catNumb[1] == 0x80 && catNumb[2] == 0x80) {
            dsid_tag_szie = bufferStream.GetUChar();
            temp = bufferStream.GetUShort();

        } else {
            dsid_tag_szie = catNumb[0];
            temp = catNumb[1] << 8 | catNumb[2];
        }
        asc_object_type = (temp & 0xf800) >> 11;
        asc_frequency_index = (temp & 0x780) >> 7;
        asc_channel_configuration = (temp & 0x78) >> 3;
        asc_channel_configuration = (temp & 0b100) >> 2;
    }

    printf("===========================\n");
    h.to_string();
    printf("es_description_tag: %d\n", es_description_tag);
    printf("ed_tag_szie: %d\n", ed_tag_szie);
    printf("ed_track_id: %d\n", ed_track_id);
    printf("ed_flag: %d\n", ed_flag);

    printf("decoder_config_descriptor_tag: %d\n", decoder_config_descriptor_tag);
    printf("dcd_tag_size: %d\n", dcd_tag_size);
    printf("dcd_mepg_audio: %d\n", dcd_mepg_audio);
    printf("dcd_audio_stream: %d\n", dcd_audio_stream);
    printf("dcd_buffersize_db: %d\n", dcd_buffersize_db);
    printf("dcd_max_bitrate: %ud => max: %.2f Kb/s\n", dcd_max_bitrate, (float) dcd_max_bitrate / 1000.0);
    printf("dcd_avg_bitrate: %ud => avg: %.2f Kb/s\n", dcd_avg_bitrate, (float) dcd_avg_bitrate / 1000.0);

    printf("decoder_specific_info_description_tag: %d\n", decoder_specific_info_description_tag);
    printf("dsid_tag_szie: %d\n", dsid_tag_szie);
    switch (asc_object_type) { // 5bit
        case 0:
            printf("asc_object_type: Null\n");
            break;
        case 1:
            printf("asc_object_type: AAC Main\n");
            break;
        case 2:
            printf("asc_object_type:  AAC LC (Low Complexity)\n");
            break;
        case 3:
            printf("asc_object_type:  AAC SSR (Scalable Sample Rate)\n");
            break;
            // ... 后面的不常见
    }
    switch (asc_frequency_index) { // 4bit
        case 0:
            printf("samples: 96000 Hz\n");
            break;
        case 1:
            printf("samples: 88200 Hz\n");
            break;
        case 2:
            printf("samples: 64000 Hz\n");
            break;
        case 3:
            printf("samples: 48000 Hz\n");
            break;
        case 4:
            printf("samples: 44100 Hz\n");
            break;
        case 5:
            printf("samples: 32000 Hz\n");
            break;
        case 6:
            printf("samples: 24000 Hz\n");
            break;
        case 7:
            printf("samples: 22050 Hz\n");
            break;
        case 8:
            printf("samples: 16000 Hz\n");
            break;
        case 9:
            printf("samples: 12000 Hz\n");
            break;
        case 10:
            printf("samples: 11025 Hz\n");
            break;
        case 11:
            printf("samples: 8000 Hz\n");
            break;
        case 12:
            printf("samples: 7350 Hz\n");
            break;
            // ... 后面的不常见
    }
    switch (asc_channel_configuration) { // 4bit
        case 0:
            printf("channels: Defined in AOT Specifc Config\n");
            break;
        case 1:
            printf("channels: 1 channel: front-center\n");
            break;
        case 2:
            printf("channels: 2 channels:front-left, front-right\n");
            break;
        case 3:
            printf("channels: 3 channels:front-center,front-left, front-right \n");
            break;
            // ... 不明
    }
    switch (asc_frame_length_flag) { // 1bit
        case 0:
            printf("packet: Each packet contains 1024 samples\n");
            break;
        case 1:
            printf("packet: Each packet contains 960 samples\n");
            break;
    }
}

上一篇:avcC Box 下一篇:stts Box
给我买个键盘吧。。。求打赏。。。
欢迎加群,一起交流~~~