Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for RTSP H263 #63

Merged
merged 7 commits into from
Jun 16, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Add support for RTSP H263
Added H263 RTP Packet reader and added support for H263 playback through
RTSP.

Change-Id: I348cc4d8e974b5275409b816a9d52aa29f593233
  • Loading branch information
rakeshnitb committed Mar 30, 2022
commit 55bfe71384bfd55fcddc14fddf1cac53e9c22416
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,17 @@ public final class RtpPayloadFormat {

private static final String RTP_MEDIA_AC3 = "AC3";
private static final String RTP_MEDIA_MPEG4_GENERIC = "MPEG4-GENERIC";
private static final String RTP_MEDIA_H263_1998 = "H263-1998";
private static final String RTP_MEDIA_H263_2000 = "H263-2000";
private static final String RTP_MEDIA_H264 = "H264";
private static final String RTP_MEDIA_H265 = "H265";

/** Returns whether the format of a {@link MediaDescription} is supported. */
public static boolean isFormatSupported(MediaDescription mediaDescription) {
switch (Ascii.toUpperCase(mediaDescription.rtpMapAttribute.mediaEncoding)) {
case RTP_MEDIA_AC3:
case RTP_MEDIA_H263_1998:
case RTP_MEDIA_H263_2000:
case RTP_MEDIA_H264:
case RTP_MEDIA_H265:
case RTP_MEDIA_MPEG4_GENERIC:
Expand All @@ -65,6 +69,9 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) {
switch (Ascii.toUpperCase(mediaType)) {
case RTP_MEDIA_AC3:
return MimeTypes.AUDIO_AC3;
case RTP_MEDIA_H263_1998:
case RTP_MEDIA_H263_2000:
return MimeTypes.VIDEO_H263;
case RTP_MEDIA_H264:
return MimeTypes.VIDEO_H264;
case RTP_MEDIA_H265:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@

private static final String GENERIC_CONTROL_ATTR = "*";

/** Default width and height for H263. */
private static final int DEFAULT_H263_WIDTH = 352;
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
private static final int DEFAULT_H263_HEIGHT = 288;

/** The track's associated {@link RtpPayloadFormat}. */
public final RtpPayloadFormat payloadFormat;
/** The track's URI. */
Expand Down Expand Up @@ -121,6 +125,10 @@ public int hashCode() {
checkArgument(!fmtpParameters.isEmpty());
processAacFmtpAttribute(formatBuilder, fmtpParameters, channelCount, clockRate);
break;
case MimeTypes.VIDEO_H263:
// H263 does not require a FMTP attribute. So Setting default width and height.
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
formatBuilder.setWidth(DEFAULT_H263_WIDTH).setHeight(DEFAULT_H263_HEIGHT);
break;
case MimeTypes.VIDEO_H264:
checkArgument(!fmtpParameters.isEmpty());
processH264FmtpAttribute(formatBuilder, fmtpParameters);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public RtpPayloadReader createPayloadReader(RtpPayloadFormat payloadFormat) {
return new RtpAc3Reader(payloadFormat);
case MimeTypes.AUDIO_AAC:
return new RtpAacReader(payloadFormat);
case MimeTypes.VIDEO_H263:
return new RtpH263Reader(payloadFormat);
case MimeTypes.VIDEO_H264:
return new RtpH264Reader(payloadFormat);
case MimeTypes.VIDEO_H265:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
/*
* Copyright 2022 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package androidx.media3.exoplayer.rtsp.reader;

import static androidx.media3.common.util.Assertions.checkStateNotNull;
import static androidx.media3.common.util.Util.castNonNull;

import androidx.media3.common.C;
import androidx.media3.common.ParserException;
import androidx.media3.common.util.Log;
import androidx.media3.common.util.ParsableByteArray;
import androidx.media3.common.util.Util;
import androidx.media3.exoplayer.rtsp.RtpPacket;
import androidx.media3.exoplayer.rtsp.RtpPayloadFormat;
import androidx.media3.extractor.ExtractorOutput;
import androidx.media3.extractor.TrackOutput;
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;

/**
* Parses a H263 byte stream carried on RTP packets, and extracts H263 individual video frames as
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
* defined in RFC4629.
*/
/* package */ final class RtpH263Reader implements RtpPayloadReader {
private static final String TAG = "RtpH263Reader";

private static final long MEDIA_CLOCK_FREQUENCY = 90_000;

/** VOP unit type. */
private static final int I_VOP = 0;
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved

private final RtpPayloadFormat payloadFormat;

private @MonotonicNonNull TrackOutput trackOutput;

private long firstReceivedTimestamp;
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
private int previousSequenceNumber;
/** The combined size of a sample that is fragmented into multiple RTP packets. */
private int fragmentedSampleSizeBytes;
private static int width;
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
private static int height;
private static boolean isKeyFrame;
private boolean isOutputFormatSet;
private long startTimeOffsetUs;

/** Creates an instance. */
public RtpH263Reader(RtpPayloadFormat payloadFormat) {
this.payloadFormat = payloadFormat;
firstReceivedTimestamp = C.TIME_UNSET;
previousSequenceNumber = C.INDEX_UNSET;
fragmentedSampleSizeBytes = 0;
isKeyFrame = false;
isOutputFormatSet = false;
}

@Override
public void createTracks(ExtractorOutput extractorOutput, int trackId) {
trackOutput = extractorOutput.track(trackId, C.TRACK_TYPE_VIDEO);
castNonNull(trackOutput).format(payloadFormat.format);
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {}

@Override
public void consume(ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker)
throws ParserException {
checkStateNotNull(trackOutput);

// H263 Header Payload Header, RFC4629 Section 5.1.
// 0 1
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | RR |P|V| PLEN |PEBIT|
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
int currPosition = data.getPosition();
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
int header = data.readUnsignedShort();
boolean pBit = ((header & 0x400) == 0x400);
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved

// Check if optional Video Redundancy Coding or PLEN or PEBIT is present, RFC4629 Section 5.1.
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
if ((header & 0x200) != 0 || (header & 0x1f8) != 0 || (header & 0x7) != 0) {
Log.w(TAG, "Packet discarded due to (VRC != 0) or (PLEN != 0) or (PEBIT != 0)");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • Why not supporting this mode?
  • If you choose to log VRC != 0, you should log the actual value of VRC, PLEN and PEBIT.
  • Mention packet dropped.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We are unable to find clip(s) which are having these special headers to add support and verify it.
These special header are used for error resilience.

return;
}
int startCodePayload = data.peekUnsignedByte() & 0xfc;
if (pBit == true) {
if (startCodePayload < 128) {
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
Log.w(TAG, "Picture start Code (PSC) missing, Dropping packet.");
return;
} else {
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
// Setting first two bytes of the start code. Refer RFC4629 Section 5.1.
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
data.getData()[currPosition] = 0;
data.getData()[currPosition + 1] = 0;
data.setPosition(currPosition);
}
} else {
// Check that this packet is in the sequence of the previous packet.
int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber);
if (sequenceNumber != expectedSequenceNumber) {
Log.w(
TAG,
Util.formatInvariant(
"Received RTP packet with unexpected sequence number. Expected: %d; received: %d."
+ " Dropping packet.",
expectedSequenceNumber, sequenceNumber));
return;
}
}

if (fragmentedSampleSizeBytes == 0) {
getBufferFlagsAndResolutionFromVop(data, isOutputFormatSet);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's very strange that a method starts with get does not return anything, and for getBufferFlagsAndResolutionFromVop to depend on whether the output format is set (isOutputFormatSet)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

getBufferFlagsAndResolutionFromVop() method returns Buffer flags as well as Resolution, so i'm using class variable to get both values. I'm using "isOutputFormatSet" variable to check if outputformat is already set or not, as we need to update actual resolution once throughout decode.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, if the method does not return anything, can you rename it to parseVopHeader and specify it sets width/height/flags in the javadoc.

if (!isOutputFormatSet && isKeyFrame == true) {
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
if (width != payloadFormat.format.width || height != payloadFormat.format.height) {
trackOutput.format(
payloadFormat.format.buildUpon().setWidth(width).setHeight(height).build());
}
isOutputFormatSet = true;
}
}
int fragmentSize = data.bytesLeft();
// Write the video sample.
trackOutput.sampleData(data, fragmentSize);
fragmentedSampleSizeBytes += fragmentSize;

if (rtpMarker) {
if (firstReceivedTimestamp == C.TIME_UNSET) {
firstReceivedTimestamp = timestamp;
}
long timeUs = toSampleUs(startTimeOffsetUs, timestamp, firstReceivedTimestamp);
trackOutput.sampleMetadata(
timeUs,
isKeyFrame ? C.BUFFER_FLAG_KEY_FRAME : 0,
fragmentedSampleSizeBytes,
/* offset= */ 0,
/* encryptionData= */ null);
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
fragmentedSampleSizeBytes = 0;
isKeyFrame = false;
}
previousSequenceNumber = sequenceNumber;
}

@Override
public void seek(long nextRtpTimestamp, long timeUs) {
firstReceivedTimestamp = nextRtpTimestamp;
fragmentedSampleSizeBytes = 0;
startTimeOffsetUs = timeUs;
}

// Internal methods.
/**
* Parses VOP Coding type and resolution.
*/
private void getBufferFlagsAndResolutionFromVop(ParsableByteArray data, boolean gotResolution) {
// Search for SHORT_VIDEO_START_MARKER (0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0).
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
int currPosition = data.getPosition();
if (data.readUnsignedInt() >> 10 == 0x20) {
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
int header = data.peekUnsignedByte();
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
int vopType = ((header >> 1) & 0x01);
if (!gotResolution && vopType == I_VOP) {
int sourceFormat = ((header >> 2) & 0x07);
if (sourceFormat == 1) {
width = 128;
rakeshnitb marked this conversation as resolved.
Show resolved Hide resolved
height = 96;
} else {
width = (short) (176 << (sourceFormat - 2));
height = (short) (144 << (sourceFormat - 2));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain a bit here? I'm particularly lost why the shift bits are derived from a sourceFormat. Is the related logic in the H263 spec? I can't find it in RFC4629.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://cs.android.com/android/platform/superproject/+/master:frameworks/av/media/codecs/m4v_h263/dec/src/vop.cpp;l=1128. According to this logic :
sourceFormat 2 is 176x144, sourceFormat 3 is 1762x1442, sourceFormat 4 is 17622x14422 and so on.

}
}
data.setPosition(currPosition);
isKeyFrame = (vopType == I_VOP ? true : false);
return;
}
data.setPosition(currPosition);
isKeyFrame = false;
}

private static long toSampleUs(
long startTimeOffsetUs, long rtpTimestamp, long firstReceivedRtpTimestamp) {
return startTimeOffsetUs
+ Util.scaleLargeTimestamp(
(rtpTimestamp - firstReceivedRtpTimestamp),
/* multiplier= */ C.MICROS_PER_SECOND,
/* divisor= */ MEDIA_CLOCK_FREQUENCY);
}
}