Swift

Capturing video frames

This example shows you how to use a custom video capturer using the device camera as the video source. To use the camera implementation, instantiate the camera capturer in your VonageVideoManager:

publisher.videoCapture = BasicVideoCapturerCamera(preset: AVCaptureSession.Preset.cif352x288, desiredFrameRate: 30)

Initializing and configuring the video capturer

The initializer calls size(from:) to determine resolution and sets up a serial dispatch queue for capturing images, in order to not affect the UI queue.

init(preset: AVCaptureSession.Preset, desiredFrameRate: Int) {
    self.sessionPreset = preset
    self.desiredFrameRate = desiredFrameRate
    self.captureQueue = DispatchQueue(label: "com.vonage.BasicVideoCapturer")
    
    super.init()
    
    let size = self.size(from: self.sessionPreset)
    self.imageWidth = Int(size.width)
    self.imageHeight = Int(size.height)
}

The implementation of initCapture uses the AVFoundation framework to set the camera to capture images. It creates an AVCaptureSession, sets the input device, and configures an AVCaptureVideoDataOutput:

func initCapture() {
    let session = AVCaptureSession()
    session.beginConfiguration()
    
    // Set device capture
    session.sessionPreset = sessionPreset
    
    guard let videoDevice = AVCaptureDevice.default(for: .video),
          let deviceInput = try? AVCaptureDeviceInput(device: videoDevice) else { return }
          
    self.inputDevice = deviceInput
    if session.canAddInput(deviceInput) {
        session.addInput(deviceInput)
    }
    
    let outputDevice = AVCaptureVideoDataOutput()
    outputDevice.alwaysDiscardsLateVideoFrames = true
    outputDevice.videoSettings = [
        kCVPixelBufferPixelFormatTypeKey as String: Int(kCVPixelFormatType_420YpCbCr8BiPlanarVideoRange)
    ]
    
    outputDevice.setSampleBufferDelegate(self, queue: captureQueue)
    if session.canAddOutput(outputDevice) {
        session.addOutput(outputDevice)
    }
    
    // ... Frame rate configuration (see below)
}

The frames captured with this method are accessed via the AVCaptureVideoDataOutputSampleBufferDelegate.

The second part of initCapture configures the frame rate:

    // Set framerate
    let bestFPS = self.bestFrameRate(for: videoDevice)
    do {
        try videoDevice.lockForConfiguration()
        let duration = CMTime(value: 1, timescale: CMTimeScale(bestFPS))
        videoDevice.activeVideoMinFrameDuration = duration
        videoDevice.activeVideoMaxFrameDuration = duration
        videoDevice.unlockForConfiguration()
    } catch {
        print("Error locking configuration")
    }
    
    session.commitConfiguration()
    self.captureSession = session
    
    self.format = OTVideoFormat(nv12WithWidth: UInt32(imageWidth), height: UInt32(imageHeight))

The bestFrameRate(for:) method returns the best frame rate for the capturing device:

private func bestFrameRate(for device: AVCaptureDevice) -> Double {
    var bestRate: Double = 0
    for range in device.activeFormat.videoSupportedFrameRateRanges {
        let duration = range.minFrameDuration
        let currentRate = Double(duration.timescale) / Double(duration.value)
        
        if currentRate > bestRate && currentRate < Double(desiredFrameRate) {
            bestRate = currentRate
        }
    }
    return bestRate
}

Capturing frames for the publisher's video

The start method starts the AVCaptureSession:

//
//  returns:
//  - a negative value for error
//  - 0 value when all is OK
//
func start() -> Int32 {
    self.captureStarted = true
    self.captureSession?.startRunning()
    return 0
}

The delegate method captureOutput(_:didOutput:from:) is called when a new video frame is available.

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
    guard captureStarted, let format = self.format else { return }
    guard let imageBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
    
    let frame = OTVideoFrame(format: format)
    
    // Prepare memory copy for NV12 (2 planes)
    let planeCount = CVPixelBufferGetPlaneCount(imageBuffer)
    let totalSize = CVPixelBufferGetDataSize(imageBuffer)
    let buffer = UnsafeMutablePointer<UInt8>.allocate(capacity: totalSize)
    
    CVPixelBufferLockBaseAddress(imageBuffer, .readOnly)
    
    var planePointers = [UnsafeMutablePointer<UInt8>?]()
    var currentDestination = buffer
    
    // Copy planes
    for i in 0..<planeCount {
        guard let sourceBaseAddress = CVPixelBufferGetBaseAddressOfPlane(imageBuffer, i) else { continue }
        let planeSize = CVPixelBufferGetBytesPerRowOfPlane(imageBuffer, i) * CVPixelBufferGetHeightOfPlane(imageBuffer, i)
        
        planePointers.append(currentDestination)
        memcpy(currentDestination, sourceBaseAddress, planeSize)
        currentDestination += planeSize
    }
    
    // Set metadata and consume
    if let device = self.inputDevice?.device {
         let minDuration = device.activeVideoMinFrameDuration
         frame.format?.estimatedFramesPerSecond = Double(minDuration.timescale) / Double(minDuration.value)
    }
    
    frame.orientation = self.currentDeviceOrientation()
    frame.timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer)
    
    planePointers.withUnsafeBufferPointer { bufferPointer in
        frame.setPlanesWithPointers(bufferPointer.baseAddress, numPlanes: Int(planeCount))
    }
    
    videoCaptureConsumer?.consumeFrame(frame)
    
    buffer.deallocate()
    CVPixelBufferUnlockBaseAddress(imageBuffer, .readOnly)
}

This method performs the following:

  1. Creates an OTVideoFrame instance.
  2. Allocates a memory buffer.
  3. Copies image data from the CVImageBuffer (NV12 format) into the manual buffer. NV12 has two planes (Y and UV), which are copied sequentially.
  4. Tags the frame with a timestamp and orientation.
  5. Calls consumeFrame, passing the frame to the Vonage SDK.