Split audio player

The audio player had 2 roles: - handle the SDL audio output device; - resample input samples to maintain a target latency. Extract the latter to a separate component (an "audio regulator"), independent of SDL.
2024-09-23 23:58:02 +02:00 · 2024-09-23 23:58:02 +02:00 · 0bb3955b95
commit 0bb3955b95
parent 62776fb261
5 changed files with 507 additions and 440 deletions
--- a/app/meson.build
+++ b/app/meson.build
@ -5,6 +5,7 @@ src = [
    'src/adb/adb_parser.c',
    'src/adb/adb_tunnel.c',
    'src/audio_player.c',
+    'src/audio_regulator.c',
    'src/cli.c',
    'src/clock.c',
    'src/compat.c',
--- a/app/src/audio_player.c
+++ b/app/src/audio_player.c
@ -1,143 +1,23 @@
 #include "audio_player.h"

-#include <libavcodec/avcodec.h>
-#include <libavutil/opt.h>
-
 #include "util/log.h"

-//#define SC_AUDIO_PLAYER_DEBUG // uncomment to debug
-
-/**
- * Real-time audio player with configurable latency
- *
- * As input, the player regularly receives AVFrames of decoded audio samples.
- * As output, an SDL callback regularly requests audio samples to be played.
- * In the middle, an audio buffer stores the samples produced but not consumed
- * yet.
- *
- * The goal of the player is to feed the audio output with a latency as low as
- * possible while avoiding buffer underrun (i.e. not being able to provide
- * samples when requested).
- *
- * The player aims to feed the audio output with as little latency as possible
- * while avoiding buffer underrun. To achieve this, it attempts to maintain the
- * average buffering (the number of samples present in the buffer) around a
- * target value. If this target buffering is too low, then buffer underrun will
- * occur frequently. If it is too high, then latency will become unacceptable.
- * This target value is configured using the scrcpy option --audio-buffer.
- *
- * The player cannot adjust the sample input rate (it receives samples produced
- * in real-time) or the sample output rate (it must provide samples as
- * requested by the audio output callback). Therefore, it may only apply
- * compensation by resampling (converting _m_ input samples to _n_ output
- * samples).
- *
- * The compensation itself is applied by libswresample (FFmpeg). It is
- * configured using swr_set_compensation(). An important work for the player
- * is to estimate the compensation value regularly and apply it.
- *
- * The estimated buffering level is the result of averaging the "natural"
- * buffering (samples are produced and consumed by blocks, so it must be
- * smoothed), and making instant adjustments resulting of its own actions
- * (explicit compensation and silence insertion on underflow), which are not
- * smoothed.
- *
- * Buffer underflow events can occur when packets arrive too late. In that case,
- * the player inserts silence. Once the packets finally arrive (late), one
- * strategy could be to drop the samples that were replaced by silence, in
- * order to keep a minimal latency. However, dropping samples in case of buffer
- * underflow is inadvisable, as it would temporarily increase the underflow
- * even more and cause very noticeable audio glitches.
- *
- * Therefore, the player doesn't drop any sample on underflow. The compensation
- * mechanism will absorb the delay introduced by the inserted silence.
- */
-
 /** Downcast frame_sink to sc_audio_player */
 #define DOWNCAST(SINK) container_of(SINK, struct sc_audio_player, frame_sink)

-#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_FLT
 #define SC_SDL_SAMPLE_FMT AUDIO_F32

-#define TO_BYTES(SAMPLES) sc_audiobuf_to_bytes(&ap->buf, (SAMPLES))
-#define TO_SAMPLES(BYTES) sc_audiobuf_to_samples(&ap->buf, (BYTES))
-
 static void SDLCALL
 sc_audio_player_sdl_callback(void *userdata, uint8_t *stream, int len_int) {
    struct sc_audio_player *ap = userdata;

    assert(len_int > 0);
    size_t len = len_int;
-    uint32_t count = TO_SAMPLES(len);

-#ifdef SC_AUDIO_PLAYER_DEBUG
-    LOGD("[Audio] SDL callback requests %" PRIu32 " samples", count);
-#endif
+    assert(len % ap->audioreg.sample_size == 0);
+    uint32_t out_samples = len / ap->audioreg.sample_size;

-    // A lock is necessary in the rare case where the producer needs to drop
-    // samples already pushed (when the buffer is full)
-    sc_mutex_lock(&ap->mutex);
-
-    bool played = atomic_load_explicit(&ap->played, memory_order_relaxed);
-    if (!played) {
-        uint32_t buffered_samples = sc_audiobuf_can_read(&ap->buf);
-        // Wait until the buffer is filled up to at least target_buffering
-        // before playing
-        if (buffered_samples < ap->target_buffering) {
-            LOGV("[Audio] Inserting initial buffering silence: %" PRIu32
-                 " samples", count);
-            // Delay playback starting to reach the target buffering. Fill the
-            // whole buffer with silence (len is small compared to the
-            // arbitrary margin value).
-            memset(stream, 0, len);
-            sc_mutex_unlock(&ap->mutex);
-            return;
-        }
-    }
-
-    uint32_t read = sc_audiobuf_read(&ap->buf, stream, count);
-
-    sc_mutex_unlock(&ap->mutex);
-
-    if (read < count) {
-        uint32_t silence = count - read;
-        // Insert silence. In theory, the inserted silent samples replace the
-        // missing real samples, which will arrive later, so they should be
-        // dropped to keep the latency minimal. However, this would cause very
-        // audible glitches, so let the clock compensation restore the target
-        // latency.
-        LOGD("[Audio] Buffer underflow, inserting silence: %" PRIu32 " samples",
-             silence);
-        memset(stream + TO_BYTES(read), 0, TO_BYTES(silence));
-
-        bool received = atomic_load_explicit(&ap->received,
-                                             memory_order_relaxed);
-        if (received) {
-            // Inserting additional samples immediately increases buffering
-            atomic_fetch_add_explicit(&ap->underflow, silence,
-                                      memory_order_relaxed);
-        }
-    }
-
-    atomic_store_explicit(&ap->played, true, memory_order_relaxed);
-}
-
-static uint8_t *
-sc_audio_player_get_swr_buf(struct sc_audio_player *ap, uint32_t min_samples) {
-    size_t min_buf_size = TO_BYTES(min_samples);
-    if (min_buf_size > ap->swr_buf_alloc_size) {
-        size_t new_size = min_buf_size + 4096;
-        uint8_t *buf = realloc(ap->swr_buf, new_size);
-        if (!buf) {
-            LOG_OOM();
-            // Could not realloc to the requested size
-            return NULL;
-        }
-        ap->swr_buf = buf;
-        ap->swr_buf_alloc_size = new_size;
-    }
-
-    return ap->swr_buf;
+    sc_audio_regulator_pull(&ap->audioreg, stream, out_samples);
 }

 static bool
@ -145,202 +25,14 @@ sc_audio_player_frame_sink_push(struct sc_frame_sink *sink,
                                const AVFrame *frame) {
    struct sc_audio_player *ap = DOWNCAST(sink);

-    SwrContext *swr_ctx = ap->swr_ctx;
-
-    int64_t swr_delay = swr_get_delay(swr_ctx, ap->sample_rate);
-    // No need to av_rescale_rnd(), input and output sample rates are the same.
-    // Add more space (256) for clock compensation.
-    int dst_nb_samples = swr_delay + frame->nb_samples + 256;
-
-    uint8_t *swr_buf = sc_audio_player_get_swr_buf(ap, dst_nb_samples);
-    if (!swr_buf) {
-        return false;
-    }
-
-    int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples,
-                          (const uint8_t **) frame->data, frame->nb_samples);
-    if (ret < 0) {
-        LOGE("Resampling failed: %d", ret);
-        return false;
-    }
-
-    // swr_convert() returns the number of samples which would have been
-    // written if the buffer was big enough.
-    uint32_t samples = MIN(ret, dst_nb_samples);
-#ifdef SC_AUDIO_PLAYER_DEBUG
-    LOGD("[Audio] %" PRIu32 " samples written to buffer", samples);
-#endif
-
-    uint32_t cap = sc_audiobuf_capacity(&ap->buf);
-    if (samples > cap) {
-        // Very very unlikely: a single resampled frame should never
-        // exceed the audio buffer size (or something is very wrong).
-        // Ignore the first bytes in swr_buf to avoid memory corruption anyway.
-        swr_buf += TO_BYTES(samples - cap);
-        samples = cap;
-    }
-
-    uint32_t skipped_samples = 0;
-
-    uint32_t written = sc_audiobuf_write(&ap->buf, swr_buf, samples);
-    if (written < samples) {
-        uint32_t remaining = samples - written;
-
-        // All samples that could be written without locking have been written,
-        // now we need to lock to drop/consume old samples
-        sc_mutex_lock(&ap->mutex);
-
-        // Retry with the lock
-        written += sc_audiobuf_write(&ap->buf,
-                                     swr_buf + TO_BYTES(written),
-                                     remaining);
-        if (written < samples) {
-            remaining = samples - written;
-            // Still insufficient, drop old samples to make space
-            skipped_samples = sc_audiobuf_read(&ap->buf, NULL, remaining);
-            assert(skipped_samples == remaining);
-        }
-
-        sc_mutex_unlock(&ap->mutex);
-
-        if (written < samples) {
-            // Now there is enough space
-            uint32_t w = sc_audiobuf_write(&ap->buf,
-                                           swr_buf + TO_BYTES(written),
-                                           remaining);
-            assert(w == remaining);
-            (void) w;
-        }
-    }
-
-    uint32_t underflow = 0;
-    uint32_t max_buffered_samples;
-    bool played = atomic_load_explicit(&ap->played, memory_order_relaxed);
-    if (played) {
-        underflow = atomic_exchange_explicit(&ap->underflow, 0,
-                                             memory_order_relaxed);
-
-        max_buffered_samples = ap->target_buffering * 11 / 10
-                             + 60 * ap->sample_rate / 1000 /* 60 ms */;
-    } else {
-        // SDL playback not started yet, do not accumulate more than
-        // max_initial_buffering samples, this would cause unnecessary delay
-        // (and glitches to compensate) on start.
-        max_buffered_samples = ap->target_buffering
-                             + 10 * ap->sample_rate / 1000 /* 10 ms */;
-    }
-
-    uint32_t can_read = sc_audiobuf_can_read(&ap->buf);
-    if (can_read > max_buffered_samples) {
-        uint32_t skip_samples = 0;
-
-        sc_mutex_lock(&ap->mutex);
-        can_read = sc_audiobuf_can_read(&ap->buf);
-        if (can_read > max_buffered_samples) {
-            skip_samples = can_read - max_buffered_samples;
-            uint32_t r = sc_audiobuf_read(&ap->buf, NULL, skip_samples);
-            assert(r == skip_samples);
-            (void) r;
-            skipped_samples += skip_samples;
-        }
-        sc_mutex_unlock(&ap->mutex);
-
-        if (skip_samples) {
-            if (played) {
-                LOGD("[Audio] Buffering threshold exceeded, skipping %" PRIu32
-                     " samples", skip_samples);
-#ifdef SC_AUDIO_PLAYER_DEBUG
-            } else {
-                LOGD("[Audio] Playback not started, skipping %" PRIu32
-                     " samples", skip_samples);
-#endif
-            }
-        }
-    }
-
-    atomic_store_explicit(&ap->received, true, memory_order_relaxed);
-    if (!played) {
-        // Nothing more to do
-        return true;
-    }
-
-    // Number of samples added (or removed, if negative) for compensation
-    int32_t instant_compensation = (int32_t) written - frame->nb_samples;
-    // Inserting silence instantly increases buffering
-    int32_t inserted_silence = (int32_t) underflow;
-    // Dropping input samples instantly decreases buffering
-    int32_t dropped = (int32_t) skipped_samples;
-
-    // The compensation must apply instantly, it must not be smoothed
-    ap->avg_buffering.avg += instant_compensation + inserted_silence - dropped;
-    if (ap->avg_buffering.avg < 0) {
-        // Since dropping samples instantly reduces buffering, the difference
-        // is applied immediately to the average value, assuming that the delay
-        // between the producer and the consumer will be caught up.
-        //
-        // However, when this assumption is not valid, the average buffering
-        // may decrease indefinitely. Prevent it to become negative to limit
-        // the consequences.
-        ap->avg_buffering.avg = 0;
-    }
-
-    // However, the buffering level must be smoothed
-    sc_average_push(&ap->avg_buffering, can_read);
-
-#ifdef SC_AUDIO_PLAYER_DEBUG
-    LOGD("[Audio] can_read=%" PRIu32 " avg_buffering=%f",
-         can_read, sc_average_get(&ap->avg_buffering));
-#endif
-
-    ap->samples_since_resync += written;
-    if (ap->samples_since_resync >= ap->sample_rate) {
-        // Recompute compensation every second
-        ap->samples_since_resync = 0;
-
-        float avg = sc_average_get(&ap->avg_buffering);
-        int diff = ap->target_buffering - avg;
-
-        // Enable compensation when the difference exceeds +/- 4ms.
-        // Disable compensation when the difference is lower than +/- 1ms.
-        int threshold = ap->compensation != 0
-                      ? ap->sample_rate     / 1000  /* 1ms */
-                      : ap->sample_rate * 4 / 1000; /* 4ms */
-
-        if (abs(diff) < threshold) {
-            // Do not compensate for small values, the error is just noise
-            diff = 0;
-        } else if (diff < 0 && can_read < ap->target_buffering) {
-            // Do not accelerate if the instant buffering level is below the
-            // target, this would increase underflow
-            diff = 0;
-        }
-        // Compensate the diff over 4 seconds (but will be recomputed after 1
-        // second)
-        int distance = 4 * ap->sample_rate;
-        // Limit compensation rate to 2%
-        int abs_max_diff = distance / 50;
-        diff = CLAMP(diff, -abs_max_diff, abs_max_diff);
-        LOGV("[Audio] Buffering: target=%" PRIu32 " avg=%f cur=%" PRIu32
-             " compensation=%d", ap->target_buffering, avg, can_read, diff);
-
-        if (diff != ap->compensation) {
-            int ret = swr_set_compensation(swr_ctx, diff, distance);
-            if (ret < 0) {
-                LOGW("Resampling compensation failed: %d", ret);
-                // not fatal
-            } else {
-                ap->compensation = diff;
-            }
-        }
-    }
-
-    return true;
+    return sc_audio_regulator_push(&ap->audioreg, frame);
 }

 static bool
 sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
                                const AVCodecContext *ctx) {
    struct sc_audio_player *ap = DOWNCAST(sink);
+
 #ifdef SCRCPY_LAVU_HAS_CHLAYOUT
    assert(ctx->ch_layout.nb_channels > 0 && ctx->ch_layout.nb_channels < 256);
    uint8_t nb_channels = ctx->ch_layout.nb_channels;
@ -355,12 +47,17 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
    int out_bytes_per_sample = av_get_bytes_per_sample(SC_AV_SAMPLE_FMT);
    assert(out_bytes_per_sample > 0);

-    ap->sample_rate = ctx->sample_rate;
+    uint32_t target_buffering_samples =
+        ap->target_buffering_delay * ctx->sample_rate / SC_TICK_FREQ;

-    ap->target_buffering = ap->target_buffering_delay * ap->sample_rate
-                                                      / SC_TICK_FREQ;
+    size_t sample_size = nb_channels * out_bytes_per_sample;
+    bool ok = sc_audio_regulator_init(&ap->audioreg, sample_size, ctx,
+                                      target_buffering_samples);
+    if (!ok) {
+        return false;
+    }

-    uint64_t aout_samples = ap->output_buffer_duration * ap->sample_rate
+    uint64_t aout_samples = ap->output_buffer_duration * ctx->sample_rate
                                                       / SC_TICK_FREQ;
    assert(aout_samples <= 0xFFFF);

@ -377,74 +74,10 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
    ap->device = SDL_OpenAudioDevice(NULL, 0, &desired, &obtained, 0);
    if (!ap->device) {
        LOGE("Could not open audio device: %s", SDL_GetError());
+        sc_audio_regulator_destroy(&ap->audioreg);
        return false;
    }

-    SwrContext *swr_ctx = swr_alloc();
-    if (!swr_ctx) {
-        LOG_OOM();
-        goto error_close_audio_device;
-    }
-    ap->swr_ctx = swr_ctx;
-
-#ifdef SCRCPY_LAVU_HAS_CHLAYOUT
-    av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
-    av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
-#else
-    av_opt_set_channel_layout(swr_ctx, "in_channel_layout",
-                              ctx->channel_layout, 0);
-    av_opt_set_channel_layout(swr_ctx, "out_channel_layout",
-                              ctx->channel_layout, 0);
-#endif
-
-    av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0);
-    av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0);
-
-    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0);
-    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0);
-
-    int ret = swr_init(swr_ctx);
-    if (ret) {
-        LOGE("Failed to initialize the resampling context");
-        goto error_free_swr_ctx;
-    }
-
-    // Use a ring-buffer of the target buffering size plus 1 second between the
-    // producer and the consumer. It's too big on purpose, to guarantee that
-    // the producer and the consumer will be able to access it in parallel
-    // without locking.
-    uint32_t audiobuf_samples = ap->target_buffering + ap->sample_rate;
-
-    bool ok = sc_mutex_init(&ap->mutex);
-    if (!ok) {
-        goto error_free_swr_ctx;
-    }
-
-    size_t sample_size = nb_channels * out_bytes_per_sample;
-    ok = sc_audiobuf_init(&ap->buf, sample_size, audiobuf_samples);
-    if (!ok) {
-        goto error_destroy_mutex;
-    }
-
-    size_t initial_swr_buf_size = TO_BYTES(4096);
-    ap->swr_buf = malloc(initial_swr_buf_size);
-    if (!ap->swr_buf) {
-        LOG_OOM();
-        goto error_destroy_audiobuf;
-    }
-    ap->swr_buf_alloc_size = initial_swr_buf_size;
-
-    // Samples are produced and consumed by blocks, so the buffering must be
-    // smoothed to get a relatively stable value.
-    sc_average_init(&ap->avg_buffering, 128);
-    ap->samples_since_resync = 0;
-
-    ap->received = false;
-    atomic_init(&ap->played, false);
-    atomic_init(&ap->received, false);
-    atomic_init(&ap->underflow, 0);
-    ap->compensation = 0;
-
    // The thread calling open() is the thread calling push(), which fills the
    // audio buffer consumed by the SDL audio thread.
    ok = sc_thread_set_priority(SC_THREAD_PRIORITY_TIME_CRITICAL);
@ -456,17 +89,6 @@ sc_audio_player_frame_sink_open(struct sc_frame_sink *sink,
    SDL_PauseAudioDevice(ap->device, 0);

    return true;
-
-error_destroy_audiobuf:
-    sc_audiobuf_destroy(&ap->buf);
-error_destroy_mutex:
-    sc_mutex_destroy(&ap->mutex);
-error_free_swr_ctx:
-    swr_free(&ap->swr_ctx);
-error_close_audio_device:
-    SDL_CloseAudioDevice(ap->device);
-
-    return false;
 }

 static void
@ -477,10 +99,7 @@ sc_audio_player_frame_sink_close(struct sc_frame_sink *sink) {
    SDL_PauseAudioDevice(ap->device, 1);
    SDL_CloseAudioDevice(ap->device);

-    free(ap->swr_buf);
-    sc_audiobuf_destroy(&ap->buf);
-    sc_mutex_destroy(&ap->mutex);
-    swr_free(&ap->swr_ctx);
+    sc_audio_regulator_destroy(&ap->audioreg);
 }

 void
--- a/app/src/audio_player.h
+++ b/app/src/audio_player.h
@ -5,66 +5,27 @@

 #include <stdatomic.h>
 #include <stdbool.h>
-#include <libavformat/avformat.h>
-#include <libswresample/swresample.h>
 #include <SDL2/SDL.h>

+#include "audio_regulator.h"
 #include "trait/frame_sink.h"
-#include "util/audiobuf.h"
-#include "util/average.h"
-#include "util/thread.h"
 #include "util/tick.h"

 struct sc_audio_player {
    struct sc_frame_sink frame_sink;

-    SDL_AudioDeviceID device;
-
-    sc_mutex mutex;
-
    // The target buffering between the producer and the consumer. This value
    // is directly use for compensation.
    // Since audio capture and/or encoding on the device typically produce
    // blocks of 960 samples (20ms) or 1024 samples (~21.3ms), this target
    // value should be higher.
    sc_tick target_buffering_delay;
-    uint32_t target_buffering; // in samples

-    // SDL audio output buffer size.
+    // SDL audio output buffer size
    sc_tick output_buffer_duration;

-    // Audio buffer to communicate between the receiver and the SDL audio
-    // callback
-    struct sc_audiobuf buf;
-
-    // Resampler (only used from the receiver thread)
-    struct SwrContext *swr_ctx;
-
-    // The sample rate is the same for input and output
-    uint32_t sample_rate;
-
-    // Target buffer for resampling (only used by the receiver thread)
-    uint8_t *swr_buf;
-    size_t swr_buf_alloc_size;
-
-    // Number of buffered samples (may be negative on underflow) (only used by
-    // the receiver thread)
-    struct sc_average avg_buffering;
-    // Count the number of samples to trigger a compensation update regularly
-    // (only used by the receiver thread)
-    uint32_t samples_since_resync;
-
-    // Number of silence samples inserted since the last received packet
-    atomic_uint_least32_t underflow;
-
-    // Current applied compensation value (only used by the receiver thread)
-    int compensation;
-
-    // Set to true the first time a sample is received
-    atomic_bool received;
-
-    // Set to true the first time the SDL callback is called
-    atomic_bool played;
+    SDL_AudioDeviceID device;
+    struct sc_audio_regulator audioreg;
 };

 void
--- a/app/src/audio_regulator.c
+++ b/app/src/audio_regulator.c
@ -0,0 +1,415 @@
+#include "audio_regulator.h"
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/opt.h>
+
+#include "util/log.h"
+
+//#define SC_AUDIO_REGULATOR_DEBUG // uncomment to debug
+
+/**
+ * Real-time audio regulator with configurable latency
+ *
+ * As input, the regulator regularly receives AVFrames of decoded audio samples.
+ * As output, the audio player regularly requests audio samples to be played.
+ * In the middle, an audio buffer stores the samples produced but not consumed
+ * yet.
+ *
+ * The goal of the regulator is to feed the audio player with a latency as low
+ * as possible while avoiding buffer underrun (i.e. not being able to provide
+ * samples when requested).
+ *
+ * To achieve this, it attempts to maintain the average buffering (the number
+ * of samples present in the buffer) around a target value. If this target
+ * buffering is too low, then buffer underrun will occur frequently. If it is
+ * too high, then latency will become unacceptable. This target value is
+ * configured using the scrcpy option --audio-buffer.
+ *
+ * The regulator cannot adjust the sample input rate (it receives samples
+ * produced in real-time) or the sample output rate (it must provide samples as
+ * requested by the audio player). Therefore, it may only apply compensation by
+ * resampling (converting _m_ input samples to _n_ output samples).
+ *
+ * The compensation itself is applied by libswresample (FFmpeg). It is
+ * configured using swr_set_compensation(). An important work for the regulator
+ * is to estimate the compensation value regularly and apply it.
+ *
+ * The estimated buffering level is the result of averaging the "natural"
+ * buffering (samples are produced and consumed by blocks, so it must be
+ * smoothed), and making instant adjustments resulting of its own actions
+ * (explicit compensation and silence insertion on underflow), which are not
+ * smoothed.
+ *
+ * Buffer underflow events can occur when packets arrive too late. In that case,
+ * the regulator inserts silence. Once the packets finally arrive (late), one
+ * strategy could be to drop the samples that were replaced by silence, in
+ * order to keep a minimal latency. However, dropping samples in case of buffer
+ * underflow is inadvisable, as it would temporarily increase the underflow
+ * even more and cause very noticeable audio glitches.
+ *
+ * Therefore, the regulator doesn't drop any sample on underflow. The
+ * compensation mechanism will absorb the delay introduced by the inserted
+ * silence.
+ */
+
+#define TO_BYTES(SAMPLES) sc_audiobuf_to_bytes(&ar->buf, (SAMPLES))
+#define TO_SAMPLES(BYTES) sc_audiobuf_to_samples(&ar->buf, (BYTES))
+
+void
+sc_audio_regulator_pull(struct sc_audio_regulator *ar, uint8_t *out,
+                        uint32_t out_samples) {
+#ifdef SC_AUDIO_REGULATOR_DEBUG
+    LOGD("[Audio] Audio regulator pulls %" PRIu32 " samples", out_samples);
+#endif
+
+    // A lock is necessary in the rare case where the producer needs to drop
+    // samples already pushed (when the buffer is full)
+    sc_mutex_lock(&ar->mutex);
+
+    bool played = atomic_load_explicit(&ar->played, memory_order_relaxed);
+    if (!played) {
+        uint32_t buffered_samples = sc_audiobuf_can_read(&ar->buf);
+        // Wait until the buffer is filled up to at least target_buffering
+        // before playing
+        if (buffered_samples < ar->target_buffering) {
+            LOGV("[Audio] Inserting initial buffering silence: %" PRIu32
+                 " samples", out_samples);
+            // Delay playback starting to reach the target buffering. Fill the
+            // whole buffer with silence (len is small compared to the
+            // arbitrary margin value).
+            memset(out, 0, out_samples * ar->sample_size);
+            sc_mutex_unlock(&ar->mutex);
+            return;
+        }
+    }
+
+    uint32_t read = sc_audiobuf_read(&ar->buf, out, out_samples);
+
+    sc_mutex_unlock(&ar->mutex);
+
+    if (read < out_samples) {
+        uint32_t silence = out_samples - read;
+        // Insert silence. In theory, the inserted silent samples replace the
+        // missing real samples, which will arrive later, so they should be
+        // dropped to keep the latency minimal. However, this would cause very
+        // audible glitches, so let the clock compensation restore the target
+        // latency.
+        LOGD("[Audio] Buffer underflow, inserting silence: %" PRIu32 " samples",
+             silence);
+        memset(out + TO_BYTES(read), 0, TO_BYTES(silence));
+
+        bool received = atomic_load_explicit(&ar->received,
+                                             memory_order_relaxed);
+        if (received) {
+            // Inserting additional samples immediately increases buffering
+            atomic_fetch_add_explicit(&ar->underflow, silence,
+                                      memory_order_relaxed);
+        }
+    }
+
+    atomic_store_explicit(&ar->played, true, memory_order_relaxed);
+}
+
+static uint8_t *
+sc_audio_regulator_get_swr_buf(struct sc_audio_regulator *ar,
+                               uint32_t min_samples) {
+    size_t min_buf_size = TO_BYTES(min_samples);
+    if (min_buf_size > ar->swr_buf_alloc_size) {
+        size_t new_size = min_buf_size + 4096;
+        uint8_t *buf = realloc(ar->swr_buf, new_size);
+        if (!buf) {
+            LOG_OOM();
+            // Could not realloc to the requested size
+            return NULL;
+        }
+        ar->swr_buf = buf;
+        ar->swr_buf_alloc_size = new_size;
+    }
+
+    return ar->swr_buf;
+}
+
+bool
+sc_audio_regulator_push(struct sc_audio_regulator *ar, const AVFrame *frame) {
+    SwrContext *swr_ctx = ar->swr_ctx;
+
+    int64_t swr_delay = swr_get_delay(swr_ctx, ar->sample_rate);
+    // No need to av_rescale_rnd(), input and output sample rates are the same.
+    // Add more space (256) for clock compensation.
+    int dst_nb_samples = swr_delay + frame->nb_samples + 256;
+
+    uint8_t *swr_buf = sc_audio_regulator_get_swr_buf(ar, dst_nb_samples);
+    if (!swr_buf) {
+        return false;
+    }
+
+    int ret = swr_convert(swr_ctx, &swr_buf, dst_nb_samples,
+                          (const uint8_t **) frame->data, frame->nb_samples);
+    if (ret < 0) {
+        LOGE("Resampling failed: %d", ret);
+        return false;
+    }
+
+    // swr_convert() returns the number of samples which would have been
+    // written if the buffer was big enough.
+    uint32_t samples = MIN(ret, dst_nb_samples);
+#ifdef SC_AUDIO_REGULATOR_DEBUG
+    LOGD("[Audio] %" PRIu32 " samples written to buffer", samples);
+#endif
+
+    uint32_t cap = sc_audiobuf_capacity(&ar->buf);
+    if (samples > cap) {
+        // Very very unlikely: a single resampled frame should never
+        // exceed the audio buffer size (or something is very wrong).
+        // Ignore the first bytes in swr_buf to avoid memory corruption anyway.
+        swr_buf += TO_BYTES(samples - cap);
+        samples = cap;
+    }
+
+    uint32_t skipped_samples = 0;
+
+    uint32_t written = sc_audiobuf_write(&ar->buf, swr_buf, samples);
+    if (written < samples) {
+        uint32_t remaining = samples - written;
+
+        // All samples that could be written without locking have been written,
+        // now we need to lock to drop/consume old samples
+        sc_mutex_lock(&ar->mutex);
+
+        // Retry with the lock
+        written += sc_audiobuf_write(&ar->buf,
+                                     swr_buf + TO_BYTES(written),
+                                     remaining);
+        if (written < samples) {
+            remaining = samples - written;
+            // Still insufficient, drop old samples to make space
+            skipped_samples = sc_audiobuf_read(&ar->buf, NULL, remaining);
+            assert(skipped_samples == remaining);
+        }
+
+        sc_mutex_unlock(&ar->mutex);
+
+        if (written < samples) {
+            // Now there is enough space
+            uint32_t w = sc_audiobuf_write(&ar->buf,
+                                           swr_buf + TO_BYTES(written),
+                                           remaining);
+            assert(w == remaining);
+            (void) w;
+        }
+    }
+
+    uint32_t underflow = 0;
+    uint32_t max_buffered_samples;
+    bool played = atomic_load_explicit(&ar->played, memory_order_relaxed);
+    if (played) {
+        underflow = atomic_exchange_explicit(&ar->underflow, 0,
+                                             memory_order_relaxed);
+
+        max_buffered_samples = ar->target_buffering * 11 / 10
+                             + 60 * ar->sample_rate / 1000 /* 60 ms */;
+    } else {
+        // Playback not started yet, do not accumulate more than
+        // max_initial_buffering samples, this would cause unnecessary delay
+        // (and glitches to compensate) on start.
+        max_buffered_samples = ar->target_buffering
+                             + 10 * ar->sample_rate / 1000 /* 10 ms */;
+    }
+
+    uint32_t can_read = sc_audiobuf_can_read(&ar->buf);
+    if (can_read > max_buffered_samples) {
+        uint32_t skip_samples = 0;
+
+        sc_mutex_lock(&ar->mutex);
+        can_read = sc_audiobuf_can_read(&ar->buf);
+        if (can_read > max_buffered_samples) {
+            skip_samples = can_read - max_buffered_samples;
+            uint32_t r = sc_audiobuf_read(&ar->buf, NULL, skip_samples);
+            assert(r == skip_samples);
+            (void) r;
+            skipped_samples += skip_samples;
+        }
+        sc_mutex_unlock(&ar->mutex);
+
+        if (skip_samples) {
+            if (played) {
+                LOGD("[Audio] Buffering threshold exceeded, skipping %" PRIu32
+                     " samples", skip_samples);
+#ifdef SC_AUDIO_REGULATOR_DEBUG
+            } else {
+                LOGD("[Audio] Playback not started, skipping %" PRIu32
+                     " samples", skip_samples);
+#endif
+            }
+        }
+    }
+
+    atomic_store_explicit(&ar->received, true, memory_order_relaxed);
+    if (!played) {
+        // Nothing more to do
+        return true;
+    }
+
+    // Number of samples added (or removed, if negative) for compensation
+    int32_t instant_compensation = (int32_t) written - frame->nb_samples;
+    // Inserting silence instantly increases buffering
+    int32_t inserted_silence = (int32_t) underflow;
+    // Dropping input samples instantly decreases buffering
+    int32_t dropped = (int32_t) skipped_samples;
+
+    // The compensation must apply instantly, it must not be smoothed
+    ar->avg_buffering.avg += instant_compensation + inserted_silence - dropped;
+    if (ar->avg_buffering.avg < 0) {
+        // Since dropping samples instantly reduces buffering, the difference
+        // is applied immediately to the average value, assuming that the delay
+        // between the producer and the consumer will be caught up.
+        //
+        // However, when this assumption is not valid, the average buffering
+        // may decrease indefinitely. Prevent it to become negative to limit
+        // the consequences.
+        ar->avg_buffering.avg = 0;
+    }
+
+    // However, the buffering level must be smoothed
+    sc_average_push(&ar->avg_buffering, can_read);
+
+#ifdef SC_AUDIO_REGULATOR_DEBUG
+    LOGD("[Audio] can_read=%" PRIu32 " avg_buffering=%f",
+         can_read, sc_average_get(&ar->avg_buffering));
+#endif
+
+    ar->samples_since_resync += written;
+    if (ar->samples_since_resync >= ar->sample_rate) {
+        // Recompute compensation every second
+        ar->samples_since_resync = 0;
+
+        float avg = sc_average_get(&ar->avg_buffering);
+        int diff = ar->target_buffering - avg;
+
+        // Enable compensation when the difference exceeds +/- 4ms.
+        // Disable compensation when the difference is lower than +/- 1ms.
+        int threshold = ar->compensation != 0
+                      ? ar->sample_rate     / 1000  /* 1ms */
+                      : ar->sample_rate * 4 / 1000; /* 4ms */
+
+        if (abs(diff) < threshold) {
+            // Do not compensate for small values, the error is just noise
+            diff = 0;
+        } else if (diff < 0 && can_read < ar->target_buffering) {
+            // Do not accelerate if the instant buffering level is below the
+            // target, this would increase underflow
+            diff = 0;
+        }
+        // Compensate the diff over 4 seconds (but will be recomputed after 1
+        // second)
+        int distance = 4 * ar->sample_rate;
+        // Limit compensation rate to 2%
+        int abs_max_diff = distance / 50;
+        diff = CLAMP(diff, -abs_max_diff, abs_max_diff);
+        LOGV("[Audio] Buffering: target=%" PRIu32 " avg=%f cur=%" PRIu32
+             " compensation=%d", ar->target_buffering, avg, can_read, diff);
+
+        if (diff != ar->compensation) {
+            int ret = swr_set_compensation(swr_ctx, diff, distance);
+            if (ret < 0) {
+                LOGW("Resampling compensation failed: %d", ret);
+                // not fatal
+            } else {
+                ar->compensation = diff;
+            }
+        }
+    }
+
+    return true;
+}
+
+bool
+sc_audio_regulator_init(struct sc_audio_regulator *ar, size_t sample_size,
+                        const AVCodecContext *ctx, uint32_t target_buffering) {
+    SwrContext *swr_ctx = swr_alloc();
+    if (!swr_ctx) {
+        LOG_OOM();
+        return false;
+    }
+    ar->swr_ctx = swr_ctx;
+
+#ifdef SCRCPY_LAVU_HAS_CHLAYOUT
+    av_opt_set_chlayout(swr_ctx, "in_chlayout", &ctx->ch_layout, 0);
+    av_opt_set_chlayout(swr_ctx, "out_chlayout", &ctx->ch_layout, 0);
+#else
+    av_opt_set_channel_layout(swr_ctx, "in_channel_layout",
+                              ctx->channel_layout, 0);
+    av_opt_set_channel_layout(swr_ctx, "out_channel_layout",
+                              ctx->channel_layout, 0);
+#endif
+
+    av_opt_set_int(swr_ctx, "in_sample_rate", ctx->sample_rate, 0);
+    av_opt_set_int(swr_ctx, "out_sample_rate", ctx->sample_rate, 0);
+
+    av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", ctx->sample_fmt, 0);
+    av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", SC_AV_SAMPLE_FMT, 0);
+
+    int ret = swr_init(swr_ctx);
+    if (ret) {
+        LOGE("Failed to initialize the resampling context");
+        goto error_free_swr_ctx;
+    }
+
+    bool ok = sc_mutex_init(&ar->mutex);
+    if (!ok) {
+        goto error_free_swr_ctx;
+    }
+
+    ar->target_buffering = target_buffering;
+    ar->sample_size = sample_size;
+    ar->sample_rate = ctx->sample_rate;
+
+    // Use a ring-buffer of the target buffering size plus 1 second between the
+    // producer and the consumer. It's too big on purpose, to guarantee that
+    // the producer and the consumer will be able to access it in parallel
+    // without locking.
+    uint32_t audiobuf_samples = target_buffering + ar->sample_rate;
+
+    ok = sc_audiobuf_init(&ar->buf, sample_size, audiobuf_samples);
+    if (!ok) {
+        goto error_destroy_mutex;
+    }
+
+    size_t initial_swr_buf_size = TO_BYTES(4096);
+    ar->swr_buf = malloc(initial_swr_buf_size);
+    if (!ar->swr_buf) {
+        LOG_OOM();
+        goto error_destroy_audiobuf;
+    }
+    ar->swr_buf_alloc_size = initial_swr_buf_size;
+
+    // Samples are produced and consumed by blocks, so the buffering must be
+    // smoothed to get a relatively stable value.
+    sc_average_init(&ar->avg_buffering, 128);
+    ar->samples_since_resync = 0;
+
+    ar->received = false;
+    atomic_init(&ar->played, false);
+    atomic_init(&ar->received, false);
+    atomic_init(&ar->underflow, 0);
+    ar->compensation = 0;
+
+    return true;
+
+error_destroy_audiobuf:
+    sc_audiobuf_destroy(&ar->buf);
+error_destroy_mutex:
+    sc_mutex_destroy(&ar->mutex);
+error_free_swr_ctx:
+    swr_free(&ar->swr_ctx);
+
+    return false;
+}
+
+void
+sc_audio_regulator_destroy(struct sc_audio_regulator *ar) {
+    free(ar->swr_buf);
+    sc_audiobuf_destroy(&ar->buf);
+    sc_mutex_destroy(&ar->mutex);
+    swr_free(&ar->swr_ctx);
+}
--- a/app/src/audio_regulator.h
+++ b/app/src/audio_regulator.h
@ -0,0 +1,71 @@
+#ifndef SC_AUDIO_REGULATOR_H
+#define SC_AUDIO_REGULATOR_H
+
+#include "common.h"
+
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <libavcodec/avcodec.h>
+#include <libswresample/swresample.h>
+#include "util/audiobuf.h"
+#include "util/average.h"
+#include "util/thread.h"
+
+#define SC_AV_SAMPLE_FMT AV_SAMPLE_FMT_FLT
+
+struct sc_audio_regulator {
+    sc_mutex mutex;
+
+    // Target buffering between the producer and the consumer (in samples)
+    uint32_t target_buffering;
+
+    // Audio buffer to communicate between the receiver and the player
+    struct sc_audiobuf buf;
+
+    // Resampler (only used from the receiver thread)
+    struct SwrContext *swr_ctx;
+
+    // The sample rate is the same for input and output
+    uint32_t sample_rate;
+    // The number of bytes per sample (for all channels)
+    size_t sample_size;
+
+    // Target buffer for resampling (only used by the receiver thread)
+    uint8_t *swr_buf;
+    size_t swr_buf_alloc_size;
+
+    // Number of buffered samples (may be negative on underflow) (only used by
+    // the receiver thread)
+    struct sc_average avg_buffering;
+    // Count the number of samples to trigger a compensation update regularly
+    // (only used by the receiver thread)
+    uint32_t samples_since_resync;
+
+    // Number of silence samples inserted since the last received packet
+    atomic_uint_least32_t underflow;
+
+    // Current applied compensation value (only used by the receiver thread)
+    int compensation;
+
+    // Set to true the first time a sample is received
+    atomic_bool received;
+
+    // Set to true the first time samples are pulled by the player
+    atomic_bool played;
+};
+
+bool
+sc_audio_regulator_init(struct sc_audio_regulator *ar, size_t sample_size,
+                        const AVCodecContext *ctx, uint32_t target_buffering);
+
+void
+sc_audio_regulator_destroy(struct sc_audio_regulator *ar);
+
+bool
+sc_audio_regulator_push(struct sc_audio_regulator *ar, const AVFrame *frame);
+
+void
+sc_audio_regulator_pull(struct sc_audio_regulator *ar, uint8_t *out,
+                        uint32_t samples);
+
+#endif