A delay of about 0.5 seconds occurs when using bungee for playback

Thank you for your support.
I’m developing an app that performs real‑time playback using bungee, but compared to playback without bungee, using bungee introduces a delay of about 0.5 seconds.
I would like to confirm whether I am using bungee correctly.
I’m using JUCE (v8.0.12) as the framework.
For bungee, I’m using the v2.4.10 tag.
The source code for the part where it’s used is shown below.
I can share whatever code I’m able to provide, so please let me know.

Header

struct BungeeState {
    Bungee::Stretcher<Bungee::Basic> stretcher;
    Bungee::Stream<Bungee::Basic>   stream;
    BungeeState(Bungee::SampleRates rates, int channels, int maxInputFrameCount)
        : stretcher(rates, channels)
        , stream(stretcher, maxInputFrameCount, channels) {}
};

...

std::unique_ptr<BungeeState> bungeeState;

Source

void ClipAudioSourceModel::initBungeeStream() {
    if (sr <= kZeroSeconds) {
        bungeeState.reset();
        return;
    }
    const auto* srcBuffer = originalBuffer ? originalBuffer.get() : audioBuffer.get();
    if (!srcBuffer) {
        bungeeState.reset();
        return;
    }
    const double srcRate = originalBuffer ? originalSampleRate : sourceSampleRate;
    if (srcRate <= kZeroSeconds) {
        bungeeState.reset();
        return;
    }
    const int channels = srcBuffer->getNumChannels();
    if (channels <= kZeroSamples) {
        bungeeState.reset();
        return;
    }
    const int maxInputFrameCount = preparedBlockSize * kMaxSpeedMultiplier;
    Bungee::SampleRates rates{static_cast<int>(srcRate), static_cast<int>(sr)};
    bungeeState = std::make_unique<BungeeState>(rates, channels, maxInputFrameCount);
}

void ClipAudioSourceModel::getNextAudioBlock(const juce::AudioSourceChannelInfo& bufferToFill) {
    bufferToFill.clearActiveBufferRegion();

    // Initialize Bungee Stream (on rate change, seek, or ratio change)
    if (needsStreamReset.exchange(false, std::memory_order_acquire)) {
        initBungeeStream();
    }

    if (!bungeeState) {
        return;
    }
    if (!bufferToFill.buffer) {
        return;
    }

    // Select source buffer (originalBuffer has priority)
    const auto* srcBuffer = originalBuffer ? originalBuffer.get() : audioBuffer.get();
    if (!srcBuffer) {
        return;
    }
    const double srcRate = originalBuffer ? originalSampleRate : sourceSampleRate;
    if (srcRate <= kZeroSeconds || sr <= kZeroSeconds) {
        return;
    }

    const int outputChannels  = bufferToFill.buffer->getNumChannels();
    const int inputChannels   = srcBuffer->getNumChannels();
    const int channelsToWrite = juce::jmin(outputChannels, inputChannels);
    if (channelsToWrite <= kZeroSamples) {
        return;
    }

    const int   numSamples    = bufferToFill.numSamples;
    const double stretchRatio  = stretchSpeedRatio.load(std::memory_order_relaxed);
    const double pitchRatio    = pitchShiftRatio.load(std::memory_order_relaxed);

    // Loop-related calculations (when no loop, contentDuration = cropEnd - cropStart)
    const double contentDurationSec  = loopCount > 0 ? loopEndSec - loopStartSec : cropEnd - cropStart;
    const double loopCycleSec        = loopCount > 0
        ? loopLeadingSec + contentDurationSec + loopBlankSec
        : contentDurationSec;
    const juce::int64 contentSamples   = static_cast<juce::int64>(contentDurationSec * srcRate);
    const juce::int64 leadingSamples   = static_cast<juce::int64>(loopLeadingSec * srcRate);
    const juce::int64 loopCycleSamples = static_cast<juce::int64>(loopCycleSec * srcRate);

    const juce::int64 totalLengthSamples = loopCount > 0
        ? static_cast<juce::int64>(loopCount) * loopCycleSamples
        : contentSamples;

    // Remaining source samples
    const juce::int64 samplesRemaining = juce::jmax(0LL, totalLengthSamples - readPosition);
    if (samplesRemaining <= 0LL) {
        return;
    }

    // Required input samples for output numSamples
    // inputNeeded = numSamples * stretchRatio * srcRate / sr
    const double exactInputNeeded = static_cast<double>(numSamples) * stretchRatio * srcRate / sr;
    const int inputSamplesNeeded = static_cast<int>(std::ceil(exactInputNeeded));
    const int samplesToRead = juce::jmin(inputSamplesNeeded,
                                          static_cast<int>(samplesRemaining));
    if (samplesToRead <= kZeroSamples) {
        return;
    }

    // Adjust outputFrameCount proportional to samplesToRead so that Bungee internal speed is correct
    // Reference: cmd/main.cpp → outputFrameCountIdeal = (inputSampleCount * sr) / (speed * srcRate)
    // When speed = stretchRatio: outputFrameCountIdeal = samplesToRead * sr / (stretchRatio * srcRate)
    const double outputFrameCountIdeal =
        static_cast<double>(samplesToRead) * sr / (stretchRatio * srcRate);

    // Set pointers to output buffer (using preallocated outputPtrs_)
    for (int ch = 0; ch < channelsToWrite; ++ch) {
        outputPtrs_[static_cast<size_t>(ch)] = bufferToFill.buffer->getWritePointer(ch, bufferToFill.startSample);
    }

    if (loopCount == 0) {
        // ── No loop ──
        const juce::int64 sourceReadPos = static_cast<juce::int64>(cropStart * srcRate) + readPosition;
        const juce::int64 bufferTotal   = static_cast<juce::int64>(srcBuffer->getNumSamples());

        if (sourceReadPos + samplesToRead <= bufferTotal) {
            // Within bounds: use direct pointers (using preallocated inputPtrs_)
            for (int ch = 0; ch < channelsToWrite; ++ch) {
                inputPtrs_[static_cast<size_t>(ch)] = srcBuffer->getReadPointer(ch, static_cast<int>(sourceReadPos));
            }
            bungeeState->stream.process(
                inputPtrs_.data(), outputPtrs_.data(),
                samplesToRead,
                outputFrameCountIdeal,
                pitchRatio);
        } else {
            // Crossing boundary: use preallocated tempInputBuf_ with zero-padding
            tempInputBuf_.clear();
            for (int ch = 0; ch < channelsToWrite; ++ch) {
                const juce::int64 available = juce::jmax(0LL, bufferTotal - sourceReadPos);
                if (available > 0LL) {
                    const int copyCount = static_cast<int>(juce::jmin(available, static_cast<juce::int64>(samplesToRead)));
                    tempInputBuf_.copyFrom(ch, 0, *srcBuffer, ch, static_cast<int>(sourceReadPos), copyCount);
                }
                inputPtrs_[static_cast<size_t>(ch)] = tempInputBuf_.getReadPointer(ch);
            }
            bungeeState->stream.process(
                inputPtrs_.data(), outputPtrs_.data(),
                samplesToRead,
                outputFrameCountIdeal,
                pitchRatio);
        }
    } else {
        // ── Loop enabled ──
        // Compute current position in loop cycle and build per-channel buffer
        const juce::int64 loopCyclePos  = readPosition % loopCycleSamples;
        const double contentStartSec    = cropStart + (loopStartSec - startTime);
        const juce::int64 contentSrcPos = static_cast<juce::int64>(contentStartSec * srcRate);
        const juce::int64 bufferTotal   = static_cast<juce::int64>(srcBuffer->getNumSamples());

        // Expand loop data into preallocated tempInputBuf_ (no heap allocation)
        tempInputBuf_.clear();
        for (int ch = 0; ch < channelsToWrite; ++ch) {
            const float* sourcePtr = srcBuffer->getReadPointer(ch);
            float* buf = tempInputBuf_.getWritePointer(ch);

            for (int i = 0; i < samplesToRead; ++i) {
                const juce::int64 posInCycle        = loopCyclePos + i;
                const juce::int64 posInCycleWrapped = posInCycle % loopCycleSamples;

                if (posInCycleWrapped >= leadingSamples && posInCycleWrapped < leadingSamples + contentSamples) {
                    const juce::int64 srcIdx = contentSrcPos + (posInCycleWrapped - leadingSamples);
                    if (srcIdx >= 0 && srcIdx < bufferTotal)
                        buf[i] = sourcePtr[static_cast<int>(srcIdx)];
                    // else: already zero-filled by clear()
                }
                // leading/blank: already zero-filled by clear()
            }
            inputPtrs_[static_cast<size_t>(ch)] = tempInputBuf_.getReadPointer(ch);
        }

        bungeeState->stream.process(
            inputPtrs_.data(), outputPtrs_.data(),
            samplesToRead,
            outputFrameCountIdeal,
            pitchRatio);
    }

    readPosition += samplesToRead;
}

realtime spectral processing always introduces the latency of the hop size, just specify that as latency in your plugin and daws will compensate for it

Thank you for replying.
In my environment, the device’s sample rate is 48 kHz, so the theoretical latency should be around 10 ms, coming from the hop size. However, the actual delay I’m experiencing feels noticeably larger than that.
I can’t help but feel that there must be factors other than the hop size involved.