/*-------------------------------------------------------------------------
 * drawElements Quality Program OpenGL ES 3.0 Module
 * -------------------------------------------------
 *
 * Copyright 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *//*!
 * \file
 * \brief Buffer data upload performance tests.
 *//*--------------------------------------------------------------------*/

#include "es3pBufferDataUploadTests.hpp"
#include "glsCalibration.hpp"
#include "tcuTestLog.hpp"
#include "tcuVectorUtil.hpp"
#include "tcuSurface.hpp"
#include "tcuCPUWarmup.hpp"
#include "tcuRenderTarget.hpp"
#include "gluRenderContext.hpp"
#include "gluShaderProgram.hpp"
#include "gluStrUtil.hpp"
#include "gluPixelTransfer.hpp"
#include "gluObjectWrapper.hpp"
#include "glwFunctions.hpp"
#include "glwEnums.hpp"
#include "deClock.h"
#include "deMath.h"
#include "deStringUtil.hpp"
#include "deRandom.hpp"
#include "deMemory.h"
#include "deThread.h"
#include "deMeta.hpp"

#include <algorithm>
#include <iomanip>
#include <limits>

namespace deqp
{
namespace gles3
{
namespace Performance
{
namespace
{

using de::meta::EnableIf;
using de::meta::Not;
using gls::LineParametersWithConfidence;
using gls::theilSenSiegelLinearRegression;

static const char *const s_minimalVertexShader = "#version 300 es\n"
                                                 "in highp vec4 a_position;\n"
                                                 "void main (void)\n"
                                                 "{\n"
                                                 "    gl_Position = a_position;\n"
                                                 "}\n";

static const char *const s_minimalFragnentShader = "#version 300 es\n"
                                                   "layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
                                                   "void main (void)\n"
                                                   "{\n"
                                                   "    dEQP_FragColor = vec4(1.0, 0.0, 0.0, 1.0);\n"
                                                   "}\n";

static const char *const s_colorVertexShader = "#version 300 es\n"
                                               "in highp vec4 a_position;\n"
                                               "in highp vec4 a_color;\n"
                                               "out highp vec4 v_color;\n"
                                               "void main (void)\n"
                                               "{\n"
                                               "    gl_Position = a_position;\n"
                                               "    v_color = a_color;\n"
                                               "}\n";

static const char *const s_colorFragmentShader = "#version 300 es\n"
                                                 "layout(location = 0) out mediump vec4 dEQP_FragColor;\n"
                                                 "in mediump vec4 v_color;\n"
                                                 "void main (void)\n"
                                                 "{\n"
                                                 "    dEQP_FragColor = v_color;\n"
                                                 "}\n";

struct SingleOperationDuration
{
    uint64_t totalDuration;
    uint64_t fitResponseDuration; // used for fitting
};

struct MapBufferRangeDuration
{
    uint64_t mapDuration;
    uint64_t unmapDuration;
    uint64_t writeDuration;
    uint64_t allocDuration;
    uint64_t totalDuration;

    uint64_t fitResponseDuration;
};

struct MapBufferRangeDurationNoAlloc
{
    uint64_t mapDuration;
    uint64_t unmapDuration;
    uint64_t writeDuration;
    uint64_t totalDuration;

    uint64_t fitResponseDuration;
};

struct MapBufferRangeFlushDuration
{
    uint64_t mapDuration;
    uint64_t unmapDuration;
    uint64_t writeDuration;
    uint64_t flushDuration;
    uint64_t allocDuration;
    uint64_t totalDuration;

    uint64_t fitResponseDuration;
};

struct MapBufferRangeFlushDurationNoAlloc
{
    uint64_t mapDuration;
    uint64_t unmapDuration;
    uint64_t writeDuration;
    uint64_t flushDuration;
    uint64_t totalDuration;

    uint64_t fitResponseDuration;
};

struct RenderReadDuration
{
    uint64_t renderDuration;
    uint64_t readDuration;
    uint64_t renderReadDuration;
    uint64_t totalDuration;

    uint64_t fitResponseDuration;
};

struct UnrelatedUploadRenderReadDuration
{
    uint64_t renderDuration;
    uint64_t readDuration;
    uint64_t renderReadDuration;
    uint64_t totalDuration;

    uint64_t fitResponseDuration;
};

struct UploadRenderReadDuration
{
    uint64_t uploadDuration;
    uint64_t renderDuration;
    uint64_t readDuration;
    uint64_t totalDuration;
    uint64_t renderReadDuration;

    uint64_t fitResponseDuration;
};

struct UploadRenderReadDurationWithUnrelatedUploadSize
{
    uint64_t uploadDuration;
    uint64_t renderDuration;
    uint64_t readDuration;
    uint64_t totalDuration;
    uint64_t renderReadDuration;

    uint64_t fitResponseDuration;
};

struct RenderUploadRenderReadDuration
{
    uint64_t firstRenderDuration;
    uint64_t uploadDuration;
    uint64_t secondRenderDuration;
    uint64_t readDuration;
    uint64_t totalDuration;
    uint64_t renderReadDuration;

    uint64_t fitResponseDuration;
};

template <typename SampleT>
struct UploadSampleResult
{
    typedef SampleT SampleType;

    int bufferSize;
    int allocatedSize;
    int writtenSize;
    SampleType duration;
};

template <typename SampleT>
struct RenderSampleResult
{
    typedef SampleT SampleType;

    int uploadedDataSize;
    int renderDataSize;
    int unrelatedDataSize;
    int numVertices;
    SampleT duration;
};

struct SingleOperationStatistics
{
    float minTime;
    float maxTime;
    float medianTime;
    float min2DecileTime; // !< minimum value in the 2nd decile
    float max9DecileTime; // !< maximum value in the 9th decile
};

struct SingleCallStatistics
{
    SingleOperationStatistics result;

    float medianRate;
    float maxDiffTime;
    float maxDiff9DecileTime;
    float medianDiffTime;

    float maxRelDiffTime;
    float max9DecileRelDiffTime;
    float medianRelDiffTime;
};

struct MapCallStatistics
{
    SingleOperationStatistics map;
    SingleOperationStatistics unmap;
    SingleOperationStatistics write;
    SingleOperationStatistics alloc;
    SingleOperationStatistics result;

    float medianRate;
    float maxDiffTime;
    float maxDiff9DecileTime;
    float medianDiffTime;

    float maxRelDiffTime;
    float max9DecileRelDiffTime;
    float medianRelDiffTime;
};

struct MapFlushCallStatistics
{
    SingleOperationStatistics map;
    SingleOperationStatistics unmap;
    SingleOperationStatistics write;
    SingleOperationStatistics flush;
    SingleOperationStatistics alloc;
    SingleOperationStatistics result;

    float medianRate;
    float maxDiffTime;
    float maxDiff9DecileTime;
    float medianDiffTime;

    float maxRelDiffTime;
    float max9DecileRelDiffTime;
    float medianRelDiffTime;
};

struct RenderReadStatistics
{
    SingleOperationStatistics render;
    SingleOperationStatistics read;
    SingleOperationStatistics result;
    SingleOperationStatistics total;

    float medianRate;
    float maxDiffTime;
    float maxDiff9DecileTime;
    float medianDiffTime;

    float maxRelDiffTime;
    float max9DecileRelDiffTime;
    float medianRelDiffTime;
};

struct UploadRenderReadStatistics
{
    SingleOperationStatistics upload;
    SingleOperationStatistics render;
    SingleOperationStatistics read;
    SingleOperationStatistics result;
    SingleOperationStatistics total;

    float medianRate;
    float maxDiffTime;
    float maxDiff9DecileTime;
    float medianDiffTime;

    float maxRelDiffTime;
    float max9DecileRelDiffTime;
    float medianRelDiffTime;
};

struct RenderUploadRenderReadStatistics
{
    SingleOperationStatistics firstRender;
    SingleOperationStatistics upload;
    SingleOperationStatistics secondRender;
    SingleOperationStatistics read;
    SingleOperationStatistics result;
    SingleOperationStatistics total;

    float medianRate;
    float maxDiffTime;
    float maxDiff9DecileTime;
    float medianDiffTime;

    float maxRelDiffTime;
    float max9DecileRelDiffTime;
    float medianRelDiffTime;
};

template <typename T>
struct SampleTypeTraits
{
};

template <>
struct SampleTypeTraits<SingleOperationDuration>
{
    typedef SingleCallStatistics StatsType;

    enum
    {
        HAS_MAP_STATS = 0
    };
    enum
    {
        HAS_UNMAP_STATS = 0
    };
    enum
    {
        HAS_WRITE_STATS = 0
    };
    enum
    {
        HAS_FLUSH_STATS = 0
    };
    enum
    {
        HAS_ALLOC_STATS = 0
    };
    enum
    {
        LOG_CONTRIBUTIONS = 0
    };
};

template <>
struct SampleTypeTraits<MapBufferRangeDuration>
{
    typedef MapCallStatistics StatsType;

    enum
    {
        HAS_MAP_STATS = 1
    };
    enum
    {
        HAS_UNMAP_STATS = 1
    };
    enum
    {
        HAS_WRITE_STATS = 1
    };
    enum
    {
        HAS_FLUSH_STATS = 0
    };
    enum
    {
        HAS_ALLOC_STATS = 1
    };
    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
};

template <>
struct SampleTypeTraits<MapBufferRangeDurationNoAlloc>
{
    typedef MapCallStatistics StatsType;

    enum
    {
        HAS_MAP_STATS = 1
    };
    enum
    {
        HAS_UNMAP_STATS = 1
    };
    enum
    {
        HAS_WRITE_STATS = 1
    };
    enum
    {
        HAS_FLUSH_STATS = 0
    };
    enum
    {
        HAS_ALLOC_STATS = 0
    };
    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
};

template <>
struct SampleTypeTraits<MapBufferRangeFlushDuration>
{
    typedef MapFlushCallStatistics StatsType;

    enum
    {
        HAS_MAP_STATS = 1
    };
    enum
    {
        HAS_UNMAP_STATS = 1
    };
    enum
    {
        HAS_WRITE_STATS = 1
    };
    enum
    {
        HAS_FLUSH_STATS = 1
    };
    enum
    {
        HAS_ALLOC_STATS = 1
    };
    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
};

template <>
struct SampleTypeTraits<MapBufferRangeFlushDurationNoAlloc>
{
    typedef MapFlushCallStatistics StatsType;

    enum
    {
        HAS_MAP_STATS = 1
    };
    enum
    {
        HAS_UNMAP_STATS = 1
    };
    enum
    {
        HAS_WRITE_STATS = 1
    };
    enum
    {
        HAS_FLUSH_STATS = 1
    };
    enum
    {
        HAS_ALLOC_STATS = 0
    };
    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
};

template <>
struct SampleTypeTraits<RenderReadDuration>
{
    typedef RenderReadStatistics StatsType;

    enum
    {
        HAS_RENDER_STATS = 1
    };
    enum
    {
        HAS_READ_STATS = 1
    };
    enum
    {
        HAS_UPLOAD_STATS = 0
    };
    enum
    {
        HAS_TOTAL_STATS = 1
    };
    enum
    {
        HAS_FIRST_RENDER_STATS = 0
    };
    enum
    {
        HAS_SECOND_RENDER_STATS = 0
    };

    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
};

template <>
struct SampleTypeTraits<UnrelatedUploadRenderReadDuration>
{
    typedef RenderReadStatistics StatsType;

    enum
    {
        HAS_RENDER_STATS = 1
    };
    enum
    {
        HAS_READ_STATS = 1
    };
    enum
    {
        HAS_UPLOAD_STATS = 0
    };
    enum
    {
        HAS_TOTAL_STATS = 1
    };
    enum
    {
        HAS_FIRST_RENDER_STATS = 0
    };
    enum
    {
        HAS_SECOND_RENDER_STATS = 0
    };

    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
};

template <>
struct SampleTypeTraits<UploadRenderReadDuration>
{
    typedef UploadRenderReadStatistics StatsType;

    enum
    {
        HAS_RENDER_STATS = 1
    };
    enum
    {
        HAS_READ_STATS = 1
    };
    enum
    {
        HAS_UPLOAD_STATS = 1
    };
    enum
    {
        HAS_TOTAL_STATS = 1
    };
    enum
    {
        HAS_FIRST_RENDER_STATS = 0
    };
    enum
    {
        HAS_SECOND_RENDER_STATS = 0
    };

    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
    enum
    {
        LOG_UNRELATED_UPLOAD_SIZE = 0
    };
};

template <>
struct SampleTypeTraits<UploadRenderReadDurationWithUnrelatedUploadSize>
{
    typedef UploadRenderReadStatistics StatsType;

    enum
    {
        HAS_RENDER_STATS = 1
    };
    enum
    {
        HAS_READ_STATS = 1
    };
    enum
    {
        HAS_UPLOAD_STATS = 1
    };
    enum
    {
        HAS_TOTAL_STATS = 1
    };
    enum
    {
        HAS_FIRST_RENDER_STATS = 0
    };
    enum
    {
        HAS_SECOND_RENDER_STATS = 0
    };

    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
    enum
    {
        LOG_UNRELATED_UPLOAD_SIZE = 1
    };
};

template <>
struct SampleTypeTraits<RenderUploadRenderReadDuration>
{
    typedef RenderUploadRenderReadStatistics StatsType;

    enum
    {
        HAS_RENDER_STATS = 0
    };
    enum
    {
        HAS_READ_STATS = 1
    };
    enum
    {
        HAS_UPLOAD_STATS = 1
    };
    enum
    {
        HAS_TOTAL_STATS = 1
    };
    enum
    {
        HAS_FIRST_RENDER_STATS = 1
    };
    enum
    {
        HAS_SECOND_RENDER_STATS = 1
    };

    enum
    {
        LOG_CONTRIBUTIONS = 1
    };
    enum
    {
        LOG_UNRELATED_UPLOAD_SIZE = 1
    };
};

struct UploadSampleAnalyzeResult
{
    float transferRateMedian;
    float transferRateAtRange;
    float transferRateAtInfinity;
};

struct RenderSampleAnalyzeResult
{
    float renderRateMedian;
    float renderRateAtRange;
    float renderRateAtInfinity;
};

class UnmapFailureError : public std::exception
{
public:
    UnmapFailureError(void) : std::exception()
    {
    }
};

static std::string getHumanReadableByteSize(int numBytes)
{
    std::ostringstream buf;

    if (numBytes < 1024)
        buf << numBytes << " byte(s)";
    else if (numBytes < 1024 * 1024)
        buf << de::floatToString((float)numBytes / 1024.0f, 1) << " KiB";
    else
        buf << de::floatToString((float)numBytes / 1024.0f / 1024.0f, 1) << " MiB";

    return buf.str();
}

static uint64_t medianTimeMemcpy(void *dst, const void *src, int numBytes)
{
    // Time used by memcpy is assumed to be asymptotically linear

    // With large numBytes, the probability of context switch or other random
    // event is high. Apply memcpy in parts and report how much time would
    // memcpy have used with the median transfer rate.

    // Less than 1MiB, no need to do anything special
    if (numBytes < 1048576)
    {
        uint64_t startTime;
        uint64_t endTime;

        deYield();

        startTime = deGetMicroseconds();
        deMemcpy(dst, src, numBytes);
        endTime = deGetMicroseconds();

        return endTime - startTime;
    }
    else
    {
        // Do memcpy in multiple parts

        const int numSections  = 5;
        const int sectionAlign = 16;

        int sectionStarts[numSections + 1];
        int sectionLens[numSections];
        uint64_t sectionTimes[numSections];
        uint64_t medianTime;
        uint64_t bestTime = 0;

        for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
            sectionStarts[sectionNdx] = deAlign32((numBytes * sectionNdx / numSections), sectionAlign);
        sectionStarts[numSections] = numBytes;

        for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
            sectionLens[sectionNdx] = sectionStarts[sectionNdx + 1] - sectionStarts[sectionNdx];

        // Memcpy is usually called after mapbuffer range which may take
        // a lot of time. To prevent power management from kicking in during
        // copy, warm up more.
        {
            deYield();
            tcu::warmupCPU();
            deYield();
        }

        for (int sectionNdx = 0; sectionNdx < numSections; ++sectionNdx)
        {
            uint64_t startTime;
            uint64_t endTime;

            startTime = deGetMicroseconds();
            deMemcpy((uint8_t *)dst + sectionStarts[sectionNdx], (const uint8_t *)src + sectionStarts[sectionNdx],
                     sectionLens[sectionNdx]);
            endTime = deGetMicroseconds();

            sectionTimes[sectionNdx] = endTime - startTime;

            if (!bestTime || sectionTimes[sectionNdx] < bestTime)
                bestTime = sectionTimes[sectionNdx];

            // Detect if write takes 50% longer than it should, and warm up if that happened
            if (sectionNdx != numSections - 1 && (float)sectionTimes[sectionNdx] > 1.5f * (float)bestTime)
            {
                deYield();
                tcu::warmupCPU();
                deYield();
            }
        }

        std::sort(sectionTimes, sectionTimes + numSections);

        if ((numSections % 2) == 0)
            medianTime = (sectionTimes[numSections / 2 - 1] + sectionTimes[numSections / 2]) / 2;
        else
            medianTime = sectionTimes[numSections / 2];

        return medianTime * numSections;
    }
}

static float busyworkCalculation(float initial, int workSize)
{
    float a = initial;
    int b   = 123;

    for (int ndx = 0; ndx < workSize; ++ndx)
    {
        a = deFloatCos(a + (float)b);
        b = (b + 63) % 107 + de::abs((int)(a * 10.0f));
    }

    return a + (float)b;
}

static void busyWait(int microseconds)
{
    const uint64_t maxSingleWaitTime = 1000; // 1ms
    const uint64_t endTime           = deGetMicroseconds() + microseconds;
    float unused                     = *tcu::warmupCPUInternal::g_unused.m_v;
    int workSize                     = 500;

    // exponentially increase work, cap to 1ms
    while (deGetMicroseconds() < endTime)
    {
        const uint64_t startTime = deGetMicroseconds();
        uint64_t totalTime;

        unused = busyworkCalculation(unused, workSize);

        totalTime = deGetMicroseconds() - startTime;

        if (totalTime >= maxSingleWaitTime)
            break;
        else
            workSize *= 2;
    }

    // "wait"
    while (deGetMicroseconds() < endTime)
        unused = busyworkCalculation(unused, workSize);

    *tcu::warmupCPUInternal::g_unused.m_v = unused;
}

// Sample from given values using linear interpolation at a given position as if values were laid to range [0, 1]
template <typename T>
static float linearSample(const std::vector<T> &values, float position)
{
    DE_ASSERT(position >= 0.0f);
    DE_ASSERT(position <= 1.0f);

    const float floatNdx            = (float)(values.size() - 1) * position;
    const int lowerNdx              = (int)deFloatFloor(floatNdx);
    const int higherNdx             = lowerNdx + 1;
    const float interpolationFactor = floatNdx - (float)lowerNdx;

    DE_ASSERT(lowerNdx >= 0 && lowerNdx < (int)values.size());
    DE_ASSERT(higherNdx >= 0 && higherNdx < (int)values.size());
    DE_ASSERT(interpolationFactor >= 0 && interpolationFactor < 1.0f);

    return tcu::mix((float)values[lowerNdx], (float)values[higherNdx], interpolationFactor);
}

template <typename T>
SingleOperationStatistics calculateSingleOperationStatistics(const std::vector<T> &samples,
                                                             uint64_t T::SampleType::*target)
{
    SingleOperationStatistics stats;
    std::vector<uint64_t> values(samples.size());

    for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
        values[ndx] = samples[ndx].duration.*target;

    std::sort(values.begin(), values.end());

    stats.minTime        = (float)values.front();
    stats.maxTime        = (float)values.back();
    stats.medianTime     = linearSample(values, 0.5f);
    stats.min2DecileTime = linearSample(values, 0.1f);
    stats.max9DecileTime = linearSample(values, 0.9f);

    return stats;
}

template <typename StatisticsType, typename SampleType>
void calculateBasicStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
                              const std::vector<SampleType> &samples, int SampleType::*predictor)
{
    std::vector<uint64_t> values(samples.size());

    for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
        values[ndx] = samples[ndx].duration.fitResponseDuration;

    // median rate
    {
        std::vector<float> processingRates(samples.size());

        for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
        {
            const float timeInSeconds = (float)values[ndx] / 1000.0f / 1000.0f;
            processingRates[ndx]      = (float)(samples[ndx].*predictor) / timeInSeconds;
        }

        std::sort(processingRates.begin(), processingRates.end());

        stats.medianRate = linearSample(processingRates, 0.5f);
    }

    // results compared to the approximation
    {
        std::vector<float> timeDiffs(samples.size());

        for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
        {
            const float prediction = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
            const float actual     = (float)values[ndx];
            timeDiffs[ndx]         = actual - prediction;
        }
        std::sort(timeDiffs.begin(), timeDiffs.end());

        stats.maxDiffTime        = timeDiffs.back();
        stats.maxDiff9DecileTime = linearSample(timeDiffs, 0.9f);
        stats.medianDiffTime     = linearSample(timeDiffs, 0.5f);
    }

    // relative comparison to the approximation
    {
        std::vector<float> relativeDiffs(samples.size());

        for (int ndx = 0; ndx < (int)samples.size(); ++ndx)
        {
            const float prediction = (float)(samples[ndx].*predictor) * fit.coefficient + fit.offset;
            const float actual     = (float)values[ndx];

            // Ignore cases where we predict negative times, or if
            // ratio would be (nearly) infinite: ignore if predicted
            // time is less than 1 microsecond
            if (prediction < 1.0f)
                relativeDiffs[ndx] = 0.0f;
            else
                relativeDiffs[ndx] = (actual - prediction) / prediction;
        }
        std::sort(relativeDiffs.begin(), relativeDiffs.end());

        stats.maxRelDiffTime        = relativeDiffs.back();
        stats.max9DecileRelDiffTime = linearSample(relativeDiffs, 0.9f);
        stats.medianRelDiffTime     = linearSample(relativeDiffs, 0.5f);
    }

    // values calculated using sorted timings

    std::sort(values.begin(), values.end());

    stats.result.minTime        = (float)values.front();
    stats.result.maxTime        = (float)values.back();
    stats.result.medianTime     = linearSample(values, 0.5f);
    stats.result.min2DecileTime = linearSample(values, 0.1f);
    stats.result.max9DecileTime = linearSample(values, 0.9f);
}

template <typename StatisticsType, typename SampleType>
void calculateBasicTransferStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
                                      const std::vector<SampleType> &samples)
{
    calculateBasicStatistics(stats, fit, samples, &SampleType::writtenSize);
}

template <typename StatisticsType, typename SampleType>
void calculateBasicRenderStatistics(StatisticsType &stats, const LineParametersWithConfidence &fit,
                                    const std::vector<SampleType> &samples)
{
    calculateBasicStatistics(stats, fit, samples, &SampleType::renderDataSize);
}

static SingleCallStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit, const std::vector<UploadSampleResult<SingleOperationDuration>> &samples)
{
    SingleCallStatistics stats;

    calculateBasicTransferStatistics(stats, fit, samples);

    return stats;
}

static MapCallStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit, const std::vector<UploadSampleResult<MapBufferRangeDuration>> &samples)
{
    MapCallStatistics stats;

    calculateBasicTransferStatistics(stats, fit, samples);

    stats.map   = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::mapDuration);
    stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::unmapDuration);
    stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::writeDuration);
    stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeDuration::allocDuration);

    return stats;
}

static MapFlushCallStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit,
    const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> &samples)
{
    MapFlushCallStatistics stats;

    calculateBasicTransferStatistics(stats, fit, samples);

    stats.map   = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::mapDuration);
    stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::unmapDuration);
    stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::writeDuration);
    stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::flushDuration);
    stats.alloc = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDuration::allocDuration);

    return stats;
}

static MapCallStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit,
    const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> &samples)
{
    MapCallStatistics stats;

    calculateBasicTransferStatistics(stats, fit, samples);

    stats.map   = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::mapDuration);
    stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::unmapDuration);
    stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeDurationNoAlloc::writeDuration);

    return stats;
}

static MapFlushCallStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit,
    const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> &samples)
{
    MapFlushCallStatistics stats;

    calculateBasicTransferStatistics(stats, fit, samples);

    stats.map   = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::mapDuration);
    stats.unmap = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::unmapDuration);
    stats.write = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::writeDuration);
    stats.flush = calculateSingleOperationStatistics(samples, &MapBufferRangeFlushDurationNoAlloc::flushDuration);

    return stats;
}

static RenderReadStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit, const std::vector<RenderSampleResult<RenderReadDuration>> &samples)
{
    RenderReadStatistics stats;

    calculateBasicRenderStatistics(stats, fit, samples);

    stats.render = calculateSingleOperationStatistics(samples, &RenderReadDuration::renderDuration);
    stats.read   = calculateSingleOperationStatistics(samples, &RenderReadDuration::readDuration);
    stats.total  = calculateSingleOperationStatistics(samples, &RenderReadDuration::totalDuration);

    return stats;
}

static RenderReadStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit,
    const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> &samples)
{
    RenderReadStatistics stats;

    calculateBasicRenderStatistics(stats, fit, samples);

    stats.render = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::renderDuration);
    stats.read   = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::readDuration);
    stats.total  = calculateSingleOperationStatistics(samples, &UnrelatedUploadRenderReadDuration::totalDuration);

    return stats;
}

static UploadRenderReadStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit, const std::vector<RenderSampleResult<UploadRenderReadDuration>> &samples)
{
    UploadRenderReadStatistics stats;

    calculateBasicRenderStatistics(stats, fit, samples);

    stats.upload = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::uploadDuration);
    stats.render = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::renderDuration);
    stats.read   = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::readDuration);
    stats.total  = calculateSingleOperationStatistics(samples, &UploadRenderReadDuration::totalDuration);

    return stats;
}

static UploadRenderReadStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit,
    const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> &samples)
{
    UploadRenderReadStatistics stats;

    calculateBasicRenderStatistics(stats, fit, samples);

    stats.upload =
        calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::uploadDuration);
    stats.render =
        calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::renderDuration);
    stats.read =
        calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::readDuration);
    stats.total =
        calculateSingleOperationStatistics(samples, &UploadRenderReadDurationWithUnrelatedUploadSize::totalDuration);

    return stats;
}

static RenderUploadRenderReadStatistics calculateSampleStatistics(
    const LineParametersWithConfidence &fit,
    const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> &samples)
{
    RenderUploadRenderReadStatistics stats;

    calculateBasicRenderStatistics(stats, fit, samples);

    stats.firstRender =
        calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::firstRenderDuration);
    stats.upload = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::uploadDuration);
    stats.secondRender =
        calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::secondRenderDuration);
    stats.read  = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::readDuration);
    stats.total = calculateSingleOperationStatistics(samples, &RenderUploadRenderReadDuration::totalDuration);

    return stats;
}

template <typename DurationType>
static LineParametersWithConfidence fitLineToSamples(
    const std::vector<UploadSampleResult<DurationType>> &samples, int beginNdx, int endNdx, int step,
    uint64_t DurationType::*target = &DurationType::fitResponseDuration)
{
    std::vector<tcu::Vec2> samplePoints;

    for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
    {
        tcu::Vec2 point;

        point.x() = (float)(samples[sampleNdx].writtenSize);
        point.y() = (float)(samples[sampleNdx].duration.*target);

        samplePoints.push_back(point);
    }

    return theilSenSiegelLinearRegression(samplePoints, 0.6f);
}

template <typename DurationType>
static LineParametersWithConfidence fitLineToSamples(
    const std::vector<RenderSampleResult<DurationType>> &samples, int beginNdx, int endNdx, int step,
    uint64_t DurationType::*target = &DurationType::fitResponseDuration)
{
    std::vector<tcu::Vec2> samplePoints;

    for (int sampleNdx = beginNdx; sampleNdx < endNdx; sampleNdx += step)
    {
        tcu::Vec2 point;

        point.x() = (float)(samples[sampleNdx].renderDataSize);
        point.y() = (float)(samples[sampleNdx].duration.*target);

        samplePoints.push_back(point);
    }

    return theilSenSiegelLinearRegression(samplePoints, 0.6f);
}

template <typename T>
static LineParametersWithConfidence fitLineToSamples(
    const std::vector<T> &samples, int beginNdx, int endNdx,
    uint64_t T::SampleType::*target = &T::SampleType::fitResponseDuration)
{
    return fitLineToSamples(samples, beginNdx, endNdx, 1, target);
}

template <typename T>
static LineParametersWithConfidence fitLineToSamples(
    const std::vector<T> &samples, uint64_t T::SampleType::*target = &T::SampleType::fitResponseDuration)
{
    return fitLineToSamples(samples, 0, (int)samples.size(), target);
}

static float getAreaBetweenLines(float xmin, float xmax, float lineAOffset, float lineACoefficient, float lineBOffset,
                                 float lineBCoefficient)
{
    const float lineAMin     = lineAOffset + lineACoefficient * xmin;
    const float lineAMax     = lineAOffset + lineACoefficient * xmax;
    const float lineBMin     = lineBOffset + lineBCoefficient * xmin;
    const float lineBMax     = lineBOffset + lineBCoefficient * xmax;
    const bool aOverBAtBegin = (lineAMin > lineBMin);
    const bool aOverBAtEnd   = (lineAMax > lineBMax);

    if (aOverBAtBegin == aOverBAtEnd)
    {
        // lines do not intersect

        const float midpoint = (xmin + xmax) / 2.0f;
        const float width    = (xmax - xmin);

        const float lineAHeight = lineAOffset + lineACoefficient * midpoint;
        const float lineBHeight = lineBOffset + lineBCoefficient * midpoint;

        return width * de::abs(lineAHeight - lineBHeight);
    }
    else
    {

        // lines intersect

        const float approachCoeffient = de::abs(lineACoefficient - lineBCoefficient);
        const float epsilon           = 0.0001f;
        const float leftHeight        = de::abs(lineAMin - lineBMin);
        const float rightHeight       = de::abs(lineAMax - lineBMax);

        if (approachCoeffient < epsilon)
            return 0.0f;

        return (0.5f * leftHeight * (leftHeight / approachCoeffient)) +
               (0.5f * rightHeight * (rightHeight / approachCoeffient));
    }
}

template <typename T>
static float calculateSampleFitLinearity(const std::vector<T> &samples, int T::*predictor)
{
    // Compare the fitted line of first half of the samples to the fitted line of
    // the second half of the samples. Calculate a AABB that fully contains every
    // sample's x component and both fit lines in this range. Calculate the ratio
    // of the area between the lines and the AABB.

    const float epsilon = 1.e-6f;
    const int midPoint  = (int)samples.size() / 2;
    const LineParametersWithConfidence startApproximation =
        fitLineToSamples(samples, 0, midPoint, &T::SampleType::fitResponseDuration);
    const LineParametersWithConfidence endApproximation =
        fitLineToSamples(samples, midPoint, (int)samples.size(), &T::SampleType::fitResponseDuration);

    const float aabbMinX = (float)(samples.front().*predictor);
    const float aabbMinY = de::min(startApproximation.offset + startApproximation.coefficient * aabbMinX,
                                   endApproximation.offset + endApproximation.coefficient * aabbMinX);
    const float aabbMaxX = (float)(samples.back().*predictor);
    const float aabbMaxY = de::max(startApproximation.offset + startApproximation.coefficient * aabbMaxX,
                                   endApproximation.offset + endApproximation.coefficient * aabbMaxX);

    const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
    const float areaBetweenLines =
        getAreaBetweenLines(aabbMinX, aabbMaxX, startApproximation.offset, startApproximation.coefficient,
                            endApproximation.offset, endApproximation.coefficient);
    const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);

    return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
}

template <typename DurationType>
static float calculateSampleFitLinearity(const std::vector<UploadSampleResult<DurationType>> &samples)
{
    return calculateSampleFitLinearity(samples, &UploadSampleResult<DurationType>::writtenSize);
}

template <typename DurationType>
static float calculateSampleFitLinearity(const std::vector<RenderSampleResult<DurationType>> &samples)
{
    return calculateSampleFitLinearity(samples, &RenderSampleResult<DurationType>::renderDataSize);
}

template <typename T>
static float calculateSampleTemporalStability(const std::vector<T> &samples, int T::*predictor)
{
    // Samples are sampled in the following order: 1) even samples (in random order) 2) odd samples (in random order)
    // Compare the fitted line of even samples to the fitted line of the odd samples. Calculate a AABB that fully
    // contains every sample's x component and both fit lines in this range. Calculate the ratio of the area between
    // the lines and the AABB.

    const float epsilon = 1.e-6f;
    const LineParametersWithConfidence evenApproximation =
        fitLineToSamples(samples, 0, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);
    const LineParametersWithConfidence oddApproximation =
        fitLineToSamples(samples, 1, (int)samples.size(), 2, &T::SampleType::fitResponseDuration);

    const float aabbMinX = (float)(samples.front().*predictor);
    const float aabbMinY = de::min(evenApproximation.offset + evenApproximation.coefficient * aabbMinX,
                                   oddApproximation.offset + oddApproximation.coefficient * aabbMinX);
    const float aabbMaxX = (float)(samples.back().*predictor);
    const float aabbMaxY = de::max(evenApproximation.offset + evenApproximation.coefficient * aabbMaxX,
                                   oddApproximation.offset + oddApproximation.coefficient * aabbMaxX);

    const float aabbArea = (aabbMaxX - aabbMinX) * (aabbMaxY - aabbMinY);
    const float areaBetweenLines =
        getAreaBetweenLines(aabbMinX, aabbMaxX, evenApproximation.offset, evenApproximation.coefficient,
                            oddApproximation.offset, oddApproximation.coefficient);
    const float errorAreaRatio = (aabbArea < epsilon) ? (1.0f) : (areaBetweenLines / aabbArea);

    return de::clamp(1.0f - errorAreaRatio, 0.0f, 1.0f);
}

template <typename DurationType>
static float calculateSampleTemporalStability(const std::vector<UploadSampleResult<DurationType>> &samples)
{
    return calculateSampleTemporalStability(samples, &UploadSampleResult<DurationType>::writtenSize);
}

template <typename DurationType>
static float calculateSampleTemporalStability(const std::vector<RenderSampleResult<DurationType>> &samples)
{
    return calculateSampleTemporalStability(samples, &RenderSampleResult<DurationType>::renderDataSize);
}

template <typename DurationType>
static void bucketizeSamplesUniformly(const std::vector<UploadSampleResult<DurationType>> &samples,
                                      std::vector<UploadSampleResult<DurationType>> *buckets, int numBuckets,
                                      int &minBufferSize, int &maxBufferSize)
{
    minBufferSize = 0;
    maxBufferSize = 0;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        DE_ASSERT(samples[sampleNdx].allocatedSize != 0);

        if (!minBufferSize || samples[sampleNdx].allocatedSize < minBufferSize)
            minBufferSize = samples[sampleNdx].allocatedSize;
        if (!maxBufferSize || samples[sampleNdx].allocatedSize > maxBufferSize)
            maxBufferSize = samples[sampleNdx].allocatedSize;
    }

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float bucketNdxFloat = (float)(samples[sampleNdx].allocatedSize - minBufferSize) /
                                     (float)(maxBufferSize - minBufferSize) * (float)numBuckets;
        const int bucketNdx = de::clamp((int)deFloatFloor(bucketNdxFloat), 0, numBuckets - 1);

        buckets[bucketNdx].push_back(samples[sampleNdx]);
    }
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapRangeStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    log << tcu::TestLog::Float("MapRangeMin", "MapRange: Min time", "us", QP_KEY_TAG_TIME, stats.map.minTime)
        << tcu::TestLog::Float("MapRangeMax", "MapRange: Max time", "us", QP_KEY_TAG_TIME, stats.map.maxTime)
        << tcu::TestLog::Float("MapRangeMin90", "MapRange: 90%-Min time", "us", QP_KEY_TAG_TIME,
                               stats.map.min2DecileTime)
        << tcu::TestLog::Float("MapRangeMax90", "MapRange: 90%-Max time", "us", QP_KEY_TAG_TIME,
                               stats.map.max9DecileTime)
        << tcu::TestLog::Float("MapRangeMedian", "MapRange: Median time", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    log << tcu::TestLog::Float("UnmapMin", "Unmap: Min time", "us", QP_KEY_TAG_TIME, stats.unmap.minTime)
        << tcu::TestLog::Float("UnmapMax", "Unmap: Max time", "us", QP_KEY_TAG_TIME, stats.unmap.maxTime)
        << tcu::TestLog::Float("UnmapMin90", "Unmap: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.unmap.min2DecileTime)
        << tcu::TestLog::Float("UnmapMax90", "Unmap: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.unmap.max9DecileTime)
        << tcu::TestLog::Float("UnmapMedian", "Unmap: Median time", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    log << tcu::TestLog::Float("WriteMin", "Write: Min time", "us", QP_KEY_TAG_TIME, stats.write.minTime)
        << tcu::TestLog::Float("WriteMax", "Write: Max time", "us", QP_KEY_TAG_TIME, stats.write.maxTime)
        << tcu::TestLog::Float("WriteMin90", "Write: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.write.min2DecileTime)
        << tcu::TestLog::Float("WriteMax90", "Write: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.write.max9DecileTime)
        << tcu::TestLog::Float("WriteMedian", "Write: Median time", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    log << tcu::TestLog::Float("FlushMin", "Flush: Min time", "us", QP_KEY_TAG_TIME, stats.flush.minTime)
        << tcu::TestLog::Float("FlushMax", "Flush: Max time", "us", QP_KEY_TAG_TIME, stats.flush.maxTime)
        << tcu::TestLog::Float("FlushMin90", "Flush: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.flush.min2DecileTime)
        << tcu::TestLog::Float("FlushMax90", "Flush: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.flush.max9DecileTime)
        << tcu::TestLog::Float("FlushMedian", "Flush: Median time", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    log << tcu::TestLog::Float("AllocMin", "Alloc: Min time", "us", QP_KEY_TAG_TIME, stats.alloc.minTime)
        << tcu::TestLog::Float("AllocMax", "Alloc: Max time", "us", QP_KEY_TAG_TIME, stats.alloc.maxTime)
        << tcu::TestLog::Float("AllocMin90", "Alloc: 90%-Min time", "us", QP_KEY_TAG_TIME, stats.alloc.min2DecileTime)
        << tcu::TestLog::Float("AllocMax90", "Alloc: 90%-Max time", "us", QP_KEY_TAG_TIME, stats.alloc.max9DecileTime)
        << tcu::TestLog::Float("AllocMedian", "Alloc: Median time", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapRangeStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocStats(
    tcu::TestLog &log, const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Type logMapContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::mapDuration);
    log << tcu::TestLog::Float("MapConstantCost", "Map: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("MapLinearCost", "Map: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("MapMedianCost", "Map: Median cost", "us", QP_KEY_TAG_TIME, stats.map.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Type logUnmapContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::unmapDuration);
    log << tcu::TestLog::Float("UnmapConstantCost", "Unmap: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("UnmapLinearCost", "Unmap: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("UnmapMedianCost", "Unmap: Median cost", "us", QP_KEY_TAG_TIME, stats.unmap.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Type logWriteContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::writeDuration);
    log << tcu::TestLog::Float("WriteConstantCost", "Write: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("WriteLinearCost", "Write: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("WriteMedianCost", "Write: Median cost", "us", QP_KEY_TAG_TIME, stats.write.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Type logFlushContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::flushDuration);
    log << tcu::TestLog::Float("FlushConstantCost", "Flush: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("FlushLinearCost", "Flush: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("FlushMedianCost", "Flush: Median cost", "us", QP_KEY_TAG_TIME, stats.flush.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Type logAllocContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::allocDuration);
    log << tcu::TestLog::Float("AllocConstantCost", "Alloc: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("AllocLinearCost", "Alloc: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("AllocMedianCost", "Alloc: Median cost", "us", QP_KEY_TAG_TIME, stats.alloc.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Type logRenderContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::renderDuration);
    log << tcu::TestLog::Float("DrawCallConstantCost", "DrawCall: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("DrawCallLinearCost", "DrawCall: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("DrawCallMedianCost", "DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
                               stats.render.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_READ_STATS>::Type logReadContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::readDuration);
    log << tcu::TestLog::Float("ReadConstantCost", "Read: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("ReadLinearCost", "Read: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("ReadMedianCost", "Read: Median cost", "us", QP_KEY_TAG_TIME, stats.read.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Type logUploadContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::uploadDuration);
    log << tcu::TestLog::Float("UploadConstantCost", "Upload: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("UploadLinearCost", "Upload: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("UploadMedianCost", "Upload: Median cost", "us", QP_KEY_TAG_TIME,
                               stats.upload.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Type logTotalContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting = fitLineToSamples(samples, &SampleType::totalDuration);
    log << tcu::TestLog::Float("TotalConstantCost", "Total: Approximated contant cost", "us", QP_KEY_TAG_TIME,
                               contributionFitting.offset)
        << tcu::TestLog::Float("TotalLinearCost", "Total: Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                               contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("TotalMedianCost", "Total: Median cost", "us", QP_KEY_TAG_TIME, stats.total.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Type logFirstRenderContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting =
        fitLineToSamples(samples, &SampleType::firstRenderDuration);
    log << tcu::TestLog::Float("FirstDrawCallConstantCost", "First DrawCall: Approximated contant cost", "us",
                               QP_KEY_TAG_TIME, contributionFitting.offset)
        << tcu::TestLog::Float("FirstDrawCallLinearCost", "First DrawCall: Approximated linear cost", "us / MB",
                               QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("FirstDrawCallMedianCost", "First DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
                               stats.firstRender.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Type logSecondRenderContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    const LineParametersWithConfidence contributionFitting =
        fitLineToSamples(samples, &SampleType::secondRenderDuration);
    log << tcu::TestLog::Float("SecondDrawCallConstantCost", "Second DrawCall: Approximated contant cost", "us",
                               QP_KEY_TAG_TIME, contributionFitting.offset)
        << tcu::TestLog::Float("SecondDrawCallLinearCost", "Second DrawCall: Approximated linear cost", "us / MB",
                               QP_KEY_TAG_TIME, contributionFitting.coefficient * 1024.0f * 1024.0f)
        << tcu::TestLog::Float("SecondDrawCallMedianCost", "Second DrawCall: Median cost", "us", QP_KEY_TAG_TIME,
                               stats.secondRender.medianTime);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_MAP_STATS>::Value>::Type logMapContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UNMAP_STATS>::Value>::Type logUnmapContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_WRITE_STATS>::Value>::Type logWriteContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FLUSH_STATS>::Value>::Type logFlushContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_ALLOC_STATS>::Value>::Type logAllocContribution(
    tcu::TestLog &log, const std::vector<UploadSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_RENDER_STATS>::Value>::Type logRenderContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_READ_STATS>::Value>::Type logReadContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_UPLOAD_STATS>::Value>::Type logUploadContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_TOTAL_STATS>::Value>::Type logTotalContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_FIRST_RENDER_STATS>::Value>::Type logFirstRenderContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

template <typename SampleType>
static typename EnableIf<void, Not<SampleTypeTraits<SampleType>::HAS_SECOND_RENDER_STATS>::Value>::Type logSecondRenderContribution(
    tcu::TestLog &log, const std::vector<RenderSampleResult<SampleType>> &samples,
    const typename SampleTypeTraits<SampleType>::StatsType &stats)
{
    DE_UNREF(log);
    DE_UNREF(samples);
    DE_UNREF(stats);
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<UploadSampleResult<SingleOperationDuration>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("UploadTime", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
            << (int)samples[sampleNdx].duration.totalDuration << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<UploadSampleResult<MapBufferRangeDuration>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
            << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.allocDuration
            << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration
            << (int)samples[sampleNdx].duration.writeDuration << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<UploadSampleResult<MapBufferRangeDurationNoAlloc>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
            << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.mapDuration
            << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration
            << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<UploadSampleResult<MapBufferRangeFlushDuration>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("AllocTime", "Alloc time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
            << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.allocDuration
            << (int)samples[sampleNdx].duration.mapDuration << (int)samples[sampleNdx].duration.unmapDuration
            << (int)samples[sampleNdx].duration.writeDuration << (int)samples[sampleNdx].duration.flushDuration
            << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<UploadSampleResult<MapBufferRangeFlushDurationNoAlloc>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("WrittenSize", "Written size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("BufferSize", "Buffer size", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("MapTime", "Map time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("UnmapTime", "Unmap time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("WriteTime", "Write time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FlushTime", "Flush time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].writtenSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].writtenSize << samples[sampleNdx].bufferSize
            << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.mapDuration
            << (int)samples[sampleNdx].duration.unmapDuration << (int)samples[sampleNdx].duration.writeDuration
            << (int)samples[sampleNdx].duration.flushDuration << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<RenderSampleResult<RenderReadDuration>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].numVertices
            << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.renderDuration
            << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<RenderSampleResult<UnrelatedUploadRenderReadDuration>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes",
                                   QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].numVertices
            << samples[sampleNdx].unrelatedDataSize << (int)samples[sampleNdx].duration.renderReadDuration
            << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration
            << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<RenderSampleResult<UploadRenderReadDuration>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
            << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration
            << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.uploadDuration
            << (int)samples[sampleNdx].duration.renderDuration << (int)samples[sampleNdx].duration.readDuration
            << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<RenderSampleResult<UploadRenderReadDurationWithUnrelatedUploadSize>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("UnrelatedUploadSize", "Unrelated upload size", "bytes",
                                   QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
            << samples[sampleNdx].numVertices << samples[sampleNdx].unrelatedDataSize
            << (int)samples[sampleNdx].duration.renderReadDuration << (int)samples[sampleNdx].duration.totalDuration
            << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.renderDuration
            << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

void logSampleList(tcu::TestLog &log, const LineParametersWithConfidence &theilSenFitting,
                   const std::vector<RenderSampleResult<RenderUploadRenderReadDuration>> &samples)
{
    log << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
        << tcu::TestLog::ValueInfo("DataSize", "Data processed", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("UploadSize", "Data uploaded", "bytes", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("VertexCount", "Number of vertices", "vertices", QP_SAMPLE_VALUE_TAG_PREDICTOR)
        << tcu::TestLog::ValueInfo("DrawReadTime", "Second draw call and ReadPixels time", "us",
                                   QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FirstDrawCallTime", "First draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("SecondDrawCallTime", "Second draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::ValueInfo("FitResidual", "Fit residual", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
        << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)samples.size(); ++sampleNdx)
    {
        const float fitResidual =
            (float)samples[sampleNdx].duration.fitResponseDuration -
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)samples[sampleNdx].renderDataSize);
        log << tcu::TestLog::Sample << samples[sampleNdx].renderDataSize << samples[sampleNdx].uploadedDataSize
            << samples[sampleNdx].numVertices << (int)samples[sampleNdx].duration.renderReadDuration
            << (int)samples[sampleNdx].duration.totalDuration << (int)samples[sampleNdx].duration.firstRenderDuration
            << (int)samples[sampleNdx].duration.uploadDuration << (int)samples[sampleNdx].duration.secondRenderDuration
            << (int)samples[sampleNdx].duration.readDuration << fitResidual << tcu::TestLog::EndSample;
    }

    log << tcu::TestLog::EndSampleList;
}

template <typename SampleType>
static UploadSampleAnalyzeResult analyzeSampleResults(tcu::TestLog &log,
                                                      const std::vector<UploadSampleResult<SampleType>> &samples,
                                                      bool logBucketPerformance)
{
    // Assume data is linear with some outliers, fit a line
    const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples);
    const typename SampleTypeTraits<SampleType>::StatsType resultStats =
        calculateSampleStatistics(theilSenFitting, samples);
    float approximatedTransferRate;
    float approximatedTransferRateNoConstant;

    // Output raw samples
    {
        const tcu::ScopedLogSection section(log, "Samples", "Samples");
        logSampleList(log, theilSenFitting, samples);
    }

    // Calculate results for different ranges
    if (logBucketPerformance)
    {
        const int numBuckets = 4;
        int minBufferSize    = 0;
        int maxBufferSize    = 0;
        std::vector<UploadSampleResult<SampleType>> buckets[numBuckets];

        bucketizeSamplesUniformly(samples, &buckets[0], numBuckets, minBufferSize, maxBufferSize);

        for (int bucketNdx = 0; bucketNdx < numBuckets; ++bucketNdx)
        {
            if (buckets[bucketNdx].empty())
                continue;

            // Print a nice result summary

            const int bucketRangeMin =
                minBufferSize + (int)(((float)bucketNdx / (float)numBuckets) * (float)(maxBufferSize - minBufferSize));
            const int bucketRangeMax = minBufferSize + (int)(((float)(bucketNdx + 1) / (float)numBuckets) *
                                                             (float)(maxBufferSize - minBufferSize));
            const typename SampleTypeTraits<SampleType>::StatsType stats =
                calculateSampleStatistics(theilSenFitting, buckets[bucketNdx]);
            const tcu::ScopedLogSection section(
                log, "BufferSizeRange",
                std::string("Transfer performance with buffer size in range [")
                    .append(getHumanReadableByteSize(bucketRangeMin)
                                .append(", ")
                                .append(getHumanReadableByteSize(bucketRangeMax).append("]"))));

            logMapRangeStats<SampleType>(log, stats);
            logUnmapStats<SampleType>(log, stats);
            logWriteStats<SampleType>(log, stats);
            logFlushStats<SampleType>(log, stats);
            logAllocStats<SampleType>(log, stats);

            log << tcu::TestLog::Float("Min", "Total: Min time", "us", QP_KEY_TAG_TIME, stats.result.minTime)
                << tcu::TestLog::Float("Max", "Total: Max time", "us", QP_KEY_TAG_TIME, stats.result.maxTime)
                << tcu::TestLog::Float("Min90", "Total: 90%-Min time", "us", QP_KEY_TAG_TIME,
                                       stats.result.min2DecileTime)
                << tcu::TestLog::Float("Max90", "Total: 90%-Max time", "us", QP_KEY_TAG_TIME,
                                       stats.result.max9DecileTime)
                << tcu::TestLog::Float("Median", "Total: Median time", "us", QP_KEY_TAG_TIME, stats.result.medianTime)
                << tcu::TestLog::Float("MedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
                                       stats.medianRate / 1024.0f / 1024.0f)
                << tcu::TestLog::Float("MaxDiff", "Max difference to approximated", "us", QP_KEY_TAG_TIME,
                                       stats.maxDiffTime)
                << tcu::TestLog::Float("Max90Diff", "90%-Max difference to approximated", "us", QP_KEY_TAG_TIME,
                                       stats.maxDiff9DecileTime)
                << tcu::TestLog::Float("MedianDiff", "Median difference to approximated", "us", QP_KEY_TAG_TIME,
                                       stats.medianDiffTime)
                << tcu::TestLog::Float("MaxRelDiff", "Max relative difference to approximated", "%", QP_KEY_TAG_NONE,
                                       stats.maxRelDiffTime * 100.0f)
                << tcu::TestLog::Float("Max90RelDiff", "90%-Max relative difference to approximated", "%",
                                       QP_KEY_TAG_NONE, stats.max9DecileRelDiffTime * 100.0f)
                << tcu::TestLog::Float("MedianRelDiff", "Median relative difference to approximated", "%",
                                       QP_KEY_TAG_NONE, stats.medianRelDiffTime * 100.0f);
        }
    }

    // Contributions
    if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
    {
        const tcu::ScopedLogSection section(log, "Contribution", "Contributions");

        logMapContribution(log, samples, resultStats);
        logUnmapContribution(log, samples, resultStats);
        logWriteContribution(log, samples, resultStats);
        logFlushContribution(log, samples, resultStats);
        logAllocContribution(log, samples, resultStats);
    }

    // Print results
    {
        const tcu::ScopedLogSection section(log, "Results", "Results");

        const int medianBufferSize = (samples.front().bufferSize + samples.back().bufferSize) / 2;
        const float approximatedTransferTime =
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
        const float approximatedTransferTimeNoConstant =
            (theilSenFitting.coefficient * (float)medianBufferSize) / 1000.0f / 1000.0f;
        const float sampleLinearity         = calculateSampleFitLinearity(samples);
        const float sampleTemporalStability = calculateSampleTemporalStability(samples);

        approximatedTransferRateNoConstant = (float)medianBufferSize / approximatedTransferTimeNoConstant;
        approximatedTransferRate           = (float)medianBufferSize / approximatedTransferTime;

        log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY,
                                   sampleLinearity * 100.0f)
            << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY,
                                   sampleTemporalStability * 100.0f)
            << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME,
                                   theilSenFitting.offset)
            << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower",
                                   "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME,
                                   theilSenFitting.offsetConfidenceLower)
            << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper",
                                   "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME,
                                   theilSenFitting.offsetConfidenceUpper)
            << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                                   theilSenFitting.coefficient * 1024.0f * 1024.0f)
            << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower",
                                   "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME,
                                   theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
            << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper",
                                   "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME,
                                   theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
            << tcu::TestLog::Float("ApproximatedTransferRate", "Approximated transfer rate", "MB / s",
                                   QP_KEY_TAG_PERFORMANCE, approximatedTransferRate / 1024.0f / 1024.0f)
            << tcu::TestLog::Float("ApproximatedTransferRateNoConstant",
                                   "Approximated transfer rate without constant cost", "MB / s", QP_KEY_TAG_PERFORMANCE,
                                   approximatedTransferRateNoConstant / 1024.0f / 1024.0f)
            << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME,
                                   resultStats.result.medianTime)
            << tcu::TestLog::Float("SampleMedianTransfer", "Median transfer rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
                                   resultStats.medianRate / 1024.0f / 1024.0f);
    }

    // return approximated transfer rate
    {
        UploadSampleAnalyzeResult result;

        result.transferRateMedian     = resultStats.medianRate;
        result.transferRateAtRange    = approximatedTransferRate;
        result.transferRateAtInfinity = approximatedTransferRateNoConstant;

        return result;
    }
}

template <typename SampleType>
static RenderSampleAnalyzeResult analyzeSampleResults(tcu::TestLog &log,
                                                      const std::vector<RenderSampleResult<SampleType>> &samples)
{
    // Assume data is linear with some outliers, fit a line
    const LineParametersWithConfidence theilSenFitting = fitLineToSamples(samples);
    const typename SampleTypeTraits<SampleType>::StatsType resultStats =
        calculateSampleStatistics(theilSenFitting, samples);
    float approximatedProcessingRate;
    float approximatedProcessingRateNoConstant;

    // output raw samples
    {
        const tcu::ScopedLogSection section(log, "Samples", "Samples");
        logSampleList(log, theilSenFitting, samples);
    }

    // Contributions
    if (SampleTypeTraits<SampleType>::LOG_CONTRIBUTIONS)
    {
        const tcu::ScopedLogSection section(log, "Contribution", "Contributions");

        logFirstRenderContribution(log, samples, resultStats);
        logUploadContribution(log, samples, resultStats);
        logRenderContribution(log, samples, resultStats);
        logSecondRenderContribution(log, samples, resultStats);
        logReadContribution(log, samples, resultStats);
        logTotalContribution(log, samples, resultStats);
    }

    // print results
    {
        const tcu::ScopedLogSection section(log, "Results", "Results");

        const int medianDataSize = (samples.front().renderDataSize + samples.back().renderDataSize) / 2;
        const float approximatedRenderTime =
            (theilSenFitting.offset + theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
        const float approximatedRenderTimeNoConstant =
            (theilSenFitting.coefficient * (float)medianDataSize) / 1000.0f / 1000.0f;
        const float sampleLinearity         = calculateSampleFitLinearity(samples);
        const float sampleTemporalStability = calculateSampleTemporalStability(samples);

        approximatedProcessingRateNoConstant = (float)medianDataSize / approximatedRenderTimeNoConstant;
        approximatedProcessingRate           = (float)medianDataSize / approximatedRenderTime;

        log << tcu::TestLog::Float("ResultLinearity", "Sample linearity", "%", QP_KEY_TAG_QUALITY,
                                   sampleLinearity * 100.0f)
            << tcu::TestLog::Float("SampleTemporalStability", "Sample temporal stability", "%", QP_KEY_TAG_QUALITY,
                                   sampleTemporalStability * 100.0f)
            << tcu::TestLog::Float("ApproximatedConstantCost", "Approximated contant cost", "us", QP_KEY_TAG_TIME,
                                   theilSenFitting.offset)
            << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Lower",
                                   "Approximated contant cost 60% confidence lower limit", "us", QP_KEY_TAG_TIME,
                                   theilSenFitting.offsetConfidenceLower)
            << tcu::TestLog::Float("ApproximatedConstantCostConfidence60Upper",
                                   "Approximated contant cost 60% confidence upper limit", "us", QP_KEY_TAG_TIME,
                                   theilSenFitting.offsetConfidenceUpper)
            << tcu::TestLog::Float("ApproximatedLinearCost", "Approximated linear cost", "us / MB", QP_KEY_TAG_TIME,
                                   theilSenFitting.coefficient * 1024.0f * 1024.0f)
            << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Lower",
                                   "Approximated linear cost 60% confidence lower limit", "us / MB", QP_KEY_TAG_TIME,
                                   theilSenFitting.coefficientConfidenceLower * 1024.0f * 1024.0f)
            << tcu::TestLog::Float("ApproximatedLinearCostConfidence60Upper",
                                   "Approximated linear cost 60% confidence upper limit", "us / MB", QP_KEY_TAG_TIME,
                                   theilSenFitting.coefficientConfidenceUpper * 1024.0f * 1024.0f)
            << tcu::TestLog::Float("ApproximatedProcessRate", "Approximated processing rate", "MB / s",
                                   QP_KEY_TAG_PERFORMANCE, approximatedProcessingRate / 1024.0f / 1024.0f)
            << tcu::TestLog::Float("ApproximatedProcessRateNoConstant",
                                   "Approximated processing rate without constant cost", "MB / s",
                                   QP_KEY_TAG_PERFORMANCE, approximatedProcessingRateNoConstant / 1024.0f / 1024.0f)
            << tcu::TestLog::Float("SampleMedianTime", "Median sample time", "us", QP_KEY_TAG_TIME,
                                   resultStats.result.medianTime)
            << tcu::TestLog::Float("SampleMedianProcess", "Median processing rate", "MB / s", QP_KEY_TAG_PERFORMANCE,
                                   resultStats.medianRate / 1024.0f / 1024.0f);
    }

    // return approximated render rate
    {
        RenderSampleAnalyzeResult result;

        result.renderRateMedian     = resultStats.medianRate;
        result.renderRateAtRange    = approximatedProcessingRate;
        result.renderRateAtInfinity = approximatedProcessingRateNoConstant;

        return result;
    }
    return RenderSampleAnalyzeResult();
}

static void generateTwoPassRandomIterationOrder(std::vector<int> &iterationOrder, int numSamples)
{
    de::Random rnd(0xabc);
    const int midPoint = (numSamples + 1) / 2; // !< ceil(m_numSamples / 2)

    DE_ASSERT((int)iterationOrder.size() == numSamples);

    // Two "passes" over range, randomize order in both passes
    // This allows to us detect if iterations are not independent
    // (first run and later run samples differ significantly?)

    for (int sampleNdx = 0; sampleNdx < midPoint; ++sampleNdx)
        iterationOrder[sampleNdx] = sampleNdx * 2;
    for (int sampleNdx = midPoint; sampleNdx < numSamples; ++sampleNdx)
        iterationOrder[sampleNdx] = (sampleNdx - midPoint) * 2 + 1;

    for (int ndx = 0; ndx < midPoint; ++ndx)
        std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(0, midPoint - 1)]);
    for (int ndx = midPoint; ndx < (int)iterationOrder.size(); ++ndx)
        std::swap(iterationOrder[ndx], iterationOrder[rnd.getInt(midPoint, (int)iterationOrder.size() - 1)]);
}

template <typename SampleType>
class BasicBufferCase : public TestCase
{
public:
    enum Flags
    {
        FLAG_ALLOCATE_LARGER_BUFFER = 0x01,
    };
    BasicBufferCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax,
                    int numSamples, int flags);
    ~BasicBufferCase(void);

    virtual void init(void);
    virtual void deinit(void);

protected:
    IterateResult iterate(void);

    virtual bool runSample(int iteration, UploadSampleResult<SampleType> &sample)                = 0;
    virtual void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results) = 0;

    void disableGLWarmup(void);
    void waitGLResults(void);

    enum
    {
        UNUSED_RENDER_AREA_SIZE = 32
    };

    glu::ShaderProgram *m_minimalProgram;
    int32_t m_minimalProgramPosLoc;
    uint32_t m_bufferID;

    const int m_numSamples;
    const int m_bufferSizeMin;
    const int m_bufferSizeMax;
    const bool m_allocateLargerBuffer;

private:
    int m_iteration;
    std::vector<int> m_iterationOrder;
    std::vector<UploadSampleResult<SampleType>> m_results;

    bool m_useGL;
    int m_bufferRandomizerTimer;
};

template <typename SampleType>
BasicBufferCase<SampleType>::BasicBufferCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
                                             int bufferSizeMax, int numSamples, int flags)
    : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, desc)
    , m_minimalProgram(DE_NULL)
    , m_minimalProgramPosLoc(-1)
    , m_bufferID(0)
    , m_numSamples(numSamples)
    , m_bufferSizeMin(bufferSizeMin)
    , m_bufferSizeMax(bufferSizeMax)
    , m_allocateLargerBuffer((flags & FLAG_ALLOCATE_LARGER_BUFFER) != 0)
    , m_iteration(0)
    , m_iterationOrder(numSamples)
    , m_results(numSamples)
    , m_useGL(true)
    , m_bufferRandomizerTimer(0)
{
    // "randomize" iteration order. Deterministic, patternless
    generateTwoPassRandomIterationOrder(m_iterationOrder, m_numSamples);

    // choose buffer sizes
    for (int sampleNdx = 0; sampleNdx < m_numSamples; ++sampleNdx)
    {
        const int rawBufferSize =
            (int)deFloatFloor((float)bufferSizeMin +
                              (float)(bufferSizeMax - bufferSizeMin) * ((float)(sampleNdx + 1) / (float)m_numSamples));
        const int bufferSize = deAlign32(rawBufferSize, 16);
        const int allocatedBufferSize =
            deAlign32((m_allocateLargerBuffer) ? ((int)((float)bufferSize * 1.5f)) : (bufferSize), 16);

        m_results[sampleNdx].bufferSize    = bufferSize;
        m_results[sampleNdx].allocatedSize = allocatedBufferSize;
        m_results[sampleNdx].writtenSize   = -1;
    }
}

template <typename SampleType>
BasicBufferCase<SampleType>::~BasicBufferCase(void)
{
    deinit();
}

template <typename SampleType>
void BasicBufferCase<SampleType>::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    if (!m_useGL)
        return;

    // \note Viewport size is not checked, it won't matter if the render target actually is smaller than UNUSED_RENDER_AREA_SIZE

    // minimal shader

    m_minimalProgram = new glu::ShaderProgram(m_context.getRenderContext(),
                                              glu::ProgramSources() << glu::VertexSource(s_minimalVertexShader)
                                                                    << glu::FragmentSource(s_minimalFragnentShader));
    if (!m_minimalProgram->isOk())
    {
        m_testCtx.getLog() << *m_minimalProgram;
        throw tcu::TestError("failed to build shader program");
    }

    m_minimalProgramPosLoc = gl.getAttribLocation(m_minimalProgram->getProgram(), "a_position");
    if (m_minimalProgramPosLoc == -1)
        throw tcu::TestError("a_position location was -1");
}

template <typename SampleType>
void BasicBufferCase<SampleType>::deinit(void)
{
    if (m_bufferID)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
        m_bufferID = 0;
    }

    delete m_minimalProgram;
    m_minimalProgram = DE_NULL;
}

template <typename SampleType>
TestCase::IterateResult BasicBufferCase<SampleType>::iterate(void)
{
    const glw::Functions &gl    = m_context.getRenderContext().getFunctions();
    static bool buffersWarmedUp = false;

    static const uint32_t usages[] = {
        GL_STREAM_DRAW, GL_STREAM_READ,  GL_STREAM_COPY,  GL_STATIC_DRAW,  GL_STATIC_READ,
        GL_STATIC_COPY, GL_DYNAMIC_DRAW, GL_DYNAMIC_READ, GL_DYNAMIC_COPY,
    };

    // Allocate some random sized buffers and remove them to
    // make sure the first samples too have some buffers removed
    // just before their allocation. This is only needed by the
    // the first test.

    if (m_useGL && !buffersWarmedUp)
    {
        const int numRandomBuffers = 6;
        const int numRepeats       = 10;
        const int maxBufferSize    = 16777216;
        const std::vector<uint8_t> zeroData(maxBufferSize, 0x00);
        de::Random rnd(0x1234);
        uint32_t bufferIDs[numRandomBuffers] = {0};

        gl.useProgram(m_minimalProgram->getProgram());
        gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
        gl.enableVertexAttribArray(m_minimalProgramPosLoc);

        for (int ndx = 0; ndx < numRepeats; ++ndx)
        {
            // Create buffer and maybe draw from it
            for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
            {
                const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
                const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];

                gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
                gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
                gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);

                if (rnd.getBool())
                {
                    gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
                    gl.drawArrays(GL_POINTS, 0, 1);
                    gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);
                }
            }

            for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
                gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);

            waitGLResults();
            GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");

            m_testCtx.touchWatchdog();
        }

        buffersWarmedUp = true;
        return CONTINUE;
    }
    else if (m_useGL && m_bufferRandomizerTimer++ % 8 == 0)
    {
        // Do some random buffer operations to every now and then
        // to make sure the previous test iterations won't affect
        // following test runs.

        const int numRandomBuffers = 3;
        const int maxBufferSize    = 16777216;
        const std::vector<uint8_t> zeroData(maxBufferSize, 0x00);
        de::Random rnd(0x1234 + 0xabc * m_bufferRandomizerTimer);

        // BufferData
        {
            uint32_t bufferIDs[numRandomBuffers] = {0};

            for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
            {
                const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
                const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];

                gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
                gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
                gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);
            }

            for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
                gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);
        }

        GLU_EXPECT_NO_ERROR(gl.getError(), "buffer ops");

        // Do some memory mappings
        {
            uint32_t bufferIDs[numRandomBuffers] = {0};

            for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
            {
                const int randomSize = deAlign32(rnd.getInt(1, maxBufferSize), 4 * 4);
                const uint32_t usage = usages[rnd.getUint32() % (uint32_t)DE_LENGTH_OF_ARRAY(usages)];
                void *ptr;

                gl.genBuffers(1, &bufferIDs[randomBufferNdx]);
                gl.bindBuffer(GL_ARRAY_BUFFER, bufferIDs[randomBufferNdx]);
                gl.bufferData(GL_ARRAY_BUFFER, randomSize, &zeroData[0], usage);

                gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
                gl.drawArrays(GL_POINTS, 0, 1);
                gl.drawArrays(GL_POINTS, randomSize / (int)sizeof(float[4]) - 1, 1);

                if (rnd.getBool())
                    waitGLResults();

                ptr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, randomSize, GL_MAP_WRITE_BIT);
                if (ptr)
                {
                    medianTimeMemcpy(ptr, &zeroData[0], randomSize);
                    gl.unmapBuffer(GL_ARRAY_BUFFER);
                }
            }

            for (int randomBufferNdx = 0; randomBufferNdx < numRandomBuffers; ++randomBufferNdx)
                gl.deleteBuffers(1, &bufferIDs[randomBufferNdx]);

            waitGLResults();
        }

        GLU_EXPECT_NO_ERROR(gl.getError(), "buffer maps");
        return CONTINUE;
    }
    else
    {
        const int currentIteration     = m_iteration;
        const int sampleNdx            = m_iterationOrder[currentIteration];
        const bool sampleRunSuccessful = runSample(currentIteration, m_results[sampleNdx]);

        GLU_EXPECT_NO_ERROR(gl.getError(), "post runSample()");

        // Retry failed samples
        if (!sampleRunSuccessful)
            return CONTINUE;

        if (++m_iteration >= m_numSamples)
        {
            logAndSetTestResult(m_results);
            return STOP;
        }
        else
            return CONTINUE;
    }
}

template <typename SampleType>
void BasicBufferCase<SampleType>::disableGLWarmup(void)
{
    m_useGL = false;
}

template <typename SampleType>
void BasicBufferCase<SampleType>::waitGLResults(void)
{
    tcu::Surface unusedSurface(UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
    glu::readPixels(m_context.getRenderContext(), 0, 0, unusedSurface.getAccess());
}

template <typename SampleType>
class BasicUploadCase : public BasicBufferCase<SampleType>
{
public:
    enum CaseType
    {
        CASE_NO_BUFFERS = 0,
        CASE_NEW_BUFFER,
        CASE_UNSPECIFIED_BUFFER,
        CASE_SPECIFIED_BUFFER,
        CASE_USED_BUFFER,
        CASE_USED_LARGER_BUFFER,

        CASE_LAST
    };

    enum CaseFlags
    {
        FLAG_DONT_LOG_BUFFER_INFO              = 0x01,
        FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT = 0x02,
    };

    enum ResultType
    {
        RESULT_MEDIAN_TRANSFER_RATE = 0,
        RESULT_ASYMPTOTIC_TRANSFER_RATE,
    };

    BasicUploadCase(Context &context, const char *name, const char *desc, int bufferSizeMin, int bufferSizeMax,
                    int numSamples, uint32_t bufferUsage, CaseType caseType, ResultType resultType, int flags = 0);

    ~BasicUploadCase(void);

    virtual void init(void);
    virtual void deinit(void);

private:
    bool runSample(int iteration, UploadSampleResult<SampleType> &sample);
    void createBuffer(int bufferSize, int iteration);
    void deleteBuffer(int bufferSize);
    void useBuffer(int bufferSize);

    virtual void testBufferUpload(UploadSampleResult<SampleType> &result, int writeSize) = 0;
    void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results);

    uint32_t m_unusedBufferID;

protected:
    const CaseType m_caseType;
    const ResultType m_resultType;
    const uint32_t m_bufferUsage;
    const bool m_logBufferInfo;
    const bool m_bufferUnspecifiedContent;
    std::vector<uint8_t> m_zeroData;

    using BasicBufferCase<SampleType>::m_testCtx;
    using BasicBufferCase<SampleType>::m_context;

    using BasicBufferCase<SampleType>::UNUSED_RENDER_AREA_SIZE;
    using BasicBufferCase<SampleType>::m_minimalProgram;
    using BasicBufferCase<SampleType>::m_minimalProgramPosLoc;
    using BasicBufferCase<SampleType>::m_bufferID;
    using BasicBufferCase<SampleType>::m_numSamples;
    using BasicBufferCase<SampleType>::m_bufferSizeMin;
    using BasicBufferCase<SampleType>::m_bufferSizeMax;
    using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
};

template <typename SampleType>
BasicUploadCase<SampleType>::BasicUploadCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
                                             int bufferSizeMax, int numSamples, uint32_t bufferUsage, CaseType caseType,
                                             ResultType resultType, int flags)
    : BasicBufferCase<SampleType>(
          context, name, desc, bufferSizeMin, bufferSizeMax, numSamples,
          (caseType == CASE_USED_LARGER_BUFFER) ? (BasicBufferCase<SampleType>::FLAG_ALLOCATE_LARGER_BUFFER) : (0))
    , m_unusedBufferID(0)
    , m_caseType(caseType)
    , m_resultType(resultType)
    , m_bufferUsage(bufferUsage)
    , m_logBufferInfo((flags & FLAG_DONT_LOG_BUFFER_INFO) == 0)
    , m_bufferUnspecifiedContent((flags & FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT) != 0)
    , m_zeroData()
{
    DE_ASSERT(m_caseType < CASE_LAST);
}

template <typename SampleType>
BasicUploadCase<SampleType>::~BasicUploadCase(void)
{
    deinit();
}

template <typename SampleType>
void BasicUploadCase<SampleType>::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    BasicBufferCase<SampleType>::init();

    // zero buffer as upload source
    m_zeroData.resize(m_bufferSizeMax, 0x00);

    // unused buffer

    gl.genBuffers(1, &m_unusedBufferID);
    GLU_EXPECT_NO_ERROR(gl.getError(), "Gen buf");

    // log basic info

    m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance with " << m_numSamples
                       << " test samples. Sample order is randomized. All samples at even positions (first = 0) are "
                          "tested before samples at odd positions.\n"
                       << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", "
                       << getHumanReadableByteSize(m_bufferSizeMax) << "]." << tcu::TestLog::EndMessage;

    if (m_logBufferInfo)
    {
        switch (m_caseType)
        {
        case CASE_NO_BUFFERS:
            break;

        case CASE_NEW_BUFFER:
            m_testCtx.getLog() << tcu::TestLog::Message
                               << "Target buffer is generated but not specified (i.e glBufferData() not called)."
                               << tcu::TestLog::EndMessage;
            break;

        case CASE_UNSPECIFIED_BUFFER:
            m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer is allocated with glBufferData(NULL)."
                               << tcu::TestLog::EndMessage;
            break;

        case CASE_SPECIFIED_BUFFER:
            m_testCtx.getLog() << tcu::TestLog::Message
                               << "Target buffer contents are specified prior testing with glBufferData(data)."
                               << tcu::TestLog::EndMessage;
            break;

        case CASE_USED_BUFFER:
            m_testCtx.getLog() << tcu::TestLog::Message << "Target buffer has been used in drawing before testing."
                               << tcu::TestLog::EndMessage;
            break;

        case CASE_USED_LARGER_BUFFER:
            m_testCtx.getLog() << tcu::TestLog::Message
                               << "Target buffer is larger and has been used in drawing before testing."
                               << tcu::TestLog::EndMessage;
            break;

        default:
            DE_ASSERT(false);
            break;
        }
    }

    if (m_resultType == RESULT_MEDIAN_TRANSFER_RATE)
        m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples."
                           << tcu::TestLog::EndMessage;
    else if (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE)
        m_testCtx.getLog() << tcu::TestLog::Message
                           << "Test result is the asymptotic transfer rate as the buffer size approaches infinity."
                           << tcu::TestLog::EndMessage;
    else
        DE_ASSERT(false);
}

template <typename SampleType>
void BasicUploadCase<SampleType>::deinit(void)
{
    if (m_unusedBufferID)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_unusedBufferID);
        m_unusedBufferID = 0;
    }

    m_zeroData = std::vector<uint8_t>();

    BasicBufferCase<SampleType>::deinit();
}

template <typename SampleType>
bool BasicUploadCase<SampleType>::runSample(int iteration, UploadSampleResult<SampleType> &sample)
{
    const glw::Functions &gl      = m_context.getRenderContext().getFunctions();
    const int allocatedBufferSize = sample.allocatedSize;
    const int bufferSize          = sample.bufferSize;

    if (m_caseType != CASE_NO_BUFFERS)
        createBuffer(iteration, allocatedBufferSize);

    // warmup CPU before the test to make sure the power management governor
    // keeps us in the "high performance" mode
    {
        deYield();
        tcu::warmupCPU();
        deYield();
    }

    testBufferUpload(sample, bufferSize);
    GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");

    if (m_caseType != CASE_NO_BUFFERS)
        deleteBuffer(bufferSize);

    return true;
}

template <typename SampleType>
void BasicUploadCase<SampleType>::createBuffer(int iteration, int bufferSize)
{
    DE_ASSERT(!m_bufferID);
    DE_ASSERT(m_caseType != CASE_NO_BUFFERS);

    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    // create buffer

    if (m_caseType == CASE_NO_BUFFERS)
        return;

    // create empty buffer

    gl.genBuffers(1, &m_bufferID);
    gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
    GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer gen");

    if (m_caseType == CASE_NEW_BUFFER)
    {
        // upload something else first, this should reduce noise in samples

        de::Random rng(0xbadc * iteration);
        const int sizeDelta = rng.getInt(0, 2097140);
        const int unusedUploadSize =
            deAlign32(1048576 + sizeDelta, 4 * 4); // Vary buffer size to make sure it is always reallocated
        const std::vector<uint8_t> unusedData(unusedUploadSize, 0x20);

        gl.bindBuffer(GL_ARRAY_BUFFER, m_unusedBufferID);
        gl.bufferData(GL_ARRAY_BUFFER, unusedUploadSize, &unusedData[0], m_bufferUsage);

        // make sure upload won't interfere with the test
        useBuffer(unusedUploadSize);

        // don't kill the buffer so that the following upload cannot potentially reuse the buffer

        return;
    }

    // specify it

    if (m_caseType == CASE_UNSPECIFIED_BUFFER)
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
    else
    {
        const std::vector<uint8_t> unusedData(bufferSize, 0x20);
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &unusedData[0], m_bufferUsage);
    }

    if (m_caseType == CASE_UNSPECIFIED_BUFFER || m_caseType == CASE_SPECIFIED_BUFFER)
        return;

    // use it and make sure it is uploaded

    useBuffer(bufferSize);
    DE_ASSERT(m_caseType == CASE_USED_BUFFER || m_caseType == CASE_USED_LARGER_BUFFER);
}

template <typename SampleType>
void BasicUploadCase<SampleType>::deleteBuffer(int bufferSize)
{
    DE_ASSERT(m_bufferID);
    DE_ASSERT(m_caseType != CASE_NO_BUFFERS);

    // render from the buffer to make sure it actually made it to the gpu. This is to
    // make sure that if the upload actually happens later or is happening right now in
    // the background, it will not interfere with further test runs

    // if buffer contains unspecified content, sourcing data from it results in undefined
    // results, possibly including program termination. Specify all data to prevent such
    // case from happening

    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

    if (m_bufferUnspecifiedContent)
    {
        const std::vector<uint8_t> unusedData(bufferSize, 0x20);
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &unusedData[0], m_bufferUsage);

        GLU_EXPECT_NO_ERROR(gl.getError(), "re-specify buffer");
    }

    useBuffer(bufferSize);

    gl.deleteBuffers(1, &m_bufferID);
    m_bufferID = 0;
}

template <typename SampleType>
void BasicUploadCase<SampleType>::useBuffer(int bufferSize)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    gl.useProgram(m_minimalProgram->getProgram());

    gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
    gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
    gl.enableVertexAttribArray(m_minimalProgramPosLoc);

    // use whole buffer to make sure buffer is uploaded by drawing first and last
    DE_ASSERT(bufferSize % (int)sizeof(float[4]) == 0);
    gl.drawArrays(GL_POINTS, 0, 1);
    gl.drawArrays(GL_POINTS, bufferSize / (int)sizeof(float[4]) - 1, 1);

    BasicBufferCase<SampleType>::waitGLResults();
}

template <typename SampleType>
void BasicUploadCase<SampleType>::logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results)
{
    const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, true);

    // with small buffers, report the median transfer rate of the samples
    // with large buffers, report the expected preformance of infinitely large buffers
    const float rate = (m_resultType == RESULT_ASYMPTOTIC_TRANSFER_RATE) ? (analysis.transferRateAtInfinity) :
                                                                           (analysis.transferRateMedian);

    if (rate == std::numeric_limits<float>::infinity())
    {
        // sample times are 1) invalid or 2) timer resolution too low
        // report speed 0 bytes / s since real value cannot be determined
        m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
    }
    else
    {
        // report transfer rate in MB / s
        m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
    }
}

class ReferenceMemcpyCase : public BasicUploadCase<SingleOperationDuration>
{
public:
    ReferenceMemcpyCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
                        int numSamples, bool largeBuffersCase);
    ~ReferenceMemcpyCase(void);

    void init(void);
    void deinit(void);

private:
    void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);

    std::vector<uint8_t> m_dstBuf;
};

ReferenceMemcpyCase::ReferenceMemcpyCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
                                         int maxBufferSize, int numSamples, bool largeBuffersCase)
    : BasicUploadCase<SingleOperationDuration>(
          ctx, name, desc, minBufferSize, maxBufferSize, numSamples, 0, CASE_NO_BUFFERS,
          (largeBuffersCase) ? (RESULT_ASYMPTOTIC_TRANSFER_RATE) : (RESULT_MEDIAN_TRANSFER_RATE))
    , m_dstBuf()
{
    disableGLWarmup();
}

ReferenceMemcpyCase::~ReferenceMemcpyCase(void)
{
}

void ReferenceMemcpyCase::init(void)
{
    // Describe what the test tries to do
    m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance of memcpy()." << tcu::TestLog::EndMessage;

    m_dstBuf.resize(m_bufferSizeMax, 0x00);

    BasicUploadCase<SingleOperationDuration>::init();
}

void ReferenceMemcpyCase::deinit(void)
{
    m_dstBuf = std::vector<uint8_t>();
    BasicUploadCase<SingleOperationDuration>::deinit();
}

void ReferenceMemcpyCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
{
    // write
    result.duration.totalDuration       = medianTimeMemcpy(&m_dstBuf[0], &m_zeroData[0], bufferSize);
    result.duration.fitResponseDuration = result.duration.totalDuration;

    result.writtenSize = bufferSize;
}

class BufferDataUploadCase : public BasicUploadCase<SingleOperationDuration>
{
public:
    BufferDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
                         int numSamples, uint32_t bufferUsage, CaseType caseType);
    ~BufferDataUploadCase(void);

    void init(void);

private:
    void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);
};

BufferDataUploadCase::BufferDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
                                           int maxBufferSize, int numSamples, uint32_t bufferUsage, CaseType caseType)
    : BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
                                               caseType, RESULT_MEDIAN_TRANSFER_RATE)
{
}

BufferDataUploadCase::~BufferDataUploadCase(void)
{
}

void BufferDataUploadCase::init(void)
{
    // Describe what the test tries to do
    m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferData() function." << tcu::TestLog::EndMessage;

    BasicUploadCase<SingleOperationDuration>::init();
}

void BufferDataUploadCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

    // upload
    {
        uint64_t startTime;
        uint64_t endTime;

        startTime = deGetMicroseconds();
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
        endTime = deGetMicroseconds();

        result.duration.totalDuration       = endTime - startTime;
        result.duration.fitResponseDuration = result.duration.totalDuration;
        result.writtenSize                  = bufferSize;
    }
}

class BufferSubDataUploadCase : public BasicUploadCase<SingleOperationDuration>
{
public:
    enum Flags
    {
        FLAG_FULL_UPLOAD           = 0x01,
        FLAG_PARTIAL_UPLOAD        = 0x02,
        FLAG_INVALIDATE_BEFORE_USE = 0x04,
    };

    BufferSubDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
                            int numSamples, uint32_t bufferUsage, CaseType parentCase, int flags);
    ~BufferSubDataUploadCase(void);

    void init(void);

private:
    void testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);

    const bool m_fullUpload;
    const bool m_invalidateBeforeUse;
};

BufferSubDataUploadCase::BufferSubDataUploadCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
                                                 int maxBufferSize, int numSamples, uint32_t bufferUsage,
                                                 CaseType parentCase, int flags)
    : BasicUploadCase<SingleOperationDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
                                               parentCase, RESULT_MEDIAN_TRANSFER_RATE)
    , m_fullUpload((flags & FLAG_FULL_UPLOAD) != 0)
    , m_invalidateBeforeUse((flags & FLAG_INVALIDATE_BEFORE_USE) != 0)
{
    DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != 0);
    DE_ASSERT((flags & (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD)) != (FLAG_FULL_UPLOAD | FLAG_PARTIAL_UPLOAD));
}

BufferSubDataUploadCase::~BufferSubDataUploadCase(void)
{
}

void BufferSubDataUploadCase::init(void)
{
    // Describe what the test tries to do
    m_testCtx.getLog() << tcu::TestLog::Message << "Testing glBufferSubData() function call performance. "
                       << ((m_fullUpload) ? ("The whole buffer is updated with glBufferSubData. ") :
                                            ("Half of the buffer data is updated with glBufferSubData. "))
                       << ((m_invalidateBeforeUse) ?
                               ("The buffer is cleared with glBufferData(..., NULL) before glBufferSubData upload.") :
                               (""))
                       << "\n"
                       << tcu::TestLog::EndMessage;

    BasicUploadCase<SingleOperationDuration>::init();
}

void BufferSubDataUploadCase::testBufferUpload(UploadSampleResult<SingleOperationDuration> &result, int bufferSize)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

    // "invalidate", upload null
    if (m_invalidateBeforeUse)
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);

    // upload
    {
        uint64_t startTime;
        uint64_t endTime;

        startTime = deGetMicroseconds();

        if (m_fullUpload)
            gl.bufferSubData(GL_ARRAY_BUFFER, 0, bufferSize, &m_zeroData[0]);
        else
        {
            // upload to buffer center
            gl.bufferSubData(GL_ARRAY_BUFFER, bufferSize / 4, bufferSize / 2, &m_zeroData[0]);
        }

        endTime = deGetMicroseconds();

        result.duration.totalDuration       = endTime - startTime;
        result.duration.fitResponseDuration = result.duration.totalDuration;

        if (m_fullUpload)
            result.writtenSize = bufferSize;
        else
            result.writtenSize = bufferSize / 2;
    }
}

class MapBufferRangeCase : public BasicUploadCase<MapBufferRangeDuration>
{
public:
    enum Flags
    {
        FLAG_PARTIAL                       = 0x01,
        FLAG_MANUAL_INVALIDATION           = 0x02,
        FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04,
        FLAG_USE_UNUSED_SPECIFIED_BUFFER   = 0x08,
    };

    MapBufferRangeCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
                       int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags);
    ~MapBufferRangeCase(void);

    void init(void);

private:
    static CaseType getBaseCaseType(int caseFlags);
    static int getBaseFlags(uint32_t mapFlags, int caseFlags);

    void testBufferUpload(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize);
    void attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize);

    const bool m_manualInvalidation;
    const bool m_fullUpload;
    const bool m_useUnusedUnspecifiedBuffer;
    const bool m_useUnusedSpecifiedBuffer;
    const uint32_t m_mapFlags;
    int m_unmapFailures;
};

MapBufferRangeCase::MapBufferRangeCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
                                       int maxBufferSize, int numSamples, uint32_t bufferUsage, uint32_t mapFlags,
                                       int caseFlags)
    : BasicUploadCase<MapBufferRangeDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples, bufferUsage,
                                              getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE,
                                              getBaseFlags(mapFlags, caseFlags))
    , m_manualInvalidation((caseFlags & FLAG_MANUAL_INVALIDATION) != 0)
    , m_fullUpload((caseFlags & FLAG_PARTIAL) == 0)
    , m_useUnusedUnspecifiedBuffer((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
    , m_useUnusedSpecifiedBuffer((caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
    , m_mapFlags(mapFlags)
    , m_unmapFailures(0)
{
    DE_ASSERT(!(m_useUnusedUnspecifiedBuffer && m_useUnusedSpecifiedBuffer));
    DE_ASSERT(!((m_useUnusedUnspecifiedBuffer || m_useUnusedSpecifiedBuffer) && m_manualInvalidation));
}

MapBufferRangeCase::~MapBufferRangeCase(void)
{
}

void MapBufferRangeCase::init(void)
{
    // Describe what the test tries to do
    m_testCtx.getLog()
        << tcu::TestLog::Message << "Testing glMapBufferRange() and glUnmapBuffer() function call performance.\n"
        << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
        << ((m_useUnusedUnspecifiedBuffer) ?
                ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") :
                (""))
        << ((m_useUnusedSpecifiedBuffer) ?
                ("The buffer has not been used before mapping and is allocated with specified contents.\n") :
                (""))
        << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ?
                ("The buffer has previously been used in a drawing operation.\n") :
                (""))
        << ((m_manualInvalidation) ? ("The buffer is cleared with glBufferData(..., NULL) before mapping.\n") : (""))
        << "Map bits:\n"
        << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
        << tcu::TestLog::EndMessage;

    BasicUploadCase<MapBufferRangeDuration>::init();
}

MapBufferRangeCase::CaseType MapBufferRangeCase::getBaseCaseType(int caseFlags)
{
    if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
        return CASE_USED_BUFFER;
    else
        return CASE_NEW_BUFFER;
}

int MapBufferRangeCase::getBaseFlags(uint32_t mapFlags, int caseFlags)
{
    int flags = FLAG_DONT_LOG_BUFFER_INFO;

    // If buffer contains unspecified data when it is sourced (i.e drawn)
    // results are undefined, and system errors may occur. Signal parent
    // class to take this into account
    if (caseFlags & FLAG_PARTIAL)
    {
        if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || (caseFlags & FLAG_MANUAL_INVALIDATION) != 0 ||
            (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
        {
            flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
        }
    }

    return flags;
}

void MapBufferRangeCase::testBufferUpload(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize)
{
    const int unmapFailureThreshold = 4;

    for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
    {
        try
        {
            attemptBufferMap(result, bufferSize);
            return;
        }
        catch (UnmapFailureError &)
        {
        }
    }

    throw tcu::TestError("Unmapping failures exceeded limit");
}

void MapBufferRangeCase::attemptBufferMap(UploadSampleResult<MapBufferRangeDuration> &result, int bufferSize)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

    if (m_fullUpload)
        result.writtenSize = bufferSize;
    else
        result.writtenSize = bufferSize / 2;

    // Create unused buffer

    if (m_manualInvalidation || m_useUnusedUnspecifiedBuffer)
    {
        uint64_t startTime;
        uint64_t endTime;

        // "invalidate" or allocate, upload null
        startTime = deGetMicroseconds();
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
        endTime = deGetMicroseconds();

        result.duration.allocDuration = endTime - startTime;
    }
    else if (m_useUnusedSpecifiedBuffer)
    {
        uint64_t startTime;
        uint64_t endTime;

        // Specify buffer contents
        startTime = deGetMicroseconds();
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
        endTime = deGetMicroseconds();

        result.duration.allocDuration = endTime - startTime;
    }
    else
    {
        // No alloc, no time
        result.duration.allocDuration = 0;
    }

    // upload
    {
        void *mapPtr;

        // Map
        {
            uint64_t startTime;
            uint64_t endTime;

            startTime = deGetMicroseconds();
            if (m_fullUpload)
                mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, result.writtenSize, m_mapFlags);
            else
            {
                // upload to buffer center
                mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, result.writtenSize, m_mapFlags);
            }
            endTime = deGetMicroseconds();

            if (!mapPtr)
                throw tcu::Exception("MapBufferRange returned NULL");

            result.duration.mapDuration = endTime - startTime;
        }

        // Write
        {
            result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
        }

        // Unmap
        {
            uint64_t startTime;
            uint64_t endTime;
            glw::GLboolean unmapSuccessful;

            startTime       = deGetMicroseconds();
            unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
            endTime         = deGetMicroseconds();

            // if unmapping fails, just try again later
            if (!unmapSuccessful)
                throw UnmapFailureError();

            result.duration.unmapDuration = endTime - startTime;
        }

        result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
                                        result.duration.unmapDuration + result.duration.allocDuration;
        result.duration.fitResponseDuration = result.duration.totalDuration;
    }
}

class MapBufferRangeFlushCase : public BasicUploadCase<MapBufferRangeFlushDuration>
{
public:
    enum Flags
    {
        FLAG_PARTIAL                       = 0x01,
        FLAG_FLUSH_IN_PARTS                = 0x02,
        FLAG_USE_UNUSED_UNSPECIFIED_BUFFER = 0x04,
        FLAG_USE_UNUSED_SPECIFIED_BUFFER   = 0x08,
        FLAG_FLUSH_PARTIAL                 = 0x10,
    };

    MapBufferRangeFlushCase(Context &ctx, const char *name, const char *desc, int minBufferSize, int maxBufferSize,
                            int numSamples, uint32_t bufferUsage, uint32_t mapFlags, int caseFlags);
    ~MapBufferRangeFlushCase(void);

    void init(void);

private:
    static CaseType getBaseCaseType(int caseFlags);
    static int getBaseFlags(uint32_t mapFlags, int caseFlags);

    void testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize);
    void attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize);

    const bool m_fullUpload;
    const bool m_flushInParts;
    const bool m_flushPartial;
    const bool m_useUnusedUnspecifiedBuffer;
    const bool m_useUnusedSpecifiedBuffer;
    const uint32_t m_mapFlags;
    int m_unmapFailures;
};

MapBufferRangeFlushCase::MapBufferRangeFlushCase(Context &ctx, const char *name, const char *desc, int minBufferSize,
                                                 int maxBufferSize, int numSamples, uint32_t bufferUsage,
                                                 uint32_t mapFlags, int caseFlags)
    : BasicUploadCase<MapBufferRangeFlushDuration>(ctx, name, desc, minBufferSize, maxBufferSize, numSamples,
                                                   bufferUsage, getBaseCaseType(caseFlags), RESULT_MEDIAN_TRANSFER_RATE,
                                                   getBaseFlags(mapFlags, caseFlags))
    , m_fullUpload((caseFlags & FLAG_PARTIAL) == 0)
    , m_flushInParts((caseFlags & FLAG_FLUSH_IN_PARTS) != 0)
    , m_flushPartial((caseFlags & FLAG_FLUSH_PARTIAL) != 0)
    , m_useUnusedUnspecifiedBuffer((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0)
    , m_useUnusedSpecifiedBuffer((caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) != 0)
    , m_mapFlags(mapFlags)
    , m_unmapFailures(0)
{
    DE_ASSERT(!(m_flushPartial && m_flushInParts));
    DE_ASSERT(!(m_flushPartial && !m_fullUpload));
}

MapBufferRangeFlushCase::~MapBufferRangeFlushCase(void)
{
}

void MapBufferRangeFlushCase::init(void)
{
    // Describe what the test tries to do
    m_testCtx.getLog()
        << tcu::TestLog::Message
        << "Testing glMapBufferRange(), glFlushMappedBufferRange() and glUnmapBuffer() function call performance.\n"
        << ((m_fullUpload) ? ("The whole buffer is mapped.") : ("Half of the buffer is mapped.")) << "\n"
        << ((m_flushInParts) ?
                ("The mapped range is partitioned to 4 subranges and each partition is flushed separately.") :
            (m_flushPartial) ? ("Half of the buffer range is flushed.") :
                               ("The whole mapped range is flushed in one flush call."))
        << "\n"
        << ((m_useUnusedUnspecifiedBuffer) ?
                ("The buffer has not been used before mapping and is allocated with unspecified contents.\n") :
                (""))
        << ((m_useUnusedSpecifiedBuffer) ?
                ("The buffer has not been used before mapping and is allocated with specified contents.\n") :
                (""))
        << ((!m_useUnusedSpecifiedBuffer && !m_useUnusedUnspecifiedBuffer) ?
                ("The buffer has previously been used in a drawing operation.\n") :
                (""))
        << "Map bits:\n"
        << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
        << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
        << tcu::TestLog::EndMessage;

    BasicUploadCase<MapBufferRangeFlushDuration>::init();
}

MapBufferRangeFlushCase::CaseType MapBufferRangeFlushCase::getBaseCaseType(int caseFlags)
{
    if ((caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) == 0 && (caseFlags & FLAG_USE_UNUSED_SPECIFIED_BUFFER) == 0)
        return CASE_USED_BUFFER;
    else
        return CASE_NEW_BUFFER;
}

int MapBufferRangeFlushCase::getBaseFlags(uint32_t mapFlags, int caseFlags)
{
    int flags = FLAG_DONT_LOG_BUFFER_INFO;

    // If buffer contains unspecified data when it is sourced (i.e drawn)
    // results are undefined, and system errors may occur. Signal parent
    // class to take this into account
    if (caseFlags & FLAG_PARTIAL)
    {
        if ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0 || (caseFlags & FLAG_USE_UNUSED_UNSPECIFIED_BUFFER) != 0 ||
            (caseFlags & FLAG_FLUSH_PARTIAL) != 0)
        {
            flags |= FLAG_RESULT_BUFFER_UNSPECIFIED_CONTENT;
        }
    }

    return flags;
}

void MapBufferRangeFlushCase::testBufferUpload(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize)
{
    const int unmapFailureThreshold = 4;

    for (; m_unmapFailures < unmapFailureThreshold; ++m_unmapFailures)
    {
        try
        {
            attemptBufferMap(result, bufferSize);
            return;
        }
        catch (UnmapFailureError &)
        {
        }
    }

    throw tcu::TestError("Unmapping failures exceeded limit");
}

void MapBufferRangeFlushCase::attemptBufferMap(UploadSampleResult<MapBufferRangeFlushDuration> &result, int bufferSize)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const int mappedSize     = (m_fullUpload) ? (bufferSize) : (bufferSize / 2);

    if (m_fullUpload && !m_flushPartial)
        result.writtenSize = bufferSize;
    else
        result.writtenSize = bufferSize / 2;

    gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);

    // Create unused buffer

    if (m_useUnusedUnspecifiedBuffer)
    {
        uint64_t startTime;
        uint64_t endTime;

        // Don't specify contents
        startTime = deGetMicroseconds();
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, DE_NULL, m_bufferUsage);
        endTime = deGetMicroseconds();

        result.duration.allocDuration = endTime - startTime;
    }
    else if (m_useUnusedSpecifiedBuffer)
    {
        uint64_t startTime;
        uint64_t endTime;

        // Specify buffer contents
        startTime = deGetMicroseconds();
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);
        endTime = deGetMicroseconds();

        result.duration.allocDuration = endTime - startTime;
    }
    else
    {
        // No alloc, no time
        result.duration.allocDuration = 0;
    }

    // upload
    {
        void *mapPtr;

        // Map
        {
            uint64_t startTime;
            uint64_t endTime;

            startTime = deGetMicroseconds();
            if (m_fullUpload)
                mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, mappedSize, m_mapFlags);
            else
            {
                // upload to buffer center
                mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, bufferSize / 4, mappedSize, m_mapFlags);
            }
            endTime = deGetMicroseconds();

            if (!mapPtr)
                throw tcu::Exception("MapBufferRange returned NULL");

            result.duration.mapDuration = endTime - startTime;
        }

        // Write
        {
            if (!m_flushPartial)
                result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], result.writtenSize);
            else
                result.duration.writeDuration =
                    medianTimeMemcpy((uint8_t *)mapPtr + bufferSize / 4, &m_zeroData[0], result.writtenSize);
        }

        // Flush
        {
            uint64_t startTime;
            uint64_t endTime;

            startTime = deGetMicroseconds();

            if (m_flushPartial)
                gl.flushMappedBufferRange(GL_ARRAY_BUFFER, mappedSize / 4, mappedSize / 2);
            else if (!m_flushInParts)
                gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, mappedSize);
            else
            {
                const int p1 = 0;
                const int p2 = mappedSize / 3;
                const int p3 = mappedSize / 2;
                const int p4 = mappedSize * 2 / 4;
                const int p5 = mappedSize;

                // flush in mixed order
                gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p2, p3 - p2);
                gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p1, p2 - p1);
                gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p4, p5 - p4);
                gl.flushMappedBufferRange(GL_ARRAY_BUFFER, p3, p4 - p3);
            }

            endTime = deGetMicroseconds();

            result.duration.flushDuration = endTime - startTime;
        }

        // Unmap
        {
            uint64_t startTime;
            uint64_t endTime;
            glw::GLboolean unmapSuccessful;

            startTime       = deGetMicroseconds();
            unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
            endTime         = deGetMicroseconds();

            // if unmapping fails, just try again later
            if (!unmapSuccessful)
                throw UnmapFailureError();

            result.duration.unmapDuration = endTime - startTime;
        }

        result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
                                        result.duration.flushDuration + result.duration.unmapDuration +
                                        result.duration.allocDuration;
        result.duration.fitResponseDuration = result.duration.totalDuration;
    }
}

template <typename SampleType>
class ModifyAfterBasicCase : public BasicBufferCase<SampleType>
{
public:
    ModifyAfterBasicCase(Context &context, const char *name, const char *description, int bufferSizeMin,
                         int bufferSizeMax, uint32_t usage, bool bufferUnspecifiedAfterTest);
    ~ModifyAfterBasicCase(void);

    void init(void);
    void deinit(void);

protected:
    void drawBufferRange(int begin, int end);

private:
    enum
    {
        NUM_SAMPLES = 20,
    };

    bool runSample(int iteration, UploadSampleResult<SampleType> &sample);
    bool prepareAndRunTest(int iteration, UploadSampleResult<SampleType> &result, int bufferSize);
    void logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results);

    virtual void testWithBufferSize(UploadSampleResult<SampleType> &result, int bufferSize) = 0;

    int m_unmappingErrors;

protected:
    const bool m_bufferUnspecifiedAfterTest;
    const uint32_t m_bufferUsage;
    std::vector<uint8_t> m_zeroData;

    using BasicBufferCase<SampleType>::m_testCtx;
    using BasicBufferCase<SampleType>::m_context;

    using BasicBufferCase<SampleType>::UNUSED_RENDER_AREA_SIZE;
    using BasicBufferCase<SampleType>::m_minimalProgram;
    using BasicBufferCase<SampleType>::m_minimalProgramPosLoc;
    using BasicBufferCase<SampleType>::m_bufferID;
    using BasicBufferCase<SampleType>::m_numSamples;
    using BasicBufferCase<SampleType>::m_bufferSizeMin;
    using BasicBufferCase<SampleType>::m_bufferSizeMax;
    using BasicBufferCase<SampleType>::m_allocateLargerBuffer;
};

template <typename SampleType>
ModifyAfterBasicCase<SampleType>::ModifyAfterBasicCase(Context &context, const char *name, const char *description,
                                                       int bufferSizeMin, int bufferSizeMax, uint32_t usage,
                                                       bool bufferUnspecifiedAfterTest)
    : BasicBufferCase<SampleType>(context, name, description, bufferSizeMin, bufferSizeMax, NUM_SAMPLES, 0)
    , m_unmappingErrors(0)
    , m_bufferUnspecifiedAfterTest(bufferUnspecifiedAfterTest)
    , m_bufferUsage(usage)
    , m_zeroData()
{
}

template <typename SampleType>
ModifyAfterBasicCase<SampleType>::~ModifyAfterBasicCase(void)
{
    BasicBufferCase<SampleType>::deinit();
}

template <typename SampleType>
void ModifyAfterBasicCase<SampleType>::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    // init parent

    BasicBufferCase<SampleType>::init();

    // upload source
    m_zeroData.resize(m_bufferSizeMax, 0x00);

    // log basic info

    m_testCtx.getLog() << tcu::TestLog::Message << "Testing performance with " << (int)NUM_SAMPLES
                       << " test samples. Sample order is randomized. All samples at even positions (first = 0) are "
                          "tested before samples at odd positions.\n"
                       << "Buffer sizes are in range [" << getHumanReadableByteSize(m_bufferSizeMin) << ", "
                       << getHumanReadableByteSize(m_bufferSizeMax) << "]." << tcu::TestLog::EndMessage;

    // log which transfer rate is the test result and buffer info

    m_testCtx.getLog() << tcu::TestLog::Message << "Test result is the median transfer rate of the test samples.\n"
                       << "Buffer usage = " << glu::getUsageName(m_bufferUsage) << tcu::TestLog::EndMessage;

    // Set state for drawing so that we don't have to change these during the iteration
    {
        gl.useProgram(m_minimalProgram->getProgram());
        gl.viewport(0, 0, UNUSED_RENDER_AREA_SIZE, UNUSED_RENDER_AREA_SIZE);
        gl.enableVertexAttribArray(m_minimalProgramPosLoc);
    }
}

template <typename SampleType>
void ModifyAfterBasicCase<SampleType>::deinit(void)
{
    m_zeroData = std::vector<uint8_t>();

    BasicBufferCase<SampleType>::deinit();
}

template <typename SampleType>
void ModifyAfterBasicCase<SampleType>::drawBufferRange(int begin, int end)
{
    DE_ASSERT(begin % (int)sizeof(float[4]) == 0);
    DE_ASSERT(end % (int)sizeof(float[4]) == 0);

    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    // use given range
    gl.drawArrays(GL_POINTS, begin / (int)sizeof(float[4]), 1);
    gl.drawArrays(GL_POINTS, end / (int)sizeof(float[4]) - 1, 1);
}

template <typename SampleType>
bool ModifyAfterBasicCase<SampleType>::runSample(int iteration, UploadSampleResult<SampleType> &sample)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const int bufferSize     = sample.bufferSize;
    bool testOk;

    testOk = prepareAndRunTest(iteration, sample, bufferSize);
    GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer upload sample");

    if (!testOk)
    {
        const int unmapFailureThreshold = 4;

        // only unmapping error can cause iteration failure
        if (++m_unmappingErrors >= unmapFailureThreshold)
            throw tcu::TestError("Too many unmapping errors, cannot continue.");

        // just try again
        return false;
    }

    return true;
}

template <typename SampleType>
bool ModifyAfterBasicCase<SampleType>::prepareAndRunTest(int iteration, UploadSampleResult<SampleType> &result,
                                                         int bufferSize)
{
    DE_UNREF(iteration);

    DE_ASSERT(!m_bufferID);
    DE_ASSERT(deIsAligned32(bufferSize, 4 * 4)); // aligned to vec4

    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    bool testRunOk           = true;
    bool unmappingFailed     = false;

    // Upload initial buffer to the GPU...
    gl.genBuffers(1, &m_bufferID);
    gl.bindBuffer(GL_ARRAY_BUFFER, m_bufferID);
    gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);

    // ...use it...
    gl.vertexAttribPointer(m_minimalProgramPosLoc, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
    drawBufferRange(0, bufferSize);

    // ..and make sure it is uploaded
    BasicBufferCase<SampleType>::waitGLResults();

    // warmup CPU before the test to make sure the power management governor
    // keeps us in the "high performance" mode
    {
        deYield();
        tcu::warmupCPU();
        deYield();
    }

    // test
    try
    {
        // buffer is uploaded to the GPU. Draw from it.
        drawBufferRange(0, bufferSize);

        // and test upload
        testWithBufferSize(result, bufferSize);
    }
    catch (UnmapFailureError &)
    {
        testRunOk       = false;
        unmappingFailed = true;
    }

    // clean up: make sure buffer is not in upload queue and delete it

    // sourcing unspecified data causes undefined results, possibly program termination
    if (m_bufferUnspecifiedAfterTest || unmappingFailed)
        gl.bufferData(GL_ARRAY_BUFFER, bufferSize, &m_zeroData[0], m_bufferUsage);

    drawBufferRange(0, bufferSize);
    BasicBufferCase<SampleType>::waitGLResults();

    gl.deleteBuffers(1, &m_bufferID);
    m_bufferID = 0;

    return testRunOk;
}

template <typename SampleType>
void ModifyAfterBasicCase<SampleType>::logAndSetTestResult(const std::vector<UploadSampleResult<SampleType>> &results)
{
    const UploadSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), results, false);

    // Return median transfer rate of the samples

    if (analysis.transferRateMedian == std::numeric_limits<float>::infinity())
    {
        // sample times are 1) invalid or 2) timer resolution too low
        // report speed 0 bytes / s since real value cannot be determined
        m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
    }
    else
    {
        // report transfer rate in MB / s
        m_testCtx.setTestResult(QP_TEST_RESULT_PASS,
                                de::floatToString(analysis.transferRateMedian / 1024.0f / 1024.0f, 2).c_str());
    }
}

class ModifyAfterWithBufferDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
{
public:
    enum CaseFlags
    {
        FLAG_RESPECIFY_SIZE  = 0x1,
        FLAG_UPLOAD_REPEATED = 0x2,
    };

    ModifyAfterWithBufferDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
                                  int bufferSizeMax, uint32_t usage, int flags);
    ~ModifyAfterWithBufferDataCase(void);

    void init(void);
    void deinit(void);

private:
    void testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);

    enum
    {
        NUM_REPEATS = 2
    };

    const bool m_respecifySize;
    const bool m_repeatedUpload;
    const float m_sizeDifferenceFactor;
};

ModifyAfterWithBufferDataCase::ModifyAfterWithBufferDataCase(Context &context, const char *name, const char *desc,
                                                             int bufferSizeMin, int bufferSizeMax, uint32_t usage,
                                                             int flags)
    : ModifyAfterBasicCase<SingleOperationDuration>(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
    , m_respecifySize((flags & FLAG_RESPECIFY_SIZE) != 0)
    , m_repeatedUpload((flags & FLAG_UPLOAD_REPEATED) != 0)
    , m_sizeDifferenceFactor(1.3f)
{
    DE_ASSERT(!(m_repeatedUpload && m_respecifySize));
}

ModifyAfterWithBufferDataCase::~ModifyAfterWithBufferDataCase(void)
{
    deinit();
}

void ModifyAfterWithBufferDataCase::init(void)
{
    // Log the purpose of the test

    if (m_repeatedUpload)
        m_testCtx.getLog() << tcu::TestLog::Message
                           << "Testing performance of BufferData() command after \"specify buffer contents - draw "
                              "buffer\" command pair is repeated "
                           << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
    else
        m_testCtx.getLog() << tcu::TestLog::Message
                           << "Testing performance of BufferData() command after a draw command that sources data from "
                              "the target buffer."
                           << tcu::TestLog::EndMessage;

    m_testCtx.getLog() << tcu::TestLog::Message
                       << ((m_respecifySize) ?
                               ("Buffer size is increased and contents are modified with BufferData().\n") :
                               ("Buffer contents are modified with BufferData().\n"))
                       << tcu::TestLog::EndMessage;

    // init parent
    ModifyAfterBasicCase<SingleOperationDuration>::init();

    // make sure our zeroBuffer is large enough
    if (m_respecifySize)
    {
        const int largerBufferSize = deAlign32((int)((float)m_bufferSizeMax * m_sizeDifferenceFactor), 4 * 4);
        m_zeroData.resize(largerBufferSize, 0x00);
    }
}

void ModifyAfterWithBufferDataCase::deinit(void)
{
    ModifyAfterBasicCase<SingleOperationDuration>::deinit();
}

void ModifyAfterWithBufferDataCase::testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result,
                                                       int bufferSize)
{
    // always draw the same amount to make compares between cases sensible
    const int drawStart = deAlign32(bufferSize / 4, 4 * 4);
    const int drawEnd   = deAlign32(bufferSize * 3 / 4, 4 * 4);

    const glw::Functions &gl   = m_context.getRenderContext().getFunctions();
    const int largerBufferSize = deAlign32((int)((float)bufferSize * m_sizeDifferenceFactor), 4 * 4);
    const int newBufferSize    = (m_respecifySize) ? (largerBufferSize) : (bufferSize);
    uint64_t startTime;
    uint64_t endTime;

    // repeat upload-draw
    if (m_repeatedUpload)
    {
        for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
        {
            gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
            drawBufferRange(drawStart, drawEnd);
        }
    }

    // test upload
    startTime = deGetMicroseconds();
    gl.bufferData(GL_ARRAY_BUFFER, newBufferSize, &m_zeroData[0], m_bufferUsage);
    endTime = deGetMicroseconds();

    result.duration.totalDuration       = endTime - startTime;
    result.duration.fitResponseDuration = result.duration.totalDuration;
    result.writtenSize                  = newBufferSize;
}

class ModifyAfterWithBufferSubDataCase : public ModifyAfterBasicCase<SingleOperationDuration>
{
public:
    enum CaseFlags
    {
        FLAG_PARTIAL         = 0x1,
        FLAG_UPLOAD_REPEATED = 0x2,
    };

    ModifyAfterWithBufferSubDataCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
                                     int bufferSizeMax, uint32_t usage, int flags);
    ~ModifyAfterWithBufferSubDataCase(void);

    void init(void);
    void deinit(void);

private:
    void testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result, int bufferSize);

    enum
    {
        NUM_REPEATS = 2
    };

    const bool m_partialUpload;
    const bool m_repeatedUpload;
};

ModifyAfterWithBufferSubDataCase::ModifyAfterWithBufferSubDataCase(Context &context, const char *name, const char *desc,
                                                                   int bufferSizeMin, int bufferSizeMax, uint32_t usage,
                                                                   int flags)
    : ModifyAfterBasicCase<SingleOperationDuration>(context, name, desc, bufferSizeMin, bufferSizeMax, usage, false)
    , m_partialUpload((flags & FLAG_PARTIAL) != 0)
    , m_repeatedUpload((flags & FLAG_UPLOAD_REPEATED) != 0)
{
}

ModifyAfterWithBufferSubDataCase::~ModifyAfterWithBufferSubDataCase(void)
{
    deinit();
}

void ModifyAfterWithBufferSubDataCase::init(void)
{
    // Log the purpose of the test

    if (m_repeatedUpload)
        m_testCtx.getLog() << tcu::TestLog::Message
                           << "Testing performance of BufferSubData() command after \"specify buffer contents - draw "
                              "buffer\" command pair is repeated "
                           << (int)NUM_REPEATS << " times." << tcu::TestLog::EndMessage;
    else
        m_testCtx.getLog() << tcu::TestLog::Message
                           << "Testing performance of BufferSubData() command after a draw command that sources data "
                              "from the target buffer."
                           << tcu::TestLog::EndMessage;

    m_testCtx.getLog() << tcu::TestLog::Message
                       << ((m_partialUpload) ? ("Half of the buffer contents are modified.\n") :
                                               ("Buffer contents are fully respecified.\n"))
                       << tcu::TestLog::EndMessage;

    ModifyAfterBasicCase<SingleOperationDuration>::init();
}

void ModifyAfterWithBufferSubDataCase::deinit(void)
{
    ModifyAfterBasicCase<SingleOperationDuration>::deinit();
}

void ModifyAfterWithBufferSubDataCase::testWithBufferSize(UploadSampleResult<SingleOperationDuration> &result,
                                                          int bufferSize)
{
    // always draw the same amount to make compares between cases sensible
    const int drawStart = deAlign32(bufferSize / 4, 4 * 4);
    const int drawEnd   = deAlign32(bufferSize * 3 / 4, 4 * 4);

    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const int subdataOffset  = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
    const int subdataSize    = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
    uint64_t startTime;
    uint64_t endTime;

    // make upload-draw stream
    if (m_repeatedUpload)
    {
        for (int repeatNdx = 0; repeatNdx < NUM_REPEATS; ++repeatNdx)
        {
            gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
            drawBufferRange(drawStart, drawEnd);
        }
    }

    // test upload
    startTime = deGetMicroseconds();
    gl.bufferSubData(GL_ARRAY_BUFFER, subdataOffset, subdataSize, &m_zeroData[0]);
    endTime = deGetMicroseconds();

    result.duration.totalDuration       = endTime - startTime;
    result.duration.fitResponseDuration = result.duration.totalDuration;
    result.writtenSize                  = subdataSize;
}

class ModifyAfterWithMapBufferRangeCase : public ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>
{
public:
    enum CaseFlags
    {
        FLAG_PARTIAL = 0x1,
    };

    ModifyAfterWithMapBufferRangeCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
                                      int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags);
    ~ModifyAfterWithMapBufferRangeCase(void);

    void init(void);
    void deinit(void);

private:
    static bool isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags);
    void testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> &result, int bufferSize);

    const bool m_partialUpload;
    const uint32_t m_mapFlags;
};

ModifyAfterWithMapBufferRangeCase::ModifyAfterWithMapBufferRangeCase(Context &context, const char *name,
                                                                     const char *desc, int bufferSizeMin,
                                                                     int bufferSizeMax, uint32_t usage, int flags,
                                                                     uint32_t glMapFlags)
    : ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>(context, name, desc, bufferSizeMin, bufferSizeMax, usage,
                                                          isBufferUnspecifiedAfterUpload(flags, glMapFlags))
    , m_partialUpload((flags & FLAG_PARTIAL) != 0)
    , m_mapFlags(glMapFlags)
{
}

ModifyAfterWithMapBufferRangeCase::~ModifyAfterWithMapBufferRangeCase(void)
{
    deinit();
}

void ModifyAfterWithMapBufferRangeCase::init(void)
{
    // Log the purpose of the test

    m_testCtx.getLog() << tcu::TestLog::Message
                       << "Testing performance of MapBufferRange() command after a draw command that sources data from "
                          "the target buffer.\n"
                       << ((m_partialUpload) ? ("Half of the buffer is mapped.\n") : ("Whole buffer is mapped.\n"))
                       << "Map bits:\n"
                       << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
                       << tcu::TestLog::EndMessage;

    ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::init();
}

void ModifyAfterWithMapBufferRangeCase::deinit(void)
{
    ModifyAfterBasicCase<MapBufferRangeDurationNoAlloc>::deinit();
}

bool ModifyAfterWithMapBufferRangeCase::isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags)
{
    if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
        return true;

    return false;
}

void ModifyAfterWithMapBufferRangeCase::testWithBufferSize(UploadSampleResult<MapBufferRangeDurationNoAlloc> &result,
                                                           int bufferSize)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const int subdataOffset  = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
    const int subdataSize    = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
    void *mapPtr;

    // map
    {
        uint64_t startTime;
        uint64_t endTime;

        startTime = deGetMicroseconds();
        mapPtr    = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
        endTime   = deGetMicroseconds();

        if (!mapPtr)
            throw tcu::TestError("mapBufferRange returned null");

        result.duration.mapDuration = endTime - startTime;
    }

    // write
    {
        result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
    }

    // unmap
    {
        uint64_t startTime;
        uint64_t endTime;
        glw::GLboolean unmapSucceeded;

        startTime      = deGetMicroseconds();
        unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
        endTime        = deGetMicroseconds();

        if (unmapSucceeded != GL_TRUE)
            throw UnmapFailureError();

        result.duration.unmapDuration = endTime - startTime;
    }

    result.duration.totalDuration =
        result.duration.mapDuration + result.duration.writeDuration + result.duration.unmapDuration;
    result.duration.fitResponseDuration = result.duration.totalDuration;
    result.writtenSize                  = subdataSize;
}

class ModifyAfterWithMapBufferFlushCase : public ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>
{
public:
    enum CaseFlags
    {
        FLAG_PARTIAL = 0x1,
    };

    ModifyAfterWithMapBufferFlushCase(Context &context, const char *name, const char *desc, int bufferSizeMin,
                                      int bufferSizeMax, uint32_t usage, int flags, uint32_t glMapFlags);
    ~ModifyAfterWithMapBufferFlushCase(void);

    void init(void);
    void deinit(void);

private:
    static bool isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags);
    void testWithBufferSize(UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> &result, int bufferSize);

    const bool m_partialUpload;
    const uint32_t m_mapFlags;
};

ModifyAfterWithMapBufferFlushCase::ModifyAfterWithMapBufferFlushCase(Context &context, const char *name,
                                                                     const char *desc, int bufferSizeMin,
                                                                     int bufferSizeMax, uint32_t usage, int flags,
                                                                     uint32_t glMapFlags)
    : ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>(context, name, desc, bufferSizeMin, bufferSizeMax, usage,
                                                               isBufferUnspecifiedAfterUpload(flags, glMapFlags))
    , m_partialUpload((flags & FLAG_PARTIAL) != 0)
    , m_mapFlags(glMapFlags)
{
}

ModifyAfterWithMapBufferFlushCase::~ModifyAfterWithMapBufferFlushCase(void)
{
    deinit();
}

void ModifyAfterWithMapBufferFlushCase::init(void)
{
    // Log the purpose of the test

    m_testCtx.getLog() << tcu::TestLog::Message
                       << "Testing performance of MapBufferRange() command after a draw command that sources data from "
                          "the target buffer.\n"
                       << ((m_partialUpload) ? ("Half of the buffer is mapped.\n") : ("Whole buffer is mapped.\n"))
                       << "Map bits:\n"
                       << ((m_mapFlags & GL_MAP_WRITE_BIT) ? ("\tGL_MAP_WRITE_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_READ_BIT) ? ("\tGL_MAP_READ_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_INVALIDATE_RANGE_BIT) ? ("\tGL_MAP_INVALIDATE_RANGE_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) ? ("\tGL_MAP_INVALIDATE_BUFFER_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_UNSYNCHRONIZED_BIT) ? ("\tGL_MAP_UNSYNCHRONIZED_BIT\n") : (""))
                       << ((m_mapFlags & GL_MAP_FLUSH_EXPLICIT_BIT) ? ("\tGL_MAP_FLUSH_EXPLICIT_BIT\n") : (""))
                       << tcu::TestLog::EndMessage;

    ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::init();
}

void ModifyAfterWithMapBufferFlushCase::deinit(void)
{
    ModifyAfterBasicCase<MapBufferRangeFlushDurationNoAlloc>::deinit();
}

bool ModifyAfterWithMapBufferFlushCase::isBufferUnspecifiedAfterUpload(int flags, uint32_t mapFlags)
{
    if ((flags & FLAG_PARTIAL) != 0 && ((mapFlags & GL_MAP_INVALIDATE_BUFFER_BIT) != 0))
        return true;

    return false;
}

void ModifyAfterWithMapBufferFlushCase::testWithBufferSize(
    UploadSampleResult<MapBufferRangeFlushDurationNoAlloc> &result, int bufferSize)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const int subdataOffset  = deAlign32((m_partialUpload) ? (bufferSize / 4) : (0), 4 * 4);
    const int subdataSize    = deAlign32((m_partialUpload) ? (bufferSize / 2) : (bufferSize), 4 * 4);
    void *mapPtr;

    // map
    {
        uint64_t startTime;
        uint64_t endTime;

        startTime = deGetMicroseconds();
        mapPtr    = gl.mapBufferRange(GL_ARRAY_BUFFER, subdataOffset, subdataSize, m_mapFlags);
        endTime   = deGetMicroseconds();

        if (!mapPtr)
            throw tcu::TestError("mapBufferRange returned null");

        result.duration.mapDuration = endTime - startTime;
    }

    // write
    {
        result.duration.writeDuration = medianTimeMemcpy(mapPtr, &m_zeroData[0], subdataSize);
    }

    // flush
    {
        uint64_t startTime;
        uint64_t endTime;

        startTime = deGetMicroseconds();
        gl.flushMappedBufferRange(GL_ARRAY_BUFFER, 0, subdataSize);
        endTime = deGetMicroseconds();

        result.duration.flushDuration = endTime - startTime;
    }

    // unmap
    {
        uint64_t startTime;
        uint64_t endTime;
        glw::GLboolean unmapSucceeded;

        startTime      = deGetMicroseconds();
        unmapSucceeded = gl.unmapBuffer(GL_ARRAY_BUFFER);
        endTime        = deGetMicroseconds();

        if (unmapSucceeded != GL_TRUE)
            throw UnmapFailureError();

        result.duration.unmapDuration = endTime - startTime;
    }

    result.duration.totalDuration = result.duration.mapDuration + result.duration.writeDuration +
                                    result.duration.unmapDuration + result.duration.flushDuration;
    result.duration.fitResponseDuration = result.duration.totalDuration;
    result.writtenSize                  = subdataSize;
}

enum DrawMethod
{
    DRAWMETHOD_DRAW_ARRAYS = 0,
    DRAWMETHOD_DRAW_ELEMENTS,

    DRAWMETHOD_LAST
};

enum TargetBuffer
{
    TARGETBUFFER_VERTEX = 0,
    TARGETBUFFER_INDEX,

    TARGETBUFFER_LAST
};

enum BufferState
{
    BUFFERSTATE_NEW = 0,
    BUFFERSTATE_EXISTING,

    BUFFERSTATE_LAST
};

enum UploadMethod
{
    UPLOADMETHOD_BUFFER_DATA = 0,
    UPLOADMETHOD_BUFFER_SUB_DATA,
    UPLOADMETHOD_MAP_BUFFER_RANGE,

    UPLOADMETHOD_LAST
};

enum UnrelatedBufferType
{
    UNRELATEDBUFFERTYPE_NONE = 0,
    UNRELATEDBUFFERTYPE_VERTEX,

    UNRELATEDBUFFERTYPE_LAST
};

enum UploadRange
{
    UPLOADRANGE_FULL = 0,
    UPLOADRANGE_PARTIAL,

    UPLOADRANGE_LAST
};

struct LayeredGridSpec
{
    int gridWidth;
    int gridHeight;
    int gridLayers;
};

static int getLayeredGridNumVertices(const LayeredGridSpec &scene)
{
    return scene.gridWidth * scene.gridHeight * scene.gridLayers * 6;
}

static void generateLayeredGridVertexAttribData4C4V(std::vector<tcu::Vec4> &vertexData, const LayeredGridSpec &scene)
{
    // interleave color & vertex data
    const tcu::Vec4 green(0.0f, 1.0f, 0.0f, 0.7f);
    const tcu::Vec4 yellow(1.0f, 1.0f, 0.0f, 0.8f);

    vertexData.resize(getLayeredGridNumVertices(scene) * 2);

    for (int cellY = 0; cellY < scene.gridHeight; ++cellY)
        for (int cellX = 0; cellX < scene.gridWidth; ++cellX)
            for (int cellZ = 0; cellZ < scene.gridLayers; ++cellZ)
            {
                const tcu::Vec4 color  = (((cellX + cellY + cellZ) % 2) == 0) ? (green) : (yellow);
                const float cellLeft   = (float(cellX) / (float)scene.gridWidth - 0.5f) * 2.0f;
                const float cellRight  = (float(cellX + 1) / (float)scene.gridWidth - 0.5f) * 2.0f;
                const float cellTop    = (float(cellY + 1) / (float)scene.gridHeight - 0.5f) * 2.0f;
                const float cellBottom = (float(cellY) / (float)scene.gridHeight - 0.5f) * 2.0f;

                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 0] =
                    color;
                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 1] =
                    tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);

                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 2] =
                    color;
                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 3] =
                    tcu::Vec4(cellLeft, cellBottom, 0.0f, 1.0f);

                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 4] =
                    color;
                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 5] =
                    tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);

                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 6] =
                    color;
                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 7] =
                    tcu::Vec4(cellLeft, cellTop, 0.0f, 1.0f);

                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 8] =
                    color;
                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 9] =
                    tcu::Vec4(cellRight, cellBottom, 0.0f, 1.0f);

                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 10] =
                    color;
                vertexData[(cellY * scene.gridWidth * scene.gridLayers + cellX * scene.gridLayers + cellZ) * 12 + 11] =
                    tcu::Vec4(cellRight, cellTop, 0.0f, 1.0f);
            }
}

static void generateLayeredGridIndexData(std::vector<uint32_t> &indexData, const LayeredGridSpec &scene)
{
    indexData.resize(getLayeredGridNumVertices(scene) * 2);

    for (int ndx = 0; ndx < scene.gridLayers * scene.gridHeight * scene.gridWidth * 6; ++ndx)
        indexData[ndx] = ndx;
}

class RenderPerformanceTestBase : public TestCase
{
public:
    RenderPerformanceTestBase(Context &context, const char *name, const char *description);
    ~RenderPerformanceTestBase(void);

protected:
    void init(void);
    void deinit(void);

    void waitGLResults(void) const;
    void setupVertexAttribs(void) const;

    enum
    {
        RENDER_AREA_SIZE = 128
    };

private:
    glu::ShaderProgram *m_renderProgram;
    int m_colorLoc;
    int m_positionLoc;
};

RenderPerformanceTestBase::RenderPerformanceTestBase(Context &context, const char *name, const char *description)
    : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
    , m_renderProgram(DE_NULL)
    , m_colorLoc(0)
    , m_positionLoc(0)
{
}

RenderPerformanceTestBase::~RenderPerformanceTestBase(void)
{
    deinit();
}

void RenderPerformanceTestBase::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    m_renderProgram = new glu::ShaderProgram(m_context.getRenderContext(),
                                             glu::ProgramSources() << glu::VertexSource(s_colorVertexShader)
                                                                   << glu::FragmentSource(s_colorFragmentShader));
    if (!m_renderProgram->isOk())
    {
        m_testCtx.getLog() << *m_renderProgram;
        throw tcu::TestError("could not build program");
    }

    m_colorLoc    = gl.getAttribLocation(m_renderProgram->getProgram(), "a_color");
    m_positionLoc = gl.getAttribLocation(m_renderProgram->getProgram(), "a_position");

    if (m_colorLoc == -1)
        throw tcu::TestError("Location of attribute a_color was -1");
    if (m_positionLoc == -1)
        throw tcu::TestError("Location of attribute a_position was -1");
}

void RenderPerformanceTestBase::deinit(void)
{
    delete m_renderProgram;
    m_renderProgram = DE_NULL;
}

void RenderPerformanceTestBase::setupVertexAttribs(void) const
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    // buffers are bound

    gl.enableVertexAttribArray(m_colorLoc);
    gl.enableVertexAttribArray(m_positionLoc);

    gl.vertexAttribPointer(m_colorLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)),
                           glu::BufferOffsetAsPointer(0 * sizeof(tcu::Vec4)));
    gl.vertexAttribPointer(m_positionLoc, 4, GL_FLOAT, GL_FALSE, (glw::GLsizei)(8 * sizeof(float)),
                           glu::BufferOffsetAsPointer(1 * sizeof(tcu::Vec4)));

    gl.useProgram(m_renderProgram->getProgram());

    GLU_EXPECT_NO_ERROR(gl.getError(), "set up rendering");
}

void RenderPerformanceTestBase::waitGLResults(void) const
{
    tcu::Surface unusedSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
    glu::readPixels(m_context.getRenderContext(), 0, 0, unusedSurface.getAccess());
}

template <typename SampleType>
class RenderCase : public RenderPerformanceTestBase
{
public:
    RenderCase(Context &context, const char *name, const char *description, DrawMethod drawMethod);
    ~RenderCase(void);

protected:
    void init(void);
    void deinit(void);

private:
    IterateResult iterate(void);

protected:
    struct SampleResult
    {
        LayeredGridSpec scene;
        RenderSampleResult<SampleType> result;
    };

    int getMinWorkloadSize(void) const;
    int getMaxWorkloadSize(void) const;
    int getMinWorkloadDataSize(void) const;
    int getMaxWorkloadDataSize(void) const;
    int getVertexDataSize(void) const;
    int getNumSamples(void) const;
    void uploadScene(const LayeredGridSpec &scene);

    virtual void runSample(SampleResult &sample) = 0;
    virtual void logAndSetTestResult(const std::vector<SampleResult> &results);

    void mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> &dst,
                                      const std::vector<SampleResult> &src) const;

    const DrawMethod m_drawMethod;

private:
    glw::GLuint m_attributeBufferID;
    glw::GLuint m_indexBufferID;
    int m_iterationNdx;
    std::vector<int> m_iterationOrder;
    std::vector<SampleResult> m_results;
    int m_numUnmapFailures;
};

template <typename SampleType>
RenderCase<SampleType>::RenderCase(Context &context, const char *name, const char *description, DrawMethod drawMethod)
    : RenderPerformanceTestBase(context, name, description)
    , m_drawMethod(drawMethod)
    , m_attributeBufferID(0)
    , m_indexBufferID(0)
    , m_iterationNdx(0)
    , m_numUnmapFailures(0)
{
    DE_ASSERT(drawMethod < DRAWMETHOD_LAST);
}

template <typename SampleType>
RenderCase<SampleType>::~RenderCase(void)
{
    deinit();
}

template <typename SampleType>
void RenderCase<SampleType>::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    RenderPerformanceTestBase::init();

    // requirements

    if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
        m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
        throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" +
                                     de::toString<int>(RENDER_AREA_SIZE) + " render target");

    // gl state

    gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);

    // enable bleding to prevent grid layers from being discarded
    gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
    gl.blendEquation(GL_FUNC_ADD);
    gl.enable(GL_BLEND);

    // generate iterations

    {
        const int gridSizes[] = {20, 26, 32, 38, 44, 50, 56, 62, 68, 74, 80, 86, 92, 98, 104, 110, 116, 122, 128};

        for (int gridNdx = 0; gridNdx < DE_LENGTH_OF_ARRAY(gridSizes); ++gridNdx)
        {
            m_results.push_back(SampleResult());

            m_results.back().scene.gridHeight = gridSizes[gridNdx];
            m_results.back().scene.gridWidth  = gridSizes[gridNdx];
            m_results.back().scene.gridLayers = 5;

            m_results.back().result.numVertices = getLayeredGridNumVertices(m_results.back().scene);

            // test cases set these, initialize to unused values
            m_results.back().result.renderDataSize    = -1;
            m_results.back().result.uploadedDataSize  = -1;
            m_results.back().result.unrelatedDataSize = -1;
        }
    }

    // randomize iteration order
    {
        m_iterationOrder.resize(m_results.size());
        generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_iterationOrder.size());
    }
}

template <typename SampleType>
void RenderCase<SampleType>::deinit(void)
{
    RenderPerformanceTestBase::deinit();

    if (m_attributeBufferID)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_attributeBufferID);
        m_attributeBufferID = 0;
    }

    if (m_indexBufferID)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBufferID);
        m_indexBufferID = 0;
    }
}

template <typename SampleType>
typename RenderCase<SampleType>::IterateResult RenderCase<SampleType>::iterate(void)
{
    const int unmapFailureThreshold = 3;
    const int currentIteration      = m_iterationNdx;
    const int currentConfigNdx      = m_iterationOrder[currentIteration];
    SampleResult &currentSample     = m_results[currentConfigNdx];

    try
    {
        runSample(currentSample);
        ++m_iterationNdx;
    }
    catch (const UnmapFailureError &ex)
    {
        DE_UNREF(ex);
        ++m_numUnmapFailures;
    }

    if (m_numUnmapFailures > unmapFailureThreshold)
        throw tcu::TestError("Got too many unmap errors");

    if (m_iterationNdx < (int)m_iterationOrder.size())
        return CONTINUE;

    logAndSetTestResult(m_results);
    return STOP;
}

template <typename SampleType>
int RenderCase<SampleType>::getMinWorkloadSize(void) const
{
    int result = getLayeredGridNumVertices(m_results[0].scene);

    for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
    {
        const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
        result                 = de::min(result, workloadSize);
    }

    return result;
}

template <typename SampleType>
int RenderCase<SampleType>::getMaxWorkloadSize(void) const
{
    int result = getLayeredGridNumVertices(m_results[0].scene);

    for (int ndx = 1; ndx < (int)m_results.size(); ++ndx)
    {
        const int workloadSize = getLayeredGridNumVertices(m_results[ndx].scene);
        result                 = de::max(result, workloadSize);
    }

    return result;
}

template <typename SampleType>
int RenderCase<SampleType>::getMinWorkloadDataSize(void) const
{
    return getMinWorkloadSize() * getVertexDataSize();
}

template <typename SampleType>
int RenderCase<SampleType>::getMaxWorkloadDataSize(void) const
{
    return getMaxWorkloadSize() * getVertexDataSize();
}

template <typename SampleType>
int RenderCase<SampleType>::getVertexDataSize(void) const
{
    const int numVectors = 2;
    const int vec4Size   = 4 * sizeof(float);

    return numVectors * vec4Size;
}

template <typename SampleType>
int RenderCase<SampleType>::getNumSamples(void) const
{
    return (int)m_results.size();
}

template <typename SampleType>
void RenderCase<SampleType>::uploadScene(const LayeredGridSpec &scene)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    // vertex buffer
    {
        std::vector<tcu::Vec4> vertexData;

        generateLayeredGridVertexAttribData4C4V(vertexData, scene);

        if (m_attributeBufferID == 0)
            gl.genBuffers(1, &m_attributeBufferID);
        gl.bindBuffer(GL_ARRAY_BUFFER, m_attributeBufferID);
        gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);
    }

    // index buffer
    if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
    {
        std::vector<uint32_t> indexData;

        generateLayeredGridIndexData(indexData, scene);

        if (m_indexBufferID == 0)
            gl.genBuffers(1, &m_indexBufferID);
        gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBufferID);
        gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
                      GL_STATIC_DRAW);
    }

    GLU_EXPECT_NO_ERROR(gl.getError(), "create buffers");
}

template <typename SampleType>
void RenderCase<SampleType>::logAndSetTestResult(const std::vector<SampleResult> &results)
{
    std::vector<RenderSampleResult<SampleType>> mappedResults;

    mapResultsToRenderRateFormat(mappedResults, results);

    {
        const RenderSampleAnalyzeResult analysis = analyzeSampleResults(m_testCtx.getLog(), mappedResults);
        const float rate                         = analysis.renderRateAtRange;

        if (rate == std::numeric_limits<float>::infinity())
        {
            // sample times are 1) invalid or 2) timer resolution too low
            m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(0.0f, 2).c_str());
        }
        else
        {
            // report transfer rate in millions of MiB/s
            m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(rate / 1024.0f / 1024.0f, 2).c_str());
        }
    }
}

template <typename SampleType>
void RenderCase<SampleType>::mapResultsToRenderRateFormat(std::vector<RenderSampleResult<SampleType>> &dst,
                                                          const std::vector<SampleResult> &src) const
{
    dst.resize(src.size());

    for (int ndx = 0; ndx < (int)src.size(); ++ndx)
        dst[ndx] = src[ndx].result;
}

class ReferenceRenderTimeCase : public RenderCase<RenderReadDuration>
{
public:
    ReferenceRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod);

private:
    void init(void);
    void runSample(SampleResult &sample);
};

ReferenceRenderTimeCase::ReferenceRenderTimeCase(Context &context, const char *name, const char *description,
                                                 DrawMethod drawMethod)
    : RenderCase<RenderReadDuration>(context, name, description, drawMethod)
{
}

void ReferenceRenderTimeCase::init(void)
{
    const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");

    // init parent
    RenderCase<RenderReadDuration>::init();

    // log
    m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in " << targetFunctionName
                       << " and readPixels call with different rendering workloads.\n"
                       << getNumSamples() << " test samples. Sample order is randomized.\n"
                       << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
                       << "Generated workload is multiple viewport-covering grids with varying number of cells, each "
                          "cell is two separate triangles.\n"
                       << "Workload sizes are in the range [" << getMinWorkloadSize() << ",  " << getMaxWorkloadSize()
                       << "] vertices ([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
                       << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
                       << "Test result is the approximated total processing rate in MiB / s.\n"
                       << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
                               ("Note that index array size is not included in the processed size.\n") :
                               (""))
                       << "Note! Test result should only be used as a baseline reference result for "
                          "buffer.data_upload.* test group results."
                       << tcu::TestLog::EndMessage;
}

void ReferenceRenderTimeCase::runSample(SampleResult &sample)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
    const int numVertices = getLayeredGridNumVertices(sample.scene);
    const glu::Buffer arrayBuffer(m_context.getRenderContext());
    const glu::Buffer indexBuffer(m_context.getRenderContext());
    std::vector<tcu::Vec4> vertexData;
    std::vector<uint32_t> indexData;
    uint64_t startTime;
    uint64_t endTime;

    // generate and upload buffers

    generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
    gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
    gl.bufferData(GL_ARRAY_BUFFER, (int)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0], GL_STATIC_DRAW);

    if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
    {
        generateLayeredGridIndexData(indexData, sample.scene);
        gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
        gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
                      GL_STATIC_DRAW);
    }

    setupVertexAttribs();

    // make sure data is uploaded

    if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
        gl.drawArrays(GL_TRIANGLES, 0, numVertices);
    else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
        gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
    else
        DE_ASSERT(false);
    waitGLResults();

    gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
    gl.clear(GL_COLOR_BUFFER_BIT);
    waitGLResults();

    tcu::warmupCPU();

    // Measure both draw and associated readpixels
    {
        startTime = deGetMicroseconds();

        if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
            gl.drawArrays(GL_TRIANGLES, 0, numVertices);
        else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
            gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
        else
            DE_ASSERT(false);

        endTime = deGetMicroseconds();

        sample.result.duration.renderDuration = endTime - startTime;
    }

    {
        startTime = deGetMicroseconds();
        glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
        endTime = deGetMicroseconds();

        sample.result.duration.readDuration = endTime - startTime;
    }

    sample.result.renderDataSize    = getVertexDataSize() * sample.result.numVertices;
    sample.result.uploadedDataSize  = 0;
    sample.result.unrelatedDataSize = 0;
    sample.result.duration.renderReadDuration =
        sample.result.duration.renderDuration + sample.result.duration.readDuration;
    sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
    sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
}

class UnrelatedUploadRenderTimeCase : public RenderCase<UnrelatedUploadRenderReadDuration>
{
public:
    UnrelatedUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod drawMethod,
                                  UploadMethod unrelatedUploadMethod);

private:
    void init(void);
    void runSample(SampleResult &sample);

    const UploadMethod m_unrelatedUploadMethod;
};

UnrelatedUploadRenderTimeCase::UnrelatedUploadRenderTimeCase(Context &context, const char *name,
                                                             const char *description, DrawMethod drawMethod,
                                                             UploadMethod unrelatedUploadMethod)
    : RenderCase<UnrelatedUploadRenderReadDuration>(context, name, description, drawMethod)
    , m_unrelatedUploadMethod(unrelatedUploadMethod)
{
    DE_ASSERT(m_unrelatedUploadMethod < UPLOADMETHOD_LAST);
}

void UnrelatedUploadRenderTimeCase::init(void)
{
    const char *const targetFunctionName = (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
    tcu::MessageBuilder message(&m_testCtx.getLog());

    // init parent
    RenderCase<UnrelatedUploadRenderReadDuration>::init();

    // log

    message << "Measuring the time used in " << targetFunctionName
            << " and readPixels call with different rendering workloads.\n"
            << "Uploading an unrelated buffer just before issuing the rendering command with "
            << ((m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_DATA)      ? ("bufferData") :
                (m_unrelatedUploadMethod != UPLOADMETHOD_BUFFER_SUB_DATA)  ? ("bufferSubData") :
                (m_unrelatedUploadMethod != UPLOADMETHOD_MAP_BUFFER_RANGE) ? ("mapBufferRange") :
                                                                             ((const char *)DE_NULL))
            << ".\n"
            << getNumSamples() << " test samples. Sample order is randomized.\n"
            << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
            << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two "
               "separate triangles.\n"
            << "Workload sizes are in the range [" << getMinWorkloadSize() << ",  " << getMaxWorkloadSize()
            << "] vertices ([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
            << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
            << "Unrelated upload sizes are in the range [" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ", "
            << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "]\n"
            << "Test result is the approximated total processing rate in MiB / s.\n"
            << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
                    ("Note that index array size is not included in the processed size.\n") :
                    (""))
            << "Note that the data size and the time used in the unrelated upload is not included in the results.\n"
            << "Note! Test result may not be useful as is but instead should be compared against the reference.* group "
               "and upload_and_draw.*_and_unrelated_upload group results.\n"
            << tcu::TestLog::EndMessage;
}

void UnrelatedUploadRenderTimeCase::runSample(SampleResult &sample)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
    const int numVertices = getLayeredGridNumVertices(sample.scene);
    const glu::Buffer arrayBuffer(m_context.getRenderContext());
    const glu::Buffer indexBuffer(m_context.getRenderContext());
    const glu::Buffer unrelatedBuffer(m_context.getRenderContext());
    int unrelatedUploadSize = -1;
    int renderUploadSize;
    std::vector<tcu::Vec4> vertexData;
    std::vector<uint32_t> indexData;
    uint64_t startTime;
    uint64_t endTime;

    // generate and upload buffers

    generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
    renderUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

    gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
    gl.bufferData(GL_ARRAY_BUFFER, renderUploadSize, &vertexData[0], GL_STATIC_DRAW);

    if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
    {
        generateLayeredGridIndexData(indexData, sample.scene);
        gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
        gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (int)(indexData.size() * sizeof(uint32_t)), &indexData[0],
                      GL_STATIC_DRAW);
    }

    setupVertexAttribs();

    // make sure data is uploaded

    if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
        gl.drawArrays(GL_TRIANGLES, 0, numVertices);
    else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
        gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
    else
        DE_ASSERT(false);
    waitGLResults();

    gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
    gl.clear(GL_COLOR_BUFFER_BIT);
    waitGLResults();

    tcu::warmupCPU();

    // Unrelated upload
    if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_DATA)
    {
        unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

        gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
        gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
    }
    else if (m_unrelatedUploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
    {
        unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

        gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
        gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);
        gl.bufferSubData(GL_ARRAY_BUFFER, 0, unrelatedUploadSize, &vertexData[0]);
    }
    else if (m_unrelatedUploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
    {
        void *mapPtr;
        glw::GLboolean unmapSuccessful;

        unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

        gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
        gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, DE_NULL, GL_STATIC_DRAW);

        mapPtr = gl.mapBufferRange(GL_ARRAY_BUFFER, 0, unrelatedUploadSize,
                                   GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT |
                                       GL_MAP_UNSYNCHRONIZED_BIT);
        if (!mapPtr)
            throw tcu::Exception("MapBufferRange returned NULL");

        deMemcpy(mapPtr, &vertexData[0], unrelatedUploadSize);

        // if unmapping fails, just try again later
        unmapSuccessful = gl.unmapBuffer(GL_ARRAY_BUFFER);
        if (!unmapSuccessful)
            throw UnmapFailureError();
    }
    else
        DE_ASSERT(false);

    DE_ASSERT(unrelatedUploadSize != -1);

    // Measure both draw and associated readpixels
    {
        startTime = deGetMicroseconds();

        if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
            gl.drawArrays(GL_TRIANGLES, 0, numVertices);
        else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
            gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
        else
            DE_ASSERT(false);

        endTime = deGetMicroseconds();

        sample.result.duration.renderDuration = endTime - startTime;
    }

    {
        startTime = deGetMicroseconds();
        glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
        endTime = deGetMicroseconds();

        sample.result.duration.readDuration = endTime - startTime;
    }

    sample.result.renderDataSize    = getVertexDataSize() * sample.result.numVertices;
    sample.result.uploadedDataSize  = renderUploadSize;
    sample.result.unrelatedDataSize = unrelatedUploadSize;
    sample.result.duration.renderReadDuration =
        sample.result.duration.renderDuration + sample.result.duration.readDuration;
    sample.result.duration.totalDuration = sample.result.duration.renderDuration + sample.result.duration.readDuration;
    sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
}

class ReferenceReadPixelsTimeCase : public TestCase
{
public:
    ReferenceReadPixelsTimeCase(Context &context, const char *name, const char *description);

private:
    void init(void);
    IterateResult iterate(void);
    void logAndSetTestResult(void);

    enum
    {
        RENDER_AREA_SIZE = 128
    };

    const int m_numSamples;
    int m_sampleNdx;
    std::vector<int> m_samples;
};

ReferenceReadPixelsTimeCase::ReferenceReadPixelsTimeCase(Context &context, const char *name, const char *description)
    : TestCase(context, tcu::NODETYPE_PERFORMANCE, name, description)
    , m_numSamples(20)
    , m_sampleNdx(0)
    , m_samples(m_numSamples)
{
}

void ReferenceReadPixelsTimeCase::init(void)
{
    m_testCtx.getLog() << tcu::TestLog::Message << "Measuring the time used in a single readPixels call with "
                       << m_numSamples << " test samples.\n"
                       << "Test result is the median of the samples in microseconds.\n"
                       << "Note! Test result should only be used as a baseline reference result for "
                          "buffer.data_upload.* test group results."
                       << tcu::TestLog::EndMessage;
}

ReferenceReadPixelsTimeCase::IterateResult ReferenceReadPixelsTimeCase::iterate(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
    uint64_t startTime;
    uint64_t endTime;

    deYield();
    tcu::warmupCPU();
    deYield();

    // "Render" something and wait for it
    gl.clearColor(0.0f, 1.0f, float(m_sampleNdx) / float(m_numSamples), 1.0f);
    gl.clear(GL_COLOR_BUFFER_BIT);

    // wait for results
    glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());

    // measure time used in readPixels
    startTime = deGetMicroseconds();
    glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
    endTime = deGetMicroseconds();

    m_samples[m_sampleNdx] = (int)(endTime - startTime);

    if (++m_sampleNdx < m_numSamples)
        return CONTINUE;

    logAndSetTestResult();
    return STOP;
}

void ReferenceReadPixelsTimeCase::logAndSetTestResult(void)
{
    // Log sample list
    {
        m_testCtx.getLog() << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
                           << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
                           << tcu::TestLog::EndSampleInfo;

        for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
            m_testCtx.getLog() << tcu::TestLog::Sample << m_samples[sampleNdx] << tcu::TestLog::EndSample;

        m_testCtx.getLog() << tcu::TestLog::EndSampleList;
    }

    // Log median
    {
        float median;
        float limit60Low;
        float limit60Up;

        std::sort(m_samples.begin(), m_samples.end());
        median     = linearSample(m_samples, 0.5f);
        limit60Low = linearSample(m_samples, 0.2f);
        limit60Up  = linearSample(m_samples, 0.8f);

        m_testCtx.getLog() << tcu::TestLog::Float("Median", "Median", "us", QP_KEY_TAG_TIME, median)
                           << tcu::TestLog::Message << "60 % of samples within range:\n"
                           << tcu::TestLog::EndMessage
                           << tcu::TestLog::Float("Low60Range", "Lower", "us", QP_KEY_TAG_TIME, limit60Low)
                           << tcu::TestLog::Float("High60Range", "Upper", "us", QP_KEY_TAG_TIME, limit60Up);

        m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(median, 2).c_str());
    }
}

template <typename SampleType>
class GenericUploadRenderTimeCase : public RenderCase<SampleType>
{
public:
    typedef typename RenderCase<SampleType>::SampleResult SampleResult;

    GenericUploadRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method,
                                TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState,
                                UploadRange uploadRange, UnrelatedBufferType unrelatedBufferType);

private:
    void init(void);
    void runSample(SampleResult &sample);

    using RenderCase<SampleType>::RENDER_AREA_SIZE;

    const TargetBuffer m_targetBuffer;
    const BufferState m_bufferState;
    const UploadMethod m_uploadMethod;
    const UnrelatedBufferType m_unrelatedBufferType;
    const UploadRange m_uploadRange;

    using RenderCase<SampleType>::m_context;
    using RenderCase<SampleType>::m_testCtx;
    using RenderCase<SampleType>::m_drawMethod;
};

template <typename SampleType>
GenericUploadRenderTimeCase<SampleType>::GenericUploadRenderTimeCase(Context &context, const char *name,
                                                                     const char *description, DrawMethod method,
                                                                     TargetBuffer targetBuffer,
                                                                     UploadMethod uploadMethod, BufferState bufferState,
                                                                     UploadRange uploadRange,
                                                                     UnrelatedBufferType unrelatedBufferType)
    : RenderCase<SampleType>(context, name, description, method)
    , m_targetBuffer(targetBuffer)
    , m_bufferState(bufferState)
    , m_uploadMethod(uploadMethod)
    , m_unrelatedBufferType(unrelatedBufferType)
    , m_uploadRange(uploadRange)
{
    DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
    DE_ASSERT(m_bufferState < BUFFERSTATE_LAST);
    DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
    DE_ASSERT(m_unrelatedBufferType < UNRELATEDBUFFERTYPE_LAST);
    DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
}

template <typename SampleType>
void GenericUploadRenderTimeCase<SampleType>::init(void)
{
    // init parent
    RenderCase<SampleType>::init();

    // log
    {
        const char *const targetFunctionName =
            (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
        const int perVertexSize =
            (m_targetBuffer == TARGETBUFFER_INDEX) ? ((int)sizeof(uint32_t)) : ((int)sizeof(tcu::Vec4[2]));
        const int fullMinUploadSize = RenderCase<SampleType>::getMinWorkloadSize() * perVertexSize;
        const int fullMaxUploadSize = RenderCase<SampleType>::getMaxWorkloadSize() * perVertexSize;
        const int minUploadSize =
            (m_uploadRange == UPLOADRANGE_FULL) ? (fullMinUploadSize) : (deAlign32(fullMinUploadSize / 2, 4));
        const int maxUploadSize =
            (m_uploadRange == UPLOADRANGE_FULL) ? (fullMaxUploadSize) : (deAlign32(fullMaxUploadSize / 2, 4));
        const int minUnrelatedUploadSize = RenderCase<SampleType>::getMinWorkloadSize() * (int)sizeof(tcu::Vec4[2]);
        const int maxUnrelatedUploadSize = RenderCase<SampleType>::getMaxWorkloadSize() * (int)sizeof(tcu::Vec4[2]);

        m_testCtx.getLog()
            << tcu::TestLog::Message << "Measuring the time used in " << targetFunctionName
            << " and readPixels call with different rendering workloads.\n"
            << "The " << ((m_targetBuffer == TARGETBUFFER_INDEX) ? ("index") : ("vertex attrib")) << " buffer "
            << ((m_bufferState == BUFFERSTATE_NEW) ? ("") : ("contents ")) << "sourced by the rendering command "
            << ((m_bufferState == BUFFERSTATE_NEW)     ? ("is uploaded ") :
                (m_uploadRange == UPLOADRANGE_FULL)    ? ("are specified ") :
                (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("are updated (partial upload) ") :
                                                         ((const char *)DE_NULL))
            << "just before issuing the rendering command.\n"
            << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("The buffer has been used in rendering.\n") :
                                                          ("The buffer is generated just before uploading.\n"))
            << "Buffer "
            << ((m_bufferState == BUFFERSTATE_NEW)     ? ("is uploaded") :
                (m_uploadRange == UPLOADRANGE_FULL)    ? ("contents are specified") :
                (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("contents are partially updated") :
                                                         ((const char *)DE_NULL))
            << " with "
            << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)     ? ("bufferData") :
                (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") :
                                                                   ("mapBufferRange"))
            << " command. Usage of the target buffer is DYNAMIC_DRAW.\n"
            << ((m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ?
                    ("Mapping buffer with bits MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT | MAP_INVALIDATE_BUFFER_BIT | "
                     "MAP_UNSYNCHRONIZED_BIT\n") :
                    (""))
            << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ?
                    ("Uploading an unrelated buffer just before issuing the rendering command with bufferData.\n") :
                    (""))
            << RenderCase<SampleType>::getNumSamples() << " test samples. Sample order is randomized.\n"
            << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
            << "Generated workload is multiple viewport-covering grids with varying number of cells, each cell is two "
               "separate triangles.\n"
            << "Workload sizes are in the range [" << RenderCase<SampleType>::getMinWorkloadSize() << ",  "
            << RenderCase<SampleType>::getMaxWorkloadSize() << "] vertices "
            << "([" << getHumanReadableByteSize(RenderCase<SampleType>::getMinWorkloadDataSize()) << ","
            << getHumanReadableByteSize(RenderCase<SampleType>::getMaxWorkloadDataSize()) << "] to be processed).\n"
            << "Upload sizes are in the range [" << getHumanReadableByteSize(minUploadSize) << ","
            << getHumanReadableByteSize(maxUploadSize) << "].\n"
            << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
                    ("Unrelated upload sizes are in the range [" + getHumanReadableByteSize(minUnrelatedUploadSize) +
                     ", " + getHumanReadableByteSize(maxUnrelatedUploadSize) + "]\n") :
                    (""))
            << "Test result is the approximated processing rate in MiB / s.\n"
            << "Note that while upload time is measured, the time used is not included in the results.\n"
            << ((m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX) ?
                    ("Note that the data size and the time used in the unrelated upload is not included in the "
                     "results.\n") :
                    (""))
            << ((m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS) ?
                    ("Note that index array size is not included in the processed size.\n") :
                    (""))
            << "Note! Test result may not be useful as is but instead should be compared against the reference.* group "
               "and other upload_and_draw.* group results.\n"
            << tcu::TestLog::EndMessage;
    }
}

template <typename SampleType>
void GenericUploadRenderTimeCase<SampleType>::runSample(SampleResult &sample)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const glu::Buffer arrayBuffer(m_context.getRenderContext());
    const glu::Buffer indexBuffer(m_context.getRenderContext());
    const glu::Buffer unrelatedBuffer(m_context.getRenderContext());
    const int numVertices = getLayeredGridNumVertices(sample.scene);
    tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
    uint64_t startTime;
    uint64_t endTime;
    std::vector<tcu::Vec4> vertexData;
    std::vector<uint32_t> indexData;

    // create data

    generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
    if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
        generateLayeredGridIndexData(indexData, sample.scene);

    gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
    gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
    RenderCase<SampleType>::setupVertexAttribs();

    // target should be an exisiting buffer? Draw from it once to make sure it exists on the gpu

    if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_EXISTING)
    {
        gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
                      GL_DYNAMIC_DRAW);
        gl.drawArrays(GL_TRIANGLES, 0, numVertices);
    }
    else if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS && m_bufferState == BUFFERSTATE_NEW)
    {
        // do not touch the vertex buffer
    }
    else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_EXISTING)
    {
        // hint that the target buffer will be modified soon
        const glw::GLenum vertexDataUsage =
            (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);
        const glw::GLenum indexDataUsage =
            (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_DYNAMIC_DRAW) : (GL_STATIC_DRAW);

        gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
                      vertexDataUsage);
        gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0],
                      indexDataUsage);
        gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
    }
    else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS && m_bufferState == BUFFERSTATE_NEW)
    {
        if (m_targetBuffer == TARGETBUFFER_VERTEX)
        {
            // make the index buffer present on the gpu
            // use another vertex buffer to keep original buffer in unused state
            const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());

            gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
            RenderCase<SampleType>::setupVertexAttribs();

            gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
                          GL_STATIC_DRAW);
            gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)),
                          &indexData[0], GL_STATIC_DRAW);
            gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);

            // restore original state
            gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
            RenderCase<SampleType>::setupVertexAttribs();
        }
        else if (m_targetBuffer == TARGETBUFFER_INDEX)
        {
            // make the vertex buffer present on the gpu
            gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
                          GL_STATIC_DRAW);
            gl.drawArrays(GL_TRIANGLES, 0, numVertices);
        }
        else
            DE_ASSERT(false);
    }
    else
        DE_ASSERT(false);

    RenderCase<SampleType>::waitGLResults();
    GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");

    gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
    gl.clear(GL_COLOR_BUFFER_BIT);
    RenderCase<SampleType>::waitGLResults();

    tcu::warmupCPU();

    // upload

    {
        glw::GLenum target;
        glw::GLsizeiptr size;
        glw::GLintptr offset = 0;
        const void *source;

        if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
        {
            target = GL_ARRAY_BUFFER;
            size   = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
            source = &vertexData[0];
        }
        else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
        {
            target = GL_ELEMENT_ARRAY_BUFFER;
            size   = (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t));
            source = &indexData[0];
        }
        else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
        {
            DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);

            target = GL_ARRAY_BUFFER;
            size   = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
            offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
            source = (const uint8_t *)&vertexData[0] + offset;
        }
        else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
        {
            DE_ASSERT(m_bufferState == BUFFERSTATE_EXISTING);

            // upload to 25% - 75% range
            target = GL_ELEMENT_ARRAY_BUFFER;
            size   = (glw::GLsizeiptr)deAlign32((int32_t)(indexData.size() * sizeof(uint32_t)) / 2, 4);
            offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
            source = (const uint8_t *)&indexData[0] + offset;
        }
        else
        {
            DE_ASSERT(false);
            return;
        }

        startTime = deGetMicroseconds();

        if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
            gl.bufferData(target, size, source, GL_DYNAMIC_DRAW);
        else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
        {
            // create buffer storage
            if (m_bufferState == BUFFERSTATE_NEW)
                gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);
            gl.bufferSubData(target, offset, size, source);
        }
        else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
        {
            void *mapPtr;
            glw::GLboolean unmapSuccessful;

            // create buffer storage
            if (m_bufferState == BUFFERSTATE_NEW)
                gl.bufferData(target, size, DE_NULL, GL_DYNAMIC_DRAW);

            mapPtr = gl.mapBufferRange(target, offset, size,
                                       GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT |
                                           GL_MAP_UNSYNCHRONIZED_BIT);
            if (!mapPtr)
                throw tcu::Exception("MapBufferRange returned NULL");

            deMemcpy(mapPtr, source, (int)size);

            // if unmapping fails, just try again later
            unmapSuccessful = gl.unmapBuffer(target);
            if (!unmapSuccessful)
                throw UnmapFailureError();
        }
        else
            DE_ASSERT(false);

        endTime = deGetMicroseconds();

        sample.result.uploadedDataSize        = (int)size;
        sample.result.duration.uploadDuration = endTime - startTime;
    }

    // unrelated
    if (m_unrelatedBufferType == UNRELATEDBUFFERTYPE_VERTEX)
    {
        const int unrelatedUploadSize = (int)(vertexData.size() * sizeof(tcu::Vec4));

        gl.bindBuffer(GL_ARRAY_BUFFER, *unrelatedBuffer);
        gl.bufferData(GL_ARRAY_BUFFER, unrelatedUploadSize, &vertexData[0], GL_STATIC_DRAW);
        // Attibute pointers are not modified, no need restore state

        sample.result.unrelatedDataSize = unrelatedUploadSize;
    }

    // draw
    {
        startTime = deGetMicroseconds();

        if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
            gl.drawArrays(GL_TRIANGLES, 0, numVertices);
        else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
            gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
        else
            DE_ASSERT(false);

        endTime = deGetMicroseconds();

        sample.result.duration.renderDuration = endTime - startTime;
    }

    // read
    {
        startTime = deGetMicroseconds();
        glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
        endTime = deGetMicroseconds();

        sample.result.duration.readDuration = endTime - startTime;
    }

    // set results

    sample.result.renderDataSize = RenderCase<SampleType>::getVertexDataSize() * sample.result.numVertices;

    sample.result.duration.renderReadDuration =
        sample.result.duration.renderDuration + sample.result.duration.readDuration;
    sample.result.duration.totalDuration = sample.result.duration.uploadDuration +
                                           sample.result.duration.renderDuration + sample.result.duration.readDuration;
    sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
}

class BufferInUseRenderTimeCase : public RenderCase<RenderUploadRenderReadDuration>
{
public:
    enum MapFlags
    {
        MAPFLAG_NONE = 0,
        MAPFLAG_INVALIDATE_BUFFER,
        MAPFLAG_INVALIDATE_RANGE,

        MAPFLAG_LAST
    };
    enum UploadBufferTarget
    {
        UPLOADBUFFERTARGET_DIFFERENT_BUFFER = 0,
        UPLOADBUFFERTARGET_SAME_BUFFER,

        UPLOADBUFFERTARGET_LAST
    };
    BufferInUseRenderTimeCase(Context &context, const char *name, const char *description, DrawMethod method,
                              MapFlags mapFlags, TargetBuffer targetBuffer, UploadMethod uploadMethod,
                              UploadRange uploadRange, UploadBufferTarget uploadTarget);

private:
    void init(void);
    void runSample(SampleResult &sample);

    const TargetBuffer m_targetBuffer;
    const UploadMethod m_uploadMethod;
    const UploadRange m_uploadRange;
    const MapFlags m_mapFlags;
    const UploadBufferTarget m_uploadBufferTarget;
};

BufferInUseRenderTimeCase::BufferInUseRenderTimeCase(Context &context, const char *name, const char *description,
                                                     DrawMethod method, MapFlags mapFlags, TargetBuffer targetBuffer,
                                                     UploadMethod uploadMethod, UploadRange uploadRange,
                                                     UploadBufferTarget uploadTarget)
    : RenderCase<RenderUploadRenderReadDuration>(context, name, description, method)
    , m_targetBuffer(targetBuffer)
    , m_uploadMethod(uploadMethod)
    , m_uploadRange(uploadRange)
    , m_mapFlags(mapFlags)
    , m_uploadBufferTarget(uploadTarget)
{
    DE_ASSERT(m_targetBuffer < TARGETBUFFER_LAST);
    DE_ASSERT(m_uploadMethod < UPLOADMETHOD_LAST);
    DE_ASSERT(m_uploadRange < UPLOADRANGE_LAST);
    DE_ASSERT(m_mapFlags < MAPFLAG_LAST);
    DE_ASSERT(m_uploadBufferTarget < UPLOADBUFFERTARGET_LAST);
}

void BufferInUseRenderTimeCase::init(void)
{
    RenderCase<RenderUploadRenderReadDuration>::init();

    // log
    {
        const char *const targetFunctionName =
            (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements");
        const char *const uploadFunctionName = (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)     ? ("bufferData") :
                                               (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ? ("bufferSubData") :
                                                                                                  ("mapBufferRange");
        const bool isReferenceCase           = (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER);
        tcu::MessageBuilder message(&m_testCtx.getLog());

        message << "Measuring the time used in " << targetFunctionName << " call, a buffer upload, "
                << targetFunctionName
                << " call using the uploaded buffer and readPixels call with different upload sizes.\n";

        if (isReferenceCase)
            message << "Rendering:\n"
                    << "    before test: create and use buffers B and C\n"
                    << "    first draw: render using buffer B\n"
                    << ((m_uploadRange == UPLOADRANGE_FULL)    ? ("    upload: respecify buffer C contents\n") :
                        (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("    upload: modify buffer C contents\n") :
                                                                 ((const char *)DE_NULL))
                    << "    second draw: render using buffer C\n"
                    << "    read: readPixels\n";
        else
            message << "Rendering:\n"
                    << "    before test: create and use buffer B\n"
                    << "    first draw: render using buffer B\n"
                    << ((m_uploadRange == UPLOADRANGE_FULL)    ? ("    upload: respecify buffer B contents\n") :
                        (m_uploadRange == UPLOADRANGE_PARTIAL) ? ("    upload: modify buffer B contents\n") :
                                                                 ((const char *)DE_NULL))
                    << "    second draw: render using buffer B\n"
                    << "    read: readPixels\n";

        message << "Uploading using " << uploadFunctionName
                << ((m_mapFlags == MAPFLAG_INVALIDATE_RANGE) ? (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_RANGE_BIT") :
                    (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ?
                                                               (", flags = MAP_WRITE_BIT | MAP_INVALIDATE_BUFFER_BIT") :
                    (m_mapFlags == MAPFLAG_NONE) ? ("") :
                                                   ((const char *)DE_NULL))
                << "\n"
                << getNumSamples() << " test samples. Sample order is randomized.\n"
                << "All samples at even positions (first = 0) are tested before samples at odd positions.\n"
                << "Workload sizes are in the range [" << getMinWorkloadSize() << ",  " << getMaxWorkloadSize()
                << "] vertices "
                << "([" << getHumanReadableByteSize(getMinWorkloadDataSize()) << ","
                << getHumanReadableByteSize(getMaxWorkloadDataSize()) << "] to be processed).\n"
                << "Test result is the approximated processing rate in MiB / s of the second draw call and the "
                   "readPixels call.\n";

        if (isReferenceCase)
            message << "Note! Test result should only be used as a baseline reference result for "
                       "buffer.render_after_upload.draw_modify_draw test group results.";
        else
            message << "Note! Test result may not be useful as is but instead should be compared against the "
                       "buffer.render_after_upload.reference.draw_upload_draw group results.\n";

        message << tcu::TestLog::EndMessage;
    }
}

void BufferInUseRenderTimeCase::runSample(SampleResult &sample)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const glu::Buffer arrayBuffer(m_context.getRenderContext());
    const glu::Buffer indexBuffer(m_context.getRenderContext());
    const glu::Buffer alternativeUploadBuffer(m_context.getRenderContext());
    const int numVertices = getLayeredGridNumVertices(sample.scene);
    tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
    uint64_t startTime;
    uint64_t endTime;
    std::vector<tcu::Vec4> vertexData;
    std::vector<uint32_t> indexData;

    // create data

    generateLayeredGridVertexAttribData4C4V(vertexData, sample.scene);
    if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
        generateLayeredGridIndexData(indexData, sample.scene);

    // make buffers used

    gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
    gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
    setupVertexAttribs();

    if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
    {
        gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
                      GL_STREAM_DRAW);
        gl.drawArrays(GL_TRIANGLES, 0, numVertices);
    }
    else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
    {
        gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
                      GL_STREAM_DRAW);
        gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)), &indexData[0],
                      GL_STREAM_DRAW);
        gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
    }
    else
        DE_ASSERT(false);

    // another pair of buffers for reference case
    if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
    {
        if (m_targetBuffer == TARGETBUFFER_VERTEX)
        {
            gl.bindBuffer(GL_ARRAY_BUFFER, *alternativeUploadBuffer);
            gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4)), &vertexData[0],
                          GL_STREAM_DRAW);

            setupVertexAttribs();
            gl.drawArrays(GL_TRIANGLES, 0, numVertices);
        }
        else if (m_targetBuffer == TARGETBUFFER_INDEX)
        {
            gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *alternativeUploadBuffer);
            gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t)),
                          &indexData[0], GL_STREAM_DRAW);
            gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
        }
        else
            DE_ASSERT(false);

        // restore state
        gl.bindBuffer(GL_ARRAY_BUFFER, *arrayBuffer);
        gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, *indexBuffer);
        setupVertexAttribs();
    }

    waitGLResults();
    GLU_EXPECT_NO_ERROR(gl.getError(), "post buffer prepare");

    gl.clearColor(0.0f, 0.0f, 0.0f, 1.0f);
    gl.clear(GL_COLOR_BUFFER_BIT);
    waitGLResults();

    tcu::warmupCPU();

    // first draw
    {
        startTime = deGetMicroseconds();

        if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
            gl.drawArrays(GL_TRIANGLES, 0, numVertices);
        else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
            gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
        else
            DE_ASSERT(false);

        endTime = deGetMicroseconds();

        sample.result.duration.firstRenderDuration = endTime - startTime;
    }

    // upload
    {
        glw::GLenum target;
        glw::GLsizeiptr size;
        glw::GLintptr offset = 0;
        const void *source;

        if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_FULL)
        {
            target = GL_ARRAY_BUFFER;
            size   = (glw::GLsizeiptr)(vertexData.size() * sizeof(tcu::Vec4));
            source = &vertexData[0];
        }
        else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_FULL)
        {
            target = GL_ELEMENT_ARRAY_BUFFER;
            size   = (glw::GLsizeiptr)(indexData.size() * sizeof(uint32_t));
            source = &indexData[0];
        }
        else if (m_targetBuffer == TARGETBUFFER_VERTEX && m_uploadRange == UPLOADRANGE_PARTIAL)
        {
            target = GL_ARRAY_BUFFER;
            size   = (glw::GLsizeiptr)deAlign32((int)(vertexData.size() * sizeof(tcu::Vec4)) / 2, 4);
            offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
            source = (const uint8_t *)&vertexData[0] + offset;
        }
        else if (m_targetBuffer == TARGETBUFFER_INDEX && m_uploadRange == UPLOADRANGE_PARTIAL)
        {
            // upload to 25% - 75% range
            target = GL_ELEMENT_ARRAY_BUFFER;
            size   = (glw::GLsizeiptr)deAlign32((int32_t)(indexData.size() * sizeof(uint32_t)) / 2, 4);
            offset = (glw::GLintptr)deAlign32((int)size / 2, 4);
            source = (const uint8_t *)&indexData[0] + offset;
        }
        else
        {
            DE_ASSERT(false);
            return;
        }

        // reference case? don't modify the buffer in use
        if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER)
            gl.bindBuffer(target, *alternativeUploadBuffer);

        startTime = deGetMicroseconds();

        if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
            gl.bufferData(target, size, source, GL_STREAM_DRAW);
        else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
            gl.bufferSubData(target, offset, size, source);
        else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
        {
            const int mapFlags =
                (m_mapFlags == MAPFLAG_INVALIDATE_BUFFER) ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT) :
                (m_mapFlags == MAPFLAG_INVALIDATE_RANGE)  ? (GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT) :
                                                            (-1);
            void *mapPtr;
            glw::GLboolean unmapSuccessful;

            mapPtr = gl.mapBufferRange(target, offset, size, mapFlags);
            if (!mapPtr)
                throw tcu::Exception("MapBufferRange returned NULL");

            deMemcpy(mapPtr, source, (int)size);

            // if unmapping fails, just try again later
            unmapSuccessful = gl.unmapBuffer(target);
            if (!unmapSuccessful)
                throw UnmapFailureError();
        }
        else
            DE_ASSERT(false);

        endTime = deGetMicroseconds();

        sample.result.uploadedDataSize        = (int)size;
        sample.result.duration.uploadDuration = endTime - startTime;
    }

    // second draw
    {
        // Source vertex data from alternative buffer in refernce case
        if (m_uploadBufferTarget == UPLOADBUFFERTARGET_DIFFERENT_BUFFER && m_targetBuffer == TARGETBUFFER_VERTEX)
            setupVertexAttribs();

        startTime = deGetMicroseconds();

        if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
            gl.drawArrays(GL_TRIANGLES, 0, numVertices);
        else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
            gl.drawElements(GL_TRIANGLES, numVertices, GL_UNSIGNED_INT, DE_NULL);
        else
            DE_ASSERT(false);

        endTime = deGetMicroseconds();

        sample.result.duration.secondRenderDuration = endTime - startTime;
    }

    // read
    {
        startTime = deGetMicroseconds();
        glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
        endTime = deGetMicroseconds();

        sample.result.duration.readDuration = endTime - startTime;
    }

    // set results

    sample.result.renderDataSize = getVertexDataSize() * sample.result.numVertices;

    sample.result.duration.renderReadDuration =
        sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
    sample.result.duration.totalDuration =
        sample.result.duration.firstRenderDuration + sample.result.duration.uploadDuration +
        sample.result.duration.secondRenderDuration + sample.result.duration.readDuration;
    sample.result.duration.fitResponseDuration = sample.result.duration.renderReadDuration;
}

class UploadWaitDrawCase : public RenderPerformanceTestBase
{
public:
    struct Sample
    {
        int numFrames;
        uint64_t uploadCallEndTime;
    };
    struct Result
    {
        uint64_t uploadDuration;
        uint64_t renderDuration;
        uint64_t readDuration;
        uint64_t renderReadDuration;

        uint64_t timeBeforeUse;
    };

    UploadWaitDrawCase(Context &context, const char *name, const char *description, DrawMethod drawMethod,
                       TargetBuffer targetBuffer, UploadMethod uploadMethod, BufferState bufferState);
    ~UploadWaitDrawCase(void);

private:
    void init(void);
    void deinit(void);
    IterateResult iterate(void);

    void uploadBuffer(Sample &sample, Result &result);
    void drawFromBuffer(Sample &sample, Result &result);
    void reuseAndDeleteBuffer(void);
    void logAndSetTestResult(void);
    void logSamples(void);
    void drawMisc(void);
    int findStabilizationSample(uint64_t Result::*target, const char *description);
    bool checkSampleTemporalStability(uint64_t Result::*target, const char *description);

    const DrawMethod m_drawMethod;
    const TargetBuffer m_targetBuffer;
    const UploadMethod m_uploadMethod;
    const BufferState m_bufferState;

    const int m_numSamplesPerSwap;
    const int m_numMaxSwaps;

    int m_frameNdx;
    int m_sampleNdx;
    int m_numVertices;

    std::vector<tcu::Vec4> m_vertexData;
    std::vector<uint32_t> m_indexData;
    std::vector<Sample> m_samples;
    std::vector<Result> m_results;
    std::vector<int> m_iterationOrder;

    uint32_t m_vertexBuffer;
    uint32_t m_indexBuffer;
    uint32_t m_miscBuffer;
    int m_numMiscVertices;
};

UploadWaitDrawCase::UploadWaitDrawCase(Context &context, const char *name, const char *description,
                                       DrawMethod drawMethod, TargetBuffer targetBuffer, UploadMethod uploadMethod,
                                       BufferState bufferState)
    : RenderPerformanceTestBase(context, name, description)
    , m_drawMethod(drawMethod)
    , m_targetBuffer(targetBuffer)
    , m_uploadMethod(uploadMethod)
    , m_bufferState(bufferState)
    , m_numSamplesPerSwap(10)
    , m_numMaxSwaps(4)
    , m_frameNdx(0)
    , m_sampleNdx(0)
    , m_numVertices(-1)
    , m_vertexBuffer(0)
    , m_indexBuffer(0)
    , m_miscBuffer(0)
    , m_numMiscVertices(-1)
{
}

UploadWaitDrawCase::~UploadWaitDrawCase(void)
{
    deinit();
}

void UploadWaitDrawCase::init(void)
{
    const glw::Functions &gl       = m_context.getRenderContext().getFunctions();
    const int vertexAttribSize     = (int)sizeof(tcu::Vec4) * 2; // color4, position4
    const int vertexIndexSize      = (int)sizeof(uint32_t);
    const int vertexUploadDataSize = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (vertexAttribSize) : (vertexIndexSize);

    RenderPerformanceTestBase::init();

    // requirements

    if (m_context.getRenderTarget().getWidth() < RENDER_AREA_SIZE ||
        m_context.getRenderTarget().getHeight() < RENDER_AREA_SIZE)
        throw tcu::NotSupportedError("Test case requires " + de::toString<int>(RENDER_AREA_SIZE) + "x" +
                                     de::toString<int>(RENDER_AREA_SIZE) + " render target");

    // gl state

    gl.viewport(0, 0, RENDER_AREA_SIZE, RENDER_AREA_SIZE);

    // enable bleding to prevent grid layers from being discarded

    gl.blendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
    gl.blendEquation(GL_FUNC_ADD);
    gl.enable(GL_BLEND);

    // scene

    {
        LayeredGridSpec scene;

        // create ~8MB workload with similar characteristics as in the other test
        // => makes comparison to other results more straightforward
        scene.gridWidth  = 93;
        scene.gridHeight = 93;
        scene.gridLayers = 5;

        generateLayeredGridVertexAttribData4C4V(m_vertexData, scene);
        generateLayeredGridIndexData(m_indexData, scene);
        m_numVertices = getLayeredGridNumVertices(scene);
    }

    // buffers

    if (m_bufferState == BUFFERSTATE_NEW)
    {
        if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
        {
            // reads from two buffers, prepare the static buffer

            if (m_targetBuffer == TARGETBUFFER_VERTEX)
            {
                // index buffer is static, use another vertex buffer to keep original buffer in unused state
                const glu::Buffer vertexCopyBuffer(m_context.getRenderContext());

                gl.genBuffers(1, &m_indexBuffer);
                gl.bindBuffer(GL_ARRAY_BUFFER, *vertexCopyBuffer);
                gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
                gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)),
                              &m_vertexData[0], GL_STATIC_DRAW);
                gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)),
                              &m_indexData[0], GL_STATIC_DRAW);

                setupVertexAttribs();
                gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
            }
            else if (m_targetBuffer == TARGETBUFFER_INDEX)
            {
                // vertex buffer is static
                gl.genBuffers(1, &m_vertexBuffer);
                gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
                gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)),
                              &m_vertexData[0], GL_STATIC_DRAW);

                setupVertexAttribs();
                gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
            }
            else
                DE_ASSERT(false);
        }
    }
    else if (m_bufferState == BUFFERSTATE_EXISTING)
    {
        const glw::GLenum vertexUsage = (m_targetBuffer == TARGETBUFFER_VERTEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);
        const glw::GLenum indexUsage  = (m_targetBuffer == TARGETBUFFER_INDEX) ? (GL_STATIC_DRAW) : (GL_STATIC_DRAW);

        gl.genBuffers(1, &m_vertexBuffer);
        gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
        gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4)), &m_vertexData[0],
                      vertexUsage);

        if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
        {
            gl.genBuffers(1, &m_indexBuffer);
            gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
            gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t)),
                          &m_indexData[0], indexUsage);
        }

        setupVertexAttribs();

        if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
            gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
        else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
            gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
        else
            DE_ASSERT(false);
    }
    else
        DE_ASSERT(false);

    // misc draw buffer
    {
        std::vector<tcu::Vec4> vertexData;
        LayeredGridSpec scene;

        // create ~1.5MB workload with similar characteristics
        scene.gridWidth  = 40;
        scene.gridHeight = 40;
        scene.gridLayers = 5;

        generateLayeredGridVertexAttribData4C4V(vertexData, scene);

        gl.genBuffers(1, &m_miscBuffer);
        gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
        gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(sizeof(tcu::Vec4) * vertexData.size()), &vertexData[0],
                      GL_STATIC_DRAW);

        m_numMiscVertices = getLayeredGridNumVertices(scene);
    }

    // iterations
    {
        m_samples.resize((m_numMaxSwaps + 1) * m_numSamplesPerSwap);
        m_results.resize((m_numMaxSwaps + 1) * m_numSamplesPerSwap);

        for (int numSwaps = 0; numSwaps <= m_numMaxSwaps; ++numSwaps)
            for (int sampleNdx = 0; sampleNdx < m_numSamplesPerSwap; ++sampleNdx)
            {
                const int index = numSwaps * m_numSamplesPerSwap + sampleNdx;

                m_samples[index].numFrames = numSwaps;
            }

        m_iterationOrder.resize(m_samples.size());
        generateTwoPassRandomIterationOrder(m_iterationOrder, (int)m_samples.size());
    }

    // log
    m_testCtx.getLog()
        << tcu::TestLog::Message << "Measuring time used in "
        << ((m_drawMethod == DRAWMETHOD_DRAW_ARRAYS) ? ("drawArrays") : ("drawElements")) << " and readPixels call.\n"
        << "Drawing using a buffer that has been uploaded N frames ago. Testing with N within range [0, "
        << m_numMaxSwaps << "].\n"
        << "Uploaded buffer is a " << ((m_targetBuffer == TARGETBUFFER_VERTEX) ? ("vertex attribute") : ("index"))
        << " buffer.\n"
        << "Uploading using "
        << ((m_uploadMethod == UPLOADMETHOD_BUFFER_DATA) ?
                ("bufferData") :
            (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA) ?
                ("bufferSubData") :
            (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE) ?
                ("mapBufferRange, flags = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | "
                 "GL_MAP_UNSYNCHRONIZED_BIT") :
                ((const char *)DE_NULL))
        << "\n"
        << "Upload size is " << getHumanReadableByteSize(m_numVertices * vertexUploadDataSize) << ".\n"
        << ((m_bufferState == BUFFERSTATE_EXISTING) ? ("All test samples use the same buffer object.\n") : (""))
        << "Test result is the number of frames (swaps) required for the render time to stabilize.\n"
        << "Assuming combined time used in the draw call and readPixels call is stabilizes to a constant value.\n"
        << tcu::TestLog::EndMessage;
}

void UploadWaitDrawCase::deinit(void)
{
    RenderPerformanceTestBase::deinit();

    if (m_vertexBuffer)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_vertexBuffer);
        m_vertexBuffer = 0;
    }
    if (m_indexBuffer)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_indexBuffer);
        m_indexBuffer = 0;
    }
    if (m_miscBuffer)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_miscBuffer);
        m_miscBuffer = 0;
    }
}

UploadWaitDrawCase::IterateResult UploadWaitDrawCase::iterate(void)
{
    const glw::Functions &gl             = m_context.getRenderContext().getFunctions();
    const int betweenIterationFrameCount = 5; // draw misc between test samples
    const int frameNdx                   = m_frameNdx++;
    const int currentSampleNdx           = m_iterationOrder[m_sampleNdx];

    // Simulate work for about 8ms
    busyWait(8000);

    // Busywork rendering during unused frames
    if (frameNdx != m_samples[currentSampleNdx].numFrames)
    {
        // draw similar from another buffer
        drawMisc();
    }

    if (frameNdx == 0)
    {
        // upload and start the clock
        uploadBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);
    }

    if (frameNdx ==
        m_samples[currentSampleNdx].numFrames) // \note: not else if, m_samples[currentSampleNdx].numFrames can be 0
    {
        // draw using the uploaded buffer
        drawFromBuffer(m_samples[currentSampleNdx], m_results[currentSampleNdx]);

        // re-use buffer for something else to make sure test iteration do not affect each other
        if (m_bufferState == BUFFERSTATE_NEW)
            reuseAndDeleteBuffer();
    }
    else if (frameNdx == m_samples[currentSampleNdx].numFrames + betweenIterationFrameCount)
    {
        // next sample
        ++m_sampleNdx;
        m_frameNdx = 0;
    }

    GLU_EXPECT_NO_ERROR(gl.getError(), "post-iterate");

    if (m_sampleNdx < (int)m_samples.size())
        return CONTINUE;

    logAndSetTestResult();
    return STOP;
}

void UploadWaitDrawCase::uploadBuffer(Sample &sample, Result &result)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    uint64_t startTime;
    uint64_t endTime;
    glw::GLenum target;
    glw::GLsizeiptr size;
    const void *source;

    // data source

    if (m_targetBuffer == TARGETBUFFER_VERTEX)
    {
        DE_ASSERT((m_vertexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));

        target = GL_ARRAY_BUFFER;
        size   = (glw::GLsizeiptr)(m_vertexData.size() * sizeof(tcu::Vec4));
        source = &m_vertexData[0];
    }
    else if (m_targetBuffer == TARGETBUFFER_INDEX)
    {
        DE_ASSERT((m_indexBuffer == 0) == (m_bufferState == BUFFERSTATE_NEW));

        target = GL_ELEMENT_ARRAY_BUFFER;
        size   = (glw::GLsizeiptr)(m_indexData.size() * sizeof(uint32_t));
        source = &m_indexData[0];
    }
    else
    {
        DE_ASSERT(false);
        return;
    }

    // gen buffer

    if (m_bufferState == BUFFERSTATE_NEW)
    {
        if (m_targetBuffer == TARGETBUFFER_VERTEX)
        {
            gl.genBuffers(1, &m_vertexBuffer);
            gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
        }
        else if (m_targetBuffer == TARGETBUFFER_INDEX)
        {
            gl.genBuffers(1, &m_indexBuffer);
            gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
        }
        else
            DE_ASSERT(false);

        if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA || m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
        {
            gl.bufferData(target, size, DE_NULL, GL_STATIC_DRAW);
        }
    }
    else if (m_bufferState == BUFFERSTATE_EXISTING)
    {
        if (m_targetBuffer == TARGETBUFFER_VERTEX)
            gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
        else if (m_targetBuffer == TARGETBUFFER_INDEX)
            gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);
        else
            DE_ASSERT(false);
    }
    else
        DE_ASSERT(false);

    // upload

    startTime = deGetMicroseconds();

    if (m_uploadMethod == UPLOADMETHOD_BUFFER_DATA)
        gl.bufferData(target, size, source, GL_STATIC_DRAW);
    else if (m_uploadMethod == UPLOADMETHOD_BUFFER_SUB_DATA)
        gl.bufferSubData(target, 0, size, source);
    else if (m_uploadMethod == UPLOADMETHOD_MAP_BUFFER_RANGE)
    {
        void *mapPtr;
        glw::GLboolean unmapSuccessful;

        mapPtr = gl.mapBufferRange(target, 0, size,
                                   GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_UNSYNCHRONIZED_BIT);
        if (!mapPtr)
            throw tcu::Exception("MapBufferRange returned NULL");

        deMemcpy(mapPtr, source, (int)size);

        // if unmapping fails, just try again later
        unmapSuccessful = gl.unmapBuffer(target);
        if (!unmapSuccessful)
            throw UnmapFailureError();
    }
    else
        DE_ASSERT(false);

    endTime = deGetMicroseconds();

    sample.uploadCallEndTime = endTime;
    result.uploadDuration    = endTime - startTime;
}

void UploadWaitDrawCase::drawFromBuffer(Sample &sample, Result &result)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    tcu::Surface resultSurface(RENDER_AREA_SIZE, RENDER_AREA_SIZE);
    uint64_t startTime;
    uint64_t endTime;

    DE_ASSERT(m_vertexBuffer != 0);
    if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
        DE_ASSERT(m_indexBuffer == 0);
    else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
        DE_ASSERT(m_indexBuffer != 0);
    else
        DE_ASSERT(false);

    // draw
    {
        gl.bindBuffer(GL_ARRAY_BUFFER, m_vertexBuffer);
        if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
            gl.bindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_indexBuffer);

        setupVertexAttribs();

        // microseconds passed since return from upload call
        result.timeBeforeUse = deGetMicroseconds() - sample.uploadCallEndTime;

        startTime = deGetMicroseconds();

        if (m_drawMethod == DRAWMETHOD_DRAW_ARRAYS)
            gl.drawArrays(GL_TRIANGLES, 0, m_numVertices);
        else if (m_drawMethod == DRAWMETHOD_DRAW_ELEMENTS)
            gl.drawElements(GL_TRIANGLES, m_numVertices, GL_UNSIGNED_INT, DE_NULL);
        else
            DE_ASSERT(false);

        endTime = deGetMicroseconds();

        result.renderDuration = endTime - startTime;
    }

    // read
    {
        startTime = deGetMicroseconds();
        glu::readPixels(m_context.getRenderContext(), 0, 0, resultSurface.getAccess());
        endTime = deGetMicroseconds();

        result.readDuration = endTime - startTime;
    }

    result.renderReadDuration = result.renderDuration + result.readDuration;
}

void UploadWaitDrawCase::reuseAndDeleteBuffer(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    if (m_targetBuffer == TARGETBUFFER_INDEX)
    {
        // respecify and delete index buffer
        static const uint32_t indices[3] = {1, 3, 8};

        DE_ASSERT(m_indexBuffer != 0);

        gl.bufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(indices), indices, GL_STATIC_DRAW);
        gl.drawElements(GL_TRIANGLES, 3, GL_UNSIGNED_INT, DE_NULL);
        gl.deleteBuffers(1, &m_indexBuffer);
        m_indexBuffer = 0;
    }
    else if (m_targetBuffer == TARGETBUFFER_VERTEX)
    {
        // respecify and delete vertex buffer
        static const tcu::Vec4 coloredTriangle[6] = {
            tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),  tcu::Vec4(-0.4f, -0.4f, 0.0f, 1.0f), tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),
            tcu::Vec4(-0.2f, 0.4f, 0.0f, 1.0f), tcu::Vec4(1.0f, 0.0f, 0.0f, 1.0f),   tcu::Vec4(0.8f, -0.1f, 0.0f, 1.0f),
        };

        DE_ASSERT(m_vertexBuffer != 0);

        gl.bufferData(GL_ARRAY_BUFFER, sizeof(coloredTriangle), coloredTriangle, GL_STATIC_DRAW);
        gl.drawArrays(GL_TRIANGLES, 0, 3);
        gl.deleteBuffers(1, &m_vertexBuffer);
        m_vertexBuffer = 0;
    }

    waitGLResults();
}

void UploadWaitDrawCase::logAndSetTestResult(void)
{
    int uploadStabilization;
    int renderReadStabilization;
    int renderStabilization;
    int readStabilization;
    bool temporallyStable;

    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "Samples", "Result samples");
        logSamples();
    }

    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "Stabilization", "Sample stability");

        // log stabilization points
        renderReadStabilization = findStabilizationSample(&Result::renderReadDuration, "Combined draw and read");
        uploadStabilization     = findStabilizationSample(&Result::uploadDuration, "Upload time");
        renderStabilization     = findStabilizationSample(&Result::renderDuration, "Draw call time");
        readStabilization       = findStabilizationSample(&Result::readDuration, "ReadPixels time");

        temporallyStable = true;
        temporallyStable &= checkSampleTemporalStability(&Result::renderReadDuration, "Combined draw and read");
        temporallyStable &= checkSampleTemporalStability(&Result::uploadDuration, "Upload time");
        temporallyStable &= checkSampleTemporalStability(&Result::renderDuration, "Draw call time");
        temporallyStable &= checkSampleTemporalStability(&Result::readDuration, "ReadPixels time");
    }

    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "Results", "Results");

        // Check result sanily
        if (uploadStabilization != 0)
            m_testCtx.getLog() << tcu::TestLog::Message
                               << "Warning! Upload times are not stable, test result may not be accurate."
                               << tcu::TestLog::EndMessage;
        if (!temporallyStable)
            m_testCtx.getLog() << tcu::TestLog::Message
                               << "Warning! Time samples do not seem to be temporally stable, sample times seem to "
                                  "drift to one direction during test execution."
                               << tcu::TestLog::EndMessage;

        // render & read
        if (renderReadStabilization == -1)
            m_testCtx.getLog() << tcu::TestLog::Message
                               << "Combined time used in draw call and ReadPixels did not stabilize."
                               << tcu::TestLog::EndMessage;
        else
            m_testCtx.getLog() << tcu::TestLog::Integer(
                "RenderReadStabilizationPoint", "Combined draw call and ReadPixels call time stabilization time",
                "frames", QP_KEY_TAG_TIME, renderReadStabilization);

        // draw call
        if (renderStabilization == -1)
            m_testCtx.getLog() << tcu::TestLog::Message << "Time used in draw call did not stabilize."
                               << tcu::TestLog::EndMessage;
        else
            m_testCtx.getLog() << tcu::TestLog::Integer("DrawCallStabilizationPoint",
                                                        "Draw call time stabilization time", "frames", QP_KEY_TAG_TIME,
                                                        renderStabilization);

        // readpixels
        if (readStabilization == -1)
            m_testCtx.getLog() << tcu::TestLog::Message << "Time used in ReadPixels did not stabilize."
                               << tcu::TestLog::EndMessage;
        else
            m_testCtx.getLog() << tcu::TestLog::Integer("ReadPixelsStabilizationPoint",
                                                        "ReadPixels call time stabilization time", "frames",
                                                        QP_KEY_TAG_TIME, readStabilization);

        // Report renderReadStabilization
        if (renderReadStabilization != -1)
            m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(renderReadStabilization).c_str());
        else
            m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::toString(m_numMaxSwaps).c_str()); // don't report -1
    }
}

void UploadWaitDrawCase::logSamples(void)
{
    // Inverse m_iterationOrder

    std::vector<int> runOrder(m_iterationOrder.size());
    for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
        runOrder[m_iterationOrder[ndx]] = ndx;

    // Log samples

    m_testCtx.getLog() << tcu::TestLog::SampleList("Samples", "Samples") << tcu::TestLog::SampleInfo
                       << tcu::TestLog::ValueInfo("NumSwaps", "SwapBuffers before use", "",
                                                  QP_SAMPLE_VALUE_TAG_PREDICTOR)
                       << tcu::TestLog::ValueInfo("Delay", "Time before use", "us", QP_SAMPLE_VALUE_TAG_PREDICTOR)
                       << tcu::TestLog::ValueInfo("RunOrder", "Sample run order", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
                       << tcu::TestLog::ValueInfo("DrawReadTime", "Draw call and ReadPixels time", "us",
                                                  QP_SAMPLE_VALUE_TAG_RESPONSE)
                       << tcu::TestLog::ValueInfo("TotalTime", "Total time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
                       << tcu::TestLog::ValueInfo("Upload time", "Upload time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
                       << tcu::TestLog::ValueInfo("DrawCallTime", "Draw call time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
                       << tcu::TestLog::ValueInfo("ReadTime", "ReadPixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
                       << tcu::TestLog::EndSampleInfo;

    for (int sampleNdx = 0; sampleNdx < (int)m_samples.size(); ++sampleNdx)
        m_testCtx.getLog() << tcu::TestLog::Sample << m_samples[sampleNdx].numFrames
                           << (int)m_results[sampleNdx].timeBeforeUse << runOrder[sampleNdx]
                           << (int)m_results[sampleNdx].renderReadDuration
                           << (int)(m_results[sampleNdx].renderReadDuration + m_results[sampleNdx].uploadDuration)
                           << (int)m_results[sampleNdx].uploadDuration << (int)m_results[sampleNdx].renderDuration
                           << (int)m_results[sampleNdx].readDuration << tcu::TestLog::EndSample;

    m_testCtx.getLog() << tcu::TestLog::EndSampleList;
}

void UploadWaitDrawCase::drawMisc(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    gl.bindBuffer(GL_ARRAY_BUFFER, m_miscBuffer);
    setupVertexAttribs();
    gl.drawArrays(GL_TRIANGLES, 0, m_numMiscVertices);
}

struct DistributionCompareResult
{
    bool equal;
    float standardDeviations;
};

template <typename Comparer>
static float sumOfRanks(const std::vector<uint64_t> &testSamples, const std::vector<uint64_t> &allSamples,
                        const Comparer &comparer)
{
    float sum = 0;

    for (int sampleNdx = 0; sampleNdx < (int)testSamples.size(); ++sampleNdx)
    {
        const uint64_t testSample = testSamples[sampleNdx];
        const int lowerIndex =
            (int)(std::lower_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
        const int upperIndex =
            (int)(std::upper_bound(allSamples.begin(), allSamples.end(), testSample, comparer) - allSamples.begin());
        const int lowerRank      = lowerIndex + 1; // convert zero-indexed to rank
        const int upperRank      = upperIndex;     // convert zero-indexed to rank, upperIndex is last equal + 1
        const float rankMidpoint = (float)(lowerRank + upperRank) / 2.0f;

        sum += rankMidpoint;
    }

    return sum;
}

template <typename Comparer>
static DistributionCompareResult distributionCompare(const std::vector<uint64_t> &orderedObservationsA,
                                                     const std::vector<uint64_t> &orderedObservationsB,
                                                     const Comparer &comparer)
{
    // Mann-Whitney U test

    const int n1 = (int)orderedObservationsA.size();
    const int n2 = (int)orderedObservationsB.size();
    std::vector<uint64_t> allSamples(n1 + n2);

    std::copy(orderedObservationsA.begin(), orderedObservationsA.end(), allSamples.begin());
    std::copy(orderedObservationsB.begin(), orderedObservationsB.end(), allSamples.begin() + n1);
    std::sort(allSamples.begin(), allSamples.end());

    {
        const float R1 = sumOfRanks(orderedObservationsA, allSamples, comparer);

        const float U1 = (float)(n1 * n2 + n1 * (n1 + 1) / 2) - R1;
        const float U2 = (float)(n1 * n2) - U1;
        const float U  = de::min(U1, U2);

        // \note: sample sizes might not be large enough to expect normal distribution but we do it anyway

        const float mU     = (float)(n1 * n2) / 2.0f;
        const float sigmaU = deFloatSqrt((float)(n1 * n2 * (n1 + n2 + 1)) / 12.0f);
        const float z      = (U - mU) / sigmaU;

        DistributionCompareResult result;

        result.equal              = (de::abs(z) <= 1.96f); // accept within 95% confidence interval
        result.standardDeviations = z;

        return result;
    }
}

template <typename T>
struct ThresholdComparer
{
    float relativeThreshold;
    T absoluteThreshold;

    bool operator()(const T &a, const T &b) const
    {
        const float diff = de::abs((float)a - (float)b);

        // thresholds
        if (diff <= (float)absoluteThreshold)
            return false;
        if (diff <= float(a) * relativeThreshold || diff <= float(b) * relativeThreshold)
            return false;

        // cmp
        return a < b;
    }
};

int UploadWaitDrawCase::findStabilizationSample(uint64_t UploadWaitDrawCase::Result::*target, const char *description)
{
    std::vector<std::vector<uint64_t>> sampleObservations(m_numMaxSwaps + 1);
    ThresholdComparer<uint64_t> comparer;

    comparer.relativeThreshold = 0.15f; // 15%
    comparer.absoluteThreshold = 100;   // (us), assumed sampling precision

    // get observations and order them

    for (int swapNdx = 0; swapNdx <= m_numMaxSwaps; ++swapNdx)
    {
        int insertNdx = 0;

        sampleObservations[swapNdx].resize(m_numSamplesPerSwap);

        for (int ndx = 0; ndx < (int)m_samples.size(); ++ndx)
            if (m_samples[ndx].numFrames == swapNdx)
                sampleObservations[swapNdx][insertNdx++] = m_results[ndx].*target;

        DE_ASSERT(insertNdx == m_numSamplesPerSwap);

        std::sort(sampleObservations[swapNdx].begin(), sampleObservations[swapNdx].end());
    }

    // find stabilization point

    for (int sampleNdx = m_numMaxSwaps - 1; sampleNdx != -1; --sampleNdx)
    {
        // Distribution is equal to all following distributions
        for (int cmpTargetDistribution = sampleNdx + 1; cmpTargetDistribution <= m_numMaxSwaps; ++cmpTargetDistribution)
        {
            // Stable section ends here?
            const DistributionCompareResult result =
                distributionCompare(sampleObservations[sampleNdx], sampleObservations[cmpTargetDistribution], comparer);
            if (!result.equal)
            {
                // Last two samples are not equal? Samples never stabilized
                if (sampleNdx == m_numMaxSwaps - 1)
                {
                    m_testCtx.getLog() << tcu::TestLog::Message << description << ": Samples with swap count "
                                       << sampleNdx << " and " << cmpTargetDistribution
                                       << " do not seem to have the same distribution:\n"
                                       << "\tDifference in standard deviations: " << result.standardDeviations << "\n"
                                       << "\tSwap count " << sampleNdx
                                       << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
                                       << "\tSwap count " << cmpTargetDistribution
                                       << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f)
                                       << "\n"
                                       << tcu::TestLog::EndMessage;
                    return -1;
                }
                else
                {
                    m_testCtx.getLog() << tcu::TestLog::Message << description << ": Samples with swap count "
                                       << sampleNdx << " and " << cmpTargetDistribution
                                       << " do not seem to have the same distribution:\n"
                                       << "\tSamples with swap count " << sampleNdx
                                       << " are not part of the tail of stable results.\n"
                                       << "\tDifference in standard deviations: " << result.standardDeviations << "\n"
                                       << "\tSwap count " << sampleNdx
                                       << " median: " << linearSample(sampleObservations[sampleNdx], 0.5f) << "\n"
                                       << "\tSwap count " << cmpTargetDistribution
                                       << " median: " << linearSample(sampleObservations[cmpTargetDistribution], 0.5f)
                                       << "\n"
                                       << tcu::TestLog::EndMessage;

                    return sampleNdx + 1;
                }
            }
        }
    }

    m_testCtx.getLog() << tcu::TestLog::Message << description << ": All samples seem to have the same distribution"
                       << tcu::TestLog::EndMessage;

    // all distributions equal
    return 0;
}

bool UploadWaitDrawCase::checkSampleTemporalStability(uint64_t UploadWaitDrawCase::Result::*target,
                                                      const char *description)
{
    // Try to find correlation with sample order and sample times

    const int numDataPoints = (int)m_iterationOrder.size();
    std::vector<tcu::Vec2> dataPoints(m_iterationOrder.size());
    LineParametersWithConfidence lineFit;

    for (int ndx = 0; ndx < (int)m_iterationOrder.size(); ++ndx)
    {
        dataPoints[m_iterationOrder[ndx]].x() = (float)ndx;
        dataPoints[m_iterationOrder[ndx]].y() = (float)(m_results[m_iterationOrder[ndx]].*target);
    }

    lineFit = theilSenSiegelLinearRegression(dataPoints, 0.6f);

    // Difference of more than 25% of the offset along the whole sample range
    if (de::abs(lineFit.coefficient) * (float)numDataPoints > de::abs(lineFit.offset) * 0.25f)
    {
        m_testCtx.getLog() << tcu::TestLog::Message << description
                           << ": Correlation with data point observation order and result time. Results are not "
                              "temporally stable, observations are not independent.\n"
                           << "\tCoefficient: " << lineFit.coefficient << " (us / observation)\n"
                           << tcu::TestLog::EndMessage;

        return false;
    }
    else
        return true;
}

} // namespace

BufferDataUploadTests::BufferDataUploadTests(Context &context)
    : TestCaseGroup(context, "data_upload", "Buffer data upload performance tests")
{
}

BufferDataUploadTests::~BufferDataUploadTests(void)
{
}

void BufferDataUploadTests::init(void)
{
    static const struct BufferUsage
    {
        const char *name;
        uint32_t usage;
        bool primaryUsage;
    } bufferUsages[] = {
        {"stream_draw", GL_STREAM_DRAW, true},    {"stream_read", GL_STREAM_READ, false},
        {"stream_copy", GL_STREAM_COPY, false},   {"static_draw", GL_STATIC_DRAW, true},
        {"static_read", GL_STATIC_READ, false},   {"static_copy", GL_STATIC_COPY, false},
        {"dynamic_draw", GL_DYNAMIC_DRAW, true},  {"dynamic_read", GL_DYNAMIC_READ, false},
        {"dynamic_copy", GL_DYNAMIC_COPY, false},
    };

    tcu::TestCaseGroup *const referenceGroup = new tcu::TestCaseGroup(m_testCtx, "reference", "Reference functions");
    tcu::TestCaseGroup *const functionCallGroup =
        new tcu::TestCaseGroup(m_testCtx, "function_call", "Function call timing");
    tcu::TestCaseGroup *const modifyAfterUseGroup =
        new tcu::TestCaseGroup(m_testCtx, "modify_after_use", "Function call time after buffer has been used");
    tcu::TestCaseGroup *const renderAfterUploadGroup = new tcu::TestCaseGroup(
        m_testCtx, "render_after_upload", "Function call time of draw commands after buffer has been modified");

    addChild(referenceGroup);
    addChild(functionCallGroup);
    addChild(modifyAfterUseGroup);
    addChild(renderAfterUploadGroup);

    // .reference
    {
        static const struct BufferSizeRange
        {
            const char *name;
            int minBufferSize;
            int maxBufferSize;
            int numSamples;
            bool largeBuffersCase;
        } sizeRanges[] = {
            {"small_buffers", 0, 1 << 18, 64, false},      // !< 0kB - 256kB
            {"large_buffers", 1 << 18, 1 << 24, 32, true}, // !< 256kB - 16MB
        };

        for (int bufferSizeRangeNdx = 0; bufferSizeRangeNdx < DE_LENGTH_OF_ARRAY(sizeRanges); ++bufferSizeRangeNdx)
        {
            referenceGroup->addChild(new ReferenceMemcpyCase(
                m_context, std::string("memcpy_").append(sizeRanges[bufferSizeRangeNdx].name).c_str(),
                "Test memcpy performance", sizeRanges[bufferSizeRangeNdx].minBufferSize,
                sizeRanges[bufferSizeRangeNdx].maxBufferSize, sizeRanges[bufferSizeRangeNdx].numSamples,
                sizeRanges[bufferSizeRangeNdx].largeBuffersCase));
        }
    }

    // .function_call
    {
        const int minBufferSize  = 0;       // !< 0kiB
        const int maxBufferSize  = 1 << 24; // !< 16MiB
        const int numDataSamples = 25;
        const int numMapSamples  = 25;

        tcu::TestCaseGroup *const bufferDataMethodGroup =
            new tcu::TestCaseGroup(m_testCtx, "buffer_data", "Use glBufferData");
        tcu::TestCaseGroup *const bufferSubDataMethodGroup =
            new tcu::TestCaseGroup(m_testCtx, "buffer_sub_data", "Use glBufferSubData");
        tcu::TestCaseGroup *const mapBufferRangeMethodGroup =
            new tcu::TestCaseGroup(m_testCtx, "map_buffer_range", "Use glMapBufferRange");

        functionCallGroup->addChild(bufferDataMethodGroup);
        functionCallGroup->addChild(bufferSubDataMethodGroup);
        functionCallGroup->addChild(mapBufferRangeMethodGroup);

        // .buffer_data
        {
            static const struct TargetCase
            {
                tcu::TestCaseGroup *group;
                BufferDataUploadCase::CaseType caseType;
                bool allUsages;
            } targetCases[] = {
                {new tcu::TestCaseGroup(m_testCtx, "new_buffer", "Target new buffer"),
                 BufferDataUploadCase::CASE_NEW_BUFFER, true},
                {new tcu::TestCaseGroup(m_testCtx, "unspecified_buffer", "Target new unspecified buffer"),
                 BufferDataUploadCase::CASE_UNSPECIFIED_BUFFER, true},
                {new tcu::TestCaseGroup(m_testCtx, "specified_buffer", "Target new specified buffer"),
                 BufferDataUploadCase::CASE_SPECIFIED_BUFFER, true},
                {new tcu::TestCaseGroup(m_testCtx, "used_buffer", "Target buffer that was used in draw"),
                 BufferDataUploadCase::CASE_USED_BUFFER, true},
                {new tcu::TestCaseGroup(m_testCtx, "larger_used_buffer", "Target larger buffer that was used in draw"),
                 BufferDataUploadCase::CASE_USED_LARGER_BUFFER, false},
            };

            for (int targetNdx = 0; targetNdx < DE_LENGTH_OF_ARRAY(targetCases); ++targetNdx)
            {
                bufferDataMethodGroup->addChild(targetCases[targetNdx].group);

                for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
                    if (bufferUsages[usageNdx].primaryUsage || targetCases[targetNdx].allUsages)
                        targetCases[targetNdx].group->addChild(new BufferDataUploadCase(
                            m_context, std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
                            std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
                            minBufferSize, maxBufferSize, numDataSamples, bufferUsages[usageNdx].usage,
                            targetCases[targetNdx].caseType));
            }
        }

        // .buffer_sub_data
        {
            static const struct FlagCase
            {
                tcu::TestCaseGroup *group;
                BufferSubDataUploadCase::CaseType parentCase;
                bool allUsages;
                int flags;
            } flagCases[] = {
                {new tcu::TestCaseGroup(m_testCtx, "used_buffer_full_upload", ""),
                 BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_FULL_UPLOAD},
                {new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_full_upload",
                                        "Clear buffer with bufferData(...,NULL) before sub data call"),
                 BufferSubDataUploadCase::CASE_USED_BUFFER, false,
                 BufferSubDataUploadCase::FLAG_FULL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE},
                {new tcu::TestCaseGroup(m_testCtx, "used_buffer_partial_upload", ""),
                 BufferSubDataUploadCase::CASE_USED_BUFFER, true, BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD},
                {new tcu::TestCaseGroup(m_testCtx, "used_buffer_invalidate_before_partial_upload",
                                        "Clear buffer with bufferData(...,NULL) before sub data call"),
                 BufferSubDataUploadCase::CASE_USED_BUFFER, false,
                 BufferSubDataUploadCase::FLAG_PARTIAL_UPLOAD | BufferSubDataUploadCase::FLAG_INVALIDATE_BEFORE_USE},
            };

            for (int flagNdx = 0; flagNdx < DE_LENGTH_OF_ARRAY(flagCases); ++flagNdx)
            {
                bufferSubDataMethodGroup->addChild(flagCases[flagNdx].group);

                for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
                    if (bufferUsages[usageNdx].primaryUsage || flagCases[flagNdx].allUsages)
                        flagCases[flagNdx].group->addChild(new BufferSubDataUploadCase(
                            m_context, std::string("usage_").append(bufferUsages[usageNdx].name).c_str(),
                            std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
                            minBufferSize, maxBufferSize, numDataSamples, bufferUsages[usageNdx].usage,
                            flagCases[flagNdx].parentCase, flagCases[flagNdx].flags));
            }
        }

        // .map_buffer_range
        {
            static const struct FlagCase
            {
                const char *name;
                bool usefulForUnusedBuffers;
                bool allUsages;
                int glFlags;
                int caseFlags;
            } flagCases[] = {
                {"flag_write_full", true, true, GL_MAP_WRITE_BIT, 0},
                {"flag_write_partial", true, true, GL_MAP_WRITE_BIT, MapBufferRangeCase::FLAG_PARTIAL},
                {"flag_read_write_full", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT, 0},
                {"flag_read_write_partial", true, true, GL_MAP_WRITE_BIT | GL_MAP_READ_BIT,
                 MapBufferRangeCase::FLAG_PARTIAL},
                {"flag_invalidate_range_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, 0},
                {"flag_invalidate_range_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
                 MapBufferRangeCase::FLAG_PARTIAL},
                {"flag_invalidate_buffer_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0},
                {"flag_invalidate_buffer_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,
                 MapBufferRangeCase::FLAG_PARTIAL},
                {"flag_write_full_manual_invalidate_buffer", false, false,
                 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT, MapBufferRangeCase::FLAG_MANUAL_INVALIDATION},
                {"flag_write_partial_manual_invalidate_buffer", false, false,
                 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
                 MapBufferRangeCase::FLAG_PARTIAL | MapBufferRangeCase::FLAG_MANUAL_INVALIDATION},
                {"flag_unsynchronized_full", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT, 0},
                {"flag_unsynchronized_partial", true, false, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT,
                 MapBufferRangeCase::FLAG_PARTIAL},
                {"flag_unsynchronized_and_invalidate_buffer_full", true, false,
                 GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT, 0},
                {"flag_unsynchronized_and_invalidate_buffer_partial", true, false,
                 GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT | GL_MAP_INVALIDATE_BUFFER_BIT,
                 MapBufferRangeCase::FLAG_PARTIAL},
            };
            static const struct FlushCases
            {
                const char *name;
                int glFlags;
                int caseFlags;
            } flushCases[] = {
                {"flag_flush_explicit_map_full", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT, 0},
                {"flag_flush_explicit_map_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
                 MapBufferRangeFlushCase::FLAG_PARTIAL},
                {"flag_flush_explicit_map_full_flush_in_parts", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
                 MapBufferRangeFlushCase::FLAG_FLUSH_IN_PARTS},
                {"flag_flush_explicit_map_full_flush_partial", GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT,
                 MapBufferRangeFlushCase::FLAG_FLUSH_PARTIAL},
            };
            static const struct MapTestGroup
            {
                int flags;
                bool unusedBufferCase;
                tcu::TestCaseGroup *group;
            } groups[] = {
                {
                    MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER,
                    true,
                    new tcu::TestCaseGroup(m_testCtx, "new_unspecified_buffer",
                                           "Test with unused, unspecified buffers"),
                },
                {
                    MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER,
                    true,
                    new tcu::TestCaseGroup(m_testCtx, "new_specified_buffer", "Test with unused, specified buffers"),
                },
                {0, false,
                 new tcu::TestCaseGroup(m_testCtx, "used_buffer",
                                        "Test with used (data has been sourced from a buffer) buffers")},
            };

            // we OR same flags to both range and flushRange cases, make sure it is legal
            DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER ==
                             (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_SPECIFIED_BUFFER);
            DE_STATIC_ASSERT((int)MapBufferRangeCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER ==
                             (int)MapBufferRangeFlushCase::FLAG_USE_UNUSED_UNSPECIFIED_BUFFER);

            for (int groupNdx = 0; groupNdx < DE_LENGTH_OF_ARRAY(groups); ++groupNdx)
            {
                tcu::TestCaseGroup *const bufferTypeGroup = groups[groupNdx].group;

                mapBufferRangeMethodGroup->addChild(bufferTypeGroup);

                for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flagCases); ++caseNdx)
                {
                    if (groups[groupNdx].unusedBufferCase && !flagCases[caseNdx].usefulForUnusedBuffers)
                        continue;

                    tcu::TestCaseGroup *const bufferUsageGroup =
                        new tcu::TestCaseGroup(m_testCtx, flagCases[caseNdx].name, "");
                    bufferTypeGroup->addChild(bufferUsageGroup);

                    for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
                        if (bufferUsages[usageNdx].primaryUsage || flagCases[caseNdx].allUsages)
                            bufferUsageGroup->addChild(new MapBufferRangeCase(
                                m_context, bufferUsages[usageNdx].name,
                                std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
                                minBufferSize, maxBufferSize, numMapSamples, bufferUsages[usageNdx].usage,
                                flagCases[caseNdx].glFlags, flagCases[caseNdx].caseFlags | groups[groupNdx].flags));
                }

                for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(flushCases); ++caseNdx)
                {
                    tcu::TestCaseGroup *const bufferUsageGroup =
                        new tcu::TestCaseGroup(m_testCtx, flushCases[caseNdx].name, "");
                    bufferTypeGroup->addChild(bufferUsageGroup);

                    for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(bufferUsages); ++usageNdx)
                        if (bufferUsages[usageNdx].primaryUsage)
                            bufferUsageGroup->addChild(new MapBufferRangeFlushCase(
                                m_context, bufferUsages[usageNdx].name,
                                std::string("Test with usage = ").append(bufferUsages[usageNdx].name).c_str(),
                                minBufferSize, maxBufferSize, numMapSamples, bufferUsages[usageNdx].usage,
                                flushCases[caseNdx].glFlags, flushCases[caseNdx].caseFlags | groups[groupNdx].flags));
                }
            }
        }
    }

    // .modify_after_use
    {
        const int minBufferSize = 0;       // !< 0kiB
        const int maxBufferSize = 1 << 24; // !< 16MiB

        static const struct Usage
        {
            const char *name;
            const char *description;
            uint32_t usage;
        } usages[] = {
            {"static_draw", "Test with GL_STATIC_DRAW", GL_STATIC_DRAW},
            {"dynamic_draw", "Test with GL_DYNAMIC_DRAW", GL_DYNAMIC_DRAW},
            {"stream_draw", "Test with GL_STREAM_DRAW", GL_STREAM_DRAW},

        };

        for (int usageNdx = 0; usageNdx < DE_LENGTH_OF_ARRAY(usages); ++usageNdx)
        {
            tcu::TestCaseGroup *const usageGroup =
                new tcu::TestCaseGroup(m_testCtx, usages[usageNdx].name, usages[usageNdx].description);
            modifyAfterUseGroup->addChild(usageGroup);

            usageGroup->addChild(new ModifyAfterWithBufferDataCase(m_context, "buffer_data",
                                                                   "Respecify buffer contents after use", minBufferSize,
                                                                   maxBufferSize, usages[usageNdx].usage, 0));
            usageGroup->addChild(new ModifyAfterWithBufferDataCase(
                m_context, "buffer_data_different_size", "Respecify buffer contents and size after use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_RESPECIFY_SIZE));
            usageGroup->addChild(new ModifyAfterWithBufferDataCase(
                m_context, "buffer_data_repeated", "Respecify buffer contents after upload and use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferDataCase::FLAG_UPLOAD_REPEATED));

            usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
                m_context, "buffer_sub_data_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize,
                usages[usageNdx].usage, 0));
            usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
                m_context, "buffer_sub_data_partial", "Respecify buffer contents partially use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));
            usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
                m_context, "buffer_sub_data_full_repeated", "Respecify buffer contents after upload and use",
                minBufferSize, maxBufferSize, usages[usageNdx].usage,
                ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED));
            usageGroup->addChild(new ModifyAfterWithBufferSubDataCase(
                m_context, "buffer_sub_data_partial_repeated", "Respecify buffer contents partially upload and use",
                minBufferSize, maxBufferSize, usages[usageNdx].usage,
                ModifyAfterWithBufferSubDataCase::FLAG_UPLOAD_REPEATED |
                    ModifyAfterWithBufferSubDataCase::FLAG_PARTIAL));

            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_write_full", "Respecify buffer contents after use", minBufferSize, maxBufferSize,
                usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_write_partial", "Respecify buffer contents partially after use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
                GL_MAP_WRITE_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_read_write_full", "Respecify buffer contents after use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_read_write_partial", "Respecify buffer contents partially after use",
                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
                GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_invalidate_range_full", "Respecify buffer contents after use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_invalidate_range_partial", "Respecify buffer contents partially after use",
                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
                GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_invalidate_buffer_full", "Respecify buffer contents after use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_invalidate_buffer_partial", "Respecify buffer contents partially after use",
                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
                GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_unsynchronized_full", "Respecify buffer contents after use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferRangeCase(
                m_context, "map_flag_unsynchronized_partial", "Respecify buffer contents partially after use",
                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferRangeCase::FLAG_PARTIAL,
                GL_MAP_WRITE_BIT | GL_MAP_UNSYNCHRONIZED_BIT));

            usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase(
                m_context, "map_flag_flush_explicit_full", "Respecify buffer contents after use", minBufferSize,
                maxBufferSize, usages[usageNdx].usage, 0, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
            usageGroup->addChild(new ModifyAfterWithMapBufferFlushCase(
                m_context, "map_flag_flush_explicit_partial", "Respecify buffer contents partially after use",
                minBufferSize, maxBufferSize, usages[usageNdx].usage, ModifyAfterWithMapBufferFlushCase::FLAG_PARTIAL,
                GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT));
        }
    }

    // .render_after_upload
    {
        // .reference
        {
            tcu::TestCaseGroup *const renderReferenceGroup =
                new tcu::TestCaseGroup(m_testCtx, "reference", "Baseline results");
            renderAfterUploadGroup->addChild(renderReferenceGroup);

            // .draw
            {
                tcu::TestCaseGroup *const drawGroup =
                    new tcu::TestCaseGroup(m_testCtx, "draw", "Time usage of functions with non-modified buffers");
                renderReferenceGroup->addChild(drawGroup);

                // Time consumed by readPixels
                drawGroup->addChild(new ReferenceReadPixelsTimeCase(
                    m_context, "read_pixels", "Measure time consumed by readPixels() function call"));

                // Time consumed by rendering
                drawGroup->addChild(new ReferenceRenderTimeCase(m_context, "draw_arrays",
                                                                "Measure time consumed by drawArrays() function call",
                                                                DRAWMETHOD_DRAW_ARRAYS));
                drawGroup->addChild(new ReferenceRenderTimeCase(m_context, "draw_elements",
                                                                "Measure time consumed by drawElements() function call",
                                                                DRAWMETHOD_DRAW_ELEMENTS));
            }

            // .draw_upload_draw
            {
                static const struct
                {
                    const char *name;
                    const char *description;
                    DrawMethod drawMethod;
                    TargetBuffer targetBuffer;
                    bool partial;
                } uploadTargets[] = {
                    {"draw_arrays_upload_vertices",
                     "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels "
                     "function calls.",
                     DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
                    {"draw_arrays_upload_vertices_partial",
                     "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and "
                     "readPixels function calls.",
                     DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
                    {"draw_elements_upload_vertices",
                     "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and "
                     "readPixels function calls.",
                     DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
                    {"draw_elements_upload_indices",
                     "Measure time consumed by drawElements, index upload, another drawElements, and readPixels "
                     "function calls.",
                     DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
                    {"draw_elements_upload_indices_partial",
                     "Measure time consumed by drawElements, partial index upload, another drawElements, and "
                     "readPixels function calls.",
                     DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
                };
                static const struct
                {
                    const char *name;
                    const char *description;
                    UploadMethod uploadMethod;
                    BufferInUseRenderTimeCase::MapFlags mapFlags;
                    bool supportsPartialUpload;
                } uploadMethods[] = {
                    {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE,
                     false},
                    {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA,
                     BufferInUseRenderTimeCase::MAPFLAG_NONE, true},
                    {"map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
                     BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true},
                    {"map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
                     BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false},
                };

                tcu::TestCaseGroup *const drawUploadDrawGroup = new tcu::TestCaseGroup(
                    m_testCtx, "draw_upload_draw", "Time usage of functions draw, upload and another draw");
                renderReferenceGroup->addChild(drawUploadDrawGroup);

                for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
                    for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
                         ++uploadMethodNdx)
                    {
                        const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
                                                 uploadMethods[uploadMethodNdx].name;

                        if (uploadTargets[uploadTargetNdx].partial &&
                            !uploadMethods[uploadMethodNdx].supportsPartialUpload)
                            continue;

                        drawUploadDrawGroup->addChild(new BufferInUseRenderTimeCase(
                            m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
                            uploadTargets[uploadTargetNdx].drawMethod, uploadMethods[uploadMethodNdx].mapFlags,
                            uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod,
                            (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
                            BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_DIFFERENT_BUFFER));
                    }
            }
        }

        // .upload_unrelated_and_draw
        {
            static const struct
            {
                const char *name;
                const char *description;
                DrawMethod drawMethod;
            } drawMethods[] = {
                {"draw_arrays", "drawArrays", DRAWMETHOD_DRAW_ARRAYS},
                {"draw_elements", "drawElements", DRAWMETHOD_DRAW_ELEMENTS},
            };

            static const struct
            {
                const char *name;
                UploadMethod uploadMethod;
            } uploadMethods[] = {
                {"buffer_data", UPLOADMETHOD_BUFFER_DATA},
                {"buffer_sub_data", UPLOADMETHOD_BUFFER_SUB_DATA},
                {"map_buffer_range", UPLOADMETHOD_MAP_BUFFER_RANGE},
            };

            tcu::TestCaseGroup *const uploadUnrelatedGroup = new tcu::TestCaseGroup(
                m_testCtx, "upload_unrelated_and_draw", "Time usage of functions after an unrelated upload");
            renderAfterUploadGroup->addChild(uploadUnrelatedGroup);

            for (int drawMethodNdx = 0; drawMethodNdx < DE_LENGTH_OF_ARRAY(drawMethods); ++drawMethodNdx)
                for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
                {
                    const std::string name = std::string() + drawMethods[drawMethodNdx].name +
                                             "_upload_unrelated_with_" + uploadMethods[uploadMethodNdx].name;
                    const std::string desc = std::string() + "Measure time consumed by " +
                                             drawMethods[drawMethodNdx].description +
                                             " function call after an unrelated upload";

                    // Time consumed by rendering command after an unrelated upload

                    uploadUnrelatedGroup->addChild(new UnrelatedUploadRenderTimeCase(
                        m_context, name.c_str(), desc.c_str(), drawMethods[drawMethodNdx].drawMethod,
                        uploadMethods[uploadMethodNdx].uploadMethod));
                }
        }

        // .upload_and_draw
        {
            static const struct
            {
                const char *name;
                const char *description;
                BufferState bufferState;
                UnrelatedBufferType unrelatedBuffer;
                bool supportsPartialUpload;
            } bufferConfigs[] = {
                {"used_buffer", "Upload to an used buffer", BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_NONE, true},
                {"new_buffer", "Upload to a new buffer", BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_NONE, false},
                {"used_buffer_and_unrelated_upload", "Upload to an used buffer and an unrelated buffer and then draw",
                 BUFFERSTATE_EXISTING, UNRELATEDBUFFERTYPE_VERTEX, true},
                {"new_buffer_and_unrelated_upload", "Upload to a new buffer and an unrelated buffer and then draw",
                 BUFFERSTATE_NEW, UNRELATEDBUFFERTYPE_VERTEX, false},
            };

            tcu::TestCaseGroup *const uploadAndDrawGroup = new tcu::TestCaseGroup(
                m_testCtx, "upload_and_draw", "Time usage of rendering functions with modified buffers");
            renderAfterUploadGroup->addChild(uploadAndDrawGroup);

            // .used_buffer
            // .new_buffer
            // .used_buffer_and_unrelated_upload
            // .new_buffer_and_unrelated_upload
            for (int stateNdx = 0; stateNdx < DE_LENGTH_OF_ARRAY(bufferConfigs); ++stateNdx)
            {
                static const struct
                {
                    const char *name;
                    const char *description;
                    DrawMethod drawMethod;
                    TargetBuffer targetBuffer;
                    bool partial;
                } uploadTargets[] = {
                    {"draw_arrays_upload_vertices",
                     "Measure time consumed by vertex attribute upload, drawArrays, and readPixels function calls",
                     DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
                    {"draw_arrays_upload_vertices_partial",
                     "Measure time consumed by partial vertex attribute upload, drawArrays, and readPixels function "
                     "calls",
                     DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
                    {"draw_elements_upload_vertices",
                     "Measure time consumed by vertex attribute upload, drawElements, and readPixels function calls",
                     DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
                    {"draw_elements_upload_indices",
                     "Measure time consumed by index upload, drawElements, and readPixels function calls",
                     DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
                    {"draw_elements_upload_indices_partial",
                     "Measure time consumed by partial index upload, drawElements, and readPixels function calls",
                     DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
                };
                static const struct
                {
                    const char *name;
                    const char *description;
                    UploadMethod uploadMethod;
                    bool supportsPartialUpload;
                } uploadMethods[] = {
                    {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, false},
                    {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA, true},
                    {"map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE, true},
                };

                tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, bufferConfigs[stateNdx].name,
                                                                         bufferConfigs[stateNdx].description);
                uploadAndDrawGroup->addChild(group);

                for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
                    for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
                         ++uploadMethodNdx)
                    {
                        const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
                                                 uploadMethods[uploadMethodNdx].name;

                        if (uploadTargets[uploadTargetNdx].partial &&
                            !uploadMethods[uploadMethodNdx].supportsPartialUpload)
                            continue;
                        if (uploadTargets[uploadTargetNdx].partial && !bufferConfigs[stateNdx].supportsPartialUpload)
                            continue;

                        // Don't log unrelated buffer information to samples if there is no such buffer

                        if (bufferConfigs[stateNdx].unrelatedBuffer == UNRELATEDBUFFERTYPE_NONE)
                        {
                            typedef UploadRenderReadDuration SampleType;
                            typedef GenericUploadRenderTimeCase<SampleType> TestType;

                            group->addChild(new TestType(
                                m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
                                uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
                                uploadMethods[uploadMethodNdx].uploadMethod, bufferConfigs[stateNdx].bufferState,
                                (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
                                bufferConfigs[stateNdx].unrelatedBuffer));
                        }
                        else
                        {
                            typedef UploadRenderReadDurationWithUnrelatedUploadSize SampleType;
                            typedef GenericUploadRenderTimeCase<SampleType> TestType;

                            group->addChild(new TestType(
                                m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
                                uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
                                uploadMethods[uploadMethodNdx].uploadMethod, bufferConfigs[stateNdx].bufferState,
                                (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
                                bufferConfigs[stateNdx].unrelatedBuffer));
                        }
                    }
            }
        }

        // .draw_modify_draw
        {
            static const struct
            {
                const char *name;
                const char *description;
                DrawMethod drawMethod;
                TargetBuffer targetBuffer;
                bool partial;
            } uploadTargets[] = {
                {"draw_arrays_upload_vertices",
                 "Measure time consumed by drawArrays, vertex attribute upload, another drawArrays, and readPixels "
                 "function calls.",
                 DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, false},
                {"draw_arrays_upload_vertices_partial",
                 "Measure time consumed by drawArrays, partial vertex attribute upload, another drawArrays, and "
                 "readPixels function calls.",
                 DRAWMETHOD_DRAW_ARRAYS, TARGETBUFFER_VERTEX, true},
                {"draw_elements_upload_vertices",
                 "Measure time consumed by drawElements, vertex attribute upload, another drawElements, and readPixels "
                 "function calls.",
                 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_VERTEX, false},
                {"draw_elements_upload_indices",
                 "Measure time consumed by drawElements, index upload, another drawElements, and readPixels function "
                 "calls.",
                 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, false},
                {"draw_elements_upload_indices_partial",
                 "Measure time consumed by drawElements, partial index upload, another drawElements, and readPixels "
                 "function calls.",
                 DRAWMETHOD_DRAW_ELEMENTS, TARGETBUFFER_INDEX, true},
            };
            static const struct
            {
                const char *name;
                const char *description;
                UploadMethod uploadMethod;
                BufferInUseRenderTimeCase::MapFlags mapFlags;
                bool supportsPartialUpload;
            } uploadMethods[] = {
                {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA, BufferInUseRenderTimeCase::MAPFLAG_NONE, false},
                {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA,
                 BufferInUseRenderTimeCase::MAPFLAG_NONE, true},
                {"map_buffer_range_invalidate_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
                 BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_RANGE, true},
                {"map_buffer_range_invalidate_buffer", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE,
                 BufferInUseRenderTimeCase::MAPFLAG_INVALIDATE_BUFFER, false},
            };

            tcu::TestCaseGroup *const drawModifyDrawGroup = new tcu::TestCaseGroup(
                m_testCtx, "draw_modify_draw",
                "Time used in rendering functions with modified buffers while original buffer is still in use");
            renderAfterUploadGroup->addChild(drawModifyDrawGroup);

            for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
                for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods); ++uploadMethodNdx)
                {
                    const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
                                             uploadMethods[uploadMethodNdx].name;

                    if (uploadTargets[uploadTargetNdx].partial && !uploadMethods[uploadMethodNdx].supportsPartialUpload)
                        continue;

                    drawModifyDrawGroup->addChild(new BufferInUseRenderTimeCase(
                        m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
                        uploadTargets[uploadTargetNdx].drawMethod, uploadMethods[uploadMethodNdx].mapFlags,
                        uploadTargets[uploadTargetNdx].targetBuffer, uploadMethods[uploadMethodNdx].uploadMethod,
                        (uploadTargets[uploadTargetNdx].partial) ? (UPLOADRANGE_PARTIAL) : (UPLOADRANGE_FULL),
                        BufferInUseRenderTimeCase::UPLOADBUFFERTARGET_SAME_BUFFER));
                }
        }

        // .upload_wait_draw
        {
            static const struct
            {
                const char *name;
                const char *description;
                BufferState bufferState;
            } bufferStates[] = {
                {"new_buffer", "Uploading to just generated name", BUFFERSTATE_NEW},
                {"used_buffer", "Uploading to a used buffer", BUFFERSTATE_EXISTING},
            };
            static const struct
            {
                const char *name;
                const char *description;
                DrawMethod drawMethod;
                TargetBuffer targetBuffer;
            } uploadTargets[] = {
                {"draw_arrays_vertices", "Upload vertex data, draw with drawArrays", DRAWMETHOD_DRAW_ARRAYS,
                 TARGETBUFFER_VERTEX},
                {"draw_elements_vertices", "Upload vertex data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS,
                 TARGETBUFFER_VERTEX},
                {"draw_elements_indices", "Upload index data, draw with drawElements", DRAWMETHOD_DRAW_ELEMENTS,
                 TARGETBUFFER_INDEX},
            };
            static const struct
            {
                const char *name;
                const char *description;
                UploadMethod uploadMethod;
            } uploadMethods[] = {
                {"buffer_data", "bufferData", UPLOADMETHOD_BUFFER_DATA},
                {"buffer_sub_data", "bufferSubData", UPLOADMETHOD_BUFFER_SUB_DATA},
                {"map_buffer_range", "mapBufferRange", UPLOADMETHOD_MAP_BUFFER_RANGE},
            };

            tcu::TestCaseGroup *const uploadSwapDrawGroup = new tcu::TestCaseGroup(
                m_testCtx, "upload_wait_draw", "Time used in rendering functions after a buffer upload N frames ago");
            renderAfterUploadGroup->addChild(uploadSwapDrawGroup);

            for (int bufferStateNdx = 0; bufferStateNdx < DE_LENGTH_OF_ARRAY(bufferStates); ++bufferStateNdx)
            {
                tcu::TestCaseGroup *const bufferGroup = new tcu::TestCaseGroup(
                    m_testCtx, bufferStates[bufferStateNdx].name, bufferStates[bufferStateNdx].description);
                uploadSwapDrawGroup->addChild(bufferGroup);

                for (int uploadTargetNdx = 0; uploadTargetNdx < DE_LENGTH_OF_ARRAY(uploadTargets); ++uploadTargetNdx)
                    for (int uploadMethodNdx = 0; uploadMethodNdx < DE_LENGTH_OF_ARRAY(uploadMethods);
                         ++uploadMethodNdx)
                    {
                        const std::string name = std::string() + uploadTargets[uploadTargetNdx].name + "_with_" +
                                                 uploadMethods[uploadMethodNdx].name;

                        bufferGroup->addChild(new UploadWaitDrawCase(
                            m_context, name.c_str(), uploadTargets[uploadTargetNdx].description,
                            uploadTargets[uploadTargetNdx].drawMethod, uploadTargets[uploadTargetNdx].targetBuffer,
                            uploadMethods[uploadMethodNdx].uploadMethod, bufferStates[bufferStateNdx].bufferState));
                    }
            }
        }
    }
}

} // namespace Performance
} // namespace gles3
} // namespace deqp
