/*-------------------------------------------------------------------------
 * drawElements Quality Program OpenGL ES 3.1 Module
 * -------------------------------------------------
 *
 * Copyright 2014 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 *//*!
 * \file
 * \brief Synchronization Tests
 *//*--------------------------------------------------------------------*/

#include "es31fSynchronizationTests.hpp"
#include "tcuTestLog.hpp"
#include "tcuStringTemplate.hpp"
#include "tcuSurface.hpp"
#include "tcuRenderTarget.hpp"
#include "gluRenderContext.hpp"
#include "gluShaderProgram.hpp"
#include "gluObjectWrapper.hpp"
#include "gluPixelTransfer.hpp"
#include "gluContextInfo.hpp"
#include "glwFunctions.hpp"
#include "glwEnums.hpp"
#include "deStringUtil.hpp"
#include "deSharedPtr.hpp"
#include "deMemory.h"
#include "deRandom.hpp"

#include <map>

namespace deqp
{
namespace gles31
{
namespace Functional
{
namespace
{

static bool checkSupport(Context &ctx)
{
    auto ctxType = ctx.getRenderContext().getType();
    return contextSupports(ctxType, glu::ApiType::es(3, 2)) || contextSupports(ctxType, glu::ApiType::core(4, 5)) ||
           ctx.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic");
}

static bool validateSortedAtomicRampAdditionValueChain(const std::vector<uint32_t> &valueChain, uint32_t sumValue,
                                                       int &invalidOperationNdx, uint32_t &errorDelta,
                                                       uint32_t &errorExpected)
{
    std::vector<uint32_t> chainDelta(valueChain.size());

    for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
        chainDelta[callNdx] =
            ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx + 1])) - valueChain[callNdx];

    // chainDelta contains now the actual additions applied to the value
    // check there exists an addition ramp form 1 to ...
    std::sort(chainDelta.begin(), chainDelta.end());

    for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
    {
        if ((int)chainDelta[callNdx] != callNdx + 1)
        {
            invalidOperationNdx = callNdx;
            errorDelta          = chainDelta[callNdx];
            errorExpected       = callNdx + 1;

            return false;
        }
    }

    return true;
}

static void readBuffer(const glw::Functions &gl, uint32_t target, int numElements, std::vector<uint32_t> &result)
{
    const void *ptr = gl.mapBufferRange(target, 0, (int)(sizeof(uint32_t) * numElements), GL_MAP_READ_BIT);
    GLU_EXPECT_NO_ERROR(gl.getError(), "map");

    if (!ptr)
        throw tcu::TestError("mapBufferRange returned NULL");

    result.resize(numElements);
    memcpy(&result[0], ptr, sizeof(uint32_t) * numElements);

    if (gl.unmapBuffer(target) == GL_FALSE)
        throw tcu::TestError("unmapBuffer returned false");
}

static uint32_t readBufferUint32(const glw::Functions &gl, uint32_t target)
{
    std::vector<uint32_t> vec;

    readBuffer(gl, target, 1, vec);

    return vec[0];
}

//! Generate a ramp of values from 1 to numElements, and shuffle it
void generateShuffledRamp(int numElements, std::vector<int> &ramp)
{
    de::Random rng(0xabcd);

    // some positive (non-zero) unique values
    ramp.resize(numElements);
    for (int callNdx = 0; callNdx < numElements; ++callNdx)
        ramp[callNdx] = callNdx + 1;

    rng.shuffle(ramp.begin(), ramp.end());
}

static std::string specializeShader(Context &context, const char *code)
{
    auto ctxType            = context.getRenderContext().getType();
    const bool isES32orGL45 = glu::contextSupports(ctxType, glu::ApiType::es(3, 2)) ||
                              glu::contextSupports(ctxType, glu::ApiType::core(4, 5));
    const glu::GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(ctxType);

    std::map<std::string, std::string> specializationMap;
    specializationMap["GLSL_VERSION_DECL"] = glu::getGLSLVersionDeclaration(glslVersion);
    specializationMap["SHADER_IMAGE_ATOMIC_REQUIRE"] =
        isES32orGL45 ? "" : "#extension GL_OES_shader_image_atomic : require";

    return tcu::StringTemplate(code).specialize(specializationMap);
}

class InterInvocationTestCase : public TestCase
{
public:
    enum StorageType
    {
        STORAGE_BUFFER = 0,
        STORAGE_IMAGE,

        STORAGE_LAST
    };
    enum CaseFlags
    {
        FLAG_ATOMIC            = 0x1,
        FLAG_ALIASING_STORAGES = 0x2,
        FLAG_IN_GROUP          = 0x4,
    };

    InterInvocationTestCase(Context &context, const char *name, const char *desc, StorageType storage, int flags = 0);
    ~InterInvocationTestCase(void);

private:
    void init(void);
    void deinit(void);
    IterateResult iterate(void);

    void runCompute(void);
    bool verifyResults(void);
    virtual std::string genShaderSource(void) const = 0;

protected:
    std::string genBarrierSource(void) const;

    const StorageType m_storage;
    const bool m_useAtomic;
    const bool m_aliasingStorages;
    const bool m_syncWithGroup;
    const int m_workWidth;             // !< total work width
    const int m_workHeight;            // !<     ...    height
    const int m_localWidth;            // !< group width
    const int m_localHeight;           // !< group height
    const int m_elementsPerInvocation; // !< elements accessed by a single invocation

private:
    glw::GLuint m_storageBuf;
    glw::GLuint m_storageTex;
    glw::GLuint m_resultBuf;
    glu::ShaderProgram *m_program;
};

InterInvocationTestCase::InterInvocationTestCase(Context &context, const char *name, const char *desc,
                                                 StorageType storage, int flags)
    : TestCase(context, name, desc)
    , m_storage(storage)
    , m_useAtomic((flags & FLAG_ATOMIC) != 0)
    , m_aliasingStorages((flags & FLAG_ALIASING_STORAGES) != 0)
    , m_syncWithGroup((flags & FLAG_IN_GROUP) != 0)
    , m_workWidth(256)
    , m_workHeight(256)
    , m_localWidth(16)
    , m_localHeight(8)
    , m_elementsPerInvocation(8)
    , m_storageBuf(0)
    , m_storageTex(0)
    , m_resultBuf(0)
    , m_program(DE_NULL)
{
    DE_ASSERT(m_storage < STORAGE_LAST);
    DE_ASSERT(m_localWidth * m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
}

InterInvocationTestCase::~InterInvocationTestCase(void)
{
    deinit();
}

void InterInvocationTestCase::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    // requirements

    if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
        throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");

    // program

    m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources()
                                                                         << glu::ComputeSource(genShaderSource()));
    m_testCtx.getLog() << *m_program;
    if (!m_program->isOk())
        throw tcu::TestError("could not build program");

    // source

    if (m_storage == STORAGE_BUFFER)
    {
        const int bufferElements = m_workWidth * m_workHeight * m_elementsPerInvocation;
        const int bufferSize     = bufferElements * (int)sizeof(uint32_t);
        std::vector<uint32_t> zeroBuffer(bufferElements, 0);

        m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size "
                           << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;

        gl.genBuffers(1, &m_storageBuf);
        gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
        gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
        GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        const int bufferElements = m_workWidth * m_workHeight * m_elementsPerInvocation;
        const int bufferSize     = bufferElements * (int)sizeof(uint32_t);

        m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x"
                           << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes."
                           << tcu::TestLog::EndMessage;

        gl.genTextures(1, &m_storageTex);
        gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
        gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
        gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");

        // Zero-fill
        m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;

        {
            const std::vector<int32_t> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
            gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation,
                             GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
            GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
        }
    }
    else
        DE_ASSERT(false);

    // destination

    {
        const int bufferElements = m_workWidth * m_workHeight;
        const int bufferSize     = bufferElements * (int)sizeof(uint32_t);
        std::vector<int32_t> negativeBuffer(bufferElements, -1);

        m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size "
                           << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;

        gl.genBuffers(1, &m_resultBuf);
        gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
        gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
        GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
    }
}

void InterInvocationTestCase::deinit(void)
{
    if (m_storageBuf)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
        m_storageBuf = DE_NULL;
    }

    if (m_storageTex)
    {
        m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
        m_storageTex = DE_NULL;
    }

    if (m_resultBuf)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
        m_resultBuf = DE_NULL;
    }

    delete m_program;
    m_program = DE_NULL;
}

InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate(void)
{
    // Dispatch
    runCompute();

    // Verify buffer contents
    if (verifyResults())
        m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    else
        m_testCtx.setTestResult(
            QP_TEST_RESULT_FAIL,
            (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed")
                .c_str());

    return STOP;
}

void InterInvocationTestCase::runCompute(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const int groupsX        = m_workWidth / m_localWidth;
    const int groupsY        = m_workHeight / m_localHeight;

    DE_ASSERT((m_workWidth % m_localWidth) == 0);
    DE_ASSERT((m_workHeight % m_localHeight) == 0);

    m_testCtx.getLog() << tcu::TestLog::Message << "Dispatching compute.\n"
                       << "    group size: " << m_localWidth << "x" << m_localHeight << "\n"
                       << "    dispatch size: " << groupsX << "x" << groupsY << "\n"
                       << "    total work size: " << m_workWidth << "x" << m_workHeight << "\n"
                       << tcu::TestLog::EndMessage;

    gl.useProgram(m_program->getProgram());

    // source
    if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
    {
        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
    }
    else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
    {
        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");

        m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages."
                           << tcu::TestLog::EndMessage;
    }
    else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
    {
        gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
    }
    else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
    {
        gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
        gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);

        GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");

        m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages."
                           << tcu::TestLog::EndMessage;
    }
    else
        DE_ASSERT(false);

    // destination
    gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
    GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");

    // dispatch
    gl.dispatchCompute(groupsX, groupsY, 1);
    GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
}

bool InterInvocationTestCase::verifyResults(void)
{
    const glw::Functions &gl      = m_context.getRenderContext().getFunctions();
    const int errorFloodThreshold = 5;
    int numErrorsLogged           = 0;
    const void *mapped            = DE_NULL;
    std::vector<int32_t> results(m_workWidth * m_workHeight);
    bool error = false;

    gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
    gl.memoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
    mapped =
        gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(int32_t), GL_MAP_READ_BIT);
    GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");

    // copy to properly aligned array
    deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(uint32_t));

    if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
        throw tcu::TestError("memory map store corrupted");

    // check the results
    for (int ndx = 0; ndx < (int)results.size(); ++ndx)
    {
        if (results[ndx] != 1)
        {
            error = true;

            if (numErrorsLogged == 0)
                m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n"
                                   << tcu::TestLog::EndMessage;
            if (numErrorsLogged++ < errorFloodThreshold)
                m_testCtx.getLog() << tcu::TestLog::Message << "    Error at index " << ndx << ": expected 1, got "
                                   << results[ndx] << ".\n"
                                   << tcu::TestLog::EndMessage;
            else
            {
                // after N errors, no point continuing verification
                m_testCtx.getLog() << tcu::TestLog::Message << "    -- too many errors, skipping verification --\n"
                                   << tcu::TestLog::EndMessage;
                break;
            }
        }
    }

    if (!error)
        m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
    return !error;
}

std::string InterInvocationTestCase::genBarrierSource(void) const
{
    std::ostringstream buf;

    if (m_syncWithGroup)
    {
        // Wait until all invocations in this work group have their texture/buffer read/write operations complete
        // \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
        //       we only require intra-workgroup synchronization.
        buf << "\n"
            << "    groupMemoryBarrier();\n"
            << "    barrier();\n"
            << "\n";
    }
    else if (m_storage == STORAGE_BUFFER)
    {
        DE_ASSERT(!m_syncWithGroup);

        // Waiting only for data written by this invocation. Since all buffer reads and writes are
        // processed in order (within a single invocation), we don't have to do anything.
        buf << "\n";
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        DE_ASSERT(!m_syncWithGroup);

        // Waiting only for data written by this invocation. But since operations complete in undefined
        // order, we have to wait for them to complete.
        buf << "\n"
            << "    memoryBarrierImage();\n"
            << "\n";
    }
    else
        DE_ASSERT(false);

    return buf.str();
}

class InvocationBasicCase : public InterInvocationTestCase
{
public:
    InvocationBasicCase(Context &context, const char *name, const char *desc, StorageType storage, int flags);

private:
    std::string genShaderSource(void) const;
    virtual std::string genShaderMainBlock(void) const = 0;
};

InvocationBasicCase::InvocationBasicCase(Context &context, const char *name, const char *desc, StorageType storage,
                                         int flags)
    : InterInvocationTestCase(context, name, desc, storage, flags)
{
}

std::string InvocationBasicCase::genShaderSource(void) const
{
    const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : ("")) << "layout (local_size_x=" << m_localWidth
        << ", local_size_y=" << m_localHeight << ") in;\n"
        << "layout(binding=0, std430) buffer Output\n"
        << "{\n"
        << "    highp int values[];\n"
        << "} sb_result;\n";

    if (m_storage == STORAGE_BUFFER)
        buf << "layout(binding=1, std430) coherent buffer Storage\n"
            << "{\n"
            << "    highp int values[];\n"
            << "} sb_store;\n"
            << "\n"
            << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
            << "{\n"
            << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
            << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx "
               "* gl_WorkGroupSize.x) + localID.x) * "
            << m_elementsPerInvocation << " + element;\n"
            << "}\n";
    else if (m_storage == STORAGE_IMAGE)
        buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
            << "\n"
            << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
            << "{\n"
            << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * "
               "gl_WorkGroupSize.y + localID.y) + element * "
            << m_workHeight << ");\n"
            << "}\n";
    else
        DE_ASSERT(false);

    buf << "\n"
        << "void main (void)\n"
        << "{\n"
        << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + "
           "gl_GlobalInvocationID.x);\n"
        << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
        << "    bool allOk      = true;\n"
        << "\n"
        << genShaderMainBlock() << "\n"
        << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
        << "}\n";

    return specializeShader(m_context, buf.str().c_str());
}

class InvocationWriteReadCase : public InvocationBasicCase
{
public:
    InvocationWriteReadCase(Context &context, const char *name, const char *desc, StorageType storage, int flags);

private:
    std::string genShaderMainBlock(void) const;
};

InvocationWriteReadCase::InvocationWriteReadCase(Context &context, const char *name, const char *desc,
                                                 StorageType storage, int flags)
    : InvocationBasicCase(context, name, desc, storage, flags)
{
}

std::string InvocationWriteReadCase::genShaderMainBlock(void) const
{
    std::ostringstream buf;

    // write

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx
                << "), ivec4(int(groupNdx), 0, 0, 0));\n";
        else
            DE_ASSERT(false);
    }

    // barrier

    buf << genBarrierSource();

    // read

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx + 1) +
                                                         ", " + de::toString(2 * ndx) + ")) % gl_WorkGroupSize.xy") :
                                                        ("gl_LocalInvocationID.xy");

        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx
                << ")], 0) == groupNdx);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx
                << "), 0) == groupNdx);\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx
                << ")).x == groupNdx);\n";
        else
            DE_ASSERT(false);
    }

    return buf.str();
}

class InvocationReadWriteCase : public InvocationBasicCase
{
public:
    InvocationReadWriteCase(Context &context, const char *name, const char *desc, StorageType storage, int flags);

private:
    std::string genShaderMainBlock(void) const;
};

InvocationReadWriteCase::InvocationReadWriteCase(Context &context, const char *name, const char *desc,
                                                 StorageType storage, int flags)
    : InvocationBasicCase(context, name, desc, storage, flags)
{
}

std::string InvocationReadWriteCase::genShaderMainBlock(void) const
{
    std::ostringstream buf;

    // read

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx + 1) +
                                                         ", " + de::toString(2 * ndx) + ")) % gl_WorkGroupSize.xy") :
                                                        ("gl_LocalInvocationID.xy");

        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx
                << ")], 123) == 0);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx
                << "), 123) == 0);\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
        else
            DE_ASSERT(false);
    }

    // barrier

    buf << genBarrierSource();

    // write

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx
                << "), ivec4(int(groupNdx), 0, 0, 0));\n";
        else
            DE_ASSERT(false);
    }

    return buf.str();
}

class InvocationOverWriteCase : public InvocationBasicCase
{
public:
    InvocationOverWriteCase(Context &context, const char *name, const char *desc, StorageType storage, int flags);

private:
    std::string genShaderMainBlock(void) const;
};

InvocationOverWriteCase::InvocationOverWriteCase(Context &context, const char *name, const char *desc,
                                                 StorageType storage, int flags)
    : InvocationBasicCase(context, name, desc, storage, flags)
{
}

std::string InvocationOverWriteCase::genShaderMainBlock(void) const
{
    std::ostringstream buf;

    // write

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
        else
            DE_ASSERT(false);
    }

    // barrier

    buf << genBarrierSource();

    // write over

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        // write another invocation's value or our own value depending on test type
        const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx + 4) +
                                                         ", " + de::toString(3 * ndx) + ")) % gl_WorkGroupSize.xy") :
                                                        ("gl_LocalInvocationID.xy");

        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
        else
            DE_ASSERT(false);
    }

    // barrier

    buf << genBarrierSource();

    // read

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        // check another invocation's value or our own value depending on test type
        const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx + 1) +
                                                         ", " + de::toString(2 * ndx) + ")) % gl_WorkGroupSize.xy") :
                                                        ("gl_LocalInvocationID.xy");

        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx
                << ")], 123) == groupNdx);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx
                << "), 123) == groupNdx);\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx
                << ")).x == groupNdx);\n";
        else
            DE_ASSERT(false);
    }

    return buf.str();
}

class InvocationAliasWriteCase : public InterInvocationTestCase
{
public:
    enum TestType
    {
        TYPE_WRITE = 0,
        TYPE_OVERWRITE,

        TYPE_LAST
    };

    InvocationAliasWriteCase(Context &context, const char *name, const char *desc, TestType type, StorageType storage,
                             int flags);

private:
    std::string genShaderSource(void) const;

    const TestType m_type;
};

InvocationAliasWriteCase::InvocationAliasWriteCase(Context &context, const char *name, const char *desc, TestType type,
                                                   StorageType storage, int flags)
    : InterInvocationTestCase(context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
    , m_type(type)
{
    DE_ASSERT(type < TYPE_LAST);
}

std::string InvocationAliasWriteCase::genShaderSource(void) const
{
    const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : ("")) << "layout (local_size_x=" << m_localWidth
        << ", local_size_y=" << m_localHeight << ") in;\n"
        << "layout(binding=0, std430) buffer Output\n"
        << "{\n"
        << "    highp int values[];\n"
        << "} sb_result;\n";

    if (m_storage == STORAGE_BUFFER)
        buf << "layout(binding=1, std430) coherent buffer Storage0\n"
            << "{\n"
            << "    highp int values[];\n"
            << "} sb_store0;\n"
            << "layout(binding=2, std430) coherent buffer Storage1\n"
            << "{\n"
            << "    highp int values[];\n"
            << "} sb_store1;\n"
            << "\n"
            << "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
            << "{\n"
            << "    highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
            << "    return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx "
               "* gl_WorkGroupSize.x) + localID.x) * "
            << m_elementsPerInvocation << " + element;\n"
            << "}\n";
    else if (m_storage == STORAGE_IMAGE)
        buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
            << "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
            << "\n"
            << "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
            << "{\n"
            << "    return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * "
               "gl_WorkGroupSize.y + localID.y) + element * "
            << m_workHeight << ");\n"
            << "}\n";
    else
        DE_ASSERT(false);

    buf << "\n"
        << "void main (void)\n"
        << "{\n"
        << "    int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + "
           "gl_GlobalInvocationID.x);\n"
        << "    int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
        << "    bool allOk      = true;\n"
        << "\n";

    if (m_type == TYPE_OVERWRITE)
    {
        // write

        for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
        {
            if (m_storage == STORAGE_BUFFER && m_useAtomic)
                buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
            else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
                buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
            else if (m_storage == STORAGE_IMAGE && m_useAtomic)
                buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
            else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
                buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx
                    << "), ivec4(456, 0, 0, 0));\n";
            else
                DE_ASSERT(false);
        }

        // barrier

        buf << genBarrierSource();
    }
    else
        DE_ASSERT(m_type == TYPE_WRITE);

    // write (again)

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx + 2) +
                                                         ", " + de::toString(2 * ndx) + ")) % gl_WorkGroupSize.xy") :
                                                        ("gl_LocalInvocationID.xy");

        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
        else
            DE_ASSERT(false);
    }

    // barrier

    buf << genBarrierSource();

    // read

    for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    {
        if (m_storage == STORAGE_BUFFER && m_useAtomic)
            buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx
                << ")], 123) == groupNdx);\n";
        else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
            buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx
                << ")] == groupNdx);\n";
        else if (m_storage == STORAGE_IMAGE && m_useAtomic)
            buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx
                << "), 123) == groupNdx);\n";
        else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
            buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx
                << ")).x == groupNdx);\n";
        else
            DE_ASSERT(false);
    }

    // return result

    buf << "\n"
        << "    sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
        << "}\n";

    return specializeShader(m_context, buf.str().c_str());
}

namespace op
{

struct WriteData
{
    int targetHandle;
    int seed;

    static WriteData Generate(int targetHandle, int seed)
    {
        WriteData retVal;

        retVal.targetHandle = targetHandle;
        retVal.seed         = seed;

        return retVal;
    }
};

struct ReadData
{
    int targetHandle;
    int seed;

    static ReadData Generate(int targetHandle, int seed)
    {
        ReadData retVal;

        retVal.targetHandle = targetHandle;
        retVal.seed         = seed;

        return retVal;
    }
};

struct Barrier
{
};

struct WriteDataInterleaved
{
    int targetHandle;
    int seed;
    bool evenOdd;

    static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
    {
        WriteDataInterleaved retVal;

        retVal.targetHandle = targetHandle;
        retVal.seed         = seed;
        retVal.evenOdd      = evenOdd;

        return retVal;
    }
};

struct ReadDataInterleaved
{
    int targetHandle;
    int seed0;
    int seed1;

    static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
    {
        ReadDataInterleaved retVal;

        retVal.targetHandle = targetHandle;
        retVal.seed0        = seed0;
        retVal.seed1        = seed1;

        return retVal;
    }
};

struct ReadMultipleData
{
    int targetHandle0;
    int seed0;
    int targetHandle1;
    int seed1;

    static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
    {
        ReadMultipleData retVal;

        retVal.targetHandle0 = targetHandle0;
        retVal.seed0         = seed0;
        retVal.targetHandle1 = targetHandle1;
        retVal.seed1         = seed1;

        return retVal;
    }
};

struct ReadZeroData
{
    int targetHandle;

    static ReadZeroData Generate(int targetHandle)
    {
        ReadZeroData retVal;

        retVal.targetHandle = targetHandle;

        return retVal;
    }
};

} // namespace op

class InterCallTestCase;

class InterCallOperations
{
public:
    InterCallOperations &operator<<(const op::WriteData &);
    InterCallOperations &operator<<(const op::ReadData &);
    InterCallOperations &operator<<(const op::Barrier &);
    InterCallOperations &operator<<(const op::ReadMultipleData &);
    InterCallOperations &operator<<(const op::WriteDataInterleaved &);
    InterCallOperations &operator<<(const op::ReadDataInterleaved &);
    InterCallOperations &operator<<(const op::ReadZeroData &);

private:
    struct Command
    {
        enum CommandType
        {
            TYPE_WRITE = 0,
            TYPE_READ,
            TYPE_BARRIER,
            TYPE_READ_MULTIPLE,
            TYPE_WRITE_INTERLEAVE,
            TYPE_READ_INTERLEAVE,
            TYPE_READ_ZERO,

            TYPE_LAST
        };

        CommandType type;

        union CommandUnion
        {
            op::WriteData write;
            op::ReadData read;
            op::Barrier barrier;
            op::ReadMultipleData readMulti;
            op::WriteDataInterleaved writeInterleave;
            op::ReadDataInterleaved readInterleave;
            op::ReadZeroData readZero;
        } u_cmd;
    };

    friend class InterCallTestCase;

    std::vector<Command> m_cmds;
};

InterCallOperations &InterCallOperations::operator<<(const op::WriteData &cmd)
{
    m_cmds.push_back(Command());
    m_cmds.back().type        = Command::TYPE_WRITE;
    m_cmds.back().u_cmd.write = cmd;

    return *this;
}

InterCallOperations &InterCallOperations::operator<<(const op::ReadData &cmd)
{
    m_cmds.push_back(Command());
    m_cmds.back().type       = Command::TYPE_READ;
    m_cmds.back().u_cmd.read = cmd;

    return *this;
}

InterCallOperations &InterCallOperations::operator<<(const op::Barrier &cmd)
{
    m_cmds.push_back(Command());
    m_cmds.back().type          = Command::TYPE_BARRIER;
    m_cmds.back().u_cmd.barrier = cmd;

    return *this;
}

InterCallOperations &InterCallOperations::operator<<(const op::ReadMultipleData &cmd)
{
    m_cmds.push_back(Command());
    m_cmds.back().type            = Command::TYPE_READ_MULTIPLE;
    m_cmds.back().u_cmd.readMulti = cmd;

    return *this;
}

InterCallOperations &InterCallOperations::operator<<(const op::WriteDataInterleaved &cmd)
{
    m_cmds.push_back(Command());
    m_cmds.back().type                  = Command::TYPE_WRITE_INTERLEAVE;
    m_cmds.back().u_cmd.writeInterleave = cmd;

    return *this;
}

InterCallOperations &InterCallOperations::operator<<(const op::ReadDataInterleaved &cmd)
{
    m_cmds.push_back(Command());
    m_cmds.back().type                 = Command::TYPE_READ_INTERLEAVE;
    m_cmds.back().u_cmd.readInterleave = cmd;

    return *this;
}

InterCallOperations &InterCallOperations::operator<<(const op::ReadZeroData &cmd)
{
    m_cmds.push_back(Command());
    m_cmds.back().type           = Command::TYPE_READ_ZERO;
    m_cmds.back().u_cmd.readZero = cmd;

    return *this;
}

class InterCallTestCase : public TestCase
{
public:
    enum StorageType
    {
        STORAGE_BUFFER = 0,
        STORAGE_IMAGE,

        STORAGE_LAST
    };
    enum Flags
    {
        FLAG_USE_ATOMIC = 1,
        FLAG_USE_INT    = 2,
    };
    InterCallTestCase(Context &context, const char *name, const char *desc, StorageType storage, int flags,
                      const InterCallOperations &ops);
    ~InterCallTestCase(void);

private:
    void init(void);
    void deinit(void);
    IterateResult iterate(void);
    bool verifyResults(void);

    void runCommand(const op::WriteData &cmd, int stepNdx, int &programFriendlyName);
    void runCommand(const op::ReadData &cmd, int stepNdx, int &programFriendlyName, int &resultStorageFriendlyName);
    void runCommand(const op::Barrier &);
    void runCommand(const op::ReadMultipleData &cmd, int stepNdx, int &programFriendlyName,
                    int &resultStorageFriendlyName);
    void runCommand(const op::WriteDataInterleaved &cmd, int stepNdx, int &programFriendlyName);
    void runCommand(const op::ReadDataInterleaved &cmd, int stepNdx, int &programFriendlyName,
                    int &resultStorageFriendlyName);
    void runCommand(const op::ReadZeroData &cmd, int stepNdx, int &programFriendlyName, int &resultStorageFriendlyName);
    void runSingleRead(int targetHandle, int stepNdx, int &programFriendlyName, int &resultStorageFriendlyName);

    glw::GLuint genStorage(int friendlyName);
    glw::GLuint genResultStorage(void);
    glu::ShaderProgram *genWriteProgram(int seed);
    glu::ShaderProgram *genReadProgram(int seed);
    glu::ShaderProgram *genReadMultipleProgram(int seed0, int seed1);
    glu::ShaderProgram *genWriteInterleavedProgram(int seed, bool evenOdd);
    glu::ShaderProgram *genReadInterleavedProgram(int seed0, int seed1);
    glu::ShaderProgram *genReadZeroProgram(void);

    const StorageType m_storage;
    const int m_invocationGridSize; // !< width and height of the two dimensional work dispatch
    const int m_perInvocationSize;  // !< number of elements accessed in single invocation
    const std::vector<InterCallOperations::Command> m_cmds;
    const bool m_useAtomic;
    const bool m_formatInteger;

    std::vector<glu::ShaderProgram *> m_operationPrograms;
    std::vector<glw::GLuint> m_operationResultStorages;
    std::map<int, glw::GLuint> m_storageIDs;
};

InterCallTestCase::InterCallTestCase(Context &context, const char *name, const char *desc, StorageType storage,
                                     int flags, const InterCallOperations &ops)
    : TestCase(context, name, desc)
    , m_storage(storage)
    , m_invocationGridSize(512)
    , m_perInvocationSize(2)
    , m_cmds(ops.m_cmds)
    , m_useAtomic((flags & FLAG_USE_ATOMIC) != 0)
    , m_formatInteger((flags & FLAG_USE_INT) != 0)
{
}

InterCallTestCase::~InterCallTestCase(void)
{
    deinit();
}

void InterCallTestCase::init(void)
{
    int programFriendlyName = 0;

    // requirements

    if (m_useAtomic && m_storage == STORAGE_IMAGE && !checkSupport(m_context))
        throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");

    // generate resources and validate command list

    m_operationPrograms.resize(m_cmds.size(), DE_NULL);
    m_operationResultStorages.resize(m_cmds.size(), 0);

    for (int step = 0; step < (int)m_cmds.size(); ++step)
    {
        switch (m_cmds[step].type)
        {
        case InterCallOperations::Command::TYPE_WRITE:
        {
            const op::WriteData &cmd = m_cmds[step].u_cmd.write;

            // new storage handle?
            if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
                m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);

            // program
            {
                glu::ShaderProgram *program = genWriteProgram(cmd.seed);

                m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName
                                   << tcu::TestLog::EndMessage;
                m_testCtx.getLog() << *program;

                if (!program->isOk())
                    throw tcu::TestError("could not build program");

                m_operationPrograms[step] = program;
            }
            break;
        }

        case InterCallOperations::Command::TYPE_READ:
        {
            const op::ReadData &cmd = m_cmds[step].u_cmd.read;
            DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());

            // program and result storage
            {
                glu::ShaderProgram *program = genReadProgram(cmd.seed);

                m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName
                                   << tcu::TestLog::EndMessage;
                m_testCtx.getLog() << *program;

                if (!program->isOk())
                    throw tcu::TestError("could not build program");

                m_operationPrograms[step]       = program;
                m_operationResultStorages[step] = genResultStorage();
            }
            break;
        }

        case InterCallOperations::Command::TYPE_BARRIER:
        {
            break;
        }

        case InterCallOperations::Command::TYPE_READ_MULTIPLE:
        {
            const op::ReadMultipleData &cmd = m_cmds[step].u_cmd.readMulti;
            DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
            DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());

            // program
            {
                glu::ShaderProgram *program = genReadMultipleProgram(cmd.seed0, cmd.seed1);

                m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName
                                   << tcu::TestLog::EndMessage;
                m_testCtx.getLog() << *program;

                if (!program->isOk())
                    throw tcu::TestError("could not build program");

                m_operationPrograms[step]       = program;
                m_operationResultStorages[step] = genResultStorage();
            }
            break;
        }

        case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
        {
            const op::WriteDataInterleaved &cmd = m_cmds[step].u_cmd.writeInterleave;

            // new storage handle?
            if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
                m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);

            // program
            {
                glu::ShaderProgram *program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);

                m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName
                                   << tcu::TestLog::EndMessage;
                m_testCtx.getLog() << *program;

                if (!program->isOk())
                    throw tcu::TestError("could not build program");

                m_operationPrograms[step] = program;
            }
            break;
        }

        case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
        {
            const op::ReadDataInterleaved &cmd = m_cmds[step].u_cmd.readInterleave;
            DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());

            // program
            {
                glu::ShaderProgram *program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);

                m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName
                                   << tcu::TestLog::EndMessage;
                m_testCtx.getLog() << *program;

                if (!program->isOk())
                    throw tcu::TestError("could not build program");

                m_operationPrograms[step]       = program;
                m_operationResultStorages[step] = genResultStorage();
            }
            break;
        }

        case InterCallOperations::Command::TYPE_READ_ZERO:
        {
            const op::ReadZeroData &cmd = m_cmds[step].u_cmd.readZero;

            // new storage handle?
            if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
                m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);

            // program
            {
                glu::ShaderProgram *program = genReadZeroProgram();

                m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName
                                   << tcu::TestLog::EndMessage;
                m_testCtx.getLog() << *program;

                if (!program->isOk())
                    throw tcu::TestError("could not build program");

                m_operationPrograms[step]       = program;
                m_operationResultStorages[step] = genResultStorage();
            }
            break;
        }

        default:
            DE_ASSERT(false);
        }
    }
}

void InterCallTestCase::deinit(void)
{
    // programs
    for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
        delete m_operationPrograms[ndx];
    m_operationPrograms.clear();

    // result storages
    for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
    {
        if (m_operationResultStorages[ndx])
            m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
    }
    m_operationResultStorages.clear();

    // storage
    for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
    {
        const glw::Functions &gl = m_context.getRenderContext().getFunctions();

        if (m_storage == STORAGE_BUFFER)
            gl.deleteBuffers(1, &it->second);
        else if (m_storage == STORAGE_IMAGE)
            gl.deleteTextures(1, &it->second);
        else
            DE_ASSERT(false);
    }
    m_storageIDs.clear();
}

InterCallTestCase::IterateResult InterCallTestCase::iterate(void)
{
    int programFriendlyName       = 0;
    int resultStorageFriendlyName = 0;

    m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;

    // run steps

    for (int step = 0; step < (int)m_cmds.size(); ++step)
    {
        switch (m_cmds[step].type)
        {
        case InterCallOperations::Command::TYPE_WRITE:
            runCommand(m_cmds[step].u_cmd.write, step, programFriendlyName);
            break;
        case InterCallOperations::Command::TYPE_READ:
            runCommand(m_cmds[step].u_cmd.read, step, programFriendlyName, resultStorageFriendlyName);
            break;
        case InterCallOperations::Command::TYPE_BARRIER:
            runCommand(m_cmds[step].u_cmd.barrier);
            break;
        case InterCallOperations::Command::TYPE_READ_MULTIPLE:
            runCommand(m_cmds[step].u_cmd.readMulti, step, programFriendlyName, resultStorageFriendlyName);
            break;
        case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
            runCommand(m_cmds[step].u_cmd.writeInterleave, step, programFriendlyName);
            break;
        case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
            runCommand(m_cmds[step].u_cmd.readInterleave, step, programFriendlyName, resultStorageFriendlyName);
            break;
        case InterCallOperations::Command::TYPE_READ_ZERO:
            runCommand(m_cmds[step].u_cmd.readZero, step, programFriendlyName, resultStorageFriendlyName);
            break;
        default:
            DE_ASSERT(false);
        }
    }

    // read results from result buffers
    if (verifyResults())
        m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    else
        m_testCtx.setTestResult(
            QP_TEST_RESULT_FAIL,
            (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed")
                .c_str());

    return STOP;
}

bool InterCallTestCase::verifyResults(void)
{
    int resultBufferFriendlyName = 0;
    bool allResultsOk            = true;
    bool anyResult               = false;

    m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;

    for (int step = 0; step < (int)m_cmds.size(); ++step)
    {
        const int errorFloodThreshold = 5;
        int numErrorsLogged           = 0;

        if (m_operationResultStorages[step])
        {
            const glw::Functions &gl = m_context.getRenderContext().getFunctions();
            const void *mapped       = DE_NULL;
            std::vector<int32_t> results(m_invocationGridSize * m_invocationGridSize);
            bool error = false;

            anyResult = true;

            gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
            mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0,
                                       m_invocationGridSize * m_invocationGridSize * sizeof(uint32_t), GL_MAP_READ_BIT);
            GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");

            // copy to properly aligned array
            deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(uint32_t));

            if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
                throw tcu::TestError("memory map store corrupted");

            // check the results
            for (int ndx = 0; ndx < (int)results.size(); ++ndx)
            {
                if (results[ndx] != 1)
                {
                    error = true;

                    if (numErrorsLogged == 0)
                        m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName
                                           << " failed, got unexpected values.\n"
                                           << tcu::TestLog::EndMessage;
                    if (numErrorsLogged++ < errorFloodThreshold)
                        m_testCtx.getLog() << tcu::TestLog::Message << "    Error at index " << ndx
                                           << ": expected 1, got " << results[ndx] << ".\n"
                                           << tcu::TestLog::EndMessage;
                    else
                    {
                        // after N errors, no point continuing verification
                        m_testCtx.getLog()
                            << tcu::TestLog::Message << "    -- too many errors, skipping verification --\n"
                            << tcu::TestLog::EndMessage;
                        break;
                    }
                }
            }

            if (error)
            {
                allResultsOk = false;
            }
            else
                m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName
                                   << " ok." << tcu::TestLog::EndMessage;
        }
    }

    DE_ASSERT(anyResult);
    DE_UNREF(anyResult);

    return allResultsOk;
}

void InterCallTestCase::runCommand(const op::WriteData &cmd, int stepNdx, int &programFriendlyName)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    m_testCtx.getLog() << tcu::TestLog::Message << "Running program #" << ++programFriendlyName << " to write "
                       << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
                       << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
                       << tcu::TestLog::EndMessage;

    gl.useProgram(m_operationPrograms[stepNdx]->getProgram());

    // set destination
    if (m_storage == STORAGE_BUFFER)
    {
        DE_ASSERT(m_storageIDs[cmd.targetHandle]);

        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        DE_ASSERT(m_storageIDs[cmd.targetHandle]);

        gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0,
                            (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY),
                            (m_formatInteger) ? (GL_R32I) : (GL_R32F));
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
    }
    else
        DE_ASSERT(false);

    // calc
    gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
    GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
}

void InterCallTestCase::runCommand(const op::ReadData &cmd, int stepNdx, int &programFriendlyName,
                                   int &resultStorageFriendlyName)
{
    runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
}

void InterCallTestCase::runCommand(const op::Barrier &cmd)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    DE_UNREF(cmd);

    if (m_storage == STORAGE_BUFFER)
    {
        m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT"
                           << tcu::TestLog::EndMessage;
        gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT"
                           << tcu::TestLog::EndMessage;
        gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
    }
    else
        DE_ASSERT(false);
}

void InterCallTestCase::runCommand(const op::ReadMultipleData &cmd, int stepNdx, int &programFriendlyName,
                                   int &resultStorageFriendlyName)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    m_testCtx.getLog() << tcu::TestLog::Message << "Running program #" << ++programFriendlyName << " to verify "
                       << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0
                       << " and #" << cmd.targetHandle1 << ".\n"
                       << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
                       << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
                       << tcu::TestLog::EndMessage;

    gl.useProgram(m_operationPrograms[stepNdx]->getProgram());

    // set sources
    if (m_storage == STORAGE_BUFFER)
    {
        DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
        DE_ASSERT(m_storageIDs[cmd.targetHandle1]);

        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
        DE_ASSERT(m_storageIDs[cmd.targetHandle1]);

        gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0,
                            (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY),
                            (m_formatInteger) ? (GL_R32I) : (GL_R32F));
        gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0,
                            (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY),
                            (m_formatInteger) ? (GL_R32I) : (GL_R32F));
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
    }
    else
        DE_ASSERT(false);

    // set destination
    DE_ASSERT(m_operationResultStorages[stepNdx]);
    gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
    GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");

    // calc
    gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
    GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
}

void InterCallTestCase::runCommand(const op::WriteDataInterleaved &cmd, int stepNdx, int &programFriendlyName)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    m_testCtx.getLog() << tcu::TestLog::Message << "Running program #" << ++programFriendlyName << " to write "
                       << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
                       << "    Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " "
                       << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
                       << "    Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
                       << tcu::TestLog::EndMessage;

    gl.useProgram(m_operationPrograms[stepNdx]->getProgram());

    // set destination
    if (m_storage == STORAGE_BUFFER)
    {
        DE_ASSERT(m_storageIDs[cmd.targetHandle]);

        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        DE_ASSERT(m_storageIDs[cmd.targetHandle]);

        gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0,
                            (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY),
                            (m_formatInteger) ? (GL_R32I) : (GL_R32F));
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
    }
    else
        DE_ASSERT(false);

    // calc
    gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
    GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
}

void InterCallTestCase::runCommand(const op::ReadDataInterleaved &cmd, int stepNdx, int &programFriendlyName,
                                   int &resultStorageFriendlyName)
{
    runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
}

void InterCallTestCase::runCommand(const op::ReadZeroData &cmd, int stepNdx, int &programFriendlyName,
                                   int &resultStorageFriendlyName)
{
    runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
}

void InterCallTestCase::runSingleRead(int targetHandle, int stepNdx, int &programFriendlyName,
                                      int &resultStorageFriendlyName)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    m_testCtx.getLog() << tcu::TestLog::Message << "Running program #" << ++programFriendlyName << " to verify "
                       << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
                       << "    Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
                       << "    Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
                       << tcu::TestLog::EndMessage;

    gl.useProgram(m_operationPrograms[stepNdx]->getProgram());

    // set source
    if (m_storage == STORAGE_BUFFER)
    {
        DE_ASSERT(m_storageIDs[targetHandle]);

        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        DE_ASSERT(m_storageIDs[targetHandle]);

        gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0,
                            (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY),
                            (m_formatInteger) ? (GL_R32I) : (GL_R32F));
        GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
    }
    else
        DE_ASSERT(false);

    // set destination
    DE_ASSERT(m_operationResultStorages[stepNdx]);
    gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
    GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");

    // calc
    gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
    GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
}

glw::GLuint InterCallTestCase::genStorage(int friendlyName)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    if (m_storage == STORAGE_BUFFER)
    {
        const int numElements = m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
        const int bufferSize  = numElements * (int)((m_formatInteger) ? (sizeof(int32_t)) : (sizeof(glw::GLfloat)));
        glw::GLuint retVal    = 0;

        m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize
                           << " bytes." << tcu::TestLog::EndMessage;

        gl.genBuffers(1, &retVal);
        gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);

        if (m_formatInteger)
        {
            const std::vector<uint32_t> zeroBuffer(numElements, 0);
            gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
        }
        else
        {
            const std::vector<float> zeroBuffer(numElements, 0.0f);
            gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
        }
        GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");

        return retVal;
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        const int imageWidth  = m_invocationGridSize;
        const int imageHeight = m_invocationGridSize * m_perInvocationSize;
        glw::GLuint retVal    = 0;

        m_testCtx.getLog() << tcu::TestLog::Message << "Creating image #" << friendlyName << ", size " << imageWidth
                           << "x" << imageHeight << ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
                           << ", size = " << (imageWidth * imageHeight * sizeof(uint32_t)) << " bytes."
                           << tcu::TestLog::EndMessage;

        gl.genTextures(1, &retVal);
        gl.bindTexture(GL_TEXTURE_2D, retVal);

        if (m_formatInteger)
            gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
        else
            gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);

        gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
        gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
        GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");

        m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0" << tcu::TestLog::EndMessage;

        if (m_formatInteger)
        {
            const std::vector<int32_t> zeroBuffer(imageWidth * imageHeight, 0);
            gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
        }
        else
        {
            const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
            gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
        }

        GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");

        return retVal;
    }
    else
    {
        DE_ASSERT(false);
        return 0;
    }
}

glw::GLuint InterCallTestCase::genResultStorage(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    glw::GLuint retVal       = 0;

    gl.genBuffers(1, &retVal);
    gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
    gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(uint32_t), DE_NULL,
                  GL_STATIC_DRAW);
    GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");

    return retVal;
}

glu::ShaderProgram *InterCallTestCase::genWriteProgram(int seed)
{
    const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
        << "layout (local_size_x = 1, local_size_y = 1) in;\n";

    if (m_storage == STORAGE_BUFFER)
        buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
            << "{\n"
            << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
            << "} sb_out;\n";
    else if (m_storage == STORAGE_IMAGE)
        buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) "
            << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp "
            << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
    else
        DE_ASSERT(false);

    buf << "\n"
        << "void main (void)\n"
        << "{\n"
        << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
        << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
           "gl_GlobalInvocationID.x);\n"
        << "\n";

    // Write to buffer/image m_perInvocationSize elements
    if (m_storage == STORAGE_BUFFER)
    {
        for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
        {
            if (m_useAtomic)
                buf << "    atomicExchange(";
            else
                buf << "    ";

            buf << "sb_out.values[(groupNdx + " << seed + writeNdx * m_invocationGridSize * m_invocationGridSize
                << ") % " << m_invocationGridSize * m_invocationGridSize * m_perInvocationSize << "]";

            if (m_useAtomic)
                buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
            else
                buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
        }
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
        {
            if (m_useAtomic)
                buf << "    imageAtomicExchange";
            else
                buf << "    imageStore";

            buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx * 100) << ") % "
                << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx * m_invocationGridSize
                << "), ";

            if (m_useAtomic)
                buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
            else
                buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") :
                                            ("vec4(float(groupNdx), 0.0, 0.0, 0.0)"))
                    << ");\n";
        }
    }
    else
        DE_ASSERT(false);

    buf << "}\n";

    return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(
                                                                    specializeShader(m_context, buf.str().c_str())));
}

glu::ShaderProgram *InterCallTestCase::genReadProgram(int seed)
{
    const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
        << "layout (local_size_x = 1, local_size_y = 1) in;\n";

    if (m_storage == STORAGE_BUFFER)
        buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
            << "{\n"
            << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
            << "} sb_in;\n";
    else if (m_storage == STORAGE_IMAGE)
        buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) "
            << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp "
            << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
    else
        DE_ASSERT(false);

    buf << "layout(binding=0, std430) buffer ResultBuffer\n"
        << "{\n"
        << "    highp int resultOk[];\n"
        << "} sb_result;\n"
        << "\n"
        << "void main (void)\n"
        << "{\n"
        << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
        << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
           "gl_GlobalInvocationID.x);\n"
        << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0"))
        << ";\n"
        << "    bool allOk = true;\n"
        << "\n";

    // Verify data

    if (m_storage == STORAGE_BUFFER)
    {
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
        {
            if (!m_useAtomic)
                buf << "    allOk = allOk && (sb_in.values[(groupNdx + "
                    << seed + readNdx * m_invocationGridSize * m_invocationGridSize << ") % "
                    << m_invocationGridSize * m_invocationGridSize * m_perInvocationSize
                    << "] == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
            else
                buf << "    allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
                    << seed + readNdx * m_invocationGridSize * m_invocationGridSize << ") % "
                    << m_invocationGridSize * m_invocationGridSize * m_perInvocationSize
                    << "], zero) == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
        }
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
        {
            if (!m_useAtomic)
                buf << "    allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
                    << (seed + readNdx * 100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + "
                    << readNdx * m_invocationGridSize << "u)).x == " << ((m_formatInteger) ? ("int") : ("float"))
                    << "(groupNdx));\n";
            else
                buf << "    allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
                    << (seed + readNdx * 100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + "
                    << readNdx * m_invocationGridSize << "u), zero) == " << ((m_formatInteger) ? ("int") : ("float"))
                    << "(groupNdx));\n";
        }
    }
    else
        DE_ASSERT(false);

    buf << "    sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
        << "}\n";

    return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(
                                                                    specializeShader(m_context, buf.str().c_str())));
}

glu::ShaderProgram *InterCallTestCase::genReadMultipleProgram(int seed0, int seed1)
{
    const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
        << "layout (local_size_x = 1, local_size_y = 1) in;\n";

    if (m_storage == STORAGE_BUFFER)
        buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
            << "{\n"
            << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
            << "} sb_in0;\n"
            << "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
            << "{\n"
            << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
            << "} sb_in1;\n";
    else if (m_storage == STORAGE_IMAGE)
        buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) "
            << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp "
            << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
            << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) "
            << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp "
            << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
    else
        DE_ASSERT(false);

    buf << "layout(binding=0, std430) buffer ResultBuffer\n"
        << "{\n"
        << "    highp int resultOk[];\n"
        << "} sb_result;\n"
        << "\n"
        << "void main (void)\n"
        << "{\n"
        << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
        << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
           "gl_GlobalInvocationID.x);\n"
        << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0"))
        << ";\n"
        << "    bool allOk = true;\n"
        << "\n";

    // Verify data

    if (m_storage == STORAGE_BUFFER)
    {
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
            buf << "    allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : (""))
                << "sb_in0.values[(groupNdx + " << seed0 + readNdx * m_invocationGridSize * m_invocationGridSize
                << ") % " << m_invocationGridSize * m_invocationGridSize * m_perInvocationSize << "]"
                << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float"))
                << "(groupNdx));\n"
                << "    allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : (""))
                << "sb_in1.values[(groupNdx + " << seed1 + readNdx * m_invocationGridSize * m_invocationGridSize
                << ") % " << m_invocationGridSize * m_invocationGridSize * m_perInvocationSize << "]"
                << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float"))
                << "(groupNdx));\n";
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
            buf << "    allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
                << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx * 100) << "u) % "
                << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx * m_invocationGridSize << "u)"
                << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float"))
                << "(groupNdx));\n"
                << "    allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
                << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx * 100) << "u) % "
                << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx * m_invocationGridSize << "u)"
                << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float"))
                << "(groupNdx));\n";
    }
    else
        DE_ASSERT(false);

    buf << "    sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
        << "}\n";

    return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(
                                                                    specializeShader(m_context, buf.str().c_str())));
}

glu::ShaderProgram *InterCallTestCase::genWriteInterleavedProgram(int seed, bool evenOdd)
{
    const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
        << "layout (local_size_x = 1, local_size_y = 1) in;\n";

    if (m_storage == STORAGE_BUFFER)
        buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
            << "{\n"
            << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
            << "} sb_out;\n";
    else if (m_storage == STORAGE_IMAGE)
        buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) "
            << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp "
            << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
    else
        DE_ASSERT(false);

    buf << "\n"
        << "void main (void)\n"
        << "{\n"
        << "    uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
        << "    int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
           "gl_GlobalInvocationID.x);\n"
        << "\n";

    // Write to buffer/image m_perInvocationSize elements
    if (m_storage == STORAGE_BUFFER)
    {
        for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
        {
            if (m_useAtomic)
                buf << "    atomicExchange(";
            else
                buf << "    ";

            buf << "sb_out.values[((groupNdx + " << seed + writeNdx * m_invocationGridSize * m_invocationGridSize / 2
                << ") % " << m_invocationGridSize * m_invocationGridSize / 2 * m_perInvocationSize << ") * 2 + "
                << ((evenOdd) ? (0) : (1)) << "]";

            if (m_useAtomic)
                buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
            else
                buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
        }
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
        {
            if (m_useAtomic)
                buf << "    imageAtomicExchange";
            else
                buf << "    imageStore";

            buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx * 100) << ") % "
                << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1))
                << ", int(gl_GlobalInvocationID.y) + " << writeNdx * m_invocationGridSize << "), ";

            if (m_useAtomic)
                buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
            else
                buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") :
                                            ("vec4(float(groupNdx), 0.0, 0.0, 0.0)"))
                    << ");\n";
        }
    }
    else
        DE_ASSERT(false);

    buf << "}\n";

    return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(
                                                                    specializeShader(m_context, buf.str().c_str())));
}

glu::ShaderProgram *InterCallTestCase::genReadInterleavedProgram(int seed0, int seed1)
{
    const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
        << "layout (local_size_x = 1, local_size_y = 1) in;\n";

    if (m_storage == STORAGE_BUFFER)
        buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
            << "{\n"
            << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
            << "} sb_in;\n";
    else if (m_storage == STORAGE_IMAGE)
        buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) "
            << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp "
            << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
    else
        DE_ASSERT(false);

    buf << "layout(binding=0, std430) buffer ResultBuffer\n"
        << "{\n"
        << "    highp int resultOk[];\n"
        << "} sb_result;\n"
        << "\n"
        << "void main (void)\n"
        << "{\n"
        << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
        << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
           "gl_GlobalInvocationID.x);\n"
        << "    int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * "
           "gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
        << "    " << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0"))
        << ";\n"
        << "    bool allOk = true;\n"
        << "\n";

    // Verify data

    if (m_storage == STORAGE_BUFFER)
    {
        buf << "    if (groupNdx % 2 == 0)\n"
            << "    {\n";
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
            buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : (""))
                << "sb_in.values[((interleavedGroupNdx + "
                << seed0 + readNdx * m_invocationGridSize * m_invocationGridSize / 2 << ") % "
                << m_invocationGridSize * m_invocationGridSize * m_perInvocationSize / 2 << ") * 2 + 0]"
                << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float"))
                << "(interleavedGroupNdx));\n";
        buf << "    }\n"
            << "    else\n"
            << "    {\n";
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
            buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : (""))
                << "sb_in.values[((interleavedGroupNdx + "
                << seed1 + readNdx * m_invocationGridSize * m_invocationGridSize / 2 << ") % "
                << m_invocationGridSize * m_invocationGridSize * m_perInvocationSize / 2 << ") * 2 + 1]"
                << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float"))
                << "(interleavedGroupNdx));\n";
        buf << "    }\n";
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        buf << "    if (groupNdx % 2 == 0)\n"
            << "    {\n";
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
            buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
                << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx * 100) << ") % "
                << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + "
                << readNdx * m_invocationGridSize << ")" << ((m_useAtomic) ? (", zero)") : (").x"))
                << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
        buf << "    }\n"
            << "    else\n"
            << "    {\n";
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
            buf << "        allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
                << "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx * 100) << ") % "
                << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + "
                << readNdx * m_invocationGridSize << ")" << ((m_useAtomic) ? (", zero)") : (").x"))
                << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
        buf << "    }\n";
    }
    else
        DE_ASSERT(false);

    buf << "    sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
        << "}\n";

    return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(
                                                                    specializeShader(m_context, buf.str().c_str())));
}

glu::ShaderProgram *InterCallTestCase::genReadZeroProgram(void)
{
    const bool useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << ((useImageAtomics) ? ("${SHADER_IMAGE_ATOMIC_REQUIRE}\n") : (""))
        << "layout (local_size_x = 1, local_size_y = 1) in;\n";

    if (m_storage == STORAGE_BUFFER)
        buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
            << "{\n"
            << "    highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
            << "} sb_in;\n";
    else if (m_storage == STORAGE_IMAGE)
        buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) "
            << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp "
            << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
    else
        DE_ASSERT(false);

    buf << "layout(binding=0, std430) buffer ResultBuffer\n"
        << "{\n"
        << "    highp int resultOk[];\n"
        << "} sb_result;\n"
        << "\n"
        << "void main (void)\n"
        << "{\n"
        << "    uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
        << "    int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + "
           "gl_GlobalInvocationID.x);\n"
        << "    " << ((m_formatInteger) ? ("int") : ("float"))
        << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
        << "    bool allOk = true;\n"
        << "\n";

    // Verify data

    if (m_storage == STORAGE_BUFFER)
    {
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
            buf << "    allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : (""))
                << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
                << ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0"))
                << ");\n";
    }
    else if (m_storage == STORAGE_IMAGE)
    {
        for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
            buf << "    allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
                << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + "
                << (readNdx * m_invocationGridSize) << "u)" << ((m_useAtomic) ? (", anything)") : (").x"))
                << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
    }
    else
        DE_ASSERT(false);

    buf << "    sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
        << "}\n";

    return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(
                                                                    specializeShader(m_context, buf.str().c_str())));
}

class SSBOConcurrentAtomicCase : public TestCase
{
public:
    SSBOConcurrentAtomicCase(Context &context, const char *name, const char *description, int numCalls, int workSize);
    ~SSBOConcurrentAtomicCase(void);

    void init(void);
    void deinit(void);
    IterateResult iterate(void);

private:
    std::string genComputeSource(void) const;

    const int m_numCalls;
    const int m_workSize;
    glu::ShaderProgram *m_program;
    uint32_t m_bufferID;
    std::vector<uint32_t> m_intermediateResultBuffers;
};

SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase(Context &context, const char *name, const char *description,
                                                   int numCalls, int workSize)
    : TestCase(context, name, description)
    , m_numCalls(numCalls)
    , m_workSize(workSize)
    , m_program(DE_NULL)
    , m_bufferID(DE_NULL)
{
}

SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase(void)
{
    deinit();
}

void SSBOConcurrentAtomicCase::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    std::vector<uint32_t> zeroData(m_workSize, 0);

    // gen buffers

    gl.genBuffers(1, &m_bufferID);
    gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
    gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32_t) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);

    for (int ndx = 0; ndx < m_numCalls; ++ndx)
    {
        uint32_t buffer = 0;

        gl.genBuffers(1, &buffer);
        gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
        gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32_t) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);

        m_intermediateResultBuffers.push_back(buffer);
        GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
    }

    // gen program

    m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources()
                                                                         << glu::ComputeSource(genComputeSource()));
    m_testCtx.getLog() << *m_program;
    if (!m_program->isOk())
        throw tcu::TestError("could not build program");
}

void SSBOConcurrentAtomicCase::deinit(void)
{
    if (m_bufferID)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
        m_bufferID = 0;
    }

    for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
    m_intermediateResultBuffers.clear();

    delete m_program;
    m_program = DE_NULL;
}

TestCase::IterateResult SSBOConcurrentAtomicCase::iterate(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const uint32_t sumValue  = (uint32_t)(m_numCalls * (m_numCalls + 1) / 2);
    std::vector<int> deltas;

    // generate unique deltas
    generateShuffledRamp(m_numCalls, deltas);

    // invoke program N times, each with a different delta
    {
        const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");

        m_testCtx.getLog() << tcu::TestLog::Message << "Running shader " << m_numCalls << " times.\n"
                           << "Num groups = (" << m_workSize << ", 1, 1)\n"
                           << "Setting u_atomicDelta to a unique value for each call.\n"
                           << tcu::TestLog::EndMessage;

        if (deltaLocation == -1)
            throw tcu::TestError("u_atomicDelta location was -1");

        gl.useProgram(m_program->getProgram());
        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);

        for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
        {
            m_testCtx.getLog() << tcu::TestLog::Message << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
                               << tcu::TestLog::EndMessage;

            gl.uniform1ui(deltaLocation, deltas[callNdx]);
            gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
            gl.dispatchCompute(m_workSize, 1, 1);
        }

        GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
    }

    // Verify result
    {
        std::vector<uint32_t> result;

        m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value "
                           << sumValue << tcu::TestLog::EndMessage;

        gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
        readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);

        for (int ndx = 0; ndx < m_workSize; ++ndx)
        {
            if (result[ndx] != sumValue)
            {
                m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer error, at index " << ndx
                                   << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
                                   << "Work buffer contains invalid values." << tcu::TestLog::EndMessage;

                m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
                return STOP;
            }
        }

        m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
    }

    // verify steps
    {
        std::vector<std::vector<uint32_t>> intermediateResults(m_numCalls);
        std::vector<uint32_t> valueChain(m_numCalls);

        m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;

        // collect results

        for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
        {
            gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
            readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
        }

        // verify values

        for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
        {
            int invalidOperationNdx;
            uint32_t errorDelta;
            uint32_t errorExpected;

            // collect result chain for each element
            for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
                valueChain[callNdx] = intermediateResults[callNdx][valueNdx];

            // check there exists a path from 0 to sumValue using each addition once
            // decompose cumulative results to addition operations (all additions positive => this works)

            std::sort(valueChain.begin(), valueChain.end());

            // validate chain
            if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta,
                                                            errorExpected))
            {
                m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffer error, at value index " << valueNdx
                                   << ", applied operation index " << invalidOperationNdx << ", value was increased by "
                                   << errorDelta << ", but expected " << errorExpected << ".\n"
                                   << "Intermediate buffer contains invalid values. Values at index " << valueNdx
                                   << "\n"
                                   << tcu::TestLog::EndMessage;

                for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
                    m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx
                                       << "] = " << intermediateResults[logCallNdx][valueNdx]
                                       << tcu::TestLog::EndMessage;
                m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;

                m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
                return STOP;
            }
        }

        m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
    }

    m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    return STOP;
}

std::string SSBOConcurrentAtomicCase::genComputeSource(void) const
{
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
        << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
        << "{\n"
        << "    highp uint values[" << m_workSize << "];\n"
        << "} sb_ires;\n"
        << "\n"
        << "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
        << "{\n"
        << "    highp uint values[" << m_workSize << "];\n"
        << "} sb_work;\n"
        << "uniform highp uint u_atomicDelta;\n"
        << "\n"
        << "void main ()\n"
        << "{\n"
        << "    highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
        << "    sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
        << "}";

    return specializeShader(m_context, buf.str().c_str());
}

class ConcurrentAtomicCounterCase : public TestCase
{
public:
    ConcurrentAtomicCounterCase(Context &context, const char *name, const char *description, int numCalls,
                                int workSize);
    ~ConcurrentAtomicCounterCase(void);

    void init(void);
    void deinit(void);
    IterateResult iterate(void);

private:
    std::string genComputeSource(bool evenOdd) const;

    const int m_numCalls;
    const int m_workSize;
    glu::ShaderProgram *m_evenProgram;
    glu::ShaderProgram *m_oddProgram;
    uint32_t m_counterBuffer;
    uint32_t m_intermediateResultBuffer;
};

ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase(Context &context, const char *name, const char *description,
                                                         int numCalls, int workSize)
    : TestCase(context, name, description)
    , m_numCalls(numCalls)
    , m_workSize(workSize)
    , m_evenProgram(DE_NULL)
    , m_oddProgram(DE_NULL)
    , m_counterBuffer(DE_NULL)
    , m_intermediateResultBuffer(DE_NULL)
{
}

ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase(void)
{
    deinit();
}

void ConcurrentAtomicCounterCase::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const std::vector<uint32_t> zeroData(m_numCalls * m_workSize, 0);

    // gen buffer

    gl.genBuffers(1, &m_counterBuffer);
    gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
    gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32_t), &zeroData[0], GL_DYNAMIC_COPY);

    gl.genBuffers(1, &m_intermediateResultBuffer);
    gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
    gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32_t) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);

    GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");

    // gen programs

    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");

        m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(),
                                               glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
        m_testCtx.getLog() << *m_evenProgram;
        if (!m_evenProgram->isOk())
            throw tcu::TestError("could not build program");
    }
    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");

        m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(),
                                              glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
        m_testCtx.getLog() << *m_oddProgram;
        if (!m_oddProgram->isOk())
            throw tcu::TestError("could not build program");
    }
}

void ConcurrentAtomicCounterCase::deinit(void)
{
    if (m_counterBuffer)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
        m_counterBuffer = 0;
    }
    if (m_intermediateResultBuffer)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
        m_intermediateResultBuffer = 0;
    }

    delete m_evenProgram;
    m_evenProgram = DE_NULL;

    delete m_oddProgram;
    m_oddProgram = DE_NULL;
}

TestCase::IterateResult ConcurrentAtomicCounterCase::iterate(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    // invoke program N times, each with a different delta
    {
        const int evenCallNdxLocation = gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
        const int oddCallNdxLocation  = gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");

        m_testCtx.getLog() << tcu::TestLog::Message << "Running shader pair (even & odd) " << m_numCalls << " times.\n"
                           << "Num groups = (" << m_workSize << ", 1, 1)\n"
                           << tcu::TestLog::EndMessage;

        if (evenCallNdxLocation == -1)
            throw tcu::TestError("u_callNdx location was -1");
        if (oddCallNdxLocation == -1)
            throw tcu::TestError("u_callNdx location was -1");

        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
        gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_counterBuffer);

        for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
        {
            gl.useProgram(m_evenProgram->getProgram());
            gl.uniform1ui(evenCallNdxLocation, (uint32_t)callNdx);
            gl.dispatchCompute(m_workSize, 1, 1);

            gl.useProgram(m_oddProgram->getProgram());
            gl.uniform1ui(oddCallNdxLocation, (uint32_t)callNdx);
            gl.dispatchCompute(m_workSize, 1, 1);
        }

        GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
    }

    // Verify result
    {
        uint32_t result;

        m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls * m_workSize
                           << tcu::TestLog::EndMessage;

        gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
        result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);

        if ((int)result != m_numCalls * m_workSize)
        {
            m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer error, expected value "
                               << (m_numCalls * m_workSize) << ", got " << result << "\n"
                               << tcu::TestLog::EndMessage;

            m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
            return STOP;
        }

        m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
    }

    // verify steps
    {
        std::vector<uint32_t> intermediateResults;

        m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;

        // collect results

        gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
        readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);

        // verify values

        std::sort(intermediateResults.begin(), intermediateResults.end());

        for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
        {
            if ((int)intermediateResults[valueNdx] != valueNdx)
            {
                m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffer error, at value index " << valueNdx
                                   << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
                                   << "Intermediate buffer contains invalid values. Intermediate results:\n"
                                   << tcu::TestLog::EndMessage;

                for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
                    m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx
                                       << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;

                m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
                return STOP;
            }
        }

        m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
    }

    m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    return STOP;
}

std::string ConcurrentAtomicCounterCase::genComputeSource(bool evenOdd) const
{
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
        << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
        << "{\n"
        << "    highp uint values[" << m_workSize * m_numCalls << "];\n"
        << "} sb_ires;\n"
        << "\n"
        << "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
        << "uniform highp uint u_callNdx;\n"
        << "\n"
        << "void main ()\n"
        << "{\n"
        << "    highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
        << "    if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
        << "        sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
        << "}";

    return specializeShader(m_context, buf.str().c_str());
}

class ConcurrentImageAtomicCase : public TestCase
{
public:
    ConcurrentImageAtomicCase(Context &context, const char *name, const char *description, int numCalls, int workSize);
    ~ConcurrentImageAtomicCase(void);

    void init(void);
    void deinit(void);
    IterateResult iterate(void);

private:
    void readWorkImage(std::vector<uint32_t> &result);

    std::string genComputeSource(void) const;
    std::string genImageReadSource(void) const;
    std::string genImageClearSource(void) const;

    const int m_numCalls;
    const int m_workSize;
    glu::ShaderProgram *m_program;
    glu::ShaderProgram *m_imageReadProgram;
    glu::ShaderProgram *m_imageClearProgram;
    uint32_t m_imageID;
    std::vector<uint32_t> m_intermediateResultBuffers;
};

ConcurrentImageAtomicCase::ConcurrentImageAtomicCase(Context &context, const char *name, const char *description,
                                                     int numCalls, int workSize)
    : TestCase(context, name, description)
    , m_numCalls(numCalls)
    , m_workSize(workSize)
    , m_program(DE_NULL)
    , m_imageReadProgram(DE_NULL)
    , m_imageClearProgram(DE_NULL)
    , m_imageID(DE_NULL)
{
}

ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase(void)
{
    deinit();
}

void ConcurrentImageAtomicCase::init(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    std::vector<uint32_t> zeroData(m_workSize * m_workSize, 0);

    if (!checkSupport(m_context))
        throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");

    // gen image

    gl.genTextures(1, &m_imageID);
    gl.bindTexture(GL_TEXTURE_2D, m_imageID);
    gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
    gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");

    // gen buffers

    for (int ndx = 0; ndx < m_numCalls; ++ndx)
    {
        uint32_t buffer = 0;

        gl.genBuffers(1, &buffer);
        gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
        gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(uint32_t) * m_workSize * m_workSize, &zeroData[0],
                      GL_DYNAMIC_COPY);

        m_intermediateResultBuffers.push_back(buffer);
        GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
    }

    // gen programs

    m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources()
                                                                         << glu::ComputeSource(genComputeSource()));
    m_testCtx.getLog() << *m_program;
    if (!m_program->isOk())
        throw tcu::TestError("could not build program");

    m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(),
                                                glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
    if (!m_imageReadProgram->isOk())
    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");

        m_testCtx.getLog() << *m_imageReadProgram;
        throw tcu::TestError("could not build program");
    }

    m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(),
                                                 glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
    if (!m_imageClearProgram->isOk())
    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");

        m_testCtx.getLog() << *m_imageClearProgram;
        throw tcu::TestError("could not build program");
    }
}

void ConcurrentImageAtomicCase::deinit(void)
{
    if (m_imageID)
    {
        m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
        m_imageID = 0;
    }

    for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
    m_intermediateResultBuffers.clear();

    delete m_program;
    m_program = DE_NULL;

    delete m_imageReadProgram;
    m_imageReadProgram = DE_NULL;

    delete m_imageClearProgram;
    m_imageClearProgram = DE_NULL;
}

TestCase::IterateResult ConcurrentImageAtomicCase::iterate(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    const uint32_t sumValue  = (uint32_t)(m_numCalls * (m_numCalls + 1) / 2);
    std::vector<int> deltas;

    // generate unique deltas
    generateShuffledRamp(m_numCalls, deltas);

    // clear image
    {
        m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;

        gl.useProgram(m_imageClearProgram->getProgram());
        gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
        gl.dispatchCompute(m_workSize, m_workSize, 1);
        gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);

        GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
    }

    // invoke program N times, each with a different delta
    {
        const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");

        m_testCtx.getLog() << tcu::TestLog::Message << "Running shader " << m_numCalls << " times.\n"
                           << "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
                           << "Setting u_atomicDelta to a unique value for each call.\n"
                           << tcu::TestLog::EndMessage;

        if (deltaLocation == -1)
            throw tcu::TestError("u_atomicDelta location was -1");

        gl.useProgram(m_program->getProgram());
        gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);

        for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
        {
            m_testCtx.getLog() << tcu::TestLog::Message << "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
                               << tcu::TestLog::EndMessage;

            gl.uniform1ui(deltaLocation, deltas[callNdx]);
            gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
            gl.dispatchCompute(m_workSize, m_workSize, 1);
        }

        GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
    }

    // Verify result
    {
        std::vector<uint32_t> result;

        m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value "
                           << sumValue << tcu::TestLog::EndMessage;

        readWorkImage(result);

        for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
        {
            if (result[ndx] != sumValue)
            {
                m_testCtx.getLog() << tcu::TestLog::Message << "Work image error, at index (" << ndx % m_workSize
                                   << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got "
                                   << result[ndx] << "\n"
                                   << "Work image contains invalid values." << tcu::TestLog::EndMessage;

                m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
                return STOP;
            }
        }

        m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
    }

    // verify steps
    {
        std::vector<std::vector<uint32_t>> intermediateResults(m_numCalls);
        std::vector<uint32_t> valueChain(m_numCalls);
        std::vector<uint32_t> chainDelta(m_numCalls);

        m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;

        // collect results

        for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
        {
            gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
            readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
        }

        // verify values

        for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
        {
            int invalidOperationNdx;
            uint32_t errorDelta;
            uint32_t errorExpected;

            // collect result chain for each element
            for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
                valueChain[callNdx] = intermediateResults[callNdx][valueNdx];

            // check there exists a path from 0 to sumValue using each addition once
            // decompose cumulative results to addition operations (all additions positive => this works)

            std::sort(valueChain.begin(), valueChain.end());

            for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
                chainDelta[callNdx] =
                    ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx + 1])) - valueChain[callNdx];

            // chainDelta contains now the actual additions applied to the value
            std::sort(chainDelta.begin(), chainDelta.end());

            // validate chain
            if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta,
                                                            errorExpected))
            {
                m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffer error, at index ("
                                   << valueNdx % m_workSize << ", " << valueNdx / m_workSize
                                   << "), applied operation index " << invalidOperationNdx
                                   << ", value was increased by " << errorDelta << ", but expected " << errorExpected
                                   << ".\n"
                                   << "Intermediate buffer contains invalid values. Values at index ("
                                   << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
                                   << tcu::TestLog::EndMessage;

                for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
                    m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx
                                       << "] = " << intermediateResults[logCallNdx][valueNdx]
                                       << tcu::TestLog::EndMessage;
                m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;

                m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
                return STOP;
            }
        }

        m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
    }

    m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    return STOP;
}

void ConcurrentImageAtomicCase::readWorkImage(std::vector<uint32_t> &result)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();
    glu::Buffer resultBuffer(m_context.getRenderContext());

    // Read image to an ssbo

    {
        const std::vector<uint32_t> zeroData(m_workSize * m_workSize, 0);

        gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
        gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(uint32_t) * m_workSize * m_workSize), &zeroData[0],
                      GL_DYNAMIC_COPY);

        gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
        gl.useProgram(m_imageReadProgram->getProgram());

        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
        gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
        gl.dispatchCompute(m_workSize, m_workSize, 1);

        GLU_EXPECT_NO_ERROR(gl.getError(), "read");
    }

    // Read ssbo
    {
        const void *ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0,
                                            (int)(sizeof(uint32_t) * m_workSize * m_workSize), GL_MAP_READ_BIT);
        GLU_EXPECT_NO_ERROR(gl.getError(), "map");

        if (!ptr)
            throw tcu::TestError("mapBufferRange returned NULL");

        result.resize(m_workSize * m_workSize);
        memcpy(&result[0], ptr, sizeof(uint32_t) * m_workSize * m_workSize);

        if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
            throw tcu::TestError("unmapBuffer returned false");
    }
}

std::string ConcurrentImageAtomicCase::genComputeSource(void) const
{
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << "${SHADER_IMAGE_ATOMIC_REQUIRE}\n"
        << "\n"
        << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
        << "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
        << "{\n"
        << "    highp uint values[" << m_workSize * m_workSize << "];\n"
        << "} sb_ires;\n"
        << "\n"
        << "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
        << "uniform highp uint u_atomicDelta;\n"
        << "\n"
        << "void main ()\n"
        << "{\n"
        << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize
        << ");\n"
        << "    sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), "
           "u_atomicDelta);\n"
        << "}";

    return specializeShader(m_context, buf.str().c_str());
}

std::string ConcurrentImageAtomicCase::genImageReadSource(void) const
{
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << "\n"
        << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
        << "layout (binding = 1, std430) writeonly buffer ImageValues\n"
        << "{\n"
        << "    highp uint values[" << m_workSize * m_workSize << "];\n"
        << "} sb_res;\n"
        << "\n"
        << "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
        << "\n"
        << "void main ()\n"
        << "{\n"
        << "    highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize
        << ");\n"
        << "    sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
        << "}";

    return specializeShader(m_context, buf.str().c_str());
}

std::string ConcurrentImageAtomicCase::genImageClearSource(void) const
{
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << "\n"
        << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
        << "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
        << "\n"
        << "void main ()\n"
        << "{\n"
        << "    imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
        << "}";

    return specializeShader(m_context, buf.str().c_str());
}

class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
{
public:
    ConcurrentSSBOAtomicCounterMixedCase(Context &context, const char *name, const char *description, int numCalls,
                                         int workSize);
    ~ConcurrentSSBOAtomicCounterMixedCase(void);

    void init(void);
    void deinit(void);
    IterateResult iterate(void);

private:
    std::string genSSBOComputeSource(void) const;
    std::string genAtomicCounterComputeSource(void) const;

    const int m_numCalls;
    const int m_workSize;
    uint32_t m_bufferID;
    glu::ShaderProgram *m_ssboAtomicProgram;
    glu::ShaderProgram *m_atomicCounterProgram;
};

ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase(Context &context, const char *name,
                                                                           const char *description, int numCalls,
                                                                           int workSize)
    : TestCase(context, name, description)
    , m_numCalls(numCalls)
    , m_workSize(workSize)
    , m_bufferID(DE_NULL)
    , m_ssboAtomicProgram(DE_NULL)
    , m_atomicCounterProgram(DE_NULL)
{
    // SSBO atomic XORs cancel out
    DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
}

ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase(void)
{
    deinit();
}

void ConcurrentSSBOAtomicCounterMixedCase::init(void)
{
    const glw::Functions &gl  = m_context.getRenderContext().getFunctions();
    const uint32_t zeroBuf[2] = {0, 0};

    // gen buffer

    gl.genBuffers(1, &m_bufferID);
    gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
    gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(uint32_t) * 2), zeroBuf, GL_DYNAMIC_COPY);

    GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");

    // gen programs

    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");

        m_ssboAtomicProgram = new glu::ShaderProgram(
            m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
        m_testCtx.getLog() << *m_ssboAtomicProgram;
        if (!m_ssboAtomicProgram->isOk())
            throw tcu::TestError("could not build program");
    }
    {
        const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");

        m_atomicCounterProgram = new glu::ShaderProgram(
            m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
        m_testCtx.getLog() << *m_atomicCounterProgram;
        if (!m_atomicCounterProgram->isOk())
            throw tcu::TestError("could not build program");
    }
}

void ConcurrentSSBOAtomicCounterMixedCase::deinit(void)
{
    if (m_bufferID)
    {
        m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
        m_bufferID = 0;
    }

    delete m_ssboAtomicProgram;
    m_ssboAtomicProgram = DE_NULL;

    delete m_atomicCounterProgram;
    m_atomicCounterProgram = DE_NULL;
}

TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate(void)
{
    const glw::Functions &gl = m_context.getRenderContext().getFunctions();

    m_testCtx.getLog() << tcu::TestLog::Message
                       << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer."
                       << tcu::TestLog::EndMessage;

    // invoke programs N times
    {
        m_testCtx.getLog() << tcu::TestLog::Message << "Running SSBO atomic program and atomic counter program "
                           << m_numCalls << " times. (interleaved)\n"
                           << "Num groups = (" << m_workSize << ", 1, 1)\n"
                           << tcu::TestLog::EndMessage;

        gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
        gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, m_bufferID);

        for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
        {
            gl.useProgram(m_atomicCounterProgram->getProgram());
            gl.dispatchCompute(m_workSize, 1, 1);

            gl.useProgram(m_ssboAtomicProgram->getProgram());
            gl.dispatchCompute(m_workSize, 1, 1);
        }

        GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
    }

    // Verify result
    {
        uint32_t result;

        // XORs cancel out, only addition is left
        m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls * m_workSize
                           << tcu::TestLog::EndMessage;

        gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
        result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);

        if ((int)result != m_numCalls * m_workSize)
        {
            m_testCtx.getLog() << tcu::TestLog::Message << "Buffer value error, expected value "
                               << (m_numCalls * m_workSize) << ", got " << result << "\n"
                               << tcu::TestLog::EndMessage;

            m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
            return STOP;
        }

        m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
    }

    m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    return STOP;
}

std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource(void) const
{
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
        << "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
        << "{\n"
        << "    highp uint targetValue;\n"
        << "    highp uint unused;\n"
        << "} sb_work;\n"
        << "\n"
        << "void main ()\n"
        << "{\n"
        << "    // flip high bits\n"
        << "    highp uint mask = uint(1) << (24u + (gl_GlobalInvocationID.x % 8u));\n"
        << "    sb_work.unused = atomicXor(sb_work.targetValue, mask);\n"
        << "}";

    return specializeShader(m_context, buf.str().c_str());
}

std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource(void) const
{
    std::ostringstream buf;

    buf << "${GLSL_VERSION_DECL}\n"
        << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
        << "\n"
        << "layout (binding = 0, offset = 0) uniform atomic_uint u_counter;\n"
        << "\n"
        << "void main ()\n"
        << "{\n"
        << "    atomicCounterIncrement(u_counter);\n"
        << "}";

    return specializeShader(m_context, buf.str().c_str());
}

} // namespace

SynchronizationTests::SynchronizationTests(Context &context)
    : TestCaseGroup(context, "synchronization", "Synchronization tests")
{
}

SynchronizationTests::~SynchronizationTests(void)
{
}

void SynchronizationTests::init(void)
{
    tcu::TestCaseGroup *const inInvocationGroup =
        new tcu::TestCaseGroup(m_testCtx, "in_invocation", "Test intra-invocation synchronization");
    tcu::TestCaseGroup *const interInvocationGroup =
        new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
    tcu::TestCaseGroup *const interCallGroup =
        new tcu::TestCaseGroup(m_testCtx, "inter_call", "Test inter-call synchronization");

    addChild(inInvocationGroup);
    addChild(interInvocationGroup);
    addChild(interCallGroup);

    // .in_invocation & .inter_invocation
    {
        static const struct CaseConfig
        {
            const char *namePrefix;
            const InterInvocationTestCase::StorageType storage;
            const int flags;
        } configs[] = {
            {"image", InterInvocationTestCase::STORAGE_IMAGE, 0},
            {"image_atomic", InterInvocationTestCase::STORAGE_IMAGE, InterInvocationTestCase::FLAG_ATOMIC},
            {"ssbo", InterInvocationTestCase::STORAGE_BUFFER, 0},
            {"ssbo_atomic", InterInvocationTestCase::STORAGE_BUFFER, InterInvocationTestCase::FLAG_ATOMIC},
        };

        for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
        {
            tcu::TestCaseGroup *const targetGroup = (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
            const int extraFlags                  = (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);

            for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
            {
                const char *const target =
                    (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");

                targetGroup->addChild(new InvocationWriteReadCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
                    (std::string("Write to ") + target + " and read it").c_str(), configs[configNdx].storage,
                    configs[configNdx].flags | extraFlags));

                targetGroup->addChild(new InvocationReadWriteCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
                    (std::string("Read form ") + target + " and then write to it").c_str(), configs[configNdx].storage,
                    configs[configNdx].flags | extraFlags));

                targetGroup->addChild(new InvocationOverWriteCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
                    (std::string("Write to ") + target + " twice and read it").c_str(), configs[configNdx].storage,
                    configs[configNdx].flags | extraFlags));

                targetGroup->addChild(new InvocationAliasWriteCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
                    (std::string("Write to aliasing ") + target + " and read it").c_str(),
                    InvocationAliasWriteCase::TYPE_WRITE, configs[configNdx].storage,
                    configs[configNdx].flags | extraFlags));

                targetGroup->addChild(new InvocationAliasWriteCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
                    (std::string("Write to aliasing ") + target + "s and read it").c_str(),
                    InvocationAliasWriteCase::TYPE_OVERWRITE, configs[configNdx].storage,
                    configs[configNdx].flags | extraFlags));
            }
        }
    }

    // .inter_call
    {
        tcu::TestCaseGroup *const withBarrierGroup =
            new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
        tcu::TestCaseGroup *const withoutBarrierGroup =
            new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");

        interCallGroup->addChild(withBarrierGroup);
        interCallGroup->addChild(withoutBarrierGroup);

        // .with_memory_barrier
        {
            static const struct CaseConfig
            {
                const char *namePrefix;
                const InterCallTestCase::StorageType storage;
                const int flags;
            } configs[] = {
                {"image", InterCallTestCase::STORAGE_IMAGE, 0},
                {"image_atomic", InterCallTestCase::STORAGE_IMAGE,
                 InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT},
                {"ssbo", InterCallTestCase::STORAGE_BUFFER, 0},
                {"ssbo_atomic", InterCallTestCase::STORAGE_BUFFER,
                 InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT},
            };

            const int seed0 = 123;
            const int seed1 = 457;

            for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
            {
                const char *const target =
                    (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");

                withBarrierGroup->addChild(new InterCallTestCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
                    (std::string("Write to ") + target + " and read it").c_str(), configs[configNdx].storage,
                    configs[configNdx].flags,
                    InterCallOperations()
                        << op::WriteData::Generate(1, seed0) << op::Barrier() << op::ReadData::Generate(1, seed0)));

                withBarrierGroup->addChild(new InterCallTestCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
                    (std::string("Read from ") + target + " and then write to it").c_str(), configs[configNdx].storage,
                    configs[configNdx].flags,
                    InterCallOperations()
                        << op::ReadZeroData::Generate(1) << op::Barrier() << op::WriteData::Generate(1, seed0)));

                withBarrierGroup->addChild(new InterCallTestCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
                    (std::string("Write to ") + target + " twice and read it").c_str(), configs[configNdx].storage,
                    configs[configNdx].flags,
                    InterCallOperations()
                        << op::WriteData::Generate(1, seed0) << op::Barrier() << op::WriteData::Generate(1, seed1)
                        << op::Barrier() << op::ReadData::Generate(1, seed1)));

                withBarrierGroup->addChild(new InterCallTestCase(
                    m_context, (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
                    (std::string("Write to multiple ") + target + "s and read them").c_str(),
                    configs[configNdx].storage, configs[configNdx].flags,
                    InterCallOperations() << op::WriteData::Generate(1, seed0) << op::WriteData::Generate(2, seed1)
                                          << op::Barrier() << op::ReadMultipleData::Generate(1, seed0, 2, seed1)));

                withBarrierGroup->addChild(new InterCallTestCase(
                    m_context,
                    (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
                    (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
                    configs[configNdx].storage, configs[configNdx].flags,
                    InterCallOperations() << op::WriteDataInterleaved::Generate(1, seed0, true)
                                          << op::WriteDataInterleaved::Generate(1, seed1, false) << op::Barrier()
                                          << op::ReadDataInterleaved::Generate(1, seed0, seed1)));

                withBarrierGroup->addChild(new InterCallTestCase(
                    m_context,
                    (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
                    (std::string("Two unrelated ") + target + " write-reads").c_str(), configs[configNdx].storage,
                    configs[configNdx].flags,
                    InterCallOperations()
                        << op::WriteData::Generate(1, seed0) << op::WriteData::Generate(2, seed1) << op::Barrier()
                        << op::ReadData::Generate(1, seed0) << op::ReadData::Generate(2, seed1)));

                withBarrierGroup->addChild(new InterCallTestCase(
                    m_context,
                    (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
                    (std::string("Two unrelated ") + target + " write-reads").c_str(), configs[configNdx].storage,
                    configs[configNdx].flags,
                    InterCallOperations()
                        << op::WriteData::Generate(1, seed0) << op::WriteData::Generate(2, seed1) << op::Barrier()
                        << op::ReadData::Generate(2, seed1) << op::ReadData::Generate(1, seed0)));
            }

            // .without_memory_barrier
            {
                struct InvocationConfig
                {
                    const char *name;
                    int count;
                };

                static const InvocationConfig ssboInvocations[] = {
                    {"1k", 1024},
                    {"4k", 4096},
                    {"32k", 32768},
                };
                static const InvocationConfig imageInvocations[] = {
                    {"8x8", 8},
                    {"32x32", 32},
                    {"128x128", 128},
                };
                static const InvocationConfig counterInvocations[] = {
                    {"32", 32},
                    {"128", 128},
                    {"1k", 1024},
                };
                static const int callCounts[] = {2, 5, 100};

                for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
                    for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
                        withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(
                            m_context,
                            (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" +
                             ssboInvocations[invocationNdx].name + "_invocations")
                                .c_str(),
                            "", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));

                for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
                    for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
                        withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(
                            m_context,
                            (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) +
                             "_calls_" + imageInvocations[invocationNdx].name + "_invocations")
                                .c_str(),
                            "", callCounts[callCountNdx], imageInvocations[invocationNdx].count));

                for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
                    for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
                        withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(
                            m_context,
                            (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) +
                             "_calls_" + counterInvocations[invocationNdx].name + "_invocations")
                                .c_str(),
                            "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));

                for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
                    for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
                        withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(
                            m_context,
                            (std::string("ssbo_atomic_counter_mixed_dispatch_") +
                             de::toString(callCounts[callCountNdx]) + "_calls_" +
                             counterInvocations[invocationNdx].name + "_invocations")
                                .c_str(),
                            "", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
            }
        }
    }
}

} // namespace Functional
} // namespace gles31
} // namespace deqp
