/*
 * Copyright 2018 Google Inc.
 *
 * Use of this source code is governed by a BSD-style license that can be
 * found in the LICENSE file.
 */

#include "src/pdf/SkPDFTag.h"

#include "include/core/SkPoint.h"
#include "include/core/SkScalar.h"
#include "include/private/base/SkAssert.h"
#include "include/private/base/SkTo.h"
#include "src/pdf/SkPDFDocumentPriv.h"

#include <algorithm>
#include <memory>
#include <utility>

using namespace skia_private;

// The struct parent tree consists of one entry per page, followed by
// entries for individual struct tree nodes corresponding to
// annotations.  Each entry is a key/value pair with an integer key
// and an indirect reference key.
//
// The page entries get consecutive keys starting at 0. Since we don't
// know the total number of pages in the document at the time we start
// processing annotations, start the key for annotations with a large
// number, which effectively becomes the maximum number of pages in a
// PDF we can handle.
const int kFirstAnnotationStructParentKey = 100000;

namespace {
struct Location {
    SkPoint fPoint{SK_ScalarNaN, SK_ScalarNaN};
    unsigned fPageIndex{0};

    void accumulate(Location const& child) {
        if (!child.fPoint.isFinite()) {
            return;
        }
        if (!fPoint.isFinite()) {
            *this = child;
            return;
        }
        if (child.fPageIndex < fPageIndex) {
            *this = child;
            return;
        }
        if (child.fPageIndex == fPageIndex) {
            fPoint.fX = std::min(child.fPoint.fX, fPoint.fX);
            fPoint.fY = std::max(child.fPoint.fY, fPoint.fY); // PDF y-up
            return;
        }
    }
};
} // namespace

struct SkPDFTagNode {
    // Structure element nodes need a unique alphanumeric ID,
    // and we need to be able to output them sorted in lexicographic
    // order. This helper function takes one of our node IDs and
    // builds an ID string that zero-pads the digits so that lexicographic
    // order matches numeric order.
    static SkString nodeIdToString(int nodeId) {
        SkString idString;
        idString.printf("node%08d", nodeId);
        return idString;
    }

    SkPDFTagNode* fChildren = nullptr;
    size_t fChildCount = 0;
    struct MarkedContentInfo {
        Location fLocation;
        int fMarkId;
    };
    TArray<MarkedContentInfo> fMarkedContent;
    int fNodeId;
    bool fWantTitle;
    SkString fTypeString;
    SkString fTitle;
    SkString fAlt;
    SkString fLang;
    SkPDFIndirectReference fRef;
    enum State {
        kUnknown,
        kYes,
        kNo,
    } fCanDiscard = kUnknown;
    std::unique_ptr<SkPDFArray> fAttributes;
    struct AnnotationInfo {
        unsigned fPageIndex;
        SkPDFIndirectReference fAnnotationRef;
    };
    std::vector<AnnotationInfo> fAnnotations;
};

SkPDF::AttributeList::AttributeList() = default;

SkPDF::AttributeList::~AttributeList() = default;

void SkPDF::AttributeList::appendInt(
        const char* owner, const char* name, int value) {
    if (!fAttrs)
        fAttrs = SkPDFMakeArray();
    std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
    attrDict->insertName("O", owner);
    attrDict->insertInt(name, value);
    fAttrs->appendObject(std::move(attrDict));
}

void SkPDF::AttributeList::appendFloat(
        const char* owner, const char* name, float value) {
    if (!fAttrs)
        fAttrs = SkPDFMakeArray();
    std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
    attrDict->insertName("O", owner);
    attrDict->insertScalar(name, value);
    fAttrs->appendObject(std::move(attrDict));
}

void SkPDF::AttributeList::appendName(
        const char* owner, const char* name, const char* value) {
    if (!fAttrs)
        fAttrs = SkPDFMakeArray();
    std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
    attrDict->insertName("O", owner);
    attrDict->insertName(name, value);
    fAttrs->appendObject(std::move(attrDict));
}

void SkPDF::AttributeList::appendFloatArray(
        const char* owner, const char* name, const std::vector<float>& value) {
    if (!fAttrs)
        fAttrs = SkPDFMakeArray();
    std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
    attrDict->insertName("O", owner);
    std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
    for (float element : value) {
        pdfArray->appendScalar(element);
    }
    attrDict->insertObject(name, std::move(pdfArray));
    fAttrs->appendObject(std::move(attrDict));
}

void SkPDF::AttributeList::appendNodeIdArray(
        const char* owner,
        const char* name,
        const std::vector<int>& nodeIds) {
    if (!fAttrs)
        fAttrs = SkPDFMakeArray();
    std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
    attrDict->insertName("O", owner);
    std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
    for (int nodeId : nodeIds) {
        SkString idString = SkPDFTagNode::nodeIdToString(nodeId);
        pdfArray->appendByteString(idString);
    }
    attrDict->insertObject(name, std::move(pdfArray));
    fAttrs->appendObject(std::move(attrDict));
}

SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}

SkPDFTagTree::~SkPDFTagTree() = default;

// static
void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
                        SkPDFTagNode* dst,
                        SkArenaAlloc* arena,
                        THashMap<int, SkPDFTagNode*>* nodeMap,
                        bool wantTitle) {
    nodeMap->set(node.fNodeId, dst);
    for (int nodeId : node.fAdditionalNodeIds) {
        SkASSERT(!nodeMap->find(nodeId));
        nodeMap->set(nodeId, dst);
    }
    dst->fNodeId = node.fNodeId;

    // Accumulate title text, need to be in sync with create_outline_from_headers
    const char* type = node.fTypeString.c_str();
    wantTitle |= fOutline == SkPDF::Metadata::Outline::StructureElementHeaders &&
                 type[0] == 'H' && '1' <= type[1] && type[1] <= '6';
    dst->fWantTitle = wantTitle;

    dst->fTypeString = node.fTypeString;
    dst->fAlt = node.fAlt;
    dst->fLang = node.fLang;

    size_t childCount = node.fChildVector.size();
    SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
    dst->fChildCount = childCount;
    dst->fChildren = children;
    for (size_t i = 0; i < childCount; ++i) {
        Copy(*node.fChildVector[i], &children[i], arena, nodeMap, wantTitle);
    }

    dst->fAttributes = std::move(node.fAttributes.fAttrs);
}

void SkPDFTagTree::init(SkPDF::StructureElementNode* node, SkPDF::Metadata::Outline outline) {
    if (node) {
        fRoot = fArena.make<SkPDFTagNode>();
        fOutline = outline;
        Copy(*node, fRoot, &fArena, &fNodeMap, false);
    }
}

int SkPDFTagTree::Mark::id() {
    return fNode ? fNode->fMarkedContent[fMarkIndex].fMarkId : -1;
}

SkPoint& SkPDFTagTree::Mark::point() {
    return fNode->fMarkedContent[fMarkIndex].fLocation.fPoint;
}

auto SkPDFTagTree::createMarkIdForNodeId(int nodeId, unsigned pageIndex, SkPoint point) -> Mark {
    if (!fRoot) {
        return Mark();
    }
    SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
    if (!tagPtr) {
        return Mark();
    }
    SkPDFTagNode* tag = *tagPtr;
    SkASSERT(tag);
    while (SkToUInt(fMarksPerPage.size()) < pageIndex + 1) {
        fMarksPerPage.push_back();
    }
    TArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[pageIndex];
    int markId = pageMarks.size();
    tag->fMarkedContent.push_back({{point, pageIndex}, markId});
    pageMarks.push_back(tag);
    return Mark(tag, tag->fMarkedContent.size() - 1);
}

int SkPDFTagTree::createStructParentKeyForNodeId(int nodeId, unsigned pageIndex) {
    if (!fRoot) {
        return -1;
    }
    SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
    if (!tagPtr) {
        return -1;
    }
    SkPDFTagNode* tag = *tagPtr;
    SkASSERT(tag);

    tag->fCanDiscard = SkPDFTagNode::kNo;

    int nextStructParentKey = kFirstAnnotationStructParentKey +
        static_cast<int>(fParentTreeAnnotationNodeIds.size());
    fParentTreeAnnotationNodeIds.push_back(nodeId);
    return nextStructParentKey;
}

static bool can_discard(SkPDFTagNode* node) {
    if (node->fCanDiscard == SkPDFTagNode::kYes) {
        return true;
    }
    if (node->fCanDiscard == SkPDFTagNode::kNo) {
        return false;
    }
    if (!node->fMarkedContent.empty()) {
        node->fCanDiscard = SkPDFTagNode::kNo;
        return false;
    }
    for (size_t i = 0; i < node->fChildCount; ++i) {
        if (!can_discard(&node->fChildren[i])) {
            node->fCanDiscard = SkPDFTagNode::kNo;
            return false;
        }
    }
    node->fCanDiscard = SkPDFTagNode::kYes;
    return true;
}

SkPDFIndirectReference SkPDFTagTree::PrepareTagTreeToEmit(SkPDFIndirectReference parent,
                                                          SkPDFTagNode* node,
                                                          SkPDFDocument* doc) {
    SkPDFIndirectReference ref = doc->reserveRef();
    std::unique_ptr<SkPDFArray> kids = SkPDFMakeArray();
    SkPDFTagNode* children = node->fChildren;
    size_t childCount = node->fChildCount;
    for (size_t i = 0; i < childCount; ++i) {
        SkPDFTagNode* child = &children[i];
        if (!(can_discard(child))) {
            kids->appendRef(PrepareTagTreeToEmit(ref, child, doc));
        }
    }
    for (const SkPDFTagNode::MarkedContentInfo& info : node->fMarkedContent) {
        std::unique_ptr<SkPDFDict> mcr = SkPDFMakeDict("MCR");
        mcr->insertRef("Pg", doc->getPage(info.fLocation.fPageIndex));
        mcr->insertInt("MCID", info.fMarkId);
        kids->appendObject(std::move(mcr));
    }
    for (const SkPDFTagNode::AnnotationInfo& annotationInfo : node->fAnnotations) {
        std::unique_ptr<SkPDFDict> annotationDict = SkPDFMakeDict("OBJR");
        annotationDict->insertRef("Obj", annotationInfo.fAnnotationRef);
        annotationDict->insertRef("Pg", doc->getPage(annotationInfo.fPageIndex));
        kids->appendObject(std::move(annotationDict));
    }
    node->fRef = ref;
    SkPDFDict dict("StructElem");
    dict.insertName("S", node->fTypeString.isEmpty() ? "NonStruct" : node->fTypeString.c_str());
    if (!node->fAlt.isEmpty()) {
        dict.insertTextString("Alt", node->fAlt);
    }
    if (!node->fLang.isEmpty()) {
        dict.insertTextString("Lang", node->fLang);
    }
    dict.insertRef("P", parent);
    dict.insertObject("K", std::move(kids));
    if (node->fAttributes) {
        dict.insertObject("A", std::move(node->fAttributes));
    }

    // Each node has a unique ID that also needs to be referenced
    // in a separate IDTree node, along with the lowest and highest
    // unique ID string.
    SkString idString = SkPDFTagNode::nodeIdToString(node->fNodeId);
    dict.insertByteString("ID", idString.c_str());
    IDTreeEntry idTreeEntry = {node->fNodeId, ref};
    fIdTreeEntries.push_back(idTreeEntry);

    return doc->emit(dict, ref);
}

void SkPDFTagTree::addNodeAnnotation(int nodeId, SkPDFIndirectReference annotationRef, unsigned pageIndex) {
    if (!fRoot) {
        return;
    }
    SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
    if (!tagPtr) {
        return;
    }
    SkPDFTagNode* tag = *tagPtr;
    SkASSERT(tag);

    SkPDFTagNode::AnnotationInfo annotationInfo = {pageIndex, annotationRef};
    tag->fAnnotations.push_back(annotationInfo);
}

void SkPDFTagTree::addNodeTitle(int nodeId, SkSpan<const char> title) {
    if (!fRoot) {
        return;
    }
    SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
    if (!tagPtr) {
        return;
    }
    SkPDFTagNode* tag = *tagPtr;
    SkASSERT(tag);

    if (tag->fWantTitle) {
        tag->fTitle.append(title.data(), title.size());
        // Arbitrary cutoff for size.
        if (tag->fTitle.size() > 1023) {
            tag->fWantTitle = false;
        }
    }
}

SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
    if (!fRoot || can_discard(fRoot)) {
        return SkPDFIndirectReference();
    }

    SkPDFIndirectReference ref = doc->reserveRef();

    unsigned pageCount = SkToUInt(doc->pageCount());

    // Build the StructTreeRoot.
    SkPDFDict structTreeRoot("StructTreeRoot");
    structTreeRoot.insertRef("K", PrepareTagTreeToEmit(ref, fRoot, doc));
    structTreeRoot.insertInt("ParentTreeNextKey", SkToInt(pageCount));

    // Build the parent tree, which consists of two things:
    // (1) For each page, a mapping from the marked content IDs on
    // each page to their corresponding tags
    // (2) For each annotation, an indirect reference to that
    // annotation's struct tree element.
    SkPDFDict parentTree("ParentTree");
    auto parentTreeNums = SkPDFMakeArray();

    // First, one entry per page.
    SkASSERT(SkToUInt(fMarksPerPage.size()) <= pageCount);
    for (int j = 0; j < fMarksPerPage.size(); ++j) {
        const TArray<SkPDFTagNode*>& pageMarks = fMarksPerPage[j];
        SkPDFArray markToTagArray;
        for (SkPDFTagNode* mark : pageMarks) {
            SkASSERT(mark->fRef);
            markToTagArray.appendRef(mark->fRef);
        }
        parentTreeNums->appendInt(j);
        parentTreeNums->appendRef(doc->emit(markToTagArray));
    }

    // Then, one entry per annotation.
    for (size_t j = 0; j < fParentTreeAnnotationNodeIds.size(); ++j) {
        int nodeId = fParentTreeAnnotationNodeIds[j];
        int structParentKey = kFirstAnnotationStructParentKey + static_cast<int>(j);

        SkPDFTagNode** tagPtr = fNodeMap.find(nodeId);
        if (!tagPtr) {
            continue;
        }
        SkPDFTagNode* tag = *tagPtr;
        parentTreeNums->appendInt(structParentKey);
        parentTreeNums->appendRef(tag->fRef);
    }

    parentTree.insertObject("Nums", std::move(parentTreeNums));
    structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));

    // Build the IDTree, a mapping from every unique ID string to
    // a reference to its corresponding structure element node.
    if (!fIdTreeEntries.empty()) {
        std::sort(fIdTreeEntries.begin(), fIdTreeEntries.end(),
                  [](const IDTreeEntry& a, const IDTreeEntry& b) {
                    return a.nodeId < b.nodeId;
                  });

        SkPDFDict idTree;
        SkPDFDict idTreeLeaf;
        auto limits = SkPDFMakeArray();
        SkString lowestNodeIdString = SkPDFTagNode::nodeIdToString(
            fIdTreeEntries.begin()->nodeId);
        limits->appendByteString(lowestNodeIdString);
        SkString highestNodeIdString = SkPDFTagNode::nodeIdToString(
            fIdTreeEntries.rbegin()->nodeId);
        limits->appendByteString(highestNodeIdString);
        idTreeLeaf.insertObject("Limits", std::move(limits));
        auto names = SkPDFMakeArray();
        for (const IDTreeEntry& entry : fIdTreeEntries) {
          SkString idString = SkPDFTagNode::nodeIdToString(entry.nodeId);
            names->appendByteString(idString);
            names->appendRef(entry.ref);
        }
        idTreeLeaf.insertObject("Names", std::move(names));
        auto idTreeKids = SkPDFMakeArray();
        idTreeKids->appendRef(doc->emit(idTreeLeaf));
        idTree.insertObject("Kids", std::move(idTreeKids));
        structTreeRoot.insertRef("IDTree", doc->emit(idTree));
    }

    return doc->emit(structTreeRoot, ref);
}

namespace {
struct OutlineEntry {
    struct Content {
        SkString fText;
        Location fLocation;
        void accumulate(Content const& child) {
            fText += child.fText;
            fLocation.accumulate(child.fLocation);
        }
    };

    Content fContent;
    int fHeaderLevel;
    SkPDFIndirectReference fRef;
    SkPDFIndirectReference fStructureRef;
    std::vector<OutlineEntry> fChildren = {};
    size_t fDescendentsEmitted = 0;

    void emitDescendents(SkPDFDocument* const doc) {
        fDescendentsEmitted = fChildren.size();
        for (size_t i = 0; i < fChildren.size(); ++i) {
            auto&& child = fChildren[i];
            child.emitDescendents(doc);
            fDescendentsEmitted += child.fDescendentsEmitted;

            SkPDFDict entry;
            entry.insertTextString("Title", child.fContent.fText);

            auto destination = SkPDFMakeArray();
            destination->appendRef(doc->getPage(child.fContent.fLocation.fPageIndex));
            destination->appendName("XYZ");
            destination->appendScalar(child.fContent.fLocation.fPoint.x());
            destination->appendScalar(child.fContent.fLocation.fPoint.y());
            destination->appendInt(0);
            entry.insertObject("Dest", std::move(destination));

            entry.insertRef("Parent", child.fRef);
            if (child.fStructureRef) {
                entry.insertRef("SE", child.fStructureRef);
            }
            if (0 < i) {
                entry.insertRef("Prev", fChildren[i-1].fRef);
            }
            if (i < fChildren.size()-1) {
                entry.insertRef("Next", fChildren[i+1].fRef);
            }
            if (!child.fChildren.empty()) {
                entry.insertRef("First", child.fChildren.front().fRef);
                entry.insertRef("Last", child.fChildren.back().fRef);
                entry.insertInt("Count", child.fDescendentsEmitted);
            }
            doc->emit(entry, child.fRef);
        }
    }
};

OutlineEntry::Content create_outline_entry_content(SkPDFTagNode* const node) {
    SkString text;
    if (!node->fTitle.isEmpty()) {
        text = node->fTitle;
    } else if (!node->fAlt.isEmpty()) {
        text = node->fAlt;
    }

    // The uppermost/leftmost point on the earliest page of this node's marks.
    Location markPoint;
    for (auto&& mark : node->fMarkedContent) {
        markPoint.accumulate(mark.fLocation);
    }

    OutlineEntry::Content content{std::move(text), std::move(markPoint)};

    // Accumulate children
    SkSpan<SkPDFTagNode> children(node->fChildren, node->fChildCount);
    for (auto&& child : children) {
        if (can_discard(&child)) {
            continue;
        }
        content.accumulate(create_outline_entry_content(&child));
    }
    return content;
}
void create_outline_from_headers(SkPDFDocument* const doc, SkPDFTagNode* const node,
                                 STArray<7, OutlineEntry*>& stack) {
    char const *type = node->fTypeString.c_str();
    if (type[0] == 'H' && '1' <= type[1] && type[1] <= '6') {
        int level = type[1] - '0';
        while (level <= stack.back()->fHeaderLevel) {
            stack.pop_back();
        }
        OutlineEntry::Content content = create_outline_entry_content(node);
        if (!content.fText.isEmpty()) {
            OutlineEntry e{std::move(content), level, doc->reserveRef(), node->fRef};
            stack.push_back(&stack.back()->fChildren.emplace_back(std::move(e)));
            return;
        }
    }

    SkSpan<SkPDFTagNode> children(node->fChildren, node->fChildCount);
    for (auto&& child : children) {
        if (can_discard(&child)) {
            continue;
        }
        create_outline_from_headers(doc, &child, stack);
    }
}

} // namespace

SkPDFIndirectReference SkPDFTagTree::makeOutline(SkPDFDocument* doc) {
    if (!fRoot || can_discard(fRoot) ||
        fOutline != SkPDF::Metadata::Outline::StructureElementHeaders)
    {
        return SkPDFIndirectReference();
    }

    STArray<7, OutlineEntry*> stack;
    OutlineEntry top{{SkString(), Location()}, 0, {}, {}};
    stack.push_back(&top);
    create_outline_from_headers(doc, fRoot, stack);
    if (top.fChildren.empty()) {
        return SkPDFIndirectReference();
    }
    top.emitDescendents(doc);
    SkPDFIndirectReference outlineRef = doc->reserveRef();
    SkPDFDict outline("Outlines");
    outline.insertRef("First", top.fChildren.front().fRef);
    outline.insertRef("Last", top.fChildren.back().fRef);
    outline.insertInt("Count", top.fDescendentsEmitted);

    return doc->emit(outline, outlineRef);
}

SkString SkPDFTagTree::getRootLanguage() {
    return fRoot ? fRoot->fLang : SkString();
}
