// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
#define COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_

#include <stddef.h>

#include <deque>
#include <limits>
#include <vector>

#include "components/zucchini/image_index.h"
#include "components/zucchini/image_utils.h"
#include "components/zucchini/targets_affinity.h"

namespace zucchini {

constexpr double kMismatchFatal = -std::numeric_limits<double>::infinity();

class EncodedView;
class EquivalenceSource;

// Returns similarity score between a token (raw byte or first byte of a
// reference) in |old_image_index| at |src| and a token in |new_image_index|
// at |dst|. |targets_affinities| describes affinities for each target pool and
// is used to evaluate similarity between references, hence it's size must be
// equal to the number of pools in both |old_image_index| and |new_image_index|.
// Both |src| and |dst| must refer to tokens in |old_image_index| and
// |new_image_index|.
double GetTokenSimilarity(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    offset_t src,
    offset_t dst);

// Returns a similarity score between content in |old_image_index| and
// |new_image_index| at regions described by |equivalence|, using
// |targets_affinities| to evaluate similarity between references.
double GetEquivalenceSimilarity(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    const Equivalence& equivalence);

// Extends |equivalence| forward and returns the result. This is related to
// VisitEquivalenceSeed().
EquivalenceCandidate ExtendEquivalenceForward(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    const EquivalenceCandidate& equivalence,
    double min_similarity);

// Extends |equivalence| backward and returns the result. This is related to
// VisitEquivalenceSeed().
EquivalenceCandidate ExtendEquivalenceBackward(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    const EquivalenceCandidate& equivalence,
    double min_similarity);

// Creates an equivalence, starting with |src| and |dst| as offset hint, and
// extends it both forward and backward, trying to maximise similarity between
// |old_image_index| and |new_image_index|, and returns the result.
// |targets_affinities| is used to evaluate similarity between references.
// |min_similarity| describes the minimum acceptable similarity score and is
// used as threshold to discard bad equivalences.
EquivalenceCandidate VisitEquivalenceSeed(
    const ImageIndex& old_image_index,
    const ImageIndex& new_image_index,
    const std::vector<TargetsAffinity>& targets_affinities,
    offset_t src,
    offset_t dst,
    double min_similarity);

// Container of pruned equivalences used to map offsets from |old_image| to
// offsets in |new_image|. Equivalences are pruned by cropping smaller
// equivalences to avoid overlaps, to make the equivalence map (for covered
// bytes in |old_image| and |new_image|) one-to-one.
class OffsetMapper {
 public:
  using const_iterator = std::deque<Equivalence>::const_iterator;

  // Constructors for various data sources. "Old" and "new" image sizes are
  // needed for bounds checks and to handle dangling targets.
  // - From a list of |equivalences|, already sorted (by |src_offset|) and
  //   pruned, useful for tests.
  OffsetMapper(std::deque<Equivalence>&& equivalences,
               offset_t old_image_size,
               offset_t new_image_size);
  // - From a generator, useful for Zucchini-apply.
  OffsetMapper(EquivalenceSource&& equivalence_source,
               offset_t old_image_size,
               offset_t new_image_size);
  // - From an EquivalenceMap that needs to be processed, useful for
  //   Zucchini-gen.
  OffsetMapper(const EquivalenceMap& equivalence_map,
               offset_t old_image_size,
               offset_t new_image_size);
  ~OffsetMapper();

  size_t size() const { return equivalences_.size(); }
  const_iterator begin() const { return equivalences_.begin(); }
  const_iterator end() const { return equivalences_.end(); }

  // Returns naive extended forward-projection of "old" |offset| that follows
  // |eq|'s delta. |eq| needs not cover |offset|.
  // - Averts underflow / overflow by clamping to |[0, new_image_size_)|.
  // - However, |offset| is *not* restricted to |[0, old_image_size_)|; the
  //   caller must to make the check (hence "naive").
  offset_t NaiveExtendedForwardProject(const Equivalence& unit,
                                       offset_t offset) const;

  // Returns an offset in |new_image| corresponding to |offset| in |old_image|.
  // Assumes |equivalences_| to be non-empty. Cases:
  // - If |offset| is covered (i.e., in an "old" block), then use the delta of
  //   the (unique) equivalence unit that covers |offset|.
  // - If |offset| is non-covered, but in |[0, old_image_size_)|, then find the
  //   nearest "old" block, use its delta, and avert underflow / overflow by
  //   clamping the result to |[0, new_image_size_)|.
  // - If |offset| is >= |new_image_size_| (a "fake offset"), then use
  //   |new_image_size_ - old_image_size_| as the delta.
  offset_t ExtendedForwardProject(offset_t offset) const;

  // Given sorted |offsets|, applies a projection in-place of all offsets that
  // are part of a pruned equivalence from |old_image| to |new_image|. Other
  // offsets are removed from |offsets|.
  void ForwardProjectAll(std::deque<offset_t>* offsets) const;

  // Accessor for testing.
  const std::deque<Equivalence> equivalences() const { return equivalences_; }

  // Sorts |equivalences| by |src_offset| and removes all source overlaps; so a
  // source location that was covered by some Equivalence would become covered
  // by exactly one Equivalence. Moreover, for the offset, the equivalence
  // corresponds to the largest (pre-pruning) covering Equivalence, and in case
  // of a tie, the Equivalence with minimal |src_offset|. |equivalences| may
  // change in size since empty Equivalences are removed.
  static void PruneEquivalencesAndSortBySource(
      std::deque<Equivalence>* equivalences);

 private:
  // |equivalences_| is pruned, i.e., no "old" blocks overlap (and no "new"
  // block overlaps). Also, it is sorted by "old" offsets.
  std::deque<Equivalence> equivalences_;
  const offset_t old_image_size_;
  const offset_t new_image_size_;
};

// Container of equivalences between |old_image_index| and |new_image_index|,
// sorted by |Equivalence::dst_offset|, only used during patch generation.
class EquivalenceMap {
 public:
  using const_iterator = std::vector<EquivalenceCandidate>::const_iterator;

  EquivalenceMap();
  // Initializes the object with |equivalences|.
  explicit EquivalenceMap(std::vector<EquivalenceCandidate>&& candidates);
  EquivalenceMap(EquivalenceMap&&);
  EquivalenceMap(const EquivalenceMap&) = delete;
  ~EquivalenceMap();

  // Finds relevant equivalences between |old_view| and |new_view|, using
  // suffix array |old_sa| computed from |old_view| and using
  // |targets_affinities| to evaluate similarity between references. This
  // function is not symmetric. Equivalences might overlap in |old_view|, but
  // not in |new_view|. It tries to maximize accumulated similarity within each
  // equivalence, while maximizing |new_view| coverage. The minimum similarity
  // of an equivalence is given by |min_similarity|.
  void Build(const std::vector<offset_t>& old_sa,
             const EncodedView& old_view,
             const EncodedView& new_view,
             const std::vector<TargetsAffinity>& targets_affinities,
             double min_similarity);

  size_t size() const { return candidates_.size(); }
  const_iterator begin() const { return candidates_.begin(); }
  const_iterator end() const { return candidates_.end(); }

 private:
  // Discovers equivalence candidates between |old_view| and |new_view| and
  // stores them in the object. Note that resulting candidates are not sorted
  // and might be overlapping in new image.
  void CreateCandidates(const std::vector<offset_t>& old_sa,
                        const EncodedView& old_view,
                        const EncodedView& new_view,
                        const std::vector<TargetsAffinity>& targets_affinities,
                        double min_similarity);
  // Sorts candidates by their offset in new image.
  void SortByDestination();
  // Visits |candidates_| (sorted by |dst_offset|) and remove all destination
  // overlaps. Candidates with low similarity scores are more likely to be
  // shrunken. Unfit candidates may be removed.
  void Prune(const EncodedView& old_view,
             const EncodedView& new_view,
             const std::vector<TargetsAffinity>& targets_affinities,
             double min_similarity);

  std::vector<EquivalenceCandidate> candidates_;
};

}  // namespace zucchini

#endif  // COMPONENTS_ZUCCHINI_EQUIVALENCE_MAP_H_
