/* * Copyright (C) 2023 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "src/trace_processor/db/column/arrangement_overlay.h" #include #include #include #include #include #include #include #include "perfetto/base/logging.h" #include "perfetto/trace_processor/basic_types.h" #include "src/trace_processor/containers/bit_vector.h" #include "src/trace_processor/db/column/data_layer.h" #include "src/trace_processor/db/column/types.h" #include "src/trace_processor/tp_metatrace.h" #include "protos/perfetto/trace_processor/metatrace_categories.pbzero.h" namespace perfetto::trace_processor::column { void ArrangementOverlay::Flatten(uint32_t* start, const uint32_t* end, uint32_t stride) { for (uint32_t* it = start; it < end; it += stride) { *it = (*arrangement_)[*it]; } } ArrangementOverlay::ChainImpl::ChainImpl( std::unique_ptr inner, const std::vector* arrangement, Indices::State arrangement_state, bool does_arrangement_order_storage) : inner_(std::move(inner)), arrangement_(arrangement), arrangement_state_(arrangement_state), does_arrangement_order_storage_(does_arrangement_order_storage) {} SingleSearchResult ArrangementOverlay::ChainImpl::SingleSearch( FilterOp op, SqlValue sql_val, uint32_t index) const { return inner_->SingleSearch(op, sql_val, (*arrangement_)[index]); } SearchValidationResult ArrangementOverlay::ChainImpl::ValidateSearchConstraints( FilterOp op, SqlValue value) const { return inner_->ValidateSearchConstraints(op, value); } RangeOrBitVector ArrangementOverlay::ChainImpl::SearchValidated( FilterOp op, SqlValue sql_val, Range in) const { PERFETTO_TP_TRACE(metatrace::Category::DB, "ArrangementOverlay::ChainImpl::Search"); if (does_arrangement_order_storage_ && op != FilterOp::kGlob && op != FilterOp::kRegex) { OrderedIndices indices{arrangement_->data() + in.start, in.size(), arrangement_state_}; if (op == FilterOp::kNe) { // Do an equality search and "invert" the range. Range inner_res = inner_->OrderedIndexSearchValidated(FilterOp::kEq, sql_val, indices); BitVector bv(in.start); bv.Resize(in.start + inner_res.start, true); bv.Resize(in.start + inner_res.end, false); bv.Resize(in.end, true); return RangeOrBitVector(std::move(bv)); } Range inner_res = inner_->OrderedIndexSearchValidated(op, sql_val, indices); return RangeOrBitVector( Range(in.start + inner_res.start, in.start + inner_res.end)); } const auto& arrangement = *arrangement_; PERFETTO_DCHECK(in.end <= arrangement.size()); const auto [min_i, max_i] = std::minmax_element(arrangement.begin() + static_cast(in.start), arrangement.begin() + static_cast(in.end)); auto storage_result = inner_->SearchValidated(op, sql_val, Range(*min_i, *max_i + 1)); BitVector::Builder builder(in.end, in.start); if (storage_result.IsRange()) { Range storage_range = std::move(storage_result).TakeIfRange(); for (uint32_t i = in.start; i < in.end; ++i) { builder.Append(storage_range.Contains(arrangement[i])); } } else { BitVector storage_bitvector = std::move(storage_result).TakeIfBitVector(); PERFETTO_DCHECK(storage_bitvector.size() == *max_i + 1); // After benchmarking, it turns out this complexity *is* actually worthwhile // and has a noticable impact on the performance of this function in real // world tables. // Fast path: we compare as many groups of 64 elements as we can. // This should be very easy for the compiler to auto-vectorize. const uint32_t* arrangement_idx = arrangement.data() + in.start; uint32_t fast_path_elements = builder.BitsInCompleteWordsUntilFull(); for (uint32_t i = 0; i < fast_path_elements; i += BitVector::kBitsInWord) { uint64_t word = 0; // This part should be optimised by SIMD and is expected to be fast. for (uint32_t k = 0; k < BitVector::kBitsInWord; ++k, ++arrangement_idx) { bool comp_result = storage_bitvector.IsSet(*arrangement_idx); word |= static_cast(comp_result) << k; } builder.AppendWord(word); } // Slow path: we compare <64 elements and append to fill the Builder. uint32_t back_elements = builder.BitsUntilFull(); for (uint32_t i = 0; i < back_elements; ++i, ++arrangement_idx) { builder.Append(storage_bitvector.IsSet(*arrangement_idx)); } } return RangeOrBitVector(std::move(builder).Build()); } void ArrangementOverlay::ChainImpl::IndexSearchValidated( FilterOp op, SqlValue sql_val, Indices& indices) const { PERFETTO_TP_TRACE(metatrace::Category::DB, "ArrangementOverlay::ChainImpl::IndexSearch"); for (auto& i : indices.tokens) { i.index = (*arrangement_)[i.index]; } // If the indices state is monotonic, we can just pass the arrangement's // state. indices.state = indices.state == Indices::State::kMonotonic ? arrangement_state_ : Indices::State::kNonmonotonic; return inner_->IndexSearchValidated(op, sql_val, indices); } void ArrangementOverlay::ChainImpl::StableSort(Token* start, Token* end, SortDirection direction) const { for (Token* it = start; it != end; ++it) { it->index = (*arrangement_)[it->index]; } inner_->StableSort(start, end, direction); } void ArrangementOverlay::ChainImpl::Distinct(Indices& indices) const { PERFETTO_TP_TRACE(metatrace::Category::DB, "ArrangementOverlay::ChainImpl::Distinct"); // TODO(mayzner): Utilize `does_arrangmeent_order_storage_`. std::unordered_set s; indices.tokens.erase( std::remove_if(indices.tokens.begin(), indices.tokens.end(), [this, &s](Token& idx) { if (s.insert(idx.index).second) { idx.index = (*arrangement_)[idx.index]; return false; } return true; }), indices.tokens.end()); inner_->Distinct(indices); } std::optional ArrangementOverlay::ChainImpl::MaxElement( Indices& indices) const { PERFETTO_TP_TRACE(metatrace::Category::DB, "ArrangementOverlay::ChainImpl::MaxElement"); for (auto& i : indices.tokens) { i.index = (*arrangement_)[i.index]; } // If the indices state is monotonic, we can just pass the arrangement's // state. indices.state = indices.state == Indices::State::kMonotonic ? arrangement_state_ : Indices::State::kNonmonotonic; return inner_->MaxElement(indices); } std::optional ArrangementOverlay::ChainImpl::MinElement( Indices& indices) const { PERFETTO_TP_TRACE(metatrace::Category::DB, "ArrangementOverlay::ChainImpl::MinElement"); for (auto& i : indices.tokens) { i.index = (*arrangement_)[i.index]; } // If the indices state is monotonic, we can just pass the arrangement's // state. indices.state = indices.state == Indices::State::kMonotonic ? arrangement_state_ : Indices::State::kNonmonotonic; return inner_->MinElement(indices); } SqlValue ArrangementOverlay::ChainImpl::Get_AvoidUsingBecauseSlow( uint32_t index) const { return inner_->Get_AvoidUsingBecauseSlow((*arrangement_)[index]); } } // namespace perfetto::trace_processor::column