#!/usr/bin/env python3
# Copyright 2020 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
"""Generates test data for hash_test.cc."""

import datetime
import os
import random

from pw_tokenizer import tokens

HASH_LENGTHS = 80, 96, 128
HASH_MACRO = 'PW_TOKENIZER_65599_FIXED_LENGTH_{}_HASH'

SHARED_HEADER = """\
// Copyright {year} The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

// AUTOGENERATED - DO NOT EDIT
//
// This file was generated by {script}.
// To make changes, update the script and run it to generate new files.
"""

CPP_HEADER = """\
#pragma once

#include <cstddef>
#include <cstdint>
#include <string_view>

{includes}

namespace pw::tokenizer {{

// Test a series of generated test cases.
inline constexpr struct {{
  std::string_view string;
  size_t hash_length;
  uint32_t python_calculated_hash;
  uint32_t macro_calculated_hash;  // clang-format off
}} kHashTests[] = {{

"""

CPP_FOOTER = """
};  // kHashTests

// clang-format on

}  // namespace pw::tokenizer
"""

_CPP_TEST_CASE = """{{
  std::string_view("{str}", {string_length}u),  // NOLINT(bugprone-string-constructor)
  {hash_length}u,  // fixed hash length
  UINT32_C({hash}),  // Python-calculated hash
  {macro}("{str}"),  // macro-calculated hash
}},
"""

RUST_HEADER = """
fn test_cases() -> Vec<TestCase> {{
    vec![
"""

RUST_FOOTER = """
    ]
}
"""

_RUST_TEST_CASE = """       TestCase{{
            string: b"{str}",
            hash_length: {hash_length},
            hash: {hash},
        }},
"""


def _include_paths(lengths):
    return '\n'.join(
        sorted(
            '#include "pw_tokenizer/internal/'
            'pw_tokenizer_65599_fixed_length_{}_hash_macro.h"'.format(length)
            for length in lengths
        )
    )


def _test_case_at_length(test_case_template, data, hash_length):
    """Generates a test case for a particular hash length."""

    if isinstance(data, str):
        data = data.encode()

    if all(ord(' ') <= b <= ord('~') for b in data):
        escaped_str = data.decode().replace('"', r'\"')
    else:
        escaped_str = ''.join(r'\x{:02x}'.format(b) for b in data)

    return test_case_template.format(
        str=escaped_str,
        string_length=len(data),
        hash_length=hash_length,
        hash=tokens.c_hash(data, hash_length),
        macro=HASH_MACRO.format(hash_length),
    )


def test_case(test_case_template, data):
    return ''.join(
        _test_case_at_length(test_case_template, data, length)
        for length in (80, 96, 128)
    )


def generate_test_cases(test_case_template):
    yield test_case(test_case_template, '')
    yield test_case(test_case_template, b'\xa1')
    yield test_case(test_case_template, b'\xff')
    yield test_case(test_case_template, '\0')
    yield test_case(test_case_template, '\0\0')
    yield test_case(test_case_template, 'a')
    yield test_case(test_case_template, 'A')
    yield test_case(test_case_template, 'hello, "world"')
    yield test_case(test_case_template, 'YO' * 100)

    random.seed(600613)

    random_string = lambda size: bytes(
        random.randrange(256) for _ in range(size)
    )

    for i in range(1, 16):
        yield test_case(test_case_template, random_string(i))
        yield test_case(test_case_template, random_string(i))

    for length in HASH_LENGTHS:
        yield test_case(test_case_template, random_string(length - 1))
        yield test_case(test_case_template, random_string(length))
        yield test_case(test_case_template, random_string(length + 1))


def generate_file(
    path_array, header_template, footer_template, test_case_template
):
    path = os.path.realpath(
        os.path.join(os.path.dirname(__file__), *path_array)
    )

    with open(path, 'w') as output:
        output.write(
            SHARED_HEADER.format(
                year=datetime.date.today().year,
                script=os.path.basename(__file__),
            )
        )
        output.write(
            header_template.format(
                includes=_include_paths(HASH_LENGTHS),
            )
        )

        for case in generate_test_cases(test_case_template):
            output.write(case)

        output.write(footer_template)
        print('Wrote test data to', path)


if __name__ == '__main__':
    generate_file(
        [
            '..',
            'pw_tokenizer_private',
            'generated_hash_test_cases.h',
        ],
        CPP_HEADER,
        CPP_FOOTER,
        _CPP_TEST_CASE,
    )
    generate_file(
        [
            '..',
            'rust',
            'pw_tokenizer_core_test_cases.rs',
        ],
        RUST_HEADER,
        RUST_FOOTER,
        _RUST_TEST_CASE,
    )
