/*
 * Copyright (C) 2010 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.android.tradefed.util;

import com.android.tradefed.error.HarnessRuntimeException;
import com.android.tradefed.log.LogUtil.CLog;
import com.android.tradefed.result.error.InfraErrorIdentifier;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class QuotationAwareTokenizer {

    /**
     * Tokenizes the string, splitting on specified delimiter. Does not split between consecutive,
     * unquoted double-quote marks.
     *
     * <p>How the tokenizer works:
     *
     * <ol>
     *   <li> Split the string into "characters" where each "character" is either an escaped
     *       character like \" (that is, "\\\"") or a single real character like f (just "f").
     *   <li> For each "character"
     *       <ol>
     *         <li> If it's a space, finish a token unless we're being quoted
     *         <li> If it's a quotation mark, flip the "we're being quoted" bit
     *         <li> Otherwise, add it to the token being built
     *       </ol>
     *
     *   <li> At EOL, we typically haven't added the final token to the (tokens) {@link ArrayList}
     *       <ol>
     *         <li> If the last "character" is an escape character, throw an exception; that's not
     *             valid
     *         <li> If we're in the middle of a quotation, throw an exception; that's not valid
     *         <li> Otherwise, add the final token to (tokens)
     *       </ol>
     *
     *   <li> Return a String[] version of (tokens)
     * </ol>
     *
     * @param line A {@link String} to be tokenized
     * @param delim the delimiter to split on
     * @param logging whether or not to log operations
     * @return A tokenized version of the string
     * @throws IllegalArgumentException if the line cannot be parsed
     */
    public static String[] tokenizeLine(String line, String delim, boolean logging)
            throws IllegalArgumentException {
        if (line == null) {
            throw new IllegalArgumentException("line is null");
        }

        ArrayList<String> tokens = new ArrayList<String>();
        StringBuilder token = new StringBuilder();
        // This pattern matches an escaped character or a character.  Escaped char takes precedence
        final Pattern charPattern = Pattern.compile("\\\\.|.");
        final Matcher charMatcher = charPattern.matcher(line);
        String aChar = "";
        boolean quotation = false;

        log(String.format("Trying to tokenize the line '%s'", line), logging);
        while (charMatcher.find()) {
            aChar = charMatcher.group();

            if (delim.equals(aChar)) {
                if (quotation) {
                    // inside a quotation; treat spaces as part of the token
                    token.append(aChar);
                } else {
                    if (token.length() > 0) {
                        // this is the end of a non-empty token; dump it in our list of tokens,
                        // clear our temp storage, and keep rolling
                        log(String.format("Finished token '%s'", token.toString()), logging);
                        // Handle escaped empty string by '' to restore and empty string
                        if (token.toString().equals("''")) {
                            tokens.add("");
                        } else {
                            tokens.add(token.toString());
                        }
                        token.delete(0, token.length());
                    }
                    // otherwise, this is the non-first in a sequence of spaces; ignore.
                }
            } else if ("\"".equals(aChar)) {
                // unescaped quotation mark; flip quotation state
                log("Flipped quotation state", logging);
                quotation ^= true;
            } else {
                // default case: add the character to the token being built
                token.append(aChar);
            }
        }

        if (quotation || "\\".equals(aChar)) {
            // We ended in a quotation or with an escape character; this is not valid
            throw new HarnessRuntimeException(
                    "Unexpected EOL in a quotation or after an escape " + "character",
                    InfraErrorIdentifier.OPTION_CONFIGURATION_ERROR);
        }

        // Add the final token to the tokens array.
        if (token.length() > 0) {
            log(String.format("Finished final token '%s'", token.toString()), logging);
            // Handle escaped empty string by '' to restore and empty string
            if (token.toString().equals("''")) {
                tokens.add("");
            } else {
                tokens.add(token.toString());
            }
            token.delete(0, token.length());
        }

        String[] tokensArray = new String[tokens.size()];
        return tokens.toArray(tokensArray);
    }

    /**
     * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
     * double-quote marks.
     *
     * <p>See also {@link #tokenizeLine(String, String)}
     */
    public static String[] tokenizeLine(String line) {
        return tokenizeLine(line, " ", true);
    }

    public static String[] tokenizeLine(String line, String delim) {
        return tokenizeLine(line, delim, true);
    }

    /**
     * Tokenizes the string, splitting on spaces. Does not split between consecutive, unquoted
     * double-quote marks.
     *
     * <p>See also {@link #tokenizeLine(String, String)}
     */
    public static String[] tokenizeLine(String line, boolean logging) {
        return tokenizeLine(line, " ", logging);
    }

    /**
     * Perform the reverse of {@link #tokenizeLine(String)}. <br/>
     * Given array of tokens, combine them into a single line.
     *
     * @param tokens
     * @return A {@link String} created from all the tokens.
     */
    public static String combineTokens(String... tokens) {
        final Pattern wsPattern = Pattern.compile("\\s");
        StringBuilder sb = new StringBuilder();
        for (int i=0; i < tokens.length; i++) {
            final String token = tokens[i];
            final Matcher wsMatcher = wsPattern.matcher(token);
            if (wsMatcher.find()) {
                sb.append('"');
                sb.append(token);
                sb.append('"');
            } else {
                sb.append(token);
            }
            if (i < (tokens.length - 1)) {
                // don't output space after last token
                sb.append(' ');
            }
        }
        return sb.toString();
    }

    private static void log(String message, boolean display) {
        if (display) {
            CLog.v(message);
        }
    }
}
