// Copyright 2024 The Pigweed Authors // // Licensed under the Apache License, Version 2.0 (the "License"); you may not // use this file except in compliance with the License. You may obtain a copy of // the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. // Program hpack_gen generates a C++ file to help parse and emit HPACK. package main import ( "fmt" ) func main() { var statsBefore, statsAfter Stats buildTree() validate(&rootNode) computeStats(&statsBefore, &rootNode) optimize(&rootNode) validate(&rootNode) computeStats(&statsAfter, &rootNode) assignTableIndices(&rootNode, 0) fmt.Println("// Copyright 2024 The Pigweed Authors") fmt.Println("//") fmt.Println("// Licensed under the Apache License, Version 2.0 (the \"License\"); you may not") fmt.Println("// use this file except in compliance with the License. You may obtain a copy of") fmt.Println("// the License at") fmt.Println("//") fmt.Println("// https://www.apache.org/licenses/LICENSE-2.0") fmt.Println("//") fmt.Println("// Unless required by applicable law or agreed to in writing, software") fmt.Println("// distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT") fmt.Println("// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the") fmt.Println("// License for the specific language governing permissions and limitations under") fmt.Println("// the License.") fmt.Println("//") fmt.Println("// AUTOGENERATED FILE, DO NOT EDIT") fmt.Println("//") fmt.Println("// To regenerate, run `go run gen/hpack_gen.go > hpack.autogen.inc`.") fmt.Println("// This file should be included in exactly one *.cc file.") fmt.Println("//") fmt.Println() fmt.Println("// clang-format off") printDecoderTable(&statsAfter) fmt.Println() fmt.Printf("// Decoder table stats:\n") fmt.Printf("// before optimization = %+v\n", statsBefore) fmt.Printf("// after optimization = %+v\n", statsAfter) fmt.Println() printStaticResponses() } type NodeType int const ( BranchNode NodeType = iota OutputNode UnprintableNode EOSNode ) type Node struct { t NodeType output int // if t == OutputNode, byte to output tableIndex int // if t == BranchNode, index in the decoder table child [2]*Node } var rootNode = Node{t: BranchNode} type Stats struct { numBranchNodes int numOutputNodes int numUnprintableNodes int numInvalidNodes int } func buildTree() { for out, code := range huffmanTable { updateTree(out, code) } } func updateTree(out int, code string) { if len(code) == 0 { panic(fmt.Sprintf("code for byte %v is empty", out)) } curr := &rootNode for i := 0; i < len(code); i++ { if code[i] != '0' && code[i] != '1' { panic(fmt.Sprintf("code for byte %v has invalid bit '%v'", out, code[i])) } bit := code[i] - '0' // Middle of the code: create a branch node if needed. if i < len(code)-1 { next := curr.child[bit] if next == nil { next = &Node{t: BranchNode} curr.child[bit] = next } else if next.t != BranchNode { panic(fmt.Sprintf("code for byte %v found non-branch node at bit number %v", out, i)) } curr = next continue } // End of the code: create a leaf node. // Each code should lead to a unique leaf node. if curr.child[bit] != nil { panic(fmt.Sprintf("code for byte %v reached a duplicate leaf node", out)) } switch { case out == EOS: curr.child[bit] = &Node{t: EOSNode} case out < 32 || 127 < out: // Unprintable characters are allowed by the Huffman code but not in gRPC. // See: https://github.com/grpc/grpc/blob/v1.60.x/doc/PROTOCOL-HTTP2.md#requests. curr.child[bit] = &Node{t: UnprintableNode} default: curr.child[bit] = &Node{t: OutputNode, output: out} } } } func validate(node *Node) { // The binary tree should be complete: there should not be missing leaf nodes. if node == nil { panic("unexpected nil in binary tree") } if node.t != BranchNode { if node.child[0] != nil || node.child[1] != nil { panic(fmt.Sprintf("unexpected child under node %+v", node)) } return } validate(node.child[0]) validate(node.child[1]) } func optimize(node *Node) { if node.t != BranchNode { return } optimize(node.child[0]) optimize(node.child[1]) // Collapse each unprintable subtree into a single unprintable node. if node.child[0].t == UnprintableNode && node.child[1].t == UnprintableNode { node.t = UnprintableNode node.child[0] = nil node.child[1] = nil } } func computeStats(stats *Stats, node *Node) { if node == nil { stats.numInvalidNodes++ return } switch node.t { case BranchNode: stats.numBranchNodes++ computeStats(stats, node.child[0]) computeStats(stats, node.child[1]) case OutputNode: stats.numOutputNodes++ case UnprintableNode: stats.numUnprintableNodes++ case EOSNode: stats.numInvalidNodes++ } } func assignTableIndices(node *Node, idx int) int { if node.t != BranchNode { return idx } node.tableIndex = idx idx++ // We use a preorder traversal to ensure the root node has index 0. idx = assignTableIndices(node.child[0], idx) idx = assignTableIndices(node.child[1], idx) return idx } const decoderTablePrefix = ` // Huffman decoder table. Decoding starts at index=0. For each bit, we inspect // kHuffmanDecoderTable[index][bit] and take an action based on that value: // // * If the value matches 0b0..._...., set index=value // * If the value matches 0b1xxx_xxxx, output byte 32 + xx_xxxx, set index=0 // * If the value matches 0b1111_1110, fail: unprintable character // * If the value matches 0b1111_1111, fail: decoder entered an invalid state // static constexpr std::array kHuffmanDecoderTable = {{ ` const decoderTableSuffix = ` }}; ` func printDecoderTable(stats *Stats) { fmt.Printf(decoderTablePrefix, stats.numBranchNodes) printDecoderTableEntries(&rootNode) fmt.Print(decoderTableSuffix) } func printDecoderTableEntries(node *Node) { if node.t != BranchNode { return } // Print nodes in preorder, which is the same order the indices were created. fmt.Printf(" /*%v=*/ {%v, %v},\n", node.tableIndex, toDecoderTableEntry(node.child[0]), toDecoderTableEntry(node.child[1])) printDecoderTableEntries(node.child[0]) printDecoderTableEntries(node.child[1]) } func toDecoderTableEntry(node *Node) uint8 { switch node.t { case BranchNode: if node.tableIndex > 127 { panic(fmt.Sprintf("BranchNode index %d > 127", node.tableIndex)) } return uint8(node.tableIndex) case OutputNode: return uint8(node.output-32) | 0b1000_0000 case UnprintableNode: return 0b1111_1110 case EOSNode: // RFC 7541 §5.2: "A Huffman-encoded string literal containing the EOS // symbol MUST be treated as a decoding error." return 0b1111_1111 } panic(fmt.Sprintf("unexpected node type %v", node.t)) } const responseHeaderFieldsPrefix = ` // HPACK-encoded header fields which form grpc Response-Headers. // These are the same for every response. static constexpr std::array kResponseHeaderFields = { ` const responseHeaderFieldsSuffix = ` }; ` const maxStatusCode = 16 const responseTrailerFieldsPrefix = ` // HPACK-encoded header fields which form grpc Trailers. All response Trailers // are identical except for the status code. // // This is indexed by pw::Status::Code, which happens to be identical to grpc's // status code. struct ResponseTrailerPayload { uint32_t size; std::array bytes; }; static constexpr std::array kResponseTrailerFields = {{ ` const responseTrailerFieldsSuffix = ` }}; ` func printStaticResponses() { respHeaderFields := buildResponseHeaderFields() fmt.Printf(responseHeaderFieldsPrefix, len(respHeaderFields)) fmt.Printf(" %s", fmtBytes(respHeaderFields)) fmt.Printf("%s", responseHeaderFieldsSuffix) maxLen := len(buildResponseTrailerFields(maxStatusCode)) fmt.Printf(responseTrailerFieldsPrefix, maxLen, maxStatusCode+1) for code := 0; code <= maxStatusCode; code++ { payload := buildResponseTrailerFields(code) fmt.Printf(" {.size=%d, .bytes={%s}},\n", len(payload), fmtBytes(payload)) } fmt.Printf("%s", responseTrailerFieldsSuffix) } func fmtBytes(bytes []byte) string { var out string for i, b := range bytes { if i < len(bytes)-1 { out += fmt.Sprintf("0x%x, ", b) } else { out += fmt.Sprintf("0x%x", b) } } return out } // Response-Headers → ":status 200" "content-type application/grpc" func buildResponseHeaderFields() []byte { var out []byte out = append(out, 0b1000_1000) // RFC 7541 §6.1: index 8 is ":status" "200" out = append(out, 0b0101_1111) // RFC 7541 §6.2.1: index 31 is "content-type" out = append(out, literalEncodeWithLength("application/grpc")...) return out } // Trailers → "grpc-status " func buildResponseTrailerFields(statusCode int) []byte { var out []byte out = append(out, 0b0100_0000) // RFC 7541 §6.2.1: literal name and value out = append(out, literalEncodeWithLength("grpc-status")...) if statusCode >= 10 { out = append(out, 0b0000_0010) // RFC 7541 §6.2.1: length of value w/out huffman out = append(out, byte(statusCode/10)+'0') // RFC 7541 §6.2.1: value out = append(out, byte(statusCode%10)+'0') } else { out = append(out, 0b0000_0001) // RFC 7541 §6.2.1: length of value w/out huffman out = append(out, byte(statusCode)+'0') // RFC 7541 §6.2.1: value } return out } func literalEncodeWithLength(str string) []byte { // RFC 7541 §6.2.1: we are generating this structure with H=0. Since the // strings are short, there is minimal benefit to using a Huffman encoding, // at most 2-3 bytes per string. // +---+---+-----------------------+ // | H | Value Length (7+) | // +---+---------------------------+ // | Value String (Length octets) | // +-------------------------------+ // // The length must be encoded across multiple bytes if 128 or larger. See RFC // 7541 §5.2. We don't write any strings that large. if len(str) >= 128 { panic(fmt.Sprintf("cannot encode string of length %v: %#v", len(str), str)) } return append([]byte{byte(len(str))}, str...) } // Special symbol for Huffman EOS. // See RFC 7541 Appendix B. const EOS = 256 // Mapping from symbol to Huffman code. // Taken verbatim from RFC 7541 Appendix B. var huffmanTable = map[int]string{ 0: "1111111111000", 1: "11111111111111111011000", 2: "1111111111111111111111100010", 3: "1111111111111111111111100011", 4: "1111111111111111111111100100", 5: "1111111111111111111111100101", 6: "1111111111111111111111100110", 7: "1111111111111111111111100111", 8: "1111111111111111111111101000", 9: "111111111111111111101010", 10: "111111111111111111111111111100", 11: "1111111111111111111111101001", 12: "1111111111111111111111101010", 13: "111111111111111111111111111101", 14: "1111111111111111111111101011", 15: "1111111111111111111111101100", 16: "1111111111111111111111101101", 17: "1111111111111111111111101110", 18: "1111111111111111111111101111", 19: "1111111111111111111111110000", 20: "1111111111111111111111110001", 21: "1111111111111111111111110010", 22: "111111111111111111111111111110", 23: "1111111111111111111111110011", 24: "1111111111111111111111110100", 25: "1111111111111111111111110101", 26: "1111111111111111111111110110", 27: "1111111111111111111111110111", 28: "1111111111111111111111111000", 29: "1111111111111111111111111001", 30: "1111111111111111111111111010", 31: "1111111111111111111111111011", 32: "010100", 33: "1111111000", 34: "1111111001", 35: "111111111010", 36: "1111111111001", 37: "010101", 38: "11111000", 39: "11111111010", 40: "1111111010", 41: "1111111011", 42: "11111001", 43: "11111111011", 44: "11111010", 45: "010110", 46: "010111", 47: "011000", 48: "00000", 49: "00001", 50: "00010", 51: "011001", 52: "011010", 53: "011011", 54: "011100", 55: "011101", 56: "011110", 57: "011111", 58: "1011100", 59: "11111011", 60: "111111111111100", 61: "100000", 62: "111111111011", 63: "1111111100", 64: "1111111111010", 65: "100001", 66: "1011101", 67: "1011110", 68: "1011111", 69: "1100000", 70: "1100001", 71: "1100010", 72: "1100011", 73: "1100100", 74: "1100101", 75: "1100110", 76: "1100111", 77: "1101000", 78: "1101001", 79: "1101010", 80: "1101011", 81: "1101100", 82: "1101101", 83: "1101110", 84: "1101111", 85: "1110000", 86: "1110001", 87: "1110010", 88: "11111100", 89: "1110011", 90: "11111101", 91: "1111111111011", 92: "1111111111111110000", 93: "1111111111100", 94: "11111111111100", 95: "100010", 96: "111111111111101", 97: "00011", 98: "100011", 99: "00100", 100: "100100", 101: "00101", 102: "100101", 103: "100110", 104: "100111", 105: "00110", 106: "1110100", 107: "1110101", 108: "101000", 109: "101001", 110: "101010", 111: "00111", 112: "101011", 113: "1110110", 114: "101100", 115: "01000", 116: "01001", 117: "101101", 118: "1110111", 119: "1111000", 120: "1111001", 121: "1111010", 122: "1111011", 123: "111111111111110", 124: "11111111100", 125: "11111111111101", 126: "1111111111101", 127: "1111111111111111111111111100", 128: "11111111111111100110", 129: "1111111111111111010010", 130: "11111111111111100111", 131: "11111111111111101000", 132: "1111111111111111010011", 133: "1111111111111111010100", 134: "1111111111111111010101", 135: "11111111111111111011001", 136: "1111111111111111010110", 137: "11111111111111111011010", 138: "11111111111111111011011", 139: "11111111111111111011100", 140: "11111111111111111011101", 141: "11111111111111111011110", 142: "111111111111111111101011", 143: "11111111111111111011111", 144: "111111111111111111101100", 145: "111111111111111111101101", 146: "1111111111111111010111", 147: "11111111111111111100000", 148: "111111111111111111101110", 149: "11111111111111111100001", 150: "11111111111111111100010", 151: "11111111111111111100011", 152: "11111111111111111100100", 153: "111111111111111011100", 154: "1111111111111111011000", 155: "11111111111111111100101", 156: "1111111111111111011001", 157: "11111111111111111100110", 158: "11111111111111111100111", 159: "111111111111111111101111", 160: "1111111111111111011010", 161: "111111111111111011101", 162: "11111111111111101001", 163: "1111111111111111011011", 164: "1111111111111111011100", 165: "11111111111111111101000", 166: "11111111111111111101001", 167: "111111111111111011110", 168: "11111111111111111101010", 169: "1111111111111111011101", 170: "1111111111111111011110", 171: "111111111111111111110000", 172: "111111111111111011111", 173: "1111111111111111011111", 174: "11111111111111111101011", 175: "11111111111111111101100", 176: "111111111111111100000", 177: "111111111111111100001", 178: "1111111111111111100000", 179: "111111111111111100010", 180: "11111111111111111101101", 181: "1111111111111111100001", 182: "11111111111111111101110", 183: "11111111111111111101111", 184: "11111111111111101010", 185: "1111111111111111100010", 186: "1111111111111111100011", 187: "1111111111111111100100", 188: "11111111111111111110000", 189: "1111111111111111100101", 190: "1111111111111111100110", 191: "11111111111111111110001", 192: "11111111111111111111100000", 193: "11111111111111111111100001", 194: "11111111111111101011", 195: "1111111111111110001", 196: "1111111111111111100111", 197: "11111111111111111110010", 198: "1111111111111111101000", 199: "1111111111111111111101100", 200: "11111111111111111111100010", 201: "11111111111111111111100011", 202: "11111111111111111111100100", 203: "111111111111111111111011110", 204: "111111111111111111111011111", 205: "11111111111111111111100101", 206: "111111111111111111110001", 207: "1111111111111111111101101", 208: "1111111111111110010", 209: "111111111111111100011", 210: "11111111111111111111100110", 211: "111111111111111111111100000", 212: "111111111111111111111100001", 213: "11111111111111111111100111", 214: "111111111111111111111100010", 215: "111111111111111111110010", 216: "111111111111111100100", 217: "111111111111111100101", 218: "11111111111111111111101000", 219: "11111111111111111111101001", 220: "1111111111111111111111111101", 221: "111111111111111111111100011", 222: "111111111111111111111100100", 223: "111111111111111111111100101", 224: "11111111111111101100", 225: "111111111111111111110011", 226: "11111111111111101101", 227: "111111111111111100110", 228: "1111111111111111101001", 229: "111111111111111100111", 230: "111111111111111101000", 231: "11111111111111111110011", 232: "1111111111111111101010", 233: "1111111111111111101011", 234: "1111111111111111111101110", 235: "1111111111111111111101111", 236: "111111111111111111110100", 237: "111111111111111111110101", 238: "11111111111111111111101010", 239: "11111111111111111110100", 240: "11111111111111111111101011", 241: "111111111111111111111100110", 242: "11111111111111111111101100", 243: "11111111111111111111101101", 244: "111111111111111111111100111", 245: "111111111111111111111101000", 246: "111111111111111111111101001", 247: "111111111111111111111101010", 248: "111111111111111111111101011", 249: "1111111111111111111111111110", 250: "111111111111111111111101100", 251: "111111111111111111111101101", 252: "111111111111111111111101110", 253: "111111111111111111111101111", 254: "111111111111111111111110000", 255: "11111111111111111111101110", 256: "111111111111111111111111111111", }