001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import org.apache.commons.codec.CodecPolicy;
021
022/**
023 * Provides Base16 encoding and decoding.
024 *
025 * <p>
026 * This class is thread-safe.
027 * </p>
028 * <p>
029 * This implementation strictly follows RFC 4648, and as such unlike
030 * the {@link Base32} and {@link Base64} implementations,
031 * it does not ignore invalid alphabet characters or whitespace,
032 * neither does it offer chunking or padding characters.
033 * </p>
034 * <p>
035 * The only additional feature above those specified in RFC 4648
036 * is support for working with a lower-case alphabet in addition
037 * to the default upper-case alphabet.
038 * </p>
039 *
040 * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a>
041 *
042 * @since 1.15
043 */
044public class Base16 extends BaseNCodec {
045
046    /**
047     * BASE16 characters are 4 bits in length.
048     * They are formed by taking an 8-bit group,
049     * which is converted into two BASE16 characters.
050     */
051    private static final int BITS_PER_ENCODED_BYTE = 4;
052    private static final int BYTES_PER_ENCODED_BLOCK = 2;
053    private static final int BYTES_PER_UNENCODED_BLOCK = 1;
054
055    /**
056     * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified
057     * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16
058     * alphabet but fall within the bounds of the array are translated to -1.
059     */
060    private static final byte[] UPPER_CASE_DECODE_TABLE = {
061            //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
062            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
063            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
064            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
065             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
066            -1, 10, 11, 12, 13, 14, 15                                      // 40-46 A-F
067    };
068
069    /**
070     * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
071     * equivalents as specified in Table 5 of RFC 4648.
072     */
073    private static final byte[] UPPER_CASE_ENCODE_TABLE = {
074            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
075            'A', 'B', 'C', 'D', 'E', 'F'
076    };
077
078    /**
079     * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet"
080     * into their 4-bit positive integer equivalents. Characters that are not in the Base16
081     * alphabet but fall within the bounds of the array are translated to -1.
082     */
083    private static final byte[] LOWER_CASE_DECODE_TABLE = {
084            //  0   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
085            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f
086            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f
087            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f
088             0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9
089            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f
090            -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f
091            -1, 10, 11, 12, 13, 14, 15                                      // 60-66 a-f
092    };
093
094    /**
095     * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet"
096     * lower-case equivalents.
097     */
098    private static final byte[] LOWER_CASE_ENCODE_TABLE = {
099            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
100            'a', 'b', 'c', 'd', 'e', 'f'
101    };
102
103    /** Mask used to extract 4 bits, used when decoding character. */
104    private static final int MASK_4BITS = 0x0f;
105
106    /**
107     * Decode table to use.
108     */
109    private final byte[] decodeTable;
110
111    /**
112     * Encode table to use.
113     */
114    private final byte[] encodeTable;
115
116    /**
117     * Creates a Base16 codec used for decoding and encoding.
118     */
119    public Base16() {
120        this(false);
121    }
122
123    /**
124     * Creates a Base16 codec used for decoding and encoding.
125     *
126     * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
127     */
128    public Base16(final boolean lowerCase) {
129        this(lowerCase, DECODING_POLICY_DEFAULT);
130    }
131
132    /**
133     * Creates a Base16 codec used for decoding and encoding.
134     *
135     * @param lowerCase if {@code true} then use a lower-case Base16 alphabet.
136     * @param decodingPolicy Decoding policy.
137     */
138    public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) {
139        super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0,
140                PAD_DEFAULT, decodingPolicy);
141        if (lowerCase) {
142            this.encodeTable = LOWER_CASE_ENCODE_TABLE;
143            this.decodeTable = LOWER_CASE_DECODE_TABLE;
144        } else {
145            this.encodeTable = UPPER_CASE_ENCODE_TABLE;
146            this.decodeTable = UPPER_CASE_DECODE_TABLE;
147        }
148    }
149
150    @Override
151    void decode(final byte[] data, int offset, final int length, final Context context) {
152        if (context.eof || length < 0) {
153            context.eof = true;
154            if (context.ibitWorkArea != 0) {
155                validateTrailingCharacter();
156            }
157            return;
158        }
159
160        final int dataLen = Math.min(data.length - offset, length);
161        final int availableChars = (context.ibitWorkArea != 0 ? 1 : 0) + dataLen;
162
163        // small optimisation to short-cut the rest of this method when it is fed byte-by-byte
164        if (availableChars == 1 && availableChars == dataLen) {
165            // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
166            context.ibitWorkArea = decodeOctet(data[offset]) + 1;
167            return;
168        }
169
170        // we must have an even number of chars to decode
171        final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1;
172        final int end = offset + dataLen;
173
174        final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context);
175
176        int result;
177        if (dataLen < availableChars) {
178            // we have 1/2 byte from previous invocation to decode
179            result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE;
180            result |= decodeOctet(data[offset++]);
181
182            buffer[context.pos++] = (byte)result;
183
184            // reset to empty-value for next invocation!
185            context.ibitWorkArea = 0;
186        }
187
188        final int loopEnd = end - 1;
189        while (offset < loopEnd) {
190            result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE;
191            result |= decodeOctet(data[offset++]);
192            buffer[context.pos++] = (byte)result;
193        }
194
195        // we have one char of a hex-pair left over
196        if (offset < end) {
197            // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0
198            context.ibitWorkArea = decodeOctet(data[offset]) + 1;
199        }
200    }
201
202    private int decodeOctet(final byte octet) {
203        int decoded = -1;
204        if ((octet & 0xff) < decodeTable.length) {
205            decoded = decodeTable[octet];
206        }
207
208        if (decoded == -1) {
209            throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet);
210        }
211
212        return decoded;
213    }
214
215    @Override
216    void encode(final byte[] data, final int offset, final int length, final Context context) {
217        if (context.eof) {
218            return;
219        }
220
221        if (length < 0) {
222            context.eof = true;
223            return;
224        }
225
226        final int size = length * BYTES_PER_ENCODED_BLOCK;
227        if (size < 0) {
228            throw new IllegalArgumentException("Input length exceeds maximum size for encoded data: " + length);
229        }
230
231        final byte[] buffer = ensureBufferSize(size, context);
232
233        final int end = offset + length;
234        for (int i = offset; i < end; i++) {
235            final int value = data[i];
236            final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS;
237            final int low = value & MASK_4BITS;
238            buffer[context.pos++] = encodeTable[high];
239            buffer[context.pos++] = encodeTable[low];
240        }
241    }
242
243    /**
244     * Returns whether or not the {@code octet} is in the Base16 alphabet.
245     *
246     * @param octet The value to test.
247     *
248     * @return {@code true} if the value is defined in the Base16 alphabet {@code false} otherwise.
249     */
250    @Override
251    public boolean isInAlphabet(final byte octet) {
252        return (octet & 0xff) < decodeTable.length && decodeTable[octet] != -1;
253    }
254
255    /**
256     * Validates whether decoding allows an entire final trailing character that cannot be
257     * used for a complete byte.
258     *
259     * @throws IllegalArgumentException if strict decoding is enabled
260     */
261    private void validateTrailingCharacter() {
262        if (isStrictDecoding()) {
263            throw new IllegalArgumentException("Strict decoding: Last encoded character is a valid base 16 alphabet" +
264                    "character but not a possible encoding. " +
265                    "Decoding requires at least two characters to create one byte.");
266        }
267    }
268}