001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import org.apache.commons.codec.BinaryDecoder;
021import org.apache.commons.codec.BinaryEncoder;
022import org.apache.commons.codec.DecoderException;
023import org.apache.commons.codec.EncoderException;
024
025/**
026 * Converts between byte arrays and strings of "0"s and "1"s.
027 *
028 * <p>This class is immutable and thread-safe.</p>
029 *
030 * TODO: may want to add more bit vector functions like and/or/xor/nand
031 * TODO: also might be good to generate boolean[] from byte[] et cetera.
032 *
033 * @since 1.3
034 */
035public class BinaryCodec implements BinaryDecoder, BinaryEncoder {
036    /*
037     * tried to avoid using ArrayUtils to minimize dependencies while using these empty arrays - dep is just not worth
038     * it.
039     */
040    /** Empty char array. */
041    private static final char[] EMPTY_CHAR_ARRAY = {};
042
043    /** Empty byte array. */
044    private static final byte[] EMPTY_BYTE_ARRAY = {};
045
046    /** Mask for bit 0 of a byte. */
047    private static final int BIT_0 = 1;
048
049    /** Mask for bit 1 of a byte. */
050    private static final int BIT_1 = 0x02;
051
052    /** Mask for bit 2 of a byte. */
053    private static final int BIT_2 = 0x04;
054
055    /** Mask for bit 3 of a byte. */
056    private static final int BIT_3 = 0x08;
057
058    /** Mask for bit 4 of a byte. */
059    private static final int BIT_4 = 0x10;
060
061    /** Mask for bit 5 of a byte. */
062    private static final int BIT_5 = 0x20;
063
064    /** Mask for bit 6 of a byte. */
065    private static final int BIT_6 = 0x40;
066
067    /** Mask for bit 7 of a byte. */
068    private static final int BIT_7 = 0x80;
069
070    private static final int[] BITS = {BIT_0, BIT_1, BIT_2, BIT_3, BIT_4, BIT_5, BIT_6, BIT_7};
071
072    /**
073     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
074     *
075     * @param ascii
076     *                  each byte represents an ASCII '0' or '1'
077     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
078     */
079    public static byte[] fromAscii(final byte[] ascii) {
080        if (isEmpty(ascii)) {
081            return EMPTY_BYTE_ARRAY;
082        }
083        final int asciiLength = ascii.length;
084        // get length/8 times bytes with 3 bit shifts to the right of the length
085        final byte[] raw = new byte[asciiLength >> 3];
086        /*
087         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
088         * loop.
089         */
090        for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
091            for (int bits = 0; bits < BITS.length; ++bits) {
092                if (ascii[jj - bits] == '1') {
093                    raw[ii] |= BITS[bits];
094                }
095            }
096        }
097        return raw;
098    }
099
100    // ------------------------------------------------------------------------
101    //
102    // static codec operations
103    //
104    // ------------------------------------------------------------------------
105    /**
106     * Decodes a char array where each char represents an ASCII '0' or '1'.
107     *
108     * @param ascii
109     *                  each char represents an ASCII '0' or '1'
110     * @return the raw encoded binary where each bit corresponds to a char in the char array argument
111     */
112    public static byte[] fromAscii(final char[] ascii) {
113        if (ascii == null || ascii.length == 0) {
114            return EMPTY_BYTE_ARRAY;
115        }
116        final int asciiLength = ascii.length;
117        // get length/8 times bytes with 3 bit shifts to the right of the length
118        final byte[] raw = new byte[asciiLength >> 3];
119        /*
120         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
121         * loop.
122         */
123        for (int ii = 0, jj = asciiLength - 1; ii < raw.length; ii++, jj -= 8) {
124            for (int bits = 0; bits < BITS.length; ++bits) {
125                if (ascii[jj - bits] == '1') {
126                    raw[ii] |= BITS[bits];
127                }
128            }
129        }
130        return raw;
131    }
132
133    /**
134     * Returns {@code true} if the given array is {@code null} or empty (size 0.)
135     *
136     * @param array
137     *            the source array
138     * @return {@code true} if the given array is {@code null} or empty (size 0.)
139     */
140    static boolean isEmpty(final byte[] array) {
141        return array == null || array.length == 0;
142    }
143
144    /**
145     * Converts an array of raw binary data into an array of ASCII 0 and 1 character bytes - each byte is a truncated
146     * char.
147     *
148     * @param raw
149     *                  the raw binary data to convert
150     * @return an array of 0 and 1 character bytes for each bit of the argument
151     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
152     */
153    public static byte[] toAsciiBytes(final byte[] raw) {
154        if (isEmpty(raw)) {
155            return EMPTY_BYTE_ARRAY;
156        }
157        final int rawLength = raw.length;
158        // get 8 times the bytes with 3 bit shifts to the left of the length
159        final byte[] l_ascii = new byte[rawLength << 3];
160        /*
161         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
162         * loop.
163         */
164        for (int ii = 0, jj = l_ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
165            for (int bits = 0; bits < BITS.length; ++bits) {
166                if ((raw[ii] & BITS[bits]) == 0) {
167                    l_ascii[jj - bits] = '0';
168                } else {
169                    l_ascii[jj - bits] = '1';
170                }
171            }
172        }
173        return l_ascii;
174    }
175
176    /**
177     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
178     *
179     * @param raw
180     *                  the raw binary data to convert
181     * @return an array of 0 and 1 characters for each bit of the argument
182     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
183     */
184    public static char[] toAsciiChars(final byte[] raw) {
185        if (isEmpty(raw)) {
186            return EMPTY_CHAR_ARRAY;
187        }
188        final int rawLength = raw.length;
189        // get 8 times the bytes with 3 bit shifts to the left of the length
190        final char[] l_ascii = new char[rawLength << 3];
191        /*
192         * We decr index jj by 8 as we go along to not recompute indices using multiplication every time inside the
193         * loop.
194         */
195        for (int ii = 0, jj = l_ascii.length - 1; ii < rawLength; ii++, jj -= 8) {
196            for (int bits = 0; bits < BITS.length; ++bits) {
197                if ((raw[ii] & BITS[bits]) == 0) {
198                    l_ascii[jj - bits] = '0';
199                } else {
200                    l_ascii[jj - bits] = '1';
201                }
202            }
203        }
204        return l_ascii;
205    }
206
207    /**
208     * Converts an array of raw binary data into a String of ASCII 0 and 1 characters.
209     *
210     * @param raw
211     *                  the raw binary data to convert
212     * @return a String of 0 and 1 characters representing the binary data
213     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
214     */
215    public static String toAsciiString(final byte[] raw) {
216        return new String(toAsciiChars(raw));
217    }
218
219    /**
220     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
221     *
222     * @param ascii
223     *                  each byte represents an ASCII '0' or '1'
224     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
225     * @see org.apache.commons.codec.Decoder#decode(Object)
226     */
227    @Override
228    public byte[] decode(final byte[] ascii) {
229        return fromAscii(ascii);
230    }
231
232    /**
233     * Decodes a byte array where each byte represents an ASCII '0' or '1'.
234     *
235     * @param ascii
236     *                  each byte represents an ASCII '0' or '1'
237     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
238     * @throws DecoderException
239     *                  if argument is not a byte[], char[] or String
240     * @see org.apache.commons.codec.Decoder#decode(Object)
241     */
242    @Override
243    public Object decode(final Object ascii) throws DecoderException {
244        if (ascii == null) {
245            return EMPTY_BYTE_ARRAY;
246        }
247        if (ascii instanceof byte[]) {
248            return fromAscii((byte[]) ascii);
249        }
250        if (ascii instanceof char[]) {
251            return fromAscii((char[]) ascii);
252        }
253        if (ascii instanceof String) {
254            return fromAscii(((String) ascii).toCharArray());
255        }
256        throw new DecoderException("argument not a byte array");
257    }
258
259    /**
260     * Converts an array of raw binary data into an array of ASCII 0 and 1 characters.
261     *
262     * @param raw
263     *                  the raw binary data to convert
264     * @return 0 and 1 ASCII character bytes one for each bit of the argument
265     * @see org.apache.commons.codec.BinaryEncoder#encode(byte[])
266     */
267    @Override
268    public byte[] encode(final byte[] raw) {
269        return toAsciiBytes(raw);
270    }
271
272    /**
273     * Converts an array of raw binary data into an array of ASCII 0 and 1 chars.
274     *
275     * @param raw
276     *                  the raw binary data to convert
277     * @return 0 and 1 ASCII character chars one for each bit of the argument
278     * @throws EncoderException
279     *                  if the argument is not a byte[]
280     * @see org.apache.commons.codec.Encoder#encode(Object)
281     */
282    @Override
283    public Object encode(final Object raw) throws EncoderException {
284        if (!(raw instanceof byte[])) {
285            throw new EncoderException("argument not a byte array");
286        }
287        return toAsciiChars((byte[]) raw);
288    }
289
290    /**
291     * Decodes a String where each char of the String represents an ASCII '0' or '1'.
292     *
293     * @param ascii
294     *                  String of '0' and '1' characters
295     * @return the raw encoded binary where each bit corresponds to a byte in the byte array argument
296     * @see org.apache.commons.codec.Decoder#decode(Object)
297     */
298    public byte[] toByteArray(final String ascii) {
299        if (ascii == null) {
300            return EMPTY_BYTE_ARRAY;
301        }
302        return fromAscii(ascii.toCharArray());
303    }
304}