001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.nio.ByteBuffer; 021import java.util.BitSet; 022 023import org.apache.commons.codec.BinaryDecoder; 024import org.apache.commons.codec.BinaryEncoder; 025import org.apache.commons.codec.DecoderException; 026import org.apache.commons.codec.EncoderException; 027 028/** 029 * Implements the Percent-Encoding scheme, as described in HTTP 1.1 specification. For extensibility, an array of 030 * special US-ASCII characters can be specified in order to perform proper URI encoding for the different parts 031 * of the URI. 032 * <p> 033 * This class is immutable. It is also thread-safe besides using BitSet which is not thread-safe, but its public 034 * interface only call the access 035 * </p> 036 * 037 * @see <a href="https://tools.ietf.org/html/rfc3986#section-2.1">Percent-Encoding</a> 038 * @since 1.12 039 */ 040public class PercentCodec implements BinaryEncoder, BinaryDecoder { 041 042 /** 043 * The escape character used by the Percent-Encoding in order to introduce an encoded character. 044 */ 045 046 private static final byte ESCAPE_CHAR = '%'; 047 048 /** 049 * The bit set used to store the character that should be always encoded 050 */ 051 private final BitSet alwaysEncodeChars = new BitSet(); 052 053 /** 054 * The flag defining if the space character should be encoded as '+' 055 */ 056 private final boolean plusForSpace; 057 058 /** 059 * The minimum and maximum code of the bytes that is inserted in the bit set, used to prevent look-ups 060 */ 061 private int alwaysEncodeCharsMin = Integer.MAX_VALUE, alwaysEncodeCharsMax = Integer.MIN_VALUE; 062 063 /** 064 * Constructs a Percent coded that will encode all the non US-ASCII characters using the Percent-Encoding 065 * while it will not encode all the US-ASCII characters, except for character '%' that is used as escape 066 * character for Percent-Encoding. 067 */ 068 public PercentCodec() { 069 this.plusForSpace = false; 070 insertAlwaysEncodeChar(ESCAPE_CHAR); 071 } 072 073 /** 074 * Constructs a Percent codec by specifying the characters that belong to US-ASCII that should 075 * always be encoded. The rest US-ASCII characters will not be encoded, except for character '%' that 076 * is used as escape character for Percent-Encoding. 077 * 078 * @param alwaysEncodeChars the unsafe characters that should always be encoded 079 * @param plusForSpace the flag defining if the space character should be encoded as '+' 080 */ 081 public PercentCodec(final byte[] alwaysEncodeChars, final boolean plusForSpace) { 082 this.plusForSpace = plusForSpace; 083 insertAlwaysEncodeChars(alwaysEncodeChars); 084 } 085 086 /** 087 * Adds the byte array into a BitSet for faster lookup 088 * 089 * @param alwaysEncodeCharsArray 090 */ 091 private void insertAlwaysEncodeChars(final byte[] alwaysEncodeCharsArray) { 092 if (alwaysEncodeCharsArray != null) { 093 for (final byte b : alwaysEncodeCharsArray) { 094 insertAlwaysEncodeChar(b); 095 } 096 } 097 insertAlwaysEncodeChar(ESCAPE_CHAR); 098 } 099 100 /** 101 * Inserts a single character into a BitSet and maintains the min and max of the characters of the 102 * {@code BitSet alwaysEncodeChars} in order to avoid look-ups when a byte is out of this range. 103 * 104 * @param b the byte that is candidate for min and max limit 105 */ 106 private void insertAlwaysEncodeChar(final byte b) { 107 this.alwaysEncodeChars.set(b); 108 if (b < alwaysEncodeCharsMin) { 109 alwaysEncodeCharsMin = b; 110 } 111 if (b > alwaysEncodeCharsMax) { 112 alwaysEncodeCharsMax = b; 113 } 114 } 115 116 /** 117 * Percent-Encoding based on RFC 3986. The non US-ASCII characters are encoded, as well as the 118 * US-ASCII characters that are configured to be always encoded. 119 */ 120 @Override 121 public byte[] encode(final byte[] bytes) throws EncoderException { 122 if (bytes == null) { 123 return null; 124 } 125 126 final int expectedEncodingBytes = expectedEncodingBytes(bytes); 127 final boolean willEncode = expectedEncodingBytes != bytes.length; 128 if (willEncode || (plusForSpace && containsSpace(bytes))) { 129 return doEncode(bytes, expectedEncodingBytes, willEncode); 130 } 131 return bytes; 132 } 133 134 private byte[] doEncode(final byte[] bytes, final int expectedLength, final boolean willEncode) { 135 final ByteBuffer buffer = ByteBuffer.allocate(expectedLength); 136 for (final byte b : bytes) { 137 if (willEncode && canEncode(b)) { 138 byte bb = b; 139 if (bb < 0) { 140 bb = (byte) (256 + bb); 141 } 142 final char hex1 = Utils.hexDigit(bb >> 4); 143 final char hex2 = Utils.hexDigit(bb); 144 buffer.put(ESCAPE_CHAR); 145 buffer.put((byte) hex1); 146 buffer.put((byte) hex2); 147 } else if (plusForSpace && b == ' ') { 148 buffer.put((byte) '+'); 149 } else { 150 buffer.put(b); 151 } 152 } 153 return buffer.array(); 154 } 155 156 private int expectedEncodingBytes(final byte[] bytes) { 157 int byteCount = 0; 158 for (final byte b : bytes) { 159 byteCount += canEncode(b) ? 3: 1; 160 } 161 return byteCount; 162 } 163 164 private boolean containsSpace(final byte[] bytes) { 165 for (final byte b : bytes) { 166 if (b == ' ') { 167 return true; 168 } 169 } 170 return false; 171 } 172 173 private boolean canEncode(final byte c) { 174 return !isAsciiChar(c) || (inAlwaysEncodeCharsRange(c) && alwaysEncodeChars.get(c)); 175 } 176 177 private boolean inAlwaysEncodeCharsRange(final byte c) { 178 return c >= alwaysEncodeCharsMin && c <= alwaysEncodeCharsMax; 179 } 180 181 private boolean isAsciiChar(final byte c) { 182 return c >= 0; 183 } 184 185 /** 186 * Decode bytes encoded with Percent-Encoding based on RFC 3986. The reverse process is performed in order to 187 * decode the encoded characters to Unicode. 188 */ 189 @Override 190 public byte[] decode(final byte[] bytes) throws DecoderException { 191 if (bytes == null) { 192 return null; 193 } 194 195 final ByteBuffer buffer = ByteBuffer.allocate(expectedDecodingBytes(bytes)); 196 for (int i = 0; i < bytes.length; i++) { 197 final byte b = bytes[i]; 198 if (b == ESCAPE_CHAR) { 199 try { 200 final int u = Utils.digit16(bytes[++i]); 201 final int l = Utils.digit16(bytes[++i]); 202 buffer.put((byte) ((u << 4) + l)); 203 } catch (final ArrayIndexOutOfBoundsException e) { 204 throw new DecoderException("Invalid percent decoding: ", e); 205 } 206 } else if (plusForSpace && b == '+') { 207 buffer.put((byte) ' '); 208 } else { 209 buffer.put(b); 210 } 211 } 212 return buffer.array(); 213 } 214 215 private int expectedDecodingBytes(final byte[] bytes) { 216 int byteCount = 0; 217 for (int i = 0; i < bytes.length; ) { 218 final byte b = bytes[i]; 219 i += b == ESCAPE_CHAR ? 3: 1; 220 byteCount++; 221 } 222 return byteCount; 223 } 224 225 /** 226 * Encodes an object into using the Percent-Encoding. Only byte[] objects are accepted. 227 * 228 * @param obj the object to encode 229 * @return the encoding result byte[] as Object 230 * @throws EncoderException if the object is not a byte array 231 */ 232 @Override 233 public Object encode(final Object obj) throws EncoderException { 234 if (obj == null) { 235 return null; 236 } 237 if (obj instanceof byte[]) { 238 return encode((byte[]) obj); 239 } 240 throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent encoded"); 241 } 242 243 /** 244 * Decodes a byte[] Object, whose bytes are encoded with Percent-Encoding. 245 * 246 * @param obj the object to decode 247 * @return the decoding result byte[] as Object 248 * @throws DecoderException if the object is not a byte array 249 */ 250 @Override 251 public Object decode(final Object obj) throws DecoderException { 252 if (obj == null) { 253 return null; 254 } 255 if (obj instanceof byte[]) { 256 return decode((byte[]) obj); 257 } 258 throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be Percent decoded"); 259 } 260}