You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

Simple8BitZipEncoding.java 7.6 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing,
  13. * software distributed under the License is distributed on an
  14. * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15. * KIND, either express or implied. See the License for the
  16. * specific language governing permissions and limitations
  17. * under the License.
  18. */
  19. package org.apache.tools.zip;
  20. import java.io.IOException;
  21. import java.nio.ByteBuffer;
  22. import java.util.ArrayList;
  23. import java.util.Collections;
  24. import java.util.List;
  25. /**
  26. * This ZipEncoding implementation implements a simple 8bit character
  27. * set, which mets the following restrictions:
  28. *
  29. * <ul>
  30. * <li>Characters 0x0000 to 0x007f are encoded as the corresponding
  31. * byte values 0x00 to 0x7f.</li>
  32. * <li>All byte codes from 0x80 to 0xff are mapped to a unique unicode
  33. * character in the range 0x0080 to 0x7fff. (No support for
  34. * UTF-16 surrogates)
  35. * </ul>
  36. *
  37. * <p>These restrictions most notably apply to the most prominent
  38. * omissions of java-1.4's {@link java.nio.charset.Charset Charset}
  39. * implementation, Cp437 and Cp850.</p>
  40. *
  41. * <p>The methods of this class are reentrant.</p>
  42. */
  43. class Simple8BitZipEncoding implements ZipEncoding {
  44. /**
  45. * A character entity, which is put to the reverse mapping table
  46. * of a simple encoding.
  47. */
  48. private static final class Simple8BitChar implements Comparable<Simple8BitChar> {
  49. public final char unicode;
  50. public final byte code;
  51. Simple8BitChar(byte code, char unicode) {
  52. this.code = code;
  53. this.unicode = unicode;
  54. }
  55. public int compareTo(Simple8BitChar a) {
  56. return this.unicode - a.unicode;
  57. }
  58. @Override
  59. public String toString() {
  60. return "0x" + Integer.toHexString(0xffff & unicode)
  61. + "->0x" + Integer.toHexString(0xff & code);
  62. }
  63. @Override
  64. public boolean equals(Object o) {
  65. if (o instanceof Simple8BitChar) {
  66. Simple8BitChar other = (Simple8BitChar) o;
  67. return unicode == other.unicode && code == other.code;
  68. }
  69. return false;
  70. }
  71. @Override
  72. public int hashCode() {
  73. return unicode;
  74. }
  75. }
  76. /**
  77. * The characters for byte values of 128 to 255 stored as an array of
  78. * 128 chars.
  79. */
  80. private final char[] highChars;
  81. /**
  82. * A list of {@link Simple8BitChar} objects sorted by the unicode
  83. * field. This list is used to binary search reverse mapping of
  84. * unicode characters with a character code greater than 127.
  85. */
  86. private final List<Simple8BitChar> reverseMapping;
  87. /**
  88. * @param highChars The characters for byte values of 128 to 255
  89. * stored as an array of 128 chars.
  90. */
  91. public Simple8BitZipEncoding(char[] highChars) {
  92. this.highChars = highChars.clone();
  93. List<Simple8BitChar> temp =
  94. new ArrayList<Simple8BitChar>(this.highChars.length);
  95. byte code = 127;
  96. for (int i = 0; i < this.highChars.length; ++i) {
  97. temp.add(new Simple8BitChar(++code, this.highChars[i]));
  98. }
  99. Collections.sort(temp);
  100. this.reverseMapping = Collections.unmodifiableList(temp);
  101. }
  102. /**
  103. * Return the character code for a given encoded byte.
  104. *
  105. * @param b The byte to decode.
  106. * @return The associated character value.
  107. */
  108. public char decodeByte(byte b) {
  109. // code 0-127
  110. if (b >= 0) {
  111. return (char) b;
  112. }
  113. // byte is signed, so 128 == -128 and 255 == -1
  114. return this.highChars[128 + b];
  115. }
  116. /**
  117. * @param c The character to encode.
  118. * @return Whether the given unicode character is covered by this encoding.
  119. */
  120. public boolean canEncodeChar(char c) {
  121. if (c >= 0 && c < 128) {
  122. return true;
  123. }
  124. Simple8BitChar r = this.encodeHighChar(c);
  125. return r != null;
  126. }
  127. /**
  128. * Pushes the encoded form of the given character to the given byte buffer.
  129. *
  130. * @param bb The byte buffer to write to.
  131. * @param c The character to encode.
  132. * @return Whether the given unicode character is covered by this encoding.
  133. * If {@code false} is returned, nothing is pushed to the
  134. * byte buffer.
  135. */
  136. public boolean pushEncodedChar(ByteBuffer bb, char c) {
  137. if (c >= 0 && c < 128) {
  138. bb.put((byte) c);
  139. return true;
  140. }
  141. Simple8BitChar r = this.encodeHighChar(c);
  142. if (r == null) {
  143. return false;
  144. }
  145. bb.put(r.code);
  146. return true;
  147. }
  148. /**
  149. * @param c A unicode character in the range from 0x0080 to 0x7f00
  150. * @return A Simple8BitChar, if this character is covered by this encoding.
  151. * A {@code null} value is returned, if this character is not
  152. * covered by this encoding.
  153. */
  154. private Simple8BitChar encodeHighChar(char c) {
  155. // for performance an simplicity, yet another reincarnation of
  156. // binary search...
  157. int i0 = 0;
  158. int i1 = this.reverseMapping.size();
  159. while (i1 > i0) {
  160. int i = i0 + (i1 - i0) / 2;
  161. Simple8BitChar m = this.reverseMapping.get(i);
  162. if (m.unicode == c) {
  163. return m;
  164. }
  165. if (m.unicode < c) {
  166. i0 = i + 1;
  167. } else {
  168. i1 = i;
  169. }
  170. }
  171. if (i0 >= this.reverseMapping.size()) {
  172. return null;
  173. }
  174. Simple8BitChar r = this.reverseMapping.get(i0);
  175. if (r.unicode != c) {
  176. return null;
  177. }
  178. return r;
  179. }
  180. /**
  181. * @see
  182. * org.apache.tools.zip.ZipEncoding#canEncode(java.lang.String)
  183. */
  184. public boolean canEncode(String name) {
  185. for (int i=0;i<name.length();++i) {
  186. char c = name.charAt(i);
  187. if (!this.canEncodeChar(c)) {
  188. return false;
  189. }
  190. }
  191. return true;
  192. }
  193. /**
  194. * @see
  195. * org.apache.tools.zip.ZipEncoding#encode(java.lang.String)
  196. */
  197. public ByteBuffer encode(String name) {
  198. ByteBuffer out = ByteBuffer.allocate(name.length()
  199. + 6 + (name.length() + 1) / 2);
  200. for (int i=0;i<name.length();++i) {
  201. char c = name.charAt(i);
  202. if (out.remaining() < 6) {
  203. out = ZipEncodingHelper.growBuffer(out,out.position() + 6);
  204. }
  205. if (!this.pushEncodedChar(out,c)) {
  206. ZipEncodingHelper.appendSurrogate(out,c);
  207. }
  208. }
  209. out.limit(out.position());
  210. out.rewind();
  211. return out;
  212. }
  213. /**
  214. * @see
  215. * org.apache.tools.zip.ZipEncoding#decode(byte[])
  216. */
  217. public String decode(byte[] data) throws IOException {
  218. char [] ret = new char[data.length];
  219. for (int i=0;i<data.length;++i) {
  220. ret[i] = this.decodeByte(data[i]);
  221. }
  222. return new String(ret);
  223. }
  224. }