| 1 | /* This file is part of Aard Dictionary for Android <http://aarddict.org>. |
| 2 | * |
| 3 | * This program is free software: you can redistribute it and/or modify |
| 4 | * it under the terms of the GNU General Public License version 3 |
| 5 | * as published by the Free Software Foundation. |
| 6 | * |
| 7 | * This program is distributed in the hope that it will be useful, |
| 8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 10 | * GNU General Public License <http://www.gnu.org/licenses/gpl-3.0.txt> |
| 11 | * for more details. |
| 12 | * |
| 13 | * Copyright (C) 2010 Igor Tkach |
| 14 | */ |
| 15 | |
| 16 | package aarddict; |
| 17 | |
| 18 | import static java.lang.String.format; |
| 19 | |
| 20 | import java.io.ByteArrayInputStream; |
| 21 | import java.io.ByteArrayOutputStream; |
| 22 | import java.io.File; |
| 23 | import java.io.FileInputStream; |
| 24 | import java.io.IOException; |
| 25 | import java.io.UnsupportedEncodingException; |
| 26 | import java.math.BigInteger; |
| 27 | import java.nio.charset.Charset; |
| 28 | import java.security.MessageDigest; |
| 29 | import java.security.NoSuchAlgorithmException; |
| 30 | import java.util.AbstractList; |
| 31 | import java.util.ArrayList; |
| 32 | import java.util.Comparator; |
| 33 | import java.util.Iterator; |
| 34 | import java.util.List; |
| 35 | import java.util.Map; |
| 36 | import java.util.UUID; |
| 37 | import java.util.WeakHashMap; |
| 38 | import java.util.zip.DataFormatException; |
| 39 | import java.util.zip.Inflater; |
| 40 | |
| 41 | import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; |
| 42 | import org.codehaus.jackson.map.ObjectMapper; |
| 43 | |
| 44 | import android.util.Log; |
| 45 | |
| 46 | public final class Volume extends AbstractList<Entry> { |
| 47 | |
| 48 | private final static String TAG = Volume.class.getName(); |
| 49 | |
| 50 | final static Charset UTF8 = Charset.forName("utf8"); |
| 51 | |
| 52 | public Metadata metadata; |
| 53 | public Header header; |
| 54 | RandomAccessFile file; |
| 55 | String sha1sum; |
| 56 | |
| 57 | private File origFile; |
| 58 | |
| 59 | private String articleURLTemplate; |
| 60 | |
| 61 | static ObjectMapper mapper = new ObjectMapper(); |
| 62 | static { |
| 63 | mapper.getDeserializationConfig().set(org.codehaus.jackson.map.DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); |
| 64 | } |
| 65 | |
| 66 | public Volume(File file, File cacheDir, Map<UUID, Metadata> knownMeta) throws IOException { |
| 67 | this.origFile = file; |
| 68 | init(new RandomAccessFile(file, "r"), cacheDir, knownMeta); |
| 69 | } |
| 70 | |
| 71 | private void init(RandomAccessFile file, File cacheDir, Map<UUID, Metadata> knownMeta) throws IOException { |
| 72 | this.file = file; |
| 73 | this.header = new Header(file); |
| 74 | this.sha1sum = header.sha1sum; |
| 75 | if (knownMeta.containsKey(header.uuid)) { |
| 76 | this.metadata = knownMeta.get(header.uuid); |
| 77 | } else { |
| 78 | String uuidStr = header.uuid.toString(); |
| 79 | File metadataCacheFile = new File(cacheDir, uuidStr); |
| 80 | if (metadataCacheFile.exists()) { |
| 81 | try { |
| 82 | long t0 = System.currentTimeMillis(); |
| 83 | this.metadata = mapper.readValue(metadataCacheFile, Metadata.class); |
| 84 | knownMeta.put(header.uuid, this.metadata); |
| 85 | Log.d(TAG, format("Loaded meta for %s from cache in %s", metadataCacheFile.getName(), (System.currentTimeMillis() - t0))); |
| 86 | } |
| 87 | catch(Exception e) { |
| 88 | Log.e(TAG, format("Failed to restore meta from cache file %s ", metadataCacheFile.getName()), e); |
| 89 | } |
| 90 | } |
| 91 | if (this.metadata == null) { |
| 92 | long t0 = System.currentTimeMillis(); |
| 93 | byte[] rawMeta = new byte[(int) header.metaLength]; |
| 94 | file.read(rawMeta); |
| 95 | String metadataStr = decompress(rawMeta); |
| 96 | this.metadata = mapper.readValue(metadataStr, Metadata.class); |
| 97 | Log.d(TAG, format("Read meta for in %s", header.uuid, (System.currentTimeMillis() - t0))); |
| 98 | knownMeta.put(header.uuid, this.metadata); |
| 99 | try { |
| 100 | mapper.writeValue(metadataCacheFile, this.metadata); |
| 101 | Log.d(TAG, format("Wrote metadata to cache file %s", metadataCacheFile.getName())); |
| 102 | } |
| 103 | catch (IOException e) { |
| 104 | Log.e(TAG, format("Failed to write metadata to cache file %s", metadataCacheFile.getName()), e); |
| 105 | } |
| 106 | } |
| 107 | } |
| 108 | initArticleURLTemplate(); |
| 109 | } |
| 110 | |
| 111 | public String getId() { |
| 112 | return sha1sum; |
| 113 | } |
| 114 | |
| 115 | public UUID getDictionaryId() { |
| 116 | return header.uuid; |
| 117 | } |
| 118 | |
| 119 | @Override |
| 120 | public int hashCode() { |
| 121 | return sha1sum.hashCode(); |
| 122 | } |
| 123 | |
| 124 | @Override |
| 125 | public boolean equals(Object obj) { |
| 126 | if (this == obj) |
| 127 | return true; |
| 128 | if (!super.equals(obj)) |
| 129 | return false; |
| 130 | if (getClass() != obj.getClass()) |
| 131 | return false; |
| 132 | Volume other = (Volume) obj; |
| 133 | if (sha1sum == null) { |
| 134 | if (other.sha1sum != null) |
| 135 | return false; |
| 136 | } |
| 137 | else if (!sha1sum.equals(other.sha1sum)) |
| 138 | return false; |
| 139 | return true; |
| 140 | } |
| 141 | |
| 142 | public String toString() { |
| 143 | return String.format("%s %s/%s(%s)", this.metadata.title, this.header.volume, |
| 144 | this.header.of, this.sha1sum); |
| 145 | }; |
| 146 | |
| 147 | IndexItem readIndexItem(long i) throws IOException { |
| 148 | Header h = this.header; |
| 149 | long pos = h.index1Offset + i * h.index1ItemSize; |
| 150 | RandomAccessFile f = this.file; |
| 151 | f.seek(pos); |
| 152 | IndexItem indexItem = new IndexItem(); |
| 153 | indexItem.keyPointer = f.readSpec(h.keyPointerSpec); |
| 154 | indexItem.articlePointer = f.readSpec(h.articlePointerSpec); |
| 155 | return indexItem; |
| 156 | } |
| 157 | |
| 158 | String readKey(long pointer) throws IOException { |
| 159 | Header h = this.header; |
| 160 | long pos = h.index2Offset + pointer; |
| 161 | RandomAccessFile f = this.file; |
| 162 | f.seek(pos); |
| 163 | int keyLength = (int)f.readSpec(h.keyLengthSpec); |
| 164 | return f.readUTF8(keyLength); |
| 165 | } |
| 166 | |
| 167 | Map <Long, Article> articleCache = new WeakHashMap<Long, Article>(20); |
| 168 | |
| 169 | Article readArticle(long pointer) throws IOException { |
| 170 | Article a = articleCache.get(pointer); |
| 171 | if (a != null) |
| 172 | return a; |
| 173 | Header h = this.header; |
| 174 | long pos = h.articleOffset + pointer; |
| 175 | RandomAccessFile f = this.file; |
| 176 | f.seek(pos); |
| 177 | long articleLength = f.readSpec(h.articleLengthSpec); |
| 178 | |
| 179 | byte[] articleBytes = new byte[(int) articleLength]; |
| 180 | f.read(articleBytes); |
| 181 | String serializedArticle = decompress(articleBytes); |
| 182 | a = Article.fromJsonStr(serializedArticle); |
| 183 | a.dictionaryUUID = h.uuid; |
| 184 | a.volumeId = h.sha1sum; |
| 185 | a.pointer = pointer; |
| 186 | articleCache.put(pointer, a); |
| 187 | return a; |
| 188 | } |
| 189 | |
| 190 | static Iterator<Entry> EMPTY_ITERATOR = new ArrayList<Entry>().iterator(); |
| 191 | |
| 192 | Iterator<Entry> lookup(final LookupWord lookupWord, final Comparator<Entry> comparator) { |
| 193 | if (lookupWord.isEmpty()) { |
| 194 | return EMPTY_ITERATOR; |
| 195 | } |
| 196 | |
| 197 | final String section = lookupWord.section; |
| 198 | final Entry lookupEntry = new Entry(this.getId(), lookupWord.word); |
| 199 | final int initialIndex = binarySearch(this, lookupEntry, comparator); |
| 200 | Iterator<Entry> iterator = new Iterator<Entry>() { |
| 201 | |
| 202 | int index = initialIndex; |
| 203 | Entry nextEntry; |
| 204 | |
| 205 | { |
| 206 | prepareNext(); |
| 207 | } |
| 208 | |
| 209 | private void prepareNext() { |
| 210 | Entry matchedEntry = get(index); |
| 211 | nextEntry = (0 == comparator.compare(matchedEntry, lookupEntry)) ? matchedEntry : null; |
| 212 | index++; |
| 213 | } |
| 214 | |
| 215 | public boolean hasNext() { |
| 216 | return nextEntry != null && index < header.indexCount - 1; |
| 217 | } |
| 218 | |
| 219 | public Entry next() { |
| 220 | Entry current = nextEntry; |
| 221 | current.section = section; |
| 222 | prepareNext(); |
| 223 | return current; |
| 224 | } |
| 225 | |
| 226 | public void remove() { |
| 227 | throw new UnsupportedOperationException(); |
| 228 | } |
| 229 | }; |
| 230 | |
| 231 | return iterator; |
| 232 | } |
| 233 | |
| 234 | public String getArticleURL(String title) { |
| 235 | String template = getArticleURLTemplate(); |
| 236 | if (template != null) { |
| 237 | return template.replace("$1", title); |
| 238 | } |
| 239 | return null; |
| 240 | } |
| 241 | |
| 242 | public String getArticleURLTemplate() { |
| 243 | return articleURLTemplate; |
| 244 | } |
| 245 | |
| 246 | private void initArticleURLTemplate() { |
| 247 | String[] serverAndArticlePath = getServerAndArticlePath(); |
| 248 | String server = serverAndArticlePath[0]; |
| 249 | String articlePath = serverAndArticlePath[1]; |
| 250 | if (server != null && articlePath != null) { |
| 251 | articleURLTemplate = server + articlePath; |
| 252 | } |
| 253 | else { |
| 254 | Log.d(TAG, "Not enough metadata to generate article url template"); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | @SuppressWarnings("unchecked") |
| 259 | private String[] getServerAndArticlePath() { |
| 260 | String[] result = new String[]{null, null}; |
| 261 | if (metadata.siteinfo != null){ |
| 262 | Map <String, Object> general = (Map <String, Object>)this.metadata.siteinfo.get("general"); |
| 263 | if (general != null) { |
| 264 | Object server = general.get("server"); |
| 265 | Object articlePath = general.get("articlepath"); |
| 266 | if (server != null) |
| 267 | result[0] = server.toString(); |
| 268 | if (articlePath != null) |
| 269 | result[1] = articlePath.toString(); |
| 270 | } |
| 271 | } |
| 272 | return result; |
| 273 | } |
| 274 | |
| 275 | Map <Integer, Entry> entryCache = new WeakHashMap<Integer, Entry>(100); |
| 276 | |
| 277 | @Override |
| 278 | public Entry get(int index) { |
| 279 | Entry entry = entryCache.get(index); |
| 280 | if (entry != null) { |
| 281 | return entry; |
| 282 | } |
| 283 | try { |
| 284 | IndexItem indexItem = readIndexItem(index); |
| 285 | String title = readKey(indexItem.keyPointer); |
| 286 | entry = new Entry(this.getId(), title, indexItem.articlePointer); |
| 287 | entryCache.put(index, entry); |
| 288 | return entry; |
| 289 | } |
| 290 | catch (IOException e) { |
| 291 | throw new RuntimeException(e); |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | @Override |
| 296 | public int size() { |
| 297 | return (int) header.indexCount; |
| 298 | } |
| 299 | |
| 300 | public void close() throws IOException { |
| 301 | file.close(); |
| 302 | }; |
| 303 | |
| 304 | static String utf8(byte[] signature) { |
| 305 | try { |
| 306 | return new String(signature, "UTF-8"); |
| 307 | } |
| 308 | catch (UnsupportedEncodingException e) { |
| 309 | e.printStackTrace(); |
| 310 | return ""; |
| 311 | } |
| 312 | } |
| 313 | |
| 314 | static String decompress(byte[] bytes) { |
| 315 | String type = null; |
| 316 | long t0 = System.currentTimeMillis(); |
| 317 | try { |
| 318 | String result = decompressZlib(bytes); |
| 319 | type = "zlib"; |
| 320 | return result; |
| 321 | } |
| 322 | catch (Exception e1) { |
| 323 | try { |
| 324 | String result = decompressBz2(bytes); |
| 325 | type = "bz2"; |
| 326 | return result; |
| 327 | } |
| 328 | catch (IOException e2) { |
| 329 | String result = utf8(bytes); |
| 330 | type = "uncompressed"; |
| 331 | return result; |
| 332 | } |
| 333 | } |
| 334 | finally { |
| 335 | Log.d(TAG, "Decompressed " + type + " in " + (System.currentTimeMillis() - t0)); |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | static String decompressZlib(byte[] bytes) throws IOException, DataFormatException { |
| 340 | Inflater decompressor = new Inflater(); |
| 341 | decompressor.setInput(bytes); |
| 342 | ByteArrayOutputStream out = new ByteArrayOutputStream(); |
| 343 | try { |
| 344 | byte[] buf = new byte[1024]; |
| 345 | while (!decompressor.finished()) { |
| 346 | int count = decompressor.inflate(buf); |
| 347 | out.write(buf, 0, count); |
| 348 | } |
| 349 | } |
| 350 | finally { |
| 351 | out.close(); |
| 352 | } |
| 353 | return utf8(out.toByteArray()); |
| 354 | } |
| 355 | |
| 356 | static String decompressBz2(byte[] bytes) throws IOException { |
| 357 | BZip2CompressorInputStream in = new BZip2CompressorInputStream(new ByteArrayInputStream(bytes)); |
| 358 | |
| 359 | int n = 0; |
| 360 | ByteArrayOutputStream out = new ByteArrayOutputStream(bytes.length*5); |
| 361 | byte[] buf = new byte[1024]; |
| 362 | try { |
| 363 | while (-1 != (n = in.read(buf))) { |
| 364 | out.write(buf, 0, n); |
| 365 | } |
| 366 | } |
| 367 | finally { |
| 368 | in.close(); |
| 369 | out.close(); |
| 370 | } |
| 371 | return utf8(out.toByteArray()); |
| 372 | } |
| 373 | |
| 374 | static UUID uuid(byte[] data) { |
| 375 | long msb = 0; |
| 376 | long lsb = 0; |
| 377 | assert data.length == 16; |
| 378 | for (int i = 0; i < 8; i++) |
| 379 | msb = (msb << 8) | (data[i] & 0xff); |
| 380 | for (int i = 8; i < 16; i++) |
| 381 | lsb = (lsb << 8) | (data[i] & 0xff); |
| 382 | return new UUID(msb, lsb); |
| 383 | } |
| 384 | |
| 385 | static <T> int binarySearch(List<? extends T> l, T key, Comparator<? super T> c) { |
| 386 | int lo = 0; |
| 387 | int hi = l.size(); |
| 388 | while (lo < hi) { |
| 389 | int mid = (lo + hi) / 2; |
| 390 | T midVal = l.get(mid); |
| 391 | int cmp = c.compare(midVal, key); |
| 392 | if (cmp < 0) { |
| 393 | lo = mid + 1; |
| 394 | } |
| 395 | else { |
| 396 | hi = mid; |
| 397 | } |
| 398 | } |
| 399 | return lo; |
| 400 | } |
| 401 | |
| 402 | public CharSequence getDisplayTitle() { |
| 403 | return getDisplayTitle(true); |
| 404 | } |
| 405 | |
| 406 | public CharSequence getDisplayTitle(boolean withVolumeNumber) { |
| 407 | StringBuilder s = new StringBuilder(this.metadata.title); |
| 408 | if (this.metadata.lang != null) { |
| 409 | s.append(String.format(" (%s)", this.metadata.lang)); |
| 410 | } |
| 411 | else { |
| 412 | if (this.metadata.sitelang != null) { |
| 413 | s.append(String.format(" (%s)", this.metadata.sitelang)); |
| 414 | } |
| 415 | else { |
| 416 | if (this.metadata.index_language != null && this.metadata.article_language != null) { |
| 417 | s.append(String.format(" (%s-%s)", this.metadata.index_language, this.metadata.article_language)); |
| 418 | } |
| 419 | } |
| 420 | } |
| 421 | if (this.header.of > 1 && withVolumeNumber) |
| 422 | s.append(String.format(" Vol. %s", this.header.volume)); |
| 423 | return s.toString(); |
| 424 | } |
| 425 | |
| 426 | public void verify(VerifyProgressListener listener) throws IOException, NoSuchAlgorithmException { |
| 427 | FileInputStream fis = new FileInputStream(origFile); |
| 428 | fis.skip(44); |
| 429 | byte[] buff = new byte[1 << 16]; |
| 430 | MessageDigest m = MessageDigest.getInstance("SHA-1"); |
| 431 | int readCount; |
| 432 | long totalReadCount = 0; |
| 433 | double totalBytes = origFile.length() - 44; |
| 434 | boolean proceed = true; |
| 435 | while ((readCount=fis.read(buff)) != -1) { |
| 436 | m.update(buff, 0, readCount); |
| 437 | totalReadCount += readCount; |
| 438 | proceed = listener.updateProgress(this, totalReadCount/totalBytes); |
| 439 | } |
| 440 | fis.close(); |
| 441 | if (proceed) { |
| 442 | BigInteger b = new BigInteger(1, m.digest()); |
| 443 | String calculated = b.toString(16); |
| 444 | Log.d(TAG, "calculated: " + calculated + " actual: " + sha1sum); |
| 445 | listener.verified(this, calculated.equals(this.sha1sum)); |
| 446 | } |
| 447 | } |
| 448 | } |