1 | /* This file is part of Aard Dictionary for Android <http://aarddict.org>. |
2 | * |
3 | * This program is free software: you can redistribute it and/or modify |
4 | * it under the terms of the GNU General Public License version 3 |
5 | * as published by the Free Software Foundation. |
6 | * |
7 | * This program is distributed in the hope that it will be useful, |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | * GNU General Public License <http://www.gnu.org/licenses/gpl-3.0.txt> |
11 | * for more details. |
12 | * |
13 | * Copyright (C) 2010 Igor Tkach |
14 | */ |
15 | |
16 | package aarddict; |
17 | |
18 | import static java.lang.String.format; |
19 | |
20 | import java.io.ByteArrayInputStream; |
21 | import java.io.ByteArrayOutputStream; |
22 | import java.io.File; |
23 | import java.io.FileInputStream; |
24 | import java.io.IOException; |
25 | import java.io.UnsupportedEncodingException; |
26 | import java.math.BigInteger; |
27 | import java.nio.charset.Charset; |
28 | import java.security.MessageDigest; |
29 | import java.security.NoSuchAlgorithmException; |
30 | import java.util.AbstractList; |
31 | import java.util.ArrayList; |
32 | import java.util.Comparator; |
33 | import java.util.Iterator; |
34 | import java.util.List; |
35 | import java.util.Map; |
36 | import java.util.UUID; |
37 | import java.util.WeakHashMap; |
38 | import java.util.zip.DataFormatException; |
39 | import java.util.zip.Inflater; |
40 | |
41 | import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; |
42 | import org.codehaus.jackson.map.ObjectMapper; |
43 | |
44 | import android.util.Log; |
45 | |
46 | public final class Volume extends AbstractList<Entry> { |
47 | |
48 | private final static String TAG = Volume.class.getName(); |
49 | |
50 | final static Charset UTF8 = Charset.forName("utf8"); |
51 | |
52 | public Metadata metadata; |
53 | public Header header; |
54 | RandomAccessFile file; |
55 | String sha1sum; |
56 | |
57 | private File origFile; |
58 | |
59 | private String articleURLTemplate; |
60 | |
61 | static ObjectMapper mapper = new ObjectMapper(); |
62 | static { |
63 | mapper.getDeserializationConfig().set(org.codehaus.jackson.map.DeserializationConfig.Feature.FAIL_ON_UNKNOWN_PROPERTIES, false); |
64 | } |
65 | |
66 | public Volume(File file, File cacheDir, Map<UUID, Metadata> knownMeta) throws IOException { |
67 | this.origFile = file; |
68 | init(new RandomAccessFile(file, "r"), cacheDir, knownMeta); |
69 | } |
70 | |
71 | private void init(RandomAccessFile file, File cacheDir, Map<UUID, Metadata> knownMeta) throws IOException { |
72 | this.file = file; |
73 | this.header = new Header(file); |
74 | this.sha1sum = header.sha1sum; |
75 | if (knownMeta.containsKey(header.uuid)) { |
76 | this.metadata = knownMeta.get(header.uuid); |
77 | } else { |
78 | String uuidStr = header.uuid.toString(); |
79 | File metadataCacheFile = new File(cacheDir, uuidStr); |
80 | if (metadataCacheFile.exists()) { |
81 | try { |
82 | long t0 = System.currentTimeMillis(); |
83 | this.metadata = mapper.readValue(metadataCacheFile, Metadata.class); |
84 | knownMeta.put(header.uuid, this.metadata); |
85 | Log.d(TAG, format("Loaded meta for %s from cache in %s", metadataCacheFile.getName(), (System.currentTimeMillis() - t0))); |
86 | } |
87 | catch(Exception e) { |
88 | Log.e(TAG, format("Failed to restore meta from cache file %s ", metadataCacheFile.getName()), e); |
89 | } |
90 | } |
91 | if (this.metadata == null) { |
92 | long t0 = System.currentTimeMillis(); |
93 | byte[] rawMeta = new byte[(int) header.metaLength]; |
94 | file.read(rawMeta); |
95 | String metadataStr = decompress(rawMeta); |
96 | this.metadata = mapper.readValue(metadataStr, Metadata.class); |
97 | Log.d(TAG, format("Read meta for in %s", header.uuid, (System.currentTimeMillis() - t0))); |
98 | knownMeta.put(header.uuid, this.metadata); |
99 | try { |
100 | mapper.writeValue(metadataCacheFile, this.metadata); |
101 | Log.d(TAG, format("Wrote metadata to cache file %s", metadataCacheFile.getName())); |
102 | } |
103 | catch (IOException e) { |
104 | Log.e(TAG, format("Failed to write metadata to cache file %s", metadataCacheFile.getName()), e); |
105 | } |
106 | } |
107 | } |
108 | initArticleURLTemplate(); |
109 | } |
110 | |
111 | public String getId() { |
112 | return sha1sum; |
113 | } |
114 | |
115 | public UUID getDictionaryId() { |
116 | return header.uuid; |
117 | } |
118 | |
119 | @Override |
120 | public int hashCode() { |
121 | return sha1sum.hashCode(); |
122 | } |
123 | |
124 | @Override |
125 | public boolean equals(Object obj) { |
126 | if (this == obj) |
127 | return true; |
128 | if (!super.equals(obj)) |
129 | return false; |
130 | if (getClass() != obj.getClass()) |
131 | return false; |
132 | Volume other = (Volume) obj; |
133 | if (sha1sum == null) { |
134 | if (other.sha1sum != null) |
135 | return false; |
136 | } |
137 | else if (!sha1sum.equals(other.sha1sum)) |
138 | return false; |
139 | return true; |
140 | } |
141 | |
142 | public String toString() { |
143 | return String.format("%s %s/%s(%s)", this.metadata.title, this.header.volume, |
144 | this.header.of, this.sha1sum); |
145 | }; |
146 | |
147 | IndexItem readIndexItem(long i) throws IOException { |
148 | Header h = this.header; |
149 | long pos = h.index1Offset + i * h.index1ItemSize; |
150 | RandomAccessFile f = this.file; |
151 | f.seek(pos); |
152 | IndexItem indexItem = new IndexItem(); |
153 | indexItem.keyPointer = f.readSpec(h.keyPointerSpec); |
154 | indexItem.articlePointer = f.readSpec(h.articlePointerSpec); |
155 | return indexItem; |
156 | } |
157 | |
158 | String readKey(long pointer) throws IOException { |
159 | Header h = this.header; |
160 | long pos = h.index2Offset + pointer; |
161 | RandomAccessFile f = this.file; |
162 | f.seek(pos); |
163 | int keyLength = (int)f.readSpec(h.keyLengthSpec); |
164 | return f.readUTF8(keyLength); |
165 | } |
166 | |
167 | Map <Long, Article> articleCache = new WeakHashMap<Long, Article>(20); |
168 | |
169 | Article readArticle(long pointer) throws IOException { |
170 | Article a = articleCache.get(pointer); |
171 | if (a != null) |
172 | return a; |
173 | Header h = this.header; |
174 | long pos = h.articleOffset + pointer; |
175 | RandomAccessFile f = this.file; |
176 | f.seek(pos); |
177 | long articleLength = f.readSpec(h.articleLengthSpec); |
178 | |
179 | byte[] articleBytes = new byte[(int) articleLength]; |
180 | f.read(articleBytes); |
181 | String serializedArticle = decompress(articleBytes); |
182 | a = Article.fromJsonStr(serializedArticle); |
183 | a.dictionaryUUID = h.uuid; |
184 | a.volumeId = h.sha1sum; |
185 | a.pointer = pointer; |
186 | articleCache.put(pointer, a); |
187 | return a; |
188 | } |
189 | |
190 | static Iterator<Entry> EMPTY_ITERATOR = new ArrayList<Entry>().iterator(); |
191 | |
192 | Iterator<Entry> lookup(final LookupWord lookupWord, final Comparator<Entry> comparator) { |
193 | if (lookupWord.isEmpty()) { |
194 | return EMPTY_ITERATOR; |
195 | } |
196 | |
197 | final String section = lookupWord.section; |
198 | final Entry lookupEntry = new Entry(this.getId(), lookupWord.word); |
199 | final int initialIndex = binarySearch(this, lookupEntry, comparator); |
200 | Iterator<Entry> iterator = new Iterator<Entry>() { |
201 | |
202 | int index = initialIndex; |
203 | Entry nextEntry; |
204 | |
205 | { |
206 | prepareNext(); |
207 | } |
208 | |
209 | private void prepareNext() { |
210 | Entry matchedEntry = get(index); |
211 | nextEntry = (0 == comparator.compare(matchedEntry, lookupEntry)) ? matchedEntry : null; |
212 | index++; |
213 | } |
214 | |
215 | public boolean hasNext() { |
216 | return nextEntry != null && index < header.indexCount - 1; |
217 | } |
218 | |
219 | public Entry next() { |
220 | Entry current = nextEntry; |
221 | current.section = section; |
222 | prepareNext(); |
223 | return current; |
224 | } |
225 | |
226 | public void remove() { |
227 | throw new UnsupportedOperationException(); |
228 | } |
229 | }; |
230 | |
231 | return iterator; |
232 | } |
233 | |
234 | public String getArticleURL(String title) { |
235 | String template = getArticleURLTemplate(); |
236 | if (template != null) { |
237 | return template.replace("$1", title); |
238 | } |
239 | return null; |
240 | } |
241 | |
242 | public String getArticleURLTemplate() { |
243 | return articleURLTemplate; |
244 | } |
245 | |
246 | private void initArticleURLTemplate() { |
247 | String[] serverAndArticlePath = getServerAndArticlePath(); |
248 | String server = serverAndArticlePath[0]; |
249 | String articlePath = serverAndArticlePath[1]; |
250 | if (server != null && articlePath != null) { |
251 | articleURLTemplate = server + articlePath; |
252 | } |
253 | else { |
254 | Log.d(TAG, "Not enough metadata to generate article url template"); |
255 | } |
256 | } |
257 | |
258 | @SuppressWarnings("unchecked") |
259 | private String[] getServerAndArticlePath() { |
260 | String[] result = new String[]{null, null}; |
261 | if (metadata.siteinfo != null){ |
262 | Map <String, Object> general = (Map <String, Object>)this.metadata.siteinfo.get("general"); |
263 | if (general != null) { |
264 | Object server = general.get("server"); |
265 | Object articlePath = general.get("articlepath"); |
266 | if (server != null) |
267 | result[0] = server.toString(); |
268 | if (articlePath != null) |
269 | result[1] = articlePath.toString(); |
270 | } |
271 | } |
272 | return result; |
273 | } |
274 | |
275 | Map <Integer, Entry> entryCache = new WeakHashMap<Integer, Entry>(100); |
276 | |
277 | @Override |
278 | public Entry get(int index) { |
279 | Entry entry = entryCache.get(index); |
280 | if (entry != null) { |
281 | return entry; |
282 | } |
283 | try { |
284 | IndexItem indexItem = readIndexItem(index); |
285 | String title = readKey(indexItem.keyPointer); |
286 | entry = new Entry(this.getId(), title, indexItem.articlePointer); |
287 | entryCache.put(index, entry); |
288 | return entry; |
289 | } |
290 | catch (IOException e) { |
291 | throw new RuntimeException(e); |
292 | } |
293 | } |
294 | |
295 | @Override |
296 | public int size() { |
297 | return (int) header.indexCount; |
298 | } |
299 | |
300 | public void close() throws IOException { |
301 | file.close(); |
302 | }; |
303 | |
304 | static String utf8(byte[] signature) { |
305 | try { |
306 | return new String(signature, "UTF-8"); |
307 | } |
308 | catch (UnsupportedEncodingException e) { |
309 | e.printStackTrace(); |
310 | return ""; |
311 | } |
312 | } |
313 | |
314 | static String decompress(byte[] bytes) { |
315 | String type = null; |
316 | long t0 = System.currentTimeMillis(); |
317 | try { |
318 | String result = decompressZlib(bytes); |
319 | type = "zlib"; |
320 | return result; |
321 | } |
322 | catch (Exception e1) { |
323 | try { |
324 | String result = decompressBz2(bytes); |
325 | type = "bz2"; |
326 | return result; |
327 | } |
328 | catch (IOException e2) { |
329 | String result = utf8(bytes); |
330 | type = "uncompressed"; |
331 | return result; |
332 | } |
333 | } |
334 | finally { |
335 | Log.d(TAG, "Decompressed " + type + " in " + (System.currentTimeMillis() - t0)); |
336 | } |
337 | } |
338 | |
339 | static String decompressZlib(byte[] bytes) throws IOException, DataFormatException { |
340 | Inflater decompressor = new Inflater(); |
341 | decompressor.setInput(bytes); |
342 | ByteArrayOutputStream out = new ByteArrayOutputStream(); |
343 | try { |
344 | byte[] buf = new byte[1024]; |
345 | while (!decompressor.finished()) { |
346 | int count = decompressor.inflate(buf); |
347 | out.write(buf, 0, count); |
348 | } |
349 | } |
350 | finally { |
351 | out.close(); |
352 | } |
353 | return utf8(out.toByteArray()); |
354 | } |
355 | |
356 | static String decompressBz2(byte[] bytes) throws IOException { |
357 | BZip2CompressorInputStream in = new BZip2CompressorInputStream(new ByteArrayInputStream(bytes)); |
358 | |
359 | int n = 0; |
360 | ByteArrayOutputStream out = new ByteArrayOutputStream(bytes.length*5); |
361 | byte[] buf = new byte[1024]; |
362 | try { |
363 | while (-1 != (n = in.read(buf))) { |
364 | out.write(buf, 0, n); |
365 | } |
366 | } |
367 | finally { |
368 | in.close(); |
369 | out.close(); |
370 | } |
371 | return utf8(out.toByteArray()); |
372 | } |
373 | |
374 | static UUID uuid(byte[] data) { |
375 | long msb = 0; |
376 | long lsb = 0; |
377 | assert data.length == 16; |
378 | for (int i = 0; i < 8; i++) |
379 | msb = (msb << 8) | (data[i] & 0xff); |
380 | for (int i = 8; i < 16; i++) |
381 | lsb = (lsb << 8) | (data[i] & 0xff); |
382 | return new UUID(msb, lsb); |
383 | } |
384 | |
385 | static <T> int binarySearch(List<? extends T> l, T key, Comparator<? super T> c) { |
386 | int lo = 0; |
387 | int hi = l.size(); |
388 | while (lo < hi) { |
389 | int mid = (lo + hi) / 2; |
390 | T midVal = l.get(mid); |
391 | int cmp = c.compare(midVal, key); |
392 | if (cmp < 0) { |
393 | lo = mid + 1; |
394 | } |
395 | else { |
396 | hi = mid; |
397 | } |
398 | } |
399 | return lo; |
400 | } |
401 | |
402 | public CharSequence getDisplayTitle() { |
403 | return getDisplayTitle(true); |
404 | } |
405 | |
406 | public CharSequence getDisplayTitle(boolean withVolumeNumber) { |
407 | StringBuilder s = new StringBuilder(this.metadata.title); |
408 | if (this.metadata.lang != null) { |
409 | s.append(String.format(" (%s)", this.metadata.lang)); |
410 | } |
411 | else { |
412 | if (this.metadata.sitelang != null) { |
413 | s.append(String.format(" (%s)", this.metadata.sitelang)); |
414 | } |
415 | else { |
416 | if (this.metadata.index_language != null && this.metadata.article_language != null) { |
417 | s.append(String.format(" (%s-%s)", this.metadata.index_language, this.metadata.article_language)); |
418 | } |
419 | } |
420 | } |
421 | if (this.header.of > 1 && withVolumeNumber) |
422 | s.append(String.format(" Vol. %s", this.header.volume)); |
423 | return s.toString(); |
424 | } |
425 | |
426 | public void verify(VerifyProgressListener listener) throws IOException, NoSuchAlgorithmException { |
427 | FileInputStream fis = new FileInputStream(origFile); |
428 | fis.skip(44); |
429 | byte[] buff = new byte[1 << 16]; |
430 | MessageDigest m = MessageDigest.getInstance("SHA-1"); |
431 | int readCount; |
432 | long totalReadCount = 0; |
433 | double totalBytes = origFile.length() - 44; |
434 | boolean proceed = true; |
435 | while ((readCount=fis.read(buff)) != -1) { |
436 | m.update(buff, 0, readCount); |
437 | totalReadCount += readCount; |
438 | proceed = listener.updateProgress(this, totalReadCount/totalBytes); |
439 | } |
440 | fis.close(); |
441 | if (proceed) { |
442 | BigInteger b = new BigInteger(1, m.digest()); |
443 | String calculated = b.toString(16); |
444 | Log.d(TAG, "calculated: " + calculated + " actual: " + sha1sum); |
445 | listener.verified(this, calculated.equals(this.sha1sum)); |
446 | } |
447 | } |
448 | } |