1 : //* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* ***** BEGIN LICENSE BLOCK *****
3 : * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License. You may obtain a copy of the License at
8 : * http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * The Original Code is Url Classifier code
16 : *
17 : * The Initial Developer of the Original Code is
18 : * the Mozilla Foundation.
19 : * Portions created by the Initial Developer are Copyright (C) 2011
20 : * the Initial Developer. All Rights Reserved.
21 : *
22 : * Contributor(s):
23 : * Dave Camp <dcamp@mozilla.com>
24 : * Gian-Carlo Pascutto <gpascutto@mozilla.com>
25 : *
26 : * Alternatively, the contents of this file may be used under the terms of
27 : * either the GNU General Public License Version 2 or later (the "GPL"), or
28 : * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 : * in which case the provisions of the GPL or the LGPL are applicable instead
30 : * of those above. If you wish to allow use of your version of this file only
31 : * under the terms of either the GPL or the LGPL, and not to allow others to
32 : * use your version of this file under the terms of the MPL, indicate your
33 : * decision by deleting the provisions above and replace them with the notice
34 : * and other provisions required by the GPL or the LGPL. If you do not delete
35 : * the provisions above, a recipient may use your version of this file under
36 : * the terms of any one of the MPL, the GPL or the LGPL.
37 : *
38 : * ***** END LICENSE BLOCK ***** */
39 :
40 : #include "LookupCache.h"
41 : #include "HashStore.h"
42 : #include "nsISeekableStream.h"
43 : #include "mozilla/Telemetry.h"
44 : #include "prlog.h"
45 : #include "prprf.h"
46 :
47 : // We act as the main entry point for all the real lookups,
48 : // so note that those are not done to the actual HashStore.
49 : // The latter solely exists to store the data needed to handle
50 : // the updates from the protocol.
51 :
52 : // This module has its own store, which stores the Completions,
53 : // mostly caching lookups that have happened over the net.
54 : // The prefixes are cached/checked by looking them up in the
55 : // PrefixSet.
56 :
57 : // Data format for the ".cache" files:
58 : // uint32 magic Identify the file type
59 : // uint32 version Version identifier for file format
60 : // uint32 numCompletions Amount of completions stored
61 : // 0...numCompletions 256-bit Completions
62 :
63 : // Name of the lookupcomplete cache
64 : #define CACHE_SUFFIX ".cache"
65 :
66 : // Name of the persistent PrefixSet storage
67 : #define PREFIXSET_SUFFIX ".pset"
68 :
69 : // NSPR_LOG_MODULES=UrlClassifierDbService:5
70 : extern PRLogModuleInfo *gUrlClassifierDbServiceLog;
71 : #if defined(PR_LOGGING)
72 : #define LOG(args) PR_LOG(gUrlClassifierDbServiceLog, PR_LOG_DEBUG, args)
73 : #define LOG_ENABLED() PR_LOG_TEST(gUrlClassifierDbServiceLog, 4)
74 : #else
75 : #define LOG(args)
76 : #define LOG_ENABLED() (false)
77 : #endif
78 :
79 : namespace mozilla {
80 : namespace safebrowsing {
81 :
82 : const uint32 LOOKUPCACHE_MAGIC = 0x1231af3e;
83 : const uint32 CURRENT_VERSION = 1;
84 :
85 45 : LookupCache::LookupCache(const nsACString& aTableName, nsIFile* aStoreDir)
86 : : mPrimed(false)
87 : , mTableName(aTableName)
88 45 : , mStoreDirectory(aStoreDir)
89 : {
90 45 : }
91 :
92 : nsresult
93 45 : LookupCache::Init()
94 : {
95 45 : mPrefixSet = new nsUrlClassifierPrefixSet();
96 45 : nsresult rv = mPrefixSet->Init(mTableName);
97 45 : NS_ENSURE_SUCCESS(rv, rv);
98 :
99 45 : return NS_OK;
100 : }
101 :
102 45 : LookupCache::~LookupCache()
103 : {
104 45 : }
105 :
106 : nsresult
107 45 : LookupCache::Open()
108 : {
109 90 : nsCOMPtr<nsIFile> storeFile;
110 :
111 45 : nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
112 45 : NS_ENSURE_SUCCESS(rv, rv);
113 :
114 45 : rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(CACHE_SUFFIX));
115 45 : NS_ENSURE_SUCCESS(rv, rv);
116 :
117 45 : rv = NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), storeFile,
118 45 : PR_RDONLY);
119 :
120 45 : if (NS_FAILED(rv) && rv != NS_ERROR_FILE_NOT_FOUND) {
121 0 : Reset();
122 0 : return rv;
123 : }
124 :
125 45 : if (rv == NS_ERROR_FILE_NOT_FOUND) {
126 45 : Clear();
127 45 : UpdateHeader();
128 45 : return NS_OK;
129 : }
130 :
131 0 : rv = ReadHeader();
132 0 : NS_ENSURE_SUCCESS(rv, rv);
133 :
134 0 : LOG(("ReadCompletions"));
135 0 : rv = ReadCompletions();
136 0 : NS_ENSURE_SUCCESS(rv, rv);
137 :
138 0 : LOG(("Loading PrefixSet"));
139 0 : rv = LoadPrefixSet();
140 0 : NS_ENSURE_SUCCESS(rv, rv);
141 :
142 0 : return NS_OK;
143 : }
144 :
145 : nsresult
146 0 : LookupCache::Reset()
147 : {
148 0 : LOG(("LookupCache resetting"));
149 :
150 0 : nsCOMPtr<nsIFile> storeFile;
151 0 : nsCOMPtr<nsIFile> prefixsetFile;
152 0 : nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
153 0 : NS_ENSURE_SUCCESS(rv, rv);
154 0 : rv = mStoreDirectory->Clone(getter_AddRefs(prefixsetFile));
155 0 : NS_ENSURE_SUCCESS(rv, rv);
156 :
157 0 : rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(CACHE_SUFFIX));
158 0 : NS_ENSURE_SUCCESS(rv, rv);
159 0 : rv = prefixsetFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
160 0 : NS_ENSURE_SUCCESS(rv, rv);
161 :
162 0 : rv = storeFile->Remove(false);
163 0 : NS_ENSURE_SUCCESS(rv, rv);
164 0 : rv = prefixsetFile->Remove(false);
165 0 : NS_ENSURE_SUCCESS(rv, rv);
166 :
167 0 : Clear();
168 :
169 0 : return NS_OK;
170 : }
171 :
172 :
173 : nsresult
174 97 : LookupCache::Build(AddPrefixArray& aAddPrefixes,
175 : AddCompleteArray& aAddCompletes)
176 : {
177 : Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
178 97 : static_cast<PRUint32>(aAddCompletes.Length()));
179 :
180 97 : mCompletions.Clear();
181 97 : mCompletions.SetCapacity(aAddCompletes.Length());
182 273 : for (uint32 i = 0; i < aAddCompletes.Length(); i++) {
183 176 : mCompletions.AppendElement(aAddCompletes[i].CompleteHash());
184 : }
185 97 : aAddCompletes.Clear();
186 97 : mCompletions.Sort();
187 :
188 : Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
189 97 : static_cast<PRUint32>(aAddPrefixes.Length()));
190 :
191 97 : nsresult rv = ConstructPrefixSet(aAddPrefixes);
192 97 : NS_ENSURE_SUCCESS(rv, rv);
193 97 : mPrimed = true;
194 :
195 97 : return NS_OK;
196 : }
197 :
198 : #if defined(DEBUG) && defined(PR_LOGGING)
199 : void
200 97 : LookupCache::Dump()
201 : {
202 97 : if (!LOG_ENABLED())
203 97 : return;
204 :
205 0 : for (uint32 i = 0; i < mCompletions.Length(); i++) {
206 0 : nsCAutoString str;
207 0 : mCompletions[i].ToString(str);
208 0 : LOG(("Completion: %s", str.get()));
209 : }
210 : }
211 : #endif
212 :
213 : nsresult
214 345 : LookupCache::Has(const Completion& aCompletion,
215 : const Completion& aHostkey,
216 : const PRUint32 aHashKey,
217 : bool* aHas, bool* aComplete,
218 : Prefix* aOrigPrefix)
219 : {
220 345 : *aHas = *aComplete = false;
221 :
222 : // check completion store first
223 345 : if (mCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) {
224 71 : LOG(("Complete in %s", mTableName.get()));
225 71 : *aComplete = true;
226 71 : *aHas = true;
227 71 : return NS_OK;
228 : }
229 :
230 274 : PRUint32 prefix = aCompletion.ToUint32();
231 274 : PRUint32 hostkey = aHostkey.ToUint32();
232 : PRUint32 codedkey;
233 274 : nsresult rv = KeyedHash(prefix, hostkey, aHashKey, &codedkey);
234 274 : NS_ENSURE_SUCCESS(rv, rv);
235 :
236 : Prefix codedPrefix;
237 274 : codedPrefix.FromUint32(codedkey);
238 274 : *aOrigPrefix = codedPrefix;
239 :
240 274 : bool ready = true;
241 : bool found;
242 274 : rv = mPrefixSet->Probe(codedkey, &ready, &found);
243 274 : NS_ENSURE_SUCCESS(rv, rv);
244 :
245 274 : LOG(("Probe in %s: %X, ready: %d found %d", mTableName.get(), prefix, ready, found));
246 :
247 274 : if (found) {
248 45 : *aHas = true;
249 : }
250 :
251 274 : return NS_OK;
252 : }
253 :
254 : nsresult
255 97 : LookupCache::WriteFile()
256 : {
257 194 : nsCOMPtr<nsIFile> storeFile;
258 97 : nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
259 97 : NS_ENSURE_SUCCESS(rv, rv);
260 97 : rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(CACHE_SUFFIX));
261 97 : NS_ENSURE_SUCCESS(rv, rv);
262 :
263 : // Need to close the inputstream here *before* rewriting its file.
264 : // Windows will fail if we don't.
265 97 : if (mInputStream) {
266 52 : rv = mInputStream->Close();
267 52 : NS_ENSURE_SUCCESS(rv, rv);
268 : }
269 :
270 194 : nsCOMPtr<nsIOutputStream> out;
271 97 : rv = NS_NewSafeLocalFileOutputStream(getter_AddRefs(out), storeFile,
272 97 : PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
273 97 : NS_ENSURE_SUCCESS(rv, rv);
274 :
275 97 : UpdateHeader();
276 97 : LOG(("Writing %d completions", mHeader.numCompletions));
277 :
278 : PRUint32 written;
279 97 : rv = out->Write(reinterpret_cast<char*>(&mHeader), sizeof(mHeader), &written);
280 97 : NS_ENSURE_SUCCESS(rv, rv);
281 :
282 97 : rv = WriteTArray(out, mCompletions);
283 97 : NS_ENSURE_SUCCESS(rv, rv);
284 :
285 194 : nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out);
286 97 : rv = safeOut->Finish();
287 97 : NS_ENSURE_SUCCESS(rv, rv);
288 :
289 97 : rv = EnsureSizeConsistent();
290 97 : NS_ENSURE_SUCCESS(rv, rv);
291 :
292 : // Reopen the file now that we've rewritten it.
293 97 : rv = NS_NewLocalFileInputStream(getter_AddRefs(mInputStream), storeFile,
294 97 : PR_RDONLY);
295 97 : NS_ENSURE_SUCCESS(rv, rv);
296 :
297 194 : nsCOMPtr<nsIFile> psFile;
298 97 : rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
299 97 : NS_ENSURE_SUCCESS(rv, rv);
300 :
301 97 : rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
302 97 : NS_ENSURE_SUCCESS(rv, rv);
303 :
304 97 : rv = mPrefixSet->StoreToFile(psFile);
305 97 : NS_WARN_IF_FALSE(NS_SUCCEEDED(rv), "failed to store the prefixset");
306 :
307 97 : return NS_OK;
308 : }
309 :
310 : void
311 45 : LookupCache::Clear()
312 : {
313 45 : mCompletions.Clear();
314 45 : mPrefixSet->SetPrefixes(nsnull, 0);
315 45 : mPrimed = false;
316 45 : }
317 :
318 : void
319 142 : LookupCache::UpdateHeader()
320 : {
321 142 : mHeader.magic = LOOKUPCACHE_MAGIC;
322 142 : mHeader.version = CURRENT_VERSION;
323 142 : mHeader.numCompletions = mCompletions.Length();
324 142 : }
325 :
326 : nsresult
327 97 : LookupCache::EnsureSizeConsistent()
328 : {
329 194 : nsCOMPtr<nsIFile> storeFile;
330 97 : nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
331 97 : NS_ENSURE_SUCCESS(rv, rv);
332 97 : rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(CACHE_SUFFIX));
333 97 : NS_ENSURE_SUCCESS(rv, rv);
334 :
335 : PRInt64 fileSize;
336 97 : rv = storeFile->GetFileSize(&fileSize);
337 97 : NS_ENSURE_SUCCESS(rv, rv);
338 :
339 97 : if (fileSize < 0) {
340 0 : return NS_ERROR_FAILURE;
341 : }
342 :
343 : PRInt64 expectedSize = sizeof(mHeader)
344 97 : + mHeader.numCompletions*sizeof(Completion);
345 97 : if (expectedSize != fileSize) {
346 0 : NS_WARNING("File length does not match. Probably corrupted.");
347 0 : Reset();
348 0 : return NS_ERROR_FILE_CORRUPTED;
349 : }
350 :
351 97 : return NS_OK;
352 : }
353 :
354 : nsresult
355 0 : LookupCache::ReadHeader()
356 : {
357 0 : if (!mInputStream) {
358 0 : Clear();
359 0 : UpdateHeader();
360 0 : return NS_OK;
361 : }
362 :
363 0 : nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
364 0 : nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
365 0 : NS_ENSURE_SUCCESS(rv, rv);
366 :
367 0 : void *buffer = &mHeader;
368 : rv = NS_ReadInputStreamToBuffer(mInputStream,
369 : &buffer,
370 0 : sizeof(Header));
371 0 : NS_ENSURE_SUCCESS(rv, rv);
372 :
373 0 : if (mHeader.magic != LOOKUPCACHE_MAGIC || mHeader.version != CURRENT_VERSION) {
374 0 : NS_WARNING("Unexpected header data in the store.");
375 0 : Reset();
376 0 : return NS_ERROR_FILE_CORRUPTED;
377 : }
378 0 : LOG(("%d completions present", mHeader.numCompletions));
379 :
380 0 : rv = EnsureSizeConsistent();
381 0 : NS_ENSURE_SUCCESS(rv, rv);
382 :
383 0 : return NS_OK;
384 : }
385 :
386 : nsresult
387 0 : LookupCache::ReadCompletions()
388 : {
389 0 : if (!mHeader.numCompletions) {
390 0 : mCompletions.Clear();
391 0 : return NS_OK;
392 : }
393 :
394 0 : nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
395 0 : nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, sizeof(Header));
396 0 : NS_ENSURE_SUCCESS(rv, rv);
397 :
398 0 : rv = ReadTArray(mInputStream, &mCompletions, mHeader.numCompletions);
399 0 : NS_ENSURE_SUCCESS(rv, rv);
400 :
401 0 : LOG(("Read %d completions", mCompletions.Length()));
402 :
403 0 : return NS_OK;
404 : }
405 :
406 : /* static */ bool
407 499 : LookupCache::IsCanonicalizedIP(const nsACString& aHost)
408 : {
409 : // The canonicalization process will have left IP addresses in dotted
410 : // decimal with no surprises.
411 : PRUint32 i1, i2, i3, i4;
412 : char c;
413 499 : if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
414 499 : &i1, &i2, &i3, &i4, &c) == 4) {
415 4 : return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
416 : }
417 :
418 495 : return false;
419 : }
420 :
421 : /* static */ nsresult
422 359 : LookupCache::GetKey(const nsACString& aSpec,
423 : Completion* aHash,
424 : nsCOMPtr<nsICryptoHash>& aCryptoHash)
425 : {
426 359 : nsACString::const_iterator begin, end, iter;
427 359 : aSpec.BeginReading(begin);
428 359 : aSpec.EndReading(end);
429 :
430 359 : iter = begin;
431 359 : if (!FindCharInReadable('/', iter, end)) {
432 0 : return NS_OK;
433 : }
434 :
435 718 : const nsCSubstring& host = Substring(begin, iter);
436 :
437 359 : if (IsCanonicalizedIP(host)) {
438 4 : nsCAutoString key;
439 2 : key.Assign(host);
440 2 : key.Append("/");
441 2 : return aHash->FromPlaintext(key, aCryptoHash);
442 : }
443 :
444 714 : nsTArray<nsCString> hostComponents;
445 357 : ParseString(PromiseFlatCString(host), '.', hostComponents);
446 :
447 357 : if (hostComponents.Length() < 2)
448 0 : return NS_ERROR_FAILURE;
449 :
450 357 : PRInt32 last = PRInt32(hostComponents.Length()) - 1;
451 714 : nsCAutoString lookupHost;
452 :
453 357 : if (hostComponents.Length() > 2) {
454 41 : lookupHost.Append(hostComponents[last - 2]);
455 41 : lookupHost.Append(".");
456 : }
457 :
458 357 : lookupHost.Append(hostComponents[last - 1]);
459 357 : lookupHost.Append(".");
460 357 : lookupHost.Append(hostComponents[last]);
461 357 : lookupHost.Append("/");
462 :
463 357 : return aHash->FromPlaintext(lookupHost, aCryptoHash);
464 : }
465 :
466 : /* static */ nsresult
467 140 : LookupCache::GetLookupFragments(const nsACString& aSpec,
468 : nsTArray<nsCString>* aFragments)
469 :
470 : {
471 140 : aFragments->Clear();
472 :
473 140 : nsACString::const_iterator begin, end, iter;
474 140 : aSpec.BeginReading(begin);
475 140 : aSpec.EndReading(end);
476 :
477 140 : iter = begin;
478 140 : if (!FindCharInReadable('/', iter, end)) {
479 0 : return NS_OK;
480 : }
481 :
482 280 : const nsCSubstring& host = Substring(begin, iter++);
483 280 : nsCAutoString path;
484 140 : path.Assign(Substring(iter, end));
485 :
486 : /**
487 : * From the protocol doc:
488 : * For the hostname, the client will try at most 5 different strings. They
489 : * are:
490 : * a) The exact hostname of the url
491 : * b) The 4 hostnames formed by starting with the last 5 components and
492 : * successivly removing the leading component. The top-level component
493 : * can be skipped. This is not done if the hostname is a numerical IP.
494 : */
495 280 : nsTArray<nsCString> hosts;
496 140 : hosts.AppendElement(host);
497 :
498 140 : if (!IsCanonicalizedIP(host)) {
499 138 : host.BeginReading(begin);
500 138 : host.EndReading(end);
501 138 : int numHostComponents = 0;
502 426 : while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
503 : numHostComponents < MAX_HOST_COMPONENTS) {
504 : // don't bother checking toplevel domains
505 150 : if (++numHostComponents >= 2) {
506 12 : host.EndReading(iter);
507 12 : hosts.AppendElement(Substring(end, iter));
508 : }
509 150 : end = begin;
510 150 : host.BeginReading(begin);
511 : }
512 : }
513 :
514 : /**
515 : * From the protocol doc:
516 : * For the path, the client will also try at most 6 different strings.
517 : * They are:
518 : * a) the exact path of the url, including query parameters
519 : * b) the exact path of the url, without query parameters
520 : * c) the 4 paths formed by starting at the root (/) and
521 : * successively appending path components, including a trailing
522 : * slash. This behavior should only extend up to the next-to-last
523 : * path component, that is, a trailing slash should never be
524 : * appended that was not present in the original url.
525 : */
526 280 : nsTArray<nsCString> paths;
527 280 : nsCAutoString pathToAdd;
528 :
529 140 : path.BeginReading(begin);
530 140 : path.EndReading(end);
531 140 : iter = begin;
532 140 : if (FindCharInReadable('?', iter, end)) {
533 3 : pathToAdd = Substring(begin, iter);
534 3 : paths.AppendElement(pathToAdd);
535 3 : end = iter;
536 : }
537 :
538 140 : int numPathComponents = 1;
539 140 : iter = begin;
540 288 : while (FindCharInReadable('/', iter, end) &&
541 : numPathComponents < MAX_PATH_COMPONENTS) {
542 8 : iter++;
543 8 : pathToAdd.Assign(Substring(begin, iter));
544 8 : paths.AppendElement(pathToAdd);
545 8 : numPathComponents++;
546 : }
547 :
548 : // If we haven't already done so, add the full path
549 140 : if (!pathToAdd.Equals(path)) {
550 133 : paths.AppendElement(path);
551 : }
552 : // Check an empty path (for whole-domain blacklist entries)
553 140 : paths.AppendElement(EmptyCString());
554 :
555 292 : for (PRUint32 hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
556 465 : for (PRUint32 pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
557 626 : nsCString key;
558 313 : key.Assign(hosts[hostIndex]);
559 313 : key.Append('/');
560 313 : key.Append(paths[pathIndex]);
561 313 : LOG(("Chking %s", key.get()));
562 :
563 313 : aFragments->AppendElement(key);
564 : }
565 : }
566 :
567 140 : return NS_OK;
568 : }
569 :
570 : /* static */ nsresult
571 0 : LookupCache::GetHostKeys(const nsACString& aSpec,
572 : nsTArray<nsCString>* aHostKeys)
573 : {
574 0 : nsACString::const_iterator begin, end, iter;
575 0 : aSpec.BeginReading(begin);
576 0 : aSpec.EndReading(end);
577 :
578 0 : iter = begin;
579 0 : if (!FindCharInReadable('/', iter, end)) {
580 0 : return NS_OK;
581 : }
582 :
583 0 : const nsCSubstring& host = Substring(begin, iter);
584 :
585 0 : if (IsCanonicalizedIP(host)) {
586 0 : nsCString *key = aHostKeys->AppendElement();
587 0 : if (!key)
588 0 : return NS_ERROR_OUT_OF_MEMORY;
589 :
590 0 : key->Assign(host);
591 0 : key->Append("/");
592 0 : return NS_OK;
593 : }
594 :
595 0 : nsTArray<nsCString> hostComponents;
596 0 : ParseString(PromiseFlatCString(host), '.', hostComponents);
597 :
598 0 : if (hostComponents.Length() < 2) {
599 : // no host or toplevel host, this won't match anything in the db
600 0 : return NS_OK;
601 : }
602 :
603 : // First check with two domain components
604 0 : PRInt32 last = PRInt32(hostComponents.Length()) - 1;
605 0 : nsCString *lookupHost = aHostKeys->AppendElement();
606 0 : if (!lookupHost)
607 0 : return NS_ERROR_OUT_OF_MEMORY;
608 :
609 0 : lookupHost->Assign(hostComponents[last - 1]);
610 0 : lookupHost->Append(".");
611 0 : lookupHost->Append(hostComponents[last]);
612 0 : lookupHost->Append("/");
613 :
614 : // Now check with three domain components
615 0 : if (hostComponents.Length() > 2) {
616 0 : nsCString *lookupHost2 = aHostKeys->AppendElement();
617 0 : if (!lookupHost2)
618 0 : return NS_ERROR_OUT_OF_MEMORY;
619 0 : lookupHost2->Assign(hostComponents[last - 2]);
620 0 : lookupHost2->Append(".");
621 0 : lookupHost2->Append(*lookupHost);
622 : }
623 :
624 0 : return NS_OK;
625 : }
626 :
627 : /* We have both a prefix and a domain. Drop the domain, but
628 : hash the domain, the prefix and a random value together,
629 : ensuring any collisions happens at a different points for
630 : different users.
631 : */
632 320 : /* static */ nsresult LookupCache::KeyedHash(PRUint32 aPref, PRUint32 aDomain,
633 : PRUint32 aKey, PRUint32* aOut)
634 : {
635 : /* This is a reimplementation of MurmurHash3 32-bit
636 : based on the public domain C++ sources.
637 : http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
638 : for nblocks = 2
639 : */
640 320 : PRUint32 c1 = 0xCC9E2D51;
641 320 : PRUint32 c2 = 0x1B873593;
642 320 : PRUint32 c3 = 0xE6546B64;
643 320 : PRUint32 c4 = 0x85EBCA6B;
644 320 : PRUint32 c5 = 0xC2B2AE35;
645 320 : PRUint32 h1 = aPref; // seed
646 : PRUint32 k1;
647 : PRUint32 karr[2];
648 :
649 320 : karr[0] = aDomain;
650 320 : karr[1] = aKey;
651 :
652 960 : for (PRUint32 i = 0; i < 2; i++) {
653 640 : k1 = karr[i];
654 640 : k1 *= c1;
655 640 : k1 = (k1 << 15) | (k1 >> (32-15));
656 640 : k1 *= c2;
657 :
658 640 : h1 ^= k1;
659 640 : h1 = (h1 << 13) | (h1 >> (32-13));
660 640 : h1 *= 5;
661 640 : h1 += c3;
662 : }
663 :
664 320 : h1 ^= 2; // len
665 : // fmix
666 320 : h1 ^= h1 >> 16;
667 320 : h1 *= c4;
668 320 : h1 ^= h1 >> 13;
669 320 : h1 *= c5;
670 320 : h1 ^= h1 >> 16;
671 :
672 320 : *aOut = h1;
673 :
674 320 : return NS_OK;
675 : }
676 :
677 0 : bool LookupCache::IsPrimed()
678 : {
679 0 : return mPrimed;
680 : }
681 :
682 : nsresult
683 97 : LookupCache::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
684 : {
685 194 : Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
686 :
687 194 : FallibleTArray<PRUint32> array;
688 : nsresult rv;
689 97 : if (!array.SetCapacity(aAddPrefixes.Length())) {
690 0 : rv = NS_ERROR_OUT_OF_MEMORY;
691 0 : goto error_bailout;
692 : }
693 :
694 204 : for (uint32 i = 0; i < aAddPrefixes.Length(); i++) {
695 107 : array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
696 : }
697 97 : aAddPrefixes.Clear();
698 :
699 97 : if (array.IsEmpty()) {
700 : // DB is empty, but put a sentinel to show that we looked
701 59 : array.AppendElement(0);
702 : }
703 : // PrefixSet requires sorted order
704 97 : array.Sort();
705 :
706 : // construct new one, replace old entries
707 97 : rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
708 97 : if (NS_FAILED(rv)) {
709 0 : goto error_bailout;
710 : }
711 :
712 : #ifdef DEBUG
713 : PRUint32 size;
714 97 : size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
715 97 : LOG(("SB tree done, size = %d bytes\n", size));
716 : #endif
717 :
718 97 : mPrimed = true;
719 :
720 97 : return NS_OK;
721 :
722 : error_bailout:
723 : // load an empty prefixset so the browser can work
724 0 : nsAutoTArray<PRUint32, 1> sentinel;
725 0 : sentinel.Clear();
726 0 : sentinel.AppendElement(0);
727 0 : mPrefixSet->SetPrefixes(sentinel.Elements(), sentinel.Length());
728 0 : if (rv == NS_ERROR_OUT_OF_MEMORY) {
729 0 : Telemetry::Accumulate(Telemetry::URLCLASSIFIER_PS_OOM, 1);
730 : }
731 0 : return rv;
732 : }
733 :
734 : nsresult
735 0 : LookupCache::LoadPrefixSet()
736 : {
737 0 : nsCOMPtr<nsIFile> psFile;
738 0 : nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
739 0 : NS_ENSURE_SUCCESS(rv, rv);
740 :
741 0 : rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
742 0 : NS_ENSURE_SUCCESS(rv, rv);
743 :
744 : bool exists;
745 0 : rv = psFile->Exists(&exists);
746 0 : NS_ENSURE_SUCCESS(rv, rv);
747 :
748 0 : if (exists) {
749 0 : LOG(("stored PrefixSet exists, loading from disk"));
750 0 : rv = mPrefixSet->LoadFromFile(psFile);
751 : }
752 0 : if (!exists || NS_FAILED(rv)) {
753 0 : LOG(("no (usable) stored PrefixSet found"));
754 : } else {
755 0 : mPrimed = true;
756 : }
757 :
758 : #ifdef DEBUG
759 0 : if (mPrimed) {
760 0 : PRUint32 size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
761 0 : LOG(("SB tree done, size = %d bytes\n", size));
762 : }
763 : #endif
764 :
765 0 : return NS_OK;
766 : }
767 :
768 : nsresult
769 97 : LookupCache::GetPrefixes(nsTArray<PRUint32>* aAddPrefixes)
770 : {
771 97 : if (!mPrimed) {
772 : // This can happen if its a new table, so no error.
773 45 : LOG(("GetPrefixes from empty LookupCache"));
774 45 : return NS_OK;
775 : }
776 : PRUint32 cnt;
777 : PRUint32 *arr;
778 52 : nsresult rv = mPrefixSet->GetPrefixes(&cnt, &arr);
779 52 : NS_ENSURE_SUCCESS(rv, rv);
780 52 : if (!aAddPrefixes->AppendElements(arr, cnt))
781 0 : return NS_ERROR_FAILURE;
782 52 : nsMemory::Free(arr);
783 52 : return NS_OK;
784 : }
785 :
786 :
787 : }
788 : }
|