Start importing places. This is just about profiling for now.

This commit is contained in:
Nick Alexander 2016-08-08 23:51:23 -07:00 committed by Richard Newman
parent 71446a3af5
commit 3775c7c773
5 changed files with 161 additions and 2 deletions

View file

@ -0,0 +1,74 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
(ns datomish.places.import
#?(:cljs
(:require-macros
[datomish.pair-chan :refer [go-pair <?]]
[cljs.core.async.macros :refer [go]]))
(:require
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]]
[datomish.sqlite :as s]
[datomish.api :as d]
#?@(:clj [[datomish.pair-chan :refer [go-pair <?]]
[clojure.core.async :as a :refer [chan go <! >!]]])
#?@(:cljs [[datomish.pair-chan]
[cljs.core.async :as a :refer [chan <! >!]]])))
(def places-schema-fragment
[{:db/id (d/id-literal :db.part/user)
:db/ident :page/url
:db/unique :db.unique/identity
:db/valueType :db.type/string ;; TODO: uri
:db.install/_attribute :db.part/db}
{:db/id (d/id-literal :db.part/user)
:db/ident :page/guid
:db/unique :db.unique/identity
:db/valueType :db.type/string ;; TODO: uuid or guid?
:db.install/_attribute :db.part/db}
{:db/id (d/id-literal :db.part/user)
:db/ident :page/title
:db/cardinality :db.cardinality/one
:db/valueType :db.type/string
:db.install/_attribute :db.part/db}
{:db/id (d/id-literal :db.part/user)
:db/ident :page/visitAt
:db/cardinality :db.cardinality/many
:db/valueType :db.type/long ;; TODO: instant
:db.install/_attribute :db.part/db}
])
(defn- place->entity [[id rows]]
(let [title (:title (first (filter :page/title rows)))]
(cond-> {:db/id (d/id-literal :db.part/user)
:page/url (:url (first rows))
:page/guid (:guid (first rows))
:page/visitAt (map :visit_date rows)}
title (assoc :page/title title))))
(defn import-places [conn places-connection]
(go-pair
;; Ensure schema fragment is in place, even though it may cost a (mostly empty) transaction.
(<? (d/<transact! conn places-schema-fragment))
(->>
["SELECT DISTINCT p.id, p.url, p.title, p.visit_count, p.last_visit_date, p.guid,"
"hv.visit_date"
"FROM moz_places AS p LEFT JOIN moz_historyvisits AS hv"
"WHERE p.hidden = 0 AND p.id = hv.place_id"
"ORDER BY p.id, hv.visit_date"
"LIMIT 20000"] ;; TODO: remove limit.
(interpose " ")
(apply str)
(vector)
(s/all-rows places-connection)
(<?)
(group-by :id)
(map place->entity)
(d/<transact! conn)
(<?))))

View file

@ -70,3 +70,5 @@
(f (first xs) (first ys)) (f (first xs) (first ys))
(recur f (rest xs) (rest ys))))) (recur f (rest xs) (rest ys)))))
(defn mapvals [f m]
(into (empty m) (map #(vector (first %) (f (second %))) m)))

View file

@ -15,12 +15,14 @@
[datomish.sqlite :as s] [datomish.sqlite :as s]
[datomish.sqlite-schema] [datomish.sqlite-schema]
[datomish.datom] [datomish.datom]
#?@(:clj [[datomish.pair-chan :refer [go-pair <?]] #?@(:clj [[datomish.jdbc-sqlite]
[datomish.pair-chan :refer [go-pair <?]]
[tempfile.core :refer [tempfile with-tempfile]] [tempfile.core :refer [tempfile with-tempfile]]
[datomish.test-macros :refer [deftest-async]] [datomish.test-macros :refer [deftest-async]]
[clojure.test :as t :refer [is are deftest testing]] [clojure.test :as t :refer [is are deftest testing]]
[clojure.core.async :refer [go <! >!]]]) [clojure.core.async :refer [go <! >!]]])
#?@(:cljs [[datomish.pair-chan] #?@(:cljs [[datomish.promise-sqlite]
[datomish.pair-chan]
[datomish.test-macros :refer-macros [deftest-async]] [datomish.test-macros :refer-macros [deftest-async]]
[datomish.node-tempfile :refer [tempfile]] [datomish.node-tempfile :refer [tempfile]]
[cljs.test :as t :refer-macros [is are deftest testing async]] [cljs.test :as t :refer-macros [is are deftest testing async]]

View file

@ -0,0 +1,79 @@
;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
(ns datomish.places.import-test
#?(:cljs
(:require-macros
[datomish.pair-chan :refer [go-pair <?]]
[datomish.node-tempfile-macros :refer [with-tempfile]]
[cljs.core.async.macros :as a :refer [go]]))
(:require
[taoensso.tufte :as tufte
#?(:cljs :refer-macros :clj :refer) [defnp p profiled profile]]
[datomish.api :as d]
[datomish.places.import :as pi]
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise cond-let]]
[datomish.sqlite :as s]
#?@(:clj [[datomish.jdbc-sqlite]
[datomish.pair-chan :refer [go-pair <?]]
[tempfile.core :refer [tempfile with-tempfile]]
[datomish.test-macros :refer [deftest-async]]
[clojure.test :as t :refer [is are deftest testing]]
[clojure.core.async :refer [go <! >!]]])
#?@(:cljs [[datomish.promise-sqlite]
[datomish.pair-chan]
[datomish.test-macros :refer-macros [deftest-async]]
[datomish.node-tempfile :refer [tempfile]]
[cljs.test :as t :refer-macros [is are deftest testing async]]
[cljs.core.async :as a :refer [<! >!]]])))
#?(:cljs
(def Throwable js/Error))
(tufte/add-basic-println-handler! {})
(deftest-async test-import
(with-tempfile [t (tempfile)]
(let [places (<? (s/<sqlite-connection "/tmp/places.sqlite"))
conn (<? (d/<connect t))]
(try
(let [report (profile {:dynamic? true} (<? (pi/import-places conn places)))]
(is (= nil (count (:tx-data report)))))
(finally
(<? (d/<close conn)))))))
(deftest-async test-import-repeat
;; Repeated import is worst possible for the big joins to find datoms that already exist, because
;; *every* datom added in the first import will match in the second.
(with-tempfile [t (tempfile)]
(let [places (<? (s/<sqlite-connection "/tmp/places.sqlite"))
conn (<? (d/<connect t))]
(try
(let [report0 (<? (pi/import-places conn places))
report (profile {:dynamic? true} (<? (pi/import-places conn places)))]
(is (= nil (count (:tx-data report)))))
(finally
(<? (d/<close conn)))))))
#_
(defn <?? [pair-chan]
(datomish.pair-chan/consume-pair (clojure.core.async/<!! pair-chan)))
#_ [
(def places (<?? (s/<sqlite-connection "/tmp/places.sqlite")))
(def conn (<?? (d/<connect "/tmp/testkb.sqlite")))
(def tx0 (:tx (<?? (d/<transact! conn places-schema-fragment))))
(tufte/add-basic-println-handler! {})
(def report (profile {:dynamic? true} (<?? (pi/import conn places))))
;; Empty:
;; "Elapsed time: 5451.610551 msecs"
;; Reimport:
;; "Elapsed time: 25600.358881 msecs"
]

View file

@ -2,6 +2,7 @@
(:require (:require
[doo.runner :refer-macros [doo-tests doo-all-tests]] [doo.runner :refer-macros [doo-tests doo-all-tests]]
[cljs.test :as t :refer-macros [is are deftest testing]] [cljs.test :as t :refer-macros [is are deftest testing]]
datomish.places.import-test
datomish.promise-sqlite-test datomish.promise-sqlite-test
datomish.db-test datomish.db-test
datomish.query-test datomish.query-test
@ -12,6 +13,7 @@
datomish.test-macros-test)) datomish.test-macros-test))
(doo-tests (doo-tests
'datomish.places.import-test
'datomish.promise-sqlite-test 'datomish.promise-sqlite-test
'datomish.db-test 'datomish.db-test
'datomish.query-test 'datomish.query-test