Start importing places. This is just about profiling for now.
This commit is contained in:
parent
71446a3af5
commit
3775c7c773
5 changed files with 161 additions and 2 deletions
74
src/datomish/places/import.cljc
Normal file
74
src/datomish/places/import.cljc
Normal file
|
@ -0,0 +1,74 @@
|
|||
;; This Source Code Form is subject to the terms of the Mozilla Public
|
||||
;; License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
(ns datomish.places.import
|
||||
#?(:cljs
|
||||
(:require-macros
|
||||
[datomish.pair-chan :refer [go-pair <?]]
|
||||
[cljs.core.async.macros :refer [go]]))
|
||||
(:require
|
||||
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise raise-str cond-let]]
|
||||
[datomish.sqlite :as s]
|
||||
[datomish.api :as d]
|
||||
#?@(:clj [[datomish.pair-chan :refer [go-pair <?]]
|
||||
[clojure.core.async :as a :refer [chan go <! >!]]])
|
||||
#?@(:cljs [[datomish.pair-chan]
|
||||
[cljs.core.async :as a :refer [chan <! >!]]])))
|
||||
|
||||
(def places-schema-fragment
|
||||
[{:db/id (d/id-literal :db.part/user)
|
||||
:db/ident :page/url
|
||||
:db/unique :db.unique/identity
|
||||
:db/valueType :db.type/string ;; TODO: uri
|
||||
:db.install/_attribute :db.part/db}
|
||||
{:db/id (d/id-literal :db.part/user)
|
||||
:db/ident :page/guid
|
||||
:db/unique :db.unique/identity
|
||||
:db/valueType :db.type/string ;; TODO: uuid or guid?
|
||||
:db.install/_attribute :db.part/db}
|
||||
{:db/id (d/id-literal :db.part/user)
|
||||
:db/ident :page/title
|
||||
:db/cardinality :db.cardinality/one
|
||||
:db/valueType :db.type/string
|
||||
:db.install/_attribute :db.part/db}
|
||||
{:db/id (d/id-literal :db.part/user)
|
||||
:db/ident :page/visitAt
|
||||
:db/cardinality :db.cardinality/many
|
||||
:db/valueType :db.type/long ;; TODO: instant
|
||||
:db.install/_attribute :db.part/db}
|
||||
])
|
||||
|
||||
(defn- place->entity [[id rows]]
|
||||
(let [title (:title (first (filter :page/title rows)))]
|
||||
(cond-> {:db/id (d/id-literal :db.part/user)
|
||||
:page/url (:url (first rows))
|
||||
:page/guid (:guid (first rows))
|
||||
:page/visitAt (map :visit_date rows)}
|
||||
title (assoc :page/title title))))
|
||||
|
||||
(defn import-places [conn places-connection]
|
||||
(go-pair
|
||||
;; Ensure schema fragment is in place, even though it may cost a (mostly empty) transaction.
|
||||
(<? (d/<transact! conn places-schema-fragment))
|
||||
|
||||
(->>
|
||||
["SELECT DISTINCT p.id, p.url, p.title, p.visit_count, p.last_visit_date, p.guid,"
|
||||
"hv.visit_date"
|
||||
"FROM moz_places AS p LEFT JOIN moz_historyvisits AS hv"
|
||||
"WHERE p.hidden = 0 AND p.id = hv.place_id"
|
||||
"ORDER BY p.id, hv.visit_date"
|
||||
"LIMIT 20000"] ;; TODO: remove limit.
|
||||
(interpose " ")
|
||||
(apply str)
|
||||
(vector)
|
||||
|
||||
(s/all-rows places-connection)
|
||||
(<?)
|
||||
|
||||
(group-by :id)
|
||||
|
||||
(map place->entity)
|
||||
|
||||
(d/<transact! conn)
|
||||
(<?))))
|
|
@ -70,3 +70,5 @@
|
|||
(f (first xs) (first ys))
|
||||
(recur f (rest xs) (rest ys)))))
|
||||
|
||||
(defn mapvals [f m]
|
||||
(into (empty m) (map #(vector (first %) (f (second %))) m)))
|
||||
|
|
|
@ -15,12 +15,14 @@
|
|||
[datomish.sqlite :as s]
|
||||
[datomish.sqlite-schema]
|
||||
[datomish.datom]
|
||||
#?@(:clj [[datomish.pair-chan :refer [go-pair <?]]
|
||||
#?@(:clj [[datomish.jdbc-sqlite]
|
||||
[datomish.pair-chan :refer [go-pair <?]]
|
||||
[tempfile.core :refer [tempfile with-tempfile]]
|
||||
[datomish.test-macros :refer [deftest-async]]
|
||||
[clojure.test :as t :refer [is are deftest testing]]
|
||||
[clojure.core.async :refer [go <! >!]]])
|
||||
#?@(:cljs [[datomish.pair-chan]
|
||||
#?@(:cljs [[datomish.promise-sqlite]
|
||||
[datomish.pair-chan]
|
||||
[datomish.test-macros :refer-macros [deftest-async]]
|
||||
[datomish.node-tempfile :refer [tempfile]]
|
||||
[cljs.test :as t :refer-macros [is are deftest testing async]]
|
||||
|
|
79
test/datomish/places/import_test.cljc
Normal file
79
test/datomish/places/import_test.cljc
Normal file
|
@ -0,0 +1,79 @@
|
|||
;; This Source Code Form is subject to the terms of the Mozilla Public
|
||||
;; License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
(ns datomish.places.import-test
|
||||
#?(:cljs
|
||||
(:require-macros
|
||||
[datomish.pair-chan :refer [go-pair <?]]
|
||||
[datomish.node-tempfile-macros :refer [with-tempfile]]
|
||||
[cljs.core.async.macros :as a :refer [go]]))
|
||||
(:require
|
||||
[taoensso.tufte :as tufte
|
||||
#?(:cljs :refer-macros :clj :refer) [defnp p profiled profile]]
|
||||
[datomish.api :as d]
|
||||
[datomish.places.import :as pi]
|
||||
[datomish.util :as util #?(:cljs :refer-macros :clj :refer) [raise cond-let]]
|
||||
[datomish.sqlite :as s]
|
||||
#?@(:clj [[datomish.jdbc-sqlite]
|
||||
[datomish.pair-chan :refer [go-pair <?]]
|
||||
[tempfile.core :refer [tempfile with-tempfile]]
|
||||
[datomish.test-macros :refer [deftest-async]]
|
||||
[clojure.test :as t :refer [is are deftest testing]]
|
||||
[clojure.core.async :refer [go <! >!]]])
|
||||
#?@(:cljs [[datomish.promise-sqlite]
|
||||
[datomish.pair-chan]
|
||||
[datomish.test-macros :refer-macros [deftest-async]]
|
||||
[datomish.node-tempfile :refer [tempfile]]
|
||||
[cljs.test :as t :refer-macros [is are deftest testing async]]
|
||||
[cljs.core.async :as a :refer [<! >!]]])))
|
||||
|
||||
#?(:cljs
|
||||
(def Throwable js/Error))
|
||||
|
||||
(tufte/add-basic-println-handler! {})
|
||||
|
||||
(deftest-async test-import
|
||||
(with-tempfile [t (tempfile)]
|
||||
(let [places (<? (s/<sqlite-connection "/tmp/places.sqlite"))
|
||||
conn (<? (d/<connect t))]
|
||||
(try
|
||||
(let [report (profile {:dynamic? true} (<? (pi/import-places conn places)))]
|
||||
|
||||
(is (= nil (count (:tx-data report)))))
|
||||
(finally
|
||||
(<? (d/<close conn)))))))
|
||||
|
||||
(deftest-async test-import-repeat
|
||||
;; Repeated import is worst possible for the big joins to find datoms that already exist, because
|
||||
;; *every* datom added in the first import will match in the second.
|
||||
(with-tempfile [t (tempfile)]
|
||||
(let [places (<? (s/<sqlite-connection "/tmp/places.sqlite"))
|
||||
conn (<? (d/<connect t))]
|
||||
(try
|
||||
(let [report0 (<? (pi/import-places conn places))
|
||||
report (profile {:dynamic? true} (<? (pi/import-places conn places)))]
|
||||
|
||||
(is (= nil (count (:tx-data report)))))
|
||||
|
||||
(finally
|
||||
(<? (d/<close conn)))))))
|
||||
|
||||
#_
|
||||
(defn <?? [pair-chan]
|
||||
(datomish.pair-chan/consume-pair (clojure.core.async/<!! pair-chan)))
|
||||
|
||||
#_ [
|
||||
(def places (<?? (s/<sqlite-connection "/tmp/places.sqlite")))
|
||||
(def conn (<?? (d/<connect "/tmp/testkb.sqlite")))
|
||||
(def tx0 (:tx (<?? (d/<transact! conn places-schema-fragment))))
|
||||
|
||||
(tufte/add-basic-println-handler! {})
|
||||
(def report (profile {:dynamic? true} (<?? (pi/import conn places))))
|
||||
|
||||
;; Empty:
|
||||
;; "Elapsed time: 5451.610551 msecs"
|
||||
;; Reimport:
|
||||
;; "Elapsed time: 25600.358881 msecs"
|
||||
|
||||
]
|
|
@ -2,6 +2,7 @@
|
|||
(:require
|
||||
[doo.runner :refer-macros [doo-tests doo-all-tests]]
|
||||
[cljs.test :as t :refer-macros [is are deftest testing]]
|
||||
datomish.places.import-test
|
||||
datomish.promise-sqlite-test
|
||||
datomish.db-test
|
||||
datomish.query-test
|
||||
|
@ -12,6 +13,7 @@
|
|||
datomish.test-macros-test))
|
||||
|
||||
(doo-tests
|
||||
'datomish.places.import-test
|
||||
'datomish.promise-sqlite-test
|
||||
'datomish.db-test
|
||||
'datomish.query-test
|
||||
|
|
Loading…
Reference in a new issue