Downloading Zotero Catalogue

I use Zotero to manage my references, and automatically insert them into text documents using a custom-built system (which I’ll share later).

However, I found that accessing my Zotero catalogue on an ad hod basis was incredibly slow, and didn’t want to mess around hacking to local SQLite record created by the Chrome plugin. So I wrote this Python script. It stores the catalogue metadata locally as a JSON-formatted text file (for fast access later), and when run only updates records which have been added or modified. Obviously the first run downloads the entire catalogue which takes ages, but subsequent updates are much quicker.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/python

#from libZotero import zotero
import time, datetime
import json
import sys
sys.path.append( "/home/USERNAME/zotero/libZotero-master/lib/py/" ) # Add locally stored Zotero library - I think I got that from here... https://github.com/fcheslack/libZotero
from libZotero2 import zotero

def todatetuple(datestring):
    return time.strptime(datestring, u"%Y-%m-%dT%H:%M:%SZ")
   
def do10(i, lastupdated): # Download records in chunks of 10 - requirement of Zotero API
    global olditems
    zotitemlist = zlib.fetchItemsTop({'limit': 10, 'order': 'dateModified', 'sort': 'desc', 'start': i-1})
    for item in zotitemlist:
        if todatetuple(item.get('dateUpdated')) < lastupdated:
            print("Updates completed.")
            return 0
        else:  
            olditems[item.get('itemKey')] = item.pristine
            print("%s updated! Total of %s done so far..." % (item.get('itemKey'), i))
            i+=1
    return i
   
olditems = json.load( open( "/home/USERNAME/zotero/zotero.json", "rb" ) )

zlib = zotero.Library('user', 'ID', '<null>', 'ACCESS-CODE') # See https://www.zotero.org/settings/keys
collectionKey = None
i=1
lastupdated = todatetuple(olditems.pop('PICKLEDATE', None))
print("Performing updates since %s" % time.strftime("%Y-%m-%d %H:%M:%S", lastupdated))

zotkeylist = zlib.fetchItemKeys()
killlist=list()

for item in olditems:
    if olditems[item]['itemKey'] in zotkeylist:
        pass
    else:
        print("LOST ONE: %s (%s) - Marked to be deleted." % (olditems[item]['itemKey'], olditems[item]['itemKey']))
        killlist.append(olditems[item]['itemKey'])

j = 0
for item in killlist:
    del olditems[item]
    try:
        print("Delete failed! (%s)" % olditems[item])
    except KeyError:
        j+=1
   
print("Deleted %s items." % j)
   
while i > 0:
    i = do10(i,lastupdated)

savedate = datetime.datetime.now() - datetime.timedelta(hours = 1)
print("Saving update for %s" % savedate.strftime("%Y-%m-%d %H:%M:%S"))
olditems['PICKLEDATE'] = savedate.strftime("%Y-%m-%dT%H:%M:%SZ")

json.dump( olditems, open( "/home/USERNAME/zotero/zotero.json", "wb" ) )