python to grab hypothes.is annotations and put them in a tiddlywiki
Okay, so… I like being able to pull hypothes.is annotations into my tiddlywiki. I also shove things over from my tiddlywiki into my site, which runs on Jekyll. The below is just absolute trash code for getting those annotations in a format which is easy to make into a Jekyll post1. I wrote it in about two and a half seconds and it’s just worked well enough that I’ve never bothered going back to do it properly. I do not recommend its use (the way it caches is Bad for almost every use case, and Suboptimal for all) but if you feel intimidated about using the hypothes.is API, maybe seeing it can show you how something hacky can really work!
After you put files into the folder where your tiddlers are stored, restart the node process for it. Creating the metadata files like this manually is just running around having to run the import commands properly which I think is too slow/annoying.
#!/usr/bin/python3
import re
import os
import json
import requests
from json.decoder import JSONDecodeError
# This is a directory where copies of the annotations live in order to keep
# track of which we've already processed (even if we later delete the
# copy in the wiki)
# Super duplicative but I'm paranoid about data loss.
filedir = <whatever>
# This is the end location of the thing, the tiddlers folder if you're doing that
targetdir = <whatever>
# https://h.readthedocs.io/en/latest/api/
TOKEN = <hypothes.is API token>
# https://www.peterbe.com/plog/fastest-python-function-to-slugify-a-string
non_url_safe = ['"', '#', '$', '%', '&', '+', '(', ')',
',', '/', ':', ';', '=', '?',
'@', '[', '\\', ']', '^', '`',
'{', '|', '}', '~', "'"]
non_url_safe_regex = re.compile(
r'[{}]'.format(''.join(re.escape(x) for x in non_url_safe)))
def kebabify(text):
text = non_url_safe_regex.sub('', text).strip()
# let's do 8 words of the title
text = u'-'.join(re.split(r'\s+', text)[0:8])
return text
def get_filename_from_annotations(annotations):
post = annotations[-1]
date = post['created'][0:10]
keb = kebabify(post["document"]['title'][0]).lower()
# I want this to match the kebab, not the original
unkeb = "".join(c if c != "-" else " " for c in keb)
unkeb = " ".join(unkeb.split(" ")[:6])
return f"{date} annotation of {unkeb}.md"
template = """---
title: "annotation of {title}"
layout: post
category: responses
tags: annotation
replyto: "{link}"
date: "{isodate}"
hypothesislink: "{hypothesislink}"
---
{content}
"""
# the timezone stuff is always wrong but it's close enough
metatemplate = """created: {datenumberstring}000000000
modified: {datenumberstring}000000000
tags: [[annotation]] {tags}
title: {title}
type: text/x-markdown"""
def save_post(path, file_content):
with open(path, 'w') as f:
f.write(file_content)
def get_file_string(annotations):
post = annotations[-1]
title = post["document"]['title'][0]
link = post['target'][0]['source']
content = generate_content(annotations)
isodate = post['created']
hypothesislink = post["links"]["incontext"]
return template.format(title=title.lower(), link=link, content=content, isodate=isodate, hypothesislink=hypothesislink)
def get_meta_string(annotations):
post = annotations[-1]
title = get_filename_from_annotations(annotations)[:-3]
tags = " ".join({f"[[{t}]]" for a in annotations for t in a["tags"]})
datenumberstring = "".join(c for c in post["created"][:10] if c != "-")
return metatemplate.format(datenumberstring=datenumberstring, tags=tags, title=title)
def process_annotations(annotations):
filename = get_filename_from_annotations(annotations)
prospective_path = os.path.join(filedir, filename)
file_content = get_file_string(annotations)
save_post(prospective_path, file_content)
target_path = os.path.join(targetdir, filename)
save_post(target_path, file_content)
# if we're just adding more annotation don't change meta file
if not os.path.isfile(target_path + ".meta"):
save_post(target_path + ".meta", get_meta_string(annotations))
SEARCH_ENDPOINT = "https://api.hypothes.is/api/search"
existing_annotations_file_path = <somewhere, tempfile is fine>
def get_annotations():
old_annotations = {}
if os.path.isfile(existing_annotations_file_path):
with open(existing_annotations_file_path, "r") as fp:
try:
old_annotations = json.load(fp)
except JSONDecodeError:
pass
with requests.Session() as s:
s.headers.update({"Accept": "application/vnd.hypothesis.v1+json"})
s.headers.update({"Authorization": f"Bearer {TOKEN}"})
params = {}
params["user"] = "acct:< YOUR ACCOUNT USERNAME HERE >@hypothes.is"
response = requests.post(SEARCH_ENDPOINT, data=params).json()
new_ones = [r for r in response['rows'] if r['uri'] not in old_annotations or not any(r['id'] == old_r['id'] for old_r in old_annotations[r['uri']])]
if len(new_ones) == 0:
return {}
else:
uri_dict = {}
new_uris = {a['uri'] for a in new_ones}
for uri in new_uris:
uri_dict[uri] = list()
params["uri"] = uri
response_rows = requests.post(SEARCH_ENDPOINT, data=params).json()["rows"]
while len(response_rows) != 0:
uri_dict[uri] += response_rows
params["search_after"] = response_rows[-1]['updated']
response_rows = requests.post(SEARCH_ENDPOINT, data=params).json()["rows"]
params["search_after"] = None
old_annotations.update(uri_dict)
with open(existing_annotations_file_path, "w") as fp:
json.dump(old_annotations, fp)
return uri_dict
def get_offset(annotation):
selectors = annotation["target"][0]["selector"]
for s in selectors:
if s["type"] == "TextPositionSelector":
return s["start"]
return 0
def get_text_dict(annotation):
selectors = annotation["target"][0]["selector"]
for s in selectors:
if s["type"] == "TextQuoteSelector":
return s
raise Exception("oh dear no text")
def collapse(v):
"""collapse multiple linebreaks down
this emoji is not the *best* tombstone but w/e"""
three_swapped = re.sub("\n{3,}", "😩", v)
two_breaks = re.sub("\n{2}", "\n> ", three_swapped)
return re.sub("😩", "\n\n> ", two_breaks)
def generate_content(annotations):
text = ""
for a in sorted(annotations, key=get_offset):
textdict = get_text_dict(a)
textdict = {k: collapse(v) for k, v in textdict.items()}
# don't look at the next couple lines, I'm not proud
before = " " if not (textdict["prefix"][-1].isspace() or textdict["exact"][0].isspace()) else ""
after = " " if not (textdict["exact"][-1].isspace() or textdict["suffix"][0].isspace()) else ""
text += f"""\n\n\n> ...{textdict["prefix"]}{before}<em>{textdict["exact"]}</em>{after}{textdict["suffix"]}...\n\n{a.get("text", "")}"""
return text
if __name__ == "__main__":
annotations = get_annotations()
if len(annotations) == 0:
exit(1)
for annos in annotations.values():
process_annotations(annos)
exit(0)
-
If you like the idea of pulling your own annotations but don’t use the same stack, you could absolutely toss the metadata file (for Tiddlywiki) and any/all of the frontmatter (the YAML at the top for Jekyll). ↩