Commit 4a7bcdc1 authored by Benjamin Rokseth's avatar Benjamin Rokseth
Browse files

Initial commit

parents
bolt.db
decorator
*.nt
Decorator
==
Simple webservice to decorate JSON documents from Koha (or other) that need extended info from
triplestore, e.g. bibliographic work ids, contributors, image info, etc.
Indexes ntriples downloaded from triplestore into record, publication and bnode indexes,
and exposes this in a /decorate API
Usage
==
./decorate [-load <ntriples.nt>] [-http port]
arg -load will start with indexing the document before webservice starts.
POST a JSON document to /decorate and receive a decorated JSON as response, based on `biblionumber` in requested data.
For document examples see
`example.nt` : a document with ntriples to -load
`examplerequest.json`: an example document to test agains API
Fetching Triples
==
For real world indexing you need a ntriples document that match the Koha records with the triples
```?pub deich:recordId ?recordId```
an example construct query to fetch this is in the document `construct_decorator_graph.sparql`
Example of usage against fuseki:
```
curl -XPOST -H 'Accept:text/plain' -H 'Timeout:1200' <host>:3030/ds --data-urlencode query@construct_decorator_graph.sparql
```
\ No newline at end of file
CONSTRUCT {
?pub deich:recordId ?recordId ;
deich:workURI ?work ;
deich:contributor [
deich:role: ?role; deich:agent ?agent ; deich:agentName ?agentName ] ;
deich:hasMediaType ?hasMediaType ;
deich:hasImage ?hasImage ;
deich:language ?language ;
deich:publicationYear ?publicationYear .
} WHERE {
?pub deich:publicationOf ?work ;
deich:recordId ?recordId ;
deich:hasMediaType ?hasMediaType ;
deich:publicationYear ?publicationYear .
?work a deich:Work ;
deich:language ?language .
OPTIONAL {
?work deich:contributor [
a deich:MainEntry, deich:Contribution;
deich:agent ?agent;
deich:role ?role
] . ?agent deich:name ?agentName .
}
OPTIONAL { ?pub deich:hasImage ?hasImage . }
}
package main
import (
"encoding/binary"
"encoding/json"
"flag"
"io"
"log"
"net/http"
"os"
"runtime"
bolt "github.com/coreos/bbolt"
"github.com/knakk/kbp/rdf"
"time"
)
var httpAddr = flag.String("http", ":8900", "HTTP serve address")
var load = flag.String("load", "", "Ntriples file to load")
var (
bktMeta = []byte("meta")
bktBnode = []byte("bnode")
bktPub = []byte("pub")
bktRec = []byte("rec")
)
type Main struct {
db *bolt.DB
}
type Pub struct {
Id string
RecordId, PublicationYear string
WorkURI, MediaType, Image, Language string
Contributors map[string]Bnode
}
type Bnode struct {
Id string
Agent string
AgentName string
Role string
}
// setup ensures DB is set up with required buckets.
func (m Main) setupBuckets() error {
err := m.db.Update(func(tx *bolt.Tx) error {
for _, b := range [][]byte{bktMeta, bktPub, bktBnode, bktRec} {
_, err := tx.CreateBucketIfNotExists(b)
if err != nil {
return err
}
}
return nil
})
return err
}
/*
* Reads Ntriples line by line into Pub or Bnode buckets with Subject as ID
*/
func (m Main) loadPublications() error {
start := time.Now()
f, err := os.Open(*load)
if err != nil {
log.Fatal("Could not read file")
}
err = m.db.Update(func(tx *bolt.Tx) error {
dec := rdf.NewDecoder(f)
for tr, err := dec.Decode(); err != io.EOF; tr, err = dec.Decode() {
switch tr.Subject.(type) {
case rdf.BlankNode:
id := tr.Subject.(rdf.BlankNode).ID()
var b *Bnode
bn := tx.Bucket(bktBnode).Get([]byte(id))
if bn != nil {
err := json.Unmarshal(bn, &b)
if err != nil {
log.Printf("Error decoding bnode: %s", err)
return err
}
} else {
b = new(Bnode)
}
b.updateBnode(tr)
d, err := json.Marshal(b)
if err != nil {
return err
}
if err := tx.Bucket(bktBnode).Put([]byte(id), d); err != nil {
log.Printf("Error updating Bnode %d: %s", b.Id, err)
return err
}
continue
case rdf.NamedNode:
id := tr.Subject.(rdf.NamedNode).Name()
var p *Pub
pub := tx.Bucket(bktPub).Get([]byte(id))
if pub != nil {
err := json.Unmarshal(pub, &p)
if err != nil {
log.Printf("Error decoding pub: %s", err)
return err
}
} else {
p = new(Pub)
p.Contributors = make(map[string]Bnode)
}
p.updatePub(tr)
d, err := json.Marshal(p)
if err != nil {
return err
}
if err := tx.Bucket(bktPub).Put([]byte(id), d); err != nil {
log.Printf("Error updating Pub %d: %s", p.Id, err)
return err
}
continue
}
}
// iterate Pubs, add bnodes and save in record bucket
cur := tx.Bucket(bktPub).Cursor()
for k, v := cur.First(); k != nil; k, v = cur.Next() {
p, err := decodePub(v)
if err != nil {
return err
}
var b Bnode
for n, _ := range p.Contributors {
bn := tx.Bucket(bktBnode).Get([]byte(n))
if bn != nil {
err := json.Unmarshal(bn, &b)
if err != nil {
log.Printf("Error decoding bnode: %s", err)
return err
}
}
p.Contributors[n] = b
}
data, err := json.Marshal(p)
if err != nil {
return err
}
if err := tx.Bucket(bktPub).Put(k, data); err != nil {
log.Printf("Error updating Pub: %s: %s", k, err)
return err
}
// duplicate index in record bucket
if err := tx.Bucket(bktRec).Put([]byte(p.RecordId), data); err != nil {
log.Printf("Error updating Rec: %s: %s", k, err)
return err
}
continue
}
return nil
})
elapsed := time.Since(start)
log.Printf("Time parsing publications: %s", elapsed)
return err
}
func (b *Bnode) updateBnode(tr rdf.Triple) {
b.Id = tr.Subject.(rdf.BlankNode).ID()
switch tr.Predicate.Name() {
case "http://data.deichman.no/ontology#agent":
b.Agent = tr.Object.(rdf.NamedNode).Name()
case "http://data.deichman.no/ontology#role":
b.Role = tr.Object.(rdf.NamedNode).Name()
case "http://data.deichman.no/ontology#agentName":
b.AgentName = tr.Object.(rdf.Literal).ValueAsString()
}
}
func (p *Pub) updatePub(tr rdf.Triple) {
p.Id = tr.Subject.(rdf.NamedNode).Name()
switch tr.Predicate.Name() {
case "http://data.deichman.no/ontology#recordId":
p.RecordId = tr.Object.(rdf.Literal).ValueAsString()
case "http://data.deichman.no/ontology#publicationYear":
p.PublicationYear = tr.Object.(rdf.Literal).ValueAsString()
case "http://data.deichman.no/ontology#workURI":
p.WorkURI = tr.Object.(rdf.NamedNode).Name()
case "http://data.deichman.no/ontology#hasMediaType":
p.MediaType = tr.Object.(rdf.NamedNode).Name()
case "http://data.deichman.no/ontology#hasImage":
p.Image = tr.Object.(rdf.Literal).ValueAsString()
case "http://data.deichman.no/ontology#language":
p.Language = tr.Object.(rdf.NamedNode).Name()
case "http://data.deichman.no/ontology#contributor":
obj := tr.Object.(rdf.BlankNode).ID()
// check if contributors doesn't already contain bnode, in case we load same file again
// TODO: reset contributor instead?
if _, ok := p.Contributors[obj]; !ok {
if obj != "" {
p.Contributors[obj] = Bnode{}
}
}
/*
if not_in(p.Contributors, obj) {
p.Contributors = append(p.Contributors, tr.Object.String())
}
*/
}
}
// unused
/*func not_in(arr []string, str string) bool {
for _, a := range arr {
if a == str {
return false
}
}
return true
}*/
func main() {
runtime.GOMAXPROCS(runtime.NumCPU())
flag.Parse()
m := new(Main)
// DB setup
db, err := bolt.Open("bolt.db", 0640, nil)
if err != nil {
log.Fatal(err)
}
defer db.Close()
m.db = db
// Setup required buckets
if err := m.setupBuckets(); err != nil {
log.Fatal(err)
}
if *load != "" {
log.Println("Loading publications...")
m.loadPublications()
}
log.Printf("Starting HTTP server listening at %v", *httpAddr)
log.Fatal(http.ListenAndServe(*httpAddr, newServer(m.db)))
}
/* UTILITY FUNCTIONS */
func encodePub(p *Pub) ([]byte, error) {
return json.Marshal(p)
}
func decodePub(b []byte) (Pub, error) {
var p Pub
err := json.Unmarshal(b, &p)
return p, err
}
func i64tob(i int64) []byte {
b := make([]byte, 8)
binary.PutVarint(b, i)
return b
}
func itob(i int) []byte {
b := make([]byte, 8)
binary.BigEndian.PutUint64(b, uint64(i))
return b
}
func btoi64(b []byte) (int64, int) {
return binary.Varint(b)
}
// u32tob converts a uint32 into a 4-byte slice.
func u32tob(v uint32) []byte {
b := make([]byte, 4)
binary.BigEndian.PutUint32(b, v)
return b
}
// btou32 converts a 4-byte slice into an uint32.
func btou32(b []byte) uint32 {
return binary.BigEndian.Uint32(b)
}
package main
import (
"errors"
"fmt"
"net/http"
"net/url"
"strings"
"encoding/json"
bolt "github.com/coreos/bbolt"
"io/ioutil"
)
var errNotFound = errors.New("not found")
type server struct {
db *bolt.DB
}
func newServer(db *bolt.DB) server {
return server{db: db}
}
func (s server) ServeHTTP(w http.ResponseWriter, r *http.Request) {
path := strings.Split(r.URL.Path, "/")
paramId := r.URL.Query().Get("id")
if len(path) < 1 {
http.Error(w, "not found", http.StatusNotFound)
return
}
switch path[1] {
case "bnode":
id, err := url.QueryUnescape(paramId)
if err != nil {
http.Error(w, "bnode unescape error", http.StatusBadRequest)
return
}
if err := s.getBnode(id, w, r); err != nil {
http.Error(w, "not found", http.StatusNotFound)
return
}
case "pub":
id, err := url.QueryUnescape(paramId)
if err != nil {
http.Error(w, "publication unescape error", http.StatusBadRequest)
return
}
if err := s.getPub(id, w, r); err != nil {
http.Error(w, "not found", http.StatusNotFound)
return
}
case "rec":
id := paramId
if err := s.getRec(id, w, r); err != nil {
http.Error(w, "not found", http.StatusNotFound)
return
}
case "decorate":
if err := s.decorate(w, r); err != nil {
http.Error(w, "not found", http.StatusNotFound)
return
}
default:
http.Error(w, "not found", http.StatusNotFound)
return
}
}
func (s server) getBnode(id string, w http.ResponseWriter, r *http.Request) error {
var recJson []byte
if err := s.db.View(func(tx *bolt.Tx) error {
b := tx.Bucket(bktBnode).Get([]byte(id))
if b == nil {
return errNotFound
}
recJson = make([]byte, len(b))
copy(recJson, b)
return nil
}); err != nil {
return err
}
w.Header().Add("Content-Type", "application/json")
w.Write(recJson)
return nil
}
func (s server) getPub(id string, w http.ResponseWriter, r *http.Request) error {
var recJson []byte
if err := s.db.View(func(tx *bolt.Tx) error {
p := tx.Bucket(bktPub).Get([]byte(id))
if p == nil {
return errNotFound
}
recJson = make([]byte, len(p))
copy(recJson, p)
return nil
}); err != nil {
return err
}
w.Header().Add("Content-Type", "application/json")
w.Write(recJson)
return nil
}
func (s server) getRec(id string, w http.ResponseWriter, r *http.Request) error {
var recJson []byte
if err := s.db.View(func(tx *bolt.Tx) error {
p := tx.Bucket(bktRec).Get([]byte(id))
if p == nil {
return errNotFound
}
recJson = make([]byte, len(p))
copy(recJson, p)
return nil
}); err != nil {
return err
}
w.Header().Add("Content-Type", "application/json")
w.Write(recJson)
return nil
}
type LoansAndReservations struct {
Pickups []Pickup
Holds []Hold
Checkouts []Checkout
History []History
}
type Pickup struct {
ReserveId string `json:"reserve_id"`
Branchcode, Pickupnumber, Itemnumber, Biblionumber string
Waitingdate, Author, Title, WorkURI string
*Pub
}
type Hold struct {
ReserveId string `json:"reserve_id"`
SuspendUntil string `json:"suspend_until"`
Branchcode, Suspend, Biblionumber, Waitingdate, Author,
Title, WorkURI, Reservedate, Expirationdate string
*Pub
}
type Checkout struct {
DateDue string `json:"date_due"`
IssueId string `json:"issue_id"`
Author, Biblionumber, Branchcode, Itemnumber string
Title, WorkURI string
*Pub
}
type History struct {
IssueId string `json:"issue_id"`
Author, Biblionumber, Branchcode, Itemnumber string
Returndate, Title, WorkURI string
*Pub
}
func (s server) decorate(w http.ResponseWriter, r *http.Request) error {
b, err := ioutil.ReadAll(r.Body)
defer r.Body.Close()
if err != nil {
http.Error(w, err.Error(), 500)
return err
}
var lr LoansAndReservations
err = json.Unmarshal(b, &lr)
if err != nil {
http.Error(w, err.Error(), 500)
return err
}
if err := s.db.View(func(tx *bolt.Tx) error {
// Holds
var p Pub
for n, _ := range lr.Holds {
bk := tx.Bucket(bktRec).Get([]byte(lr.Holds[n].Biblionumber))
if bk != nil {
err := json.Unmarshal(bk, &p)
if err != nil {
fmt.Printf("Error decoding hold: %s", err)
return err
}
lr.Holds[n].Pub = &p
}
}
// Checkouts
for n, _ := range lr.Checkouts {
bk := tx.Bucket(bktRec).Get([]byte(lr.Checkouts[n].Biblionumber))
if bk != nil {
err := json.Unmarshal(bk, &p)
if err != nil {
fmt.Printf("Error decoding checkout: %s", err)
return err
}
lr.Checkouts[n].Pub = &p
}
}
// Pickups
for n, _ := range lr.Pickups {
bk := tx.Bucket(bktRec).Get([]byte(lr.Pickups[n].Biblionumber))
if bk != nil {
err := json.Unmarshal(bk, &p)
if err != nil {
fmt.Printf("Error decoding pickups: %s", err)
return err
}
lr.Pickups[n].Pub = &p
}
}
// History
for n, _ := range lr.History {
bk := tx.Bucket(bktRec).Get([]byte(lr.History[n].Biblionumber))
if bk != nil {
err := json.Unmarshal(bk, &p)
if err != nil {
fmt.Printf("Error decoding history: %s", err)
return err
}
lr.History[n].Pub = &p
}
}
return nil
}); err != nil {
return err
}
out, err := json.Marshal(lr)
if err != nil {
http.Error(w, err.Error(), 500)
return err
}
w.Header().Set("content-type", "application/json")
w.Write([]byte(out))
return nil
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment