Commit 83108140 authored by Magnus Westergaard's avatar Magnus Westergaard
Browse files

DEICH-5691: WIP metadata.

parent de196dad
__pycache__
\ No newline at end of file
import os
import requests
from katalog import Katalog
from koha import Koha
from data import LIBRARIES
......@@ -25,6 +26,7 @@ def require_env(env, missing_msg=None):
def main():
koha_url = require_env('INTERNAL_URL_KOHA')
sparql_url = require_env('SPARQL_ENDPOINT')
auth_server = require_env('INTERNAL_URL_AUTH_SERVER')
client_secret = require_env('KOHA_MACHINE_CLIENT_SECRET')
target = require_env('TARGET')
......@@ -34,16 +36,30 @@ def main():
jwt_token = get_token(auth_server, client_secret)
koha = Koha(koha_url, jwt_token, target)
print("Setting up ILL libraries in Koha...", end=" ")
for library, payload in LIBRARIES.items():
koha.create_library(library, payload)
# print("Setting up ILL libraries in Koha...", end=" ")
# for library, payload in LIBRARIES.items():
# koha.create_library(library, payload)
koha.setup_ill(LIBRARIES.get(target).get('userid'))
print("OK Done!")
# koha.setup_ill(LIBRARIES.get(target).get('userid'))
# print("OK Done!")
print("Creating sibyl DB ...", end=" ")
koha.setup_sibyl()
print("OK Done!")
print('Importing katalog data...', end=' ')
katalog = Katalog(sparql_url)
# katalog.import_baseline() FIXME call this when baseline is available
katalog.import_files('/jobs/create-test-data/data/fjernlaan/persons.ttl', '/jobs/create-test-data/data/fjernlaan/publications.ttl')
print('OK Done!')
print('Creating biblios with items and syncing their metadata...', end=' ')
pub_uris = katalog.get_publication_uris() # FIXME also fetch number of items to make
biblionumbers = koha.create_biblios(len(pub_uris)) # FIXME return tuples (uri, biblionumber) which can be used to generate SPARQL for inserting tittelnummer
print(biblionumbers)
# katalog.link_pubs_and_biblios(pubs, biblionumbers) # FIXME insert tnr triples in virtuoso
# euler.sync_pubs(pubs) # FIXME implement metadata sync by calling euler/api/katalog/publications/[publicationId]/sync
# FIXME insert items according to spec
# FIXME trigger biblio stats availability
# FIXME trigger reindexing (sibyl, koha)
print('OK Done!')
if __name__ == '__main__':
main()
@base <http://data.deichman.no/> .
@prefix deich: <http://data.deichman.no/ontology#> .
# persons with names containing
# - ÆØÅ
# - special characters
# - more than 30 characters
<person/p001>
a deich:Person ;
deich:name "Værøy, Åge Oppdiktet" .
<person/p002>
a deich:Person ;
deich:name "Fenêtre, Frédér!que d'Oppdiktet" .
<person/p003>
a deich:Person ;
deich:name "Finckelschnell von Oppdiktet, Rafael" .
@base <http://data.deichman.no/> .
@prefix deich: <http://data.deichman.no/ontology#> .
@prefix duo: <http://data.deichman.no/utility#> .
@prefix mediaType: <http://data.deichman.no/mediaType#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix lexvo: <http://lexvo.org/ontology#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix audience: <http://data.deichman.no/audience#> .
@prefix fictionNonfiction: <http://data.deichman.no/fictionNonfiction#> .
@prefix language: <http://lexvo.org/id/iso639-3/> .
@prefix literaryForm: <http://data.deichman.no/literaryForm#> .
@prefix nationality: <http://data.deichman.no/nationality#> .
@prefix relationType: <http://data.deichman.no/relationType#> .
@prefix role: <http://data.deichman.no/role#> .
@prefix workType: <http://data.deichman.no/workType#> .
# publications with titles containing
# - ÆØÅ
# - special characters
# - more than 30 characters
<publication/p001>
a deich:Publication ;
deich:created "2021-01-01T01:02:03.456Z"^^xsd:dateTime ;
deich:hasMediaType mediaType:Book ;
deich:mainTitle "Å være en oppdiktet hærfører" ;
deich:publicationOf <work/w001> .
<publication/p002>
a deich:Publication ;
deich:created "2021-01-01T01:02:03.456Z"^^xsd:dateTime ;
deich:hasMediaType mediaType:Book ;
deich:mainTitle "Würst oppdiktet en française!" ;
deich:publicationOf <work/w002> .
<publication/p003>
a deich:Publication ;
deich:created "2021-01-01T01:02:03.456Z"^^xsd:dateTime ;
deich:hasMediaType mediaType:Book ;
deich:mainTitle "Kattekrigerne og den oppdiktede tittelen" ;
deich:publicationOf <work/w003> .
# works (nothing special datawise, but are required)
<work/w001>
a deich:Work ;
deich:hasWorkType workType:Literature ;
deich:mainTitle "Being a oppdiktet commander" ;
deich:contributor [
a deich:Contribution,
deich:MainEntry ;
deich:agent <person/p001> ;
deich:role role:author
] .
<work/w002>
a deich:Work ;
deich:hasWorkType workType:Literature ;
deich:mainTitle "Pølse oppdiktet på fransk" ;
deich:contributor [
a deich:Contribution,
deich:MainEntry ;
deich:agent <person/p002> ;
deich:role role:author
] .
<work/w003>
a deich:Work ;
deich:hasWorkType workType:Literature ;
deich:mainTitle "Warriors oppdiktet" ;
deich:contributor [
a deich:Contribution,
deich:MainEntry ;
deich:agent <person/p003> ;
deich:role role:author
] .
import csv
import requests
from virtuoso import Virtuoso
class Katalog:
GRAPH = 'https://katalog.deichman.no'
def __init__(self, sparql_url):
self.virtuoso = Virtuoso(sparql_url)
self.sparql_url = sparql_url
def import_baseline(self):
self.virtuoso.import_data(self.GRAPH, '/jobs/create-test-data/data/fjernlaan/baseline_katalog.nq')
def import_files(self, *filenames):
self.virtuoso.import_data(self.GRAPH, *filenames)
def get_publication_uris(self):
q = """
SELECT ?s
FROM <https://katalog.deichman.no>
WHERE {
?s a <http://data.deichman.no/ontology#Publication>
}
ORDER BY ?s
"""
pub_uris = []
with requests.post(self.sparql_url, headers={'Accept': 'text/csv'}, data={'query': q}) as res:
lines = (line.decode('utf-8') for line in res.iter_lines())
next(lines)
for row in csv.reader(lines):
pub_uris.append(f'<{row[0]}>')
return pub_uris
......@@ -45,3 +45,11 @@ class Koha:
GRANT ALL PRIVILEGES ON sibyl.* TO "sibyl";
"""
self.run_sql(q)
def create_biblios(self, number):
biblionumbers = []
for i in range(number):
with requests.post(f'{self.api_url}/biblio', data='<record></record>', headers={'Content-Type': 'text/xml'}) as res:
biblionumbers.append(res.json().get('biblionumber'))
return biblionumbers
import requests
import json
import docker
import tarfile
from pathlib import Path
class Virtuoso:
def __init__(self, sparqlUrl):
self.docker = docker.DockerClient(base_url='unix://var/run/docker.sock')
def import_data(self, graph, *files):
virtuoso = self.docker.containers.get('deichman_virtuoso_1')
# the docker-py API doesn't support plain copying of files into a container, so make a tar first
tarname = 'temp.tar'
with tarfile.open(tarname, 'w') as tar:
for f in files:
filename = Path(f).name
tar.add(f, arcname=filename)
# copy tar using put_archive into /data/
with open(tarname, 'rb') as t:
tardata = t.read()
put_success = virtuoso.put_archive('/data/', tardata)
if not put_success:
raise RuntimeError('Copying file into virtuoso container failed!')
# ref http://vos.openlinksw.com/owiki/wiki/VOS/VirtBulkRDFLoader
isql_ld_dirs = ''.join(map(lambda f : f"ld_dir('/data', '{Path(f).name}', '{graph}');", files))
isql_bulk_load_command = f'{isql_ld_dirs}\nrdf_loader_run();\ncheckpoint;'
res = virtuoso.exec_run(f'isql-v -P secret exec="{isql_bulk_load_command}"')
if res.exit_code != 0:
raise RuntimeError(f'Loading data into virtuoso failed: {res.output.decode("utf-8")}')
print(res.output.decode('utf-8'))
#res = virtuoso.exec_run(f'rm data/{filename}')
#if res.exit_code != 0:
# raise RuntimeError(f'Removing file from container failed: {res.output}')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment