""" author: http://purl.org/net/chimezie/foaf#chime The FtRdf class implements the rdflib backend API to provide a wrapper around a 4Suite RDF Model so it can be used interchangeably with any of the other rdflib Backend implementations. Interestingly, since 4Suite RDF itself abstracts the actual underlying data store, this essentially provides rdflib with support for the backend drivers implemented by 4Suite RDF but not by rdflib, such as: MySQL Posgres Oracle The API stack would be as follows: rdflib.Graph Backend 4Suite RDF -------------------- MySQL|Postgres|Oracle In addition, any component that works with rdflib's Graph interface will work with a 4Suite RDF Model (using this wrapper as the backend). Two good examples are Sparta, and Sparql-p """ from rdflib.backends import Backend from rdflib.Literal import Literal from rdflib.URIRef import URIRef from rdflib.BNode import BNode from Ft.Rdf.Statement import Statement from Ft.Rdf import Model,OBJECT_TYPE_UNKNOWN,OBJECT_TYPE_RESOURCE,OBJECT_TYPE_LITERAL,BNODE_BASE from Ft.Rdf.Drivers.Util import FtRDFConfigurationManager #Helper functions def objTerm2ObjType(term): if isinstance(term, URIRef) or isinstance(term, BNode): return OBJECT_TYPE_RESOURCE elif isinstance(term, Literal): #NOTE: datatype is lost.. language = term.language datatype = term.datatype if language: return language else: return OBJECT_TYPE_LITERAL else: return OBJECT_TYPE_UNKNOWN #Convert a term on Ft.Rdf.Statement instance (a URI ) to an rdflib term. def ftRdf2rdflibTerm(term): if term.startswith(BNODE_BASE): return BNode(term.decode("UTF-8")) else: return URIRef(term.decode("UTF-8")) #Convert an rdflib term to a flat URI def term2Uri(term,model): if isinstance(term, URIRef): return term.encode("UTF-8") elif isinstance(term, BNode): return model.generateBnode() elif isinstance(term, Literal): language = term.language datatype = term.datatype return term.encode("UTF-8") else: msg = "Unknown term Type for: %s" % term raise Exception(msg) #Build a Ft.Rdf.Statement instance from rdflib terms def ftStatementFromParts(subject,predicate,object,context,model): return Statement(term2Uri(subject,model), term2Uri(predicate,model), term2Uri(object,model), scope=context,objectType=objTerm2ObjType(object)) class FtRdf(Backend): """ Wraps the rdfutil Backend API around a 4RDF model. The first argument is a configuration string used by Ft.Rdf.Drivers.Util.FtRDFConfigurationManager to resolve a driver instance. The second argument (if provided) is a live model to use instead """ def __init__(self,configuration,liveModel=None): super(FtRdf, self).__init__() if liveModel: self.model = liveModel else: factory=FtRDFConfigurationManager(configuration) drv=factory.createDriver() drv.begin() self.model = Model.Model(drv) self.context_aware = True self._nsBindings = {} def __len__(self, context=None): self.model.size(context) def add(self, (subject, predicate, object), context=None): """\ Add a triple to the store of triples. """ self.model.add(ftStatementFromParts(subject,predicate,object,context,self.model)) def remove(self, (subject, predicate, object), context): statements = (ftStatementFromParts(subject,predicate,object,context,self.model)) self.model.remove(statements) def triples(self, (subject, predicate, object), context=None): """ A generator over all the triples matching. We essentially need to match this API with Ft.Rdf.Model.complete (They are mostly identical - None's for wildcards """ for stmt in self.model.complete(subject and term2Uri(subject,self.model) or None, predicate and term2Uri(predicate,self.model) or None, object and term2Uri(object,self.model) or None, scope=context): s = ftRdf2rdflibTerm(stmt.subject) p = URIRef(stmt.predicate) if stmt.objectType in [OBJECT_TYPE_LITERAL,OBJECT_TYPE_UNKNOWN]: o = Literal(stmt.object) elif stmt.objectType == OBJECT_TYPE_RESOURCE and stmt.object.startswith(BNODE_BASE): o = BNode(stmt.object) elif stmt.objectType == OBJECT_TYPE_RESOURCE: o = URIRef(stmt.object) yield s,p,o def contexts(self, triple=None): # TODO: have Graph support triple? """ Essentially, iterate over triples (using Ft.Rdf.Model.complete) using the given triple parts or None,None,None .Yield unique contexts/scopes in the underlying 4RDF Model """ if triple: s, p, o = triple else: s = p = o = None contexts = {} for stmt in self.model.complete(s and term2Uri(s) or None, p and term2Uri(p) or None, o and term2Uri(o) or None): if stmt.scope not in contexts: contexts[stmt.scope] = None yield stmt.scope def remove_context(self, identifier): self.model.removePattern(None,None,None,scope=identifier) def bind(self, prefix, namespace): self._nsBindings[prefix] = namespace def namespace(self, prefix): return self._nsBindings.get(prefix) def prefix(self, namespace): return dict([(value,key) for key,value in self._nsBindings.items()]).get(namespace) def namespaces(self): return self._nsBindings.values() def test(g,channel): from rdflib.RDF import type as rdf_type from sparta import Thing,ThingFactory Thing = ThingFactory(g) for item,p,o in g.triples((None,rdf_type,URIRef('http://purl.org/rss/1.0/item'))): item=Thing(item) if [subj for subj in item.dc_subject.copy() if subj.find('rdf')+1]: rt=[pprint(title) for title in item.rss_title.copy()] if __name__ == '__main__': import sys from Ft.Rdf.Drivers import Memory from rdflib import Graph from rdflib.Graph import Seq from rdflib.RDF import li,type as rdf_type from Ft.Rdf import Model ,RDF_MS_BASE from Ft.Rdf.Serializers import Dom from Ft.Xml import Domlette from rdflibUtils import GraphPattern from rdflibUtils import myTripleStore from pprint import pprint import urllib2,time from Ft.Rdf import Util #Setup a memory backend backEnd = FtRdf('driver=memory,model=test,dbHandle=test') model = backEnd.model #Parse my del.icio.us rss feed szr = Dom.Serializer() domStr=urllib2.urlopen('http://del.icio.us/rss/chimezie').read() dom = Domlette.NonvalidatingReader.parseString(domStr,'http://del.icio.us/rss/chimezie') szr.deserialize(model,dom,scope='http://del.icio.us/rss/chimezie') #Setup rdflib.Graph with FtRDF Model as Backend, using FtRdf driver g=Graph(backEnd) g.bind('rss',"http://purl.org/rss/1.0/") g.bind('dc',"http://purl.org/dc/elements/1.1/") g.bind('rdf',RDF_MS_BASE) test(g,URIRef('http://del.icio.us/chimezie')) sys.exit(1) print """\ SELECT ?title WHERE { ?item rdf:type rss:item; dc:subject ?subj; rss:title ?title. FILTER (REGEX(?subj,".*rdf")). }""" #Setup sparql-p query processor engine select = ("?title") #Setup term copia = URIRef('http://del.icio.us/chimezie') rssTitle = URIRef('http://purl.org/rss/1.0/title') versaWiki = URIRef('http://en.wikipedia.org/wiki/Versa') dc_subject=URIRef("http://purl.org/dc/elements/1.1/subject") #Filter on objects of statements (dc:subject values) - keep only those containing the string 'rdf' def rdfSubFilter(subj,pred,obj): return bool(obj.find('rdf')+1) #Execute query where = GraphPattern([("?item",rdf_type,URIRef('http://purl.org/rss/1.0/item')), ("?item",dc_subject,"?subj",rdfSubFilter), ("?item",rssTitle,"?title")]) tStore = myTripleStore(FtRdf(model)) start = time.time() result = tStore.query(select,where) print "sparql-p: %f"%(time.time()-start) pprint(result) vQuery="(type(rss:item)|-dc:subject->contains('rdf'))-rss:title->*" print vQuery start=time.time() rt=Util.VersaQuery(vQuery, model, {'rss':'http://purl.org/rss/1.0/', 'dc':'http://purl.org/dc/elements/1.1/',}) print "Versa: %f"%(time.time()-start) pprint(rt)