# -*- coding: utf-8 -*-
#
# PRONOM UTILS
#
# PYTHON FUNCTION TO QUERY PRONOM VERSION
# AND DOWNLOAD SIGNATUREFILE
# USES PRONOM SOAP SERVICE
#
# Open Planets Foundation (http://www.openplanetsfoundation.org)
# See License.txt for license information.
# Download from: http://github.com/openplanets/fido/downloads
# Author: Maurice de Rooij (OPF/NANETH), 2012
#
# PRONOM UTILS is a library used by FIDO
# FIDO uses the UK National Archives (TNA) PRONOM File Format and Container descriptions.
# PRONOM is available from http://www.nationalarchives.gov.uk/pronom/
#
import sys
from xml.dom import minidom
__pronomutils__ = {'version' : '1.0.1'}
def checkWellFormedness(filename,error=False):
"""
usage: checkWellFormedness(filename)
arguments:
"filename": returns true if filename is a valid XML file
"error": whether or not print to stderr upon error
"""
import xml.parsers.expat
parser = xml.parsers.expat.ParserCreate()
try:
parser.ParseFile(open(filename, "r"))
except Exception, e:
if error is not False:
sys.stderr.write("checkWellFormedness: %s: %s;\n" % (filename, e))
return False
return True
def getPronomSignature(type):
"""
usage: getPronomSignature(version|file)
arguments:
"version": returns latest signature file version number as int
"file": returns latest signature XML file as string
upon error: writes to stderr and returns false
"""
try:
import httplib
import re
import os
soapVersionContainer = """"""
soapFileContainer = """"""
soapVersionHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileVersionV1In\""""
soapFileHeader = """\"http://pronom.nationalarchives.gov.uk:getSignatureFileV1In\""""
if type == "version":
soapAction = soapVersionHeader
soapStr = soapVersionContainer
elif type == "file":
soapAction = soapFileHeader
soapStr = soapFileContainer
else:
sys.stderr.write("getPronomSignature(): unknown type: "+type)
return False
webservice = httplib.HTTP("apps.nationalarchives.gov.uk")
webservice.putrequest("POST", "/pronom/service.asmx")
webservice.putheader("Host", "www.nationalarchives.gov.uk")
webservice.putheader("User-Agent", "PRONOM UTILS v{0} (OPF)".format(__pronomutils__['version']))
webservice.putheader("Content-type", "text/xml; charset=\"UTF-8\"")
webservice.putheader("Content-length", "%d" % len(soapStr))
webservice.putheader("SOAPAction", soapAction)
try:
webservice.endheaders()
except Exception, e:
sys.stderr.write("getPronomSignature(): failed to contact PRONOM;\n%s\n" % (e))
sys.exit()
webservice.send(soapStr)
statuscode, statusmessage, header = webservice.getreply()
if statuscode == 200:
xml = webservice.getfile()
if type == "version":
exp = re.compile("\([0-9]{1,4})\<\/Version\>")
sigxml = exp.search(xml.read())
if len(sigxml.group(1)) > 0:
return int(sigxml.group(1))
else:
sys.stderr.write("getPronomSignature(): could not parse VERSION from SOAP response: "+type)
return False
if type == "file":
exp = re.compile("\.*\<\/SignatureFile\>")
sigxml = exp.search(xml.read())
sigtxt = sigxml.group(0) if sigxml else ''
if len(sigtxt) > 0:
tmpfile = "./tmp_getPronomSignature.xml"
tmp = open(tmpfile,'wb')
tmp.write(""""""+"\n")
tmp.write(sigtxt)
tmp.close()
if not checkWellFormedness(tmpfile):
os.unlink(tmpfile)
sys.stderr.write("getPronomSignature(): signaturefile not well formed")
return False
else:
os.unlink(tmpfile)
return """"""+"\n"+sigtxt
else:
sys.stderr.write("getPronomSignature(): could not parse XML from SOAP response: "+type)
return False
else:
sys.stderr.write("getPronomSignature(): webservice error: '"+str(statuscode)+" "+statusmessage+"'\n")
return False
print sys.stderr.write("getPronomSignature(): unexpected return")
return False
except Exception, e:
print sys.stderr.write("getPronomSignature(): unknown error: "+str(e))
return False