XQuery/Unzipping an Office Open XML docx file

From Wikibooks, open books for an open world
Jump to navigation Jump to search


You want to uncompress an docx file


We will use the compression:unzip() function used in the prior example and pass it a local version of the function that handles the uncompression.

File Names[edit]

Some file names in docx files such as '[Content_Types].xml' are not valid URIs. So these must be renamed to files with valid URIs.

Here is a typical list of the path names in a docx file:

<item path="[Content_Types].xml" type="resource">Types</item>
<item path="_rels/.rels" type="resource">Relationships</item>
<item path="word/_rels/document.xml.rels" type="resource">Relationships</item>
<item path="word/document.xml" type="resource">w:document</item>
<item path="word/theme/theme1.xml" type="resource">a:theme</item>
<item path="word/settings.xml" type="resource">w:settings</item>
<item path="word/fontTable.xml" type="resource">w:fonts</item>
<item path="word/webSettings.xml" type="resource">w:webSettings</item>
<item path="docProps/app.xml" type="resource">Properties</item>
<item path="docProps/core.xml" type="resource">cp:coreProperties</item>
<item path="word/styles.xml" type="resource">w:styles</item>

Note that there are three subfolders created (_rels, word and docProps). The XML files are stored in these files.

unzip-docx function[edit]

The following function is used to unzip a docx file. This function name must be passed as a parameter to the unzip function to tell it to do with each docx file.

Note that you must pass in parameters to this function from the calling function.

unzip-docx function:

declare function local:unzip-docx($path as xs:string, $data-type as xs:string, $data as item()?, $param as item()*) {
    if ($param[@name eq 'list']/@value eq 'true') then
        <item path="{$path}" data-type="{$data-type}"/>
        let $base-collection := $param[@name="base-collection"]/@value/string()
        let $zip-collection := 
                functx:substring-before-last($param[@name="zip-filename"]/@value, '.'),
                functx:substring-after-last($param[@name="zip-filename"]/@value, '.')
        let $inner-collection := functx:substring-before-last($path, '/')
        let $filename := if (contains($path, '/')) then functx:substring-after-last($path, '/') else $path
        (: we need to encode the filename to account for filenames with illegal characters like [Content_Types].xml :)
        let $filename := xmldb:encode($filename)
        let $target-collection := concat($base-collection, $zip-collection, $inner-collection)
        let $mkdir := 
            if (xmldb:collection-available($target-collection)) then () 
            else xmldb:create-collection($base-collection, concat($zip-collection, $inner-collection))
        let $store := 
            (: ensure mimetype is set properly for .docx rels files :)
            if (ends-with($filename, '.rels')) then 
                xmldb:store($target-collection, $filename, $data, 'application/xml')
                xmldb:store($target-collection, $filename, $data)
            <result object="{$path}" destination="{concat($target-collection, '/', $filename)}"/>

unzip function[edit]

declare function local:unzip($base-collection as xs:string, $zip-filename as xs:string, $action as xs:string) {
    if (not($action = ('list', 'unzip'))) then <error>Invalid action</error>
    let $file := util:binary-doc(concat($base-collection, $zip-filename))
    let $entry-filter := util:function(QName("local", "local:unzip-entry-filter"), 3)
    let $entry-filter-params := ()
    let $entry-data := util:function(QName("local", "local:unzip-docx"), 4)
    let $entry-data-params := 
        if ($action eq 'list') then <param name="list" value="true"/> else (), 
        <param name="base-collection" value="{$base-collection}"/>,
        <param name="zip-filename" value="{$zip-filename}"/>
    let $login := xmldb:login('/db', 'admin', '')

    (: recursion :)
    let $unzip := compression:unzip($file, $entry-filter, $entry-filter-params, $entry-data, $entry-data-params)
        <results action="{$action}">{$unzip}</results>

Sample Driver[edit]

let $collection := '/db/test/'
let $zip-filename := 'hello-world.docx'
let $action := 'unzip' (: valid actions: 'list', 'unzip' :)
    local:unzip($collection, $zip-filename, $action)