XQuery/Wikibook list of code links

From Wikibooks, open books for an open world
Jump to navigation Jump to search

Motivation[edit | edit source]

This Wikibook contains links to code samples executed on a University server. We need to keep track of all the links so that we can ensure that they remain live, so that all links can be executed by a test bed and to support changes to the directory structure or filenames.

Approach[edit | edit source]

The script is similar to the index script at the beginning, to get the list of pages in the book. Then it fetches each page and extracts the anchor tags where href contains ".xq" . This is a bit loose and includes some false positives. The WikiBook page is linked from the page title and the actual URL is listed.

xquery version "1.0";
import module namespace httpclient = "http://exist-db.org/xquery/httpclient";

declare option exist:serialize "method=xhtml media-type=text/html";

let $book:= request:get-parameter("book","XQuery")
let $base := "http://en.wikibooks.org"
let $indexPage :=httpclient:get(xs:anyURI(concat($base,"/wiki/Category:",$book,"?x")),false(),())/httpclient:body/*

let $pages := $indexPage//div[@class="mw-category"]//li

       <title>Index of {$book} code samples</title>
        <h1>Index of {$book} code samples</h1>
for $letter in distinct-values($pages/upper-case(substring(substring-after(.,'/'),1,1)))[string-length(.) = 1]
for $page in $pages[starts-with(upper-case(substring-after(.,'/')),$letter)]
let $title := string($page)
let $url := concat($base,$page/a/@href)
let $refs := httpclient:get(xs:anyURI($url),false(),())/httpclient:body//a[contains(@href,".xq")]
order by $title
   if (exists($refs))
        <li><a href="{$url}">{$title}</a>
            {for $ref in $refs
                 <li> {string( $ref /@href)} </li>
    else  ()