XQuery/Tag Cloud
From Wikibooks, the open-content textbooks collection
< XQuery
Contents |
[edit] Counting Words
[edit] Total of words in a text object
From Jon Robie's blog
declare variable $doc external; let $txt := string-join( $doc//text() , " ") return count(tokenize($txt,'(\s|[,.!:;]|[n][b][s][p][;])+'))
Note that the string-join() function takes an input sequence and returns a single string that is separated by spaces (the second argument of string-join).
If you want to see what this counts as a word in your document, use the following variation.
declare variable $doc external;
let $txt := string-join( $doc//text() , " ")
return <words count="{count(tokenize($txt,'(\s|[,.!:;]|[n][b][s][p][;])+'))}">
{ $txt }
</words>
Another variation is the word-count() function found at xqueryfucntions.com:
declare function local:word-count( $arg as xs:string? ) as xs:integer {
count(tokenize($arg, '\W+')[. != ''])
} ;
This version uses the \W+ regular expression to return word tokens.
[edit] Counting Keywords
Kurt Cagle suggested the following XQuery for counting keywords:
declare namespace xqwb="http://xquery.wikibooks.org";
declare option exist:serialize "method=xml media-type=text/xml indent=yes";
declare function xqwb:word-count($wordlist as element() ) as element() {
<terms>
{for $term in distinct-values($wordlist/term)
let $term-count := count($wordlist/term[. = $term])
return
<term count="{$term-count}">{$term}</term>
}
</terms>
};
let $keywords :=
<keywords>
<term>red</term>
<term>green</term>
<term>red</term>
<term>blue</term>
<term>violet</term>
<term>red</term>
<term>blue</term>
<term>blue</term>
<term>red</term>
<term>orange</term>
<term>green</term>
<term>yellow</term>
<term>indigo</term>
<term>red</term>
</keywords>
let $result := xqwb:word-count($keywords)
return $result
[Execute]
[edit] This Returns the Following
<terms>
<term count="5">red</term>
<term count="2">green</term>
<term count="3">blue</term>
<term count="1">violet</term>
<term count="1">orange</term>
<term count="1">yellow</term>
<term count="1">indigo</term>
</terms>
[edit] Creating a Tag Cloud
From this you can create a Tag Cloud or word density map such as the "Popular Tags" link on the flickr web site Flicker Popular Tags
declare namespace xqwb="http://xquery.wikibooks.org";
declare option exist:serialize "method=xhtml media-type=text/html indent=yes";
declare function xqwb:word-count($wordlist as element() ) as element() {
<terms>
{for $term in distinct-values($wordlist/term)
let $term-count := count($wordlist/term[. = $term])
return
<term count="{$term-count}">{$term}</term>
}
</terms>
};
let $keywords :=
<keywords>
<term>red</term>
<term>green</term>
<term>red</term>
<term>blue</term>
<term>violet</term>
<term>red</term>
<term>blue</term>
<term>blue</term>
<term>red</term>
<term>orange</term>
<term>green</term>
<term>yellow</term>
<term>indigo</term>
<term>red</term>
</keywords>
let $result := xqwb:word-count($keywords)
let $total := count($keywords/term)
let $scale := 20
return
<div>
{
for $term in $result/term
let $fontSize := round( $term/@count div $total * 100 * $scale)
order by $term
return <span style="font-size:{$fontSize}%">{string($term)} </span>
}
</div>