xquery version "3.0";
(:
    Copyright © ART-DECOR Expert Group and ART-DECOR Open Tools
    see https://art-decor.org/mediawiki/index.php?title=Copyright
    
    This program is free software; you can redistribute it and/or modify it under the terms of the
    GNU Lesser General Public License as published by the Free Software Foundation; either version
    2.1 of the License, or (at your option) any later version.
    
    This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
    without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
    See the GNU Lesser General Public License for more details.
    
    The full text of the license is available at http://www.gnu.org/copyleft/lesser.html
:)
module namespace md                 = "http://art-decor.org/ns/art/markdown";

declare namespace output    = "http://www.w3.org/2010/xslt-xquery-serialization";

declare variable $md:CR   := '&#xD;';
declare variable $md:LF   := '&#xA;';
declare variable $md:CRLF := '&#xD;&#xA;';

declare
    %output:media-type("text/plain")
    %output:method("text")
function md:html2markdown($html as item()*) {
    let $t := md:typeswitcher($html, $md:LF)
    (: some string based corrections and return :)
    let $u := replace($t, '^\s+|\s+$', '')
    return replace($u, '\*\s\*\*|\*\*\s\*', '***')
};

declare %private function md:typeswitcher($node as item()*, $newline as xs:string) {
string-join(
for $child in $node
return
    typeswitch ($child)
    case element() return
        switch (local-name($child))
        case 'div' return concat($newline, $newline, md:typeswitcher($child/node(),$newline), $newline, $newline)
        case 'p' return if ($child/parent::li) then md:typeswitcher($child/node(),$newline) else concat($newline, md:typeswitcher($child/node(),$newline), $newline)
        case 'span' return md:typeswitcher($child/node(),$newline)
        case 'ul' return concat($newline, md:typeswitcher($child/(li | caption),$newline), $newline)
        case 'ol' return concat($newline, md:typeswitcher($child/(li | caption),$newline), $newline)
        case 'li' return concat(string-join(for $i in (1 to count($child/ancestor::li)) return '    ', ''), if ($child/parent::ol) then '1. ' else '* ', md:typeswitcher($child/node(),$newline), $newline)
        case 'em'
        case 'i' return concat('*', md:typeswitcher($child/node(),$newline), '*')
        case 'b' 
        case 'strong' return concat('**', md:typeswitcher($child/node(),$newline), '**')
        case 'a' return concat('[', md:typeswitcher($child/node(),$newline), '](', $child/@href,  if ($child[@title]) then concat(' "', replace($child/@title, '"', '&#39;'), '"') else (), ')')
        case 'br' return $newline
        case 'h1' return concat($newline, '# ', md:typeswitcher($child/node(),$newline), $newline)
        case 'h2' return concat($newline, '## ', md:typeswitcher($child/node(),$newline), $newline)
        case 'h3' return concat($newline, '### ', md:typeswitcher($child/node(),$newline), $newline)
        case 'h4' return concat($newline, '#### ', md:typeswitcher($child/node(),$newline), $newline)
        case 'h5' return concat($newline, '##### ', md:typeswitcher($child/node(),$newline), $newline)
        case 'h6' return concat($newline, '###### ', md:typeswitcher($child/node(),$newline), $newline)
        case 'h7' return concat($newline, '###### ', md:typeswitcher($child/node(),$newline), $newline)
        case 'h8' return concat($newline, '###### ', md:typeswitcher($child/node(),$newline), $newline)
        case 'h9' return concat($newline, '###### ', md:typeswitcher($child/node(),$newline), $newline)
        case 'pre' return concat($newline, '```', $newline, md:typeswitcher($child/node(),$newline), $newline, '```', $newline)
        case 'blockquote' return concat($newline, '> ', md:typeswitcher($child/node(),$newline), $newline)
        case 'hr' return concat($newline, '----------', $newline)
        case 'table' return concat($newline, $newline, md:typeswitcher($child/(caption, thead, tbody, tfoot, tr),$newline), $newline, $newline)
        case 'caption' return concat($newline, '**', md:typeswitcher($child/node(),$newline), '**', $newline)
        case 'thead' return md:typeswitcher($child/tr,$newline)
        case 'tbody' return md:typeswitcher($child/tr,$newline)
        case 'tfoot' return md:typeswitcher($child/tr,$newline)
        case 'tr' return concat('| ', string-join(for $c in $child/(th | td) return md:typeswitcher($c,$newline), ' | '), ' |', $newline,
                                      if ($child[th][empty(following-sibling::tr[th])]) then concat('|', string-join(for $i in (1 to count($child/(th | td))) return '---|', ''), $newline) else ())
        case 'th' return md:typeswitcher($child/node(),$newline)
        case 'td' return md:typeswitcher($child/node(),$newline)
        default return (
            let $startmarker     := if ($child[matches(parent::*/@style, 'font-weight:\s*bold')]) then '**' else ()
            let $startmarker     := if ($child[matches(parent::*/@style, 'font-style:\s*italic')]) then concat('*', $startmarker) else $startmarker
            let $startmarker     := if ($child[matches(parent::*/@style, 'text-decoration:\s*line-through')]) then concat('~~', $startmarker) else $startmarker
            
            return concat($startmarker, $child, string-join(codepoints-to-string(reverse(string-to-codepoints($startmarker))), ''))
        )
    case comment() return ()
    case text() return (
        let $startmarker     := if ($child[matches(parent::*/@style, 'font-weight:\s*bold')]) then '**' else ()
        let $startmarker     := if ($child[matches(parent::*/@style, 'font-style:\s*italic')]) then concat('*', $startmarker) else $startmarker
        let $startmarker     := if ($child[matches(parent::*/@style, 'text-decoration:\s*line-through')]) then concat('~~', $startmarker) else $startmarker
        let $child           := replace($child, $md:LF, ' ')
        
        return concat($startmarker, $child, string-join(codepoints-to-string(reverse(string-to-codepoints($startmarker))), ''))
    )
    default return md:typeswitcher($child/node(),$newline)
, '')
};