# copyright 2007 ETH Zurich, DMATL
# author lorenz textor

from Products.Silva.Folder import Folder as SilvaFolder
from Products.Silva.Publication import Publication
from Products.SilvaDocument.Document import Document, DocumentVersion
from OFS.Folder import Folder
from Products.PythonScripts.standard import html_quote
import re

exp = r'(<link.*?url="(.*?)".*?>.*?</link>)'
regExp = re.compile(exp)
expAbs = r'http://.*?mat.ethz.ch'
regExpAbs = re.compile(expAbs)

def checkLinks(obj,strict):
    """walks through all folders below object obj and checks for wrong links (if strict also https:// and absolute links are reported)"""
    # get files
    def walk(objects):
        for id,val in objects:
            if isinstance(val,Document):
                docs.append(val)
            elif isinstance(val,SilvaFolder) or isinstance(val,Publication) or isinstance(val,Folder):
                # step into subfolder
                walk(val.objectItems())

    def checkDocument(doc):
        if doc.absolute_url_path() in checkedDocuments:
            return
        checkedDocuments.append(doc.absolute_url_path())
        content = doc.content_xml()
        wrongLinks = []
        links = regExp.findall(content)
        for link in links:
            if link[1].count('/silva/ETH'):
                wrongLinks.append(html_quote(link[0]))
            if strict and (link[1].startswith('https://') or regExpAbs.search(link[1])):
                wrongLinks.append(html_quote(link[0]))
        if wrongLinks:
            output.append('<p>DocumentVersion %s</p>' % doc.absolute_url_path())
            output.append('<ul><li>%s</li></ul>' % '</li><li>'.join(wrongLinks))

    output = ['<html><head></head><body><p>SilvaDocuments with wrong links</p><hr />']
    docs = []
    checkedDocuments = []
    walk(obj.objectItems())
    # check for wrong links
    for doc in docs:
        versions = (doc.get_viewable(),doc.get_previewable(),doc.get_editable())
        versions = [x for x in versions if x]
        if not versions:
            versions.append(doc.get_last_closed())
        for version in versions:
            checkDocument(version)
    output.append("<p><b>%d SilvaDocuments scanned.</b></p>" % len(docs))
    output.append('</body></html>')
    return ''.join(output)
