Program Listing for File document.hpp

Return to documentation for file (mxml/document.hpp)

/*-
 * SPDX-License-Identifier: BSD-2-Clause
 *
 * Copyright (c) 2024 Maarten L. Hekkelman
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#pragma once

#include "mxml/node.hpp"
#include "mxml/parser.hpp"
#include "mxml/version.hpp"
#include "mxml/text.hpp"

#include <functional>
#include <string>

namespace mxml
{

struct doc_type
{
    std::string m_root;
    std::string m_pubid; // pubid is empty for SYSTEM DOCTYPE
    std::string m_dtd;
};

class document final : public element_container
{
  public:
    node_type type() const override { return node_type::document; }

    document();

    document(const document &doc);

    document(document &&other) noexcept
        : document()
    {
        swap(*this, other);
    }

    document &operator=(document doc) noexcept
    {
        swap(*this, doc);
        return *this;
    }

    document(std::string_view s);

    document(std::istream &is);

    document(std::istream &is, std::string base_dir);

    ~document() = default;

    friend void swap(document &a, document &b) noexcept;

    bool is_validating() const { return m_validating; }
    void set_validating(bool validate) { m_validating = validate; }

    bool is_validating_ns() const { return m_validating_ns; }
    void set_validating_ns(bool validate) { m_validating_ns = validate; }

    bool preserves_cdata() const { return m_preserve_cdata; }

    void set_preserve_cdata(bool p) { m_preserve_cdata = p; }

    bool collapses_empty_tags() const { return m_fmt.collapse_tags; }

    void set_collapse_empty_tags(bool c) { m_fmt.collapse_tags = c; }

    bool write_html() const { return m_fmt.html; }

    void set_write_html(bool f) { m_fmt.html = f; }

    bool suppresses_comments() const { return m_fmt.suppress_comments; }

    void set_suppress_comments(bool s) { m_fmt.suppress_comments = s; }

    bool escapes_white_space() const { return m_fmt.escape_white_space; }

    void set_escape_white_space(bool e) { m_fmt.escape_white_space = e; }

    bool escapes_double_quote() const { return m_fmt.escape_double_quote; }

    void set_escape_double_quote(bool e) { m_fmt.escape_double_quote = e; }

    bool wraps_prolog() const { return m_wrap_prolog; }

    void set_wrap_prolog(bool w) { m_wrap_prolog = w; }

    doc_type get_doctype() const { return m_doctype; }

    void set_doctype(std::string root, std::string pubid, std::string dtd)
    {
        set_doctype({ std::move(root), std::move(pubid), std::move(dtd) });
    }

    void set_doctype(const doc_type &doctype)
    {
        m_doctype = doctype;
        m_write_doctype = true;
    }

    bool writes_xml_decl() const { return m_write_xml_decl; }

    void set_write_xml_decl(bool w) { m_write_xml_decl = w; }

    bool writes_doctype() const { return m_write_doctype; }

    void set_write_doctype(bool f) { m_write_doctype = f; }

    bool is_html5() const;

    friend std::ostream &operator<<(std::ostream &os, const document &doc);

    friend std::istream &operator>>(std::istream &is, document &doc);

    bool operator==(const document &doc) const;

    void set_base_dir(std::string path);

    template <typename Callback>
    void set_entity_loader(Callback &&cb)
    {
        m_external_entity_ref_loader = cb;
    }

    encoding_type get_encoding() const;
    void set_encoding(encoding_type enc);

    version_type get_version() const;
    void set_version(version_type v);

    element_container *root() override { return this; }
    const element_container *root() const override { return this; }

    element *child()
    {
        return empty() ? nullptr : &front();
    }

    const element *child() const { return const_cast<document *>(this)->child(); }

    template <typename... Args>
        requires std::is_constructible_v<element, Args...>
    auto emplace(Args &&...args)
    {
        return emplace_back(std::forward<Args>(args)...);
    }

    std::string str() const override;

  protected:
    node *insert_impl(const node *p, node *n) override;

    void XmlDeclHandler(encoding_type encoding, bool standalone, version_type version);
    void StartElementHandler(std::string name, std::string uri, const parser::attr_list_type &atts);
    void EndElementHandler(std::string name, std::string uri);
    void CharacterDataHandler(std::string data);
    void ProcessingInstructionHandler(std::string target, std::string data);
    void CommentHandler(std::string comment);
    void StartCdataSectionHandler();
    void EndCdataSectionHandler();
    void StartNamespaceDeclHandler(std::string prefix, std::string uri);
    void EndNamespaceDeclHandler(std::string_view prefix);
    void DoctypeDeclHandler(std::string root, std::string publicId, std::string uri);
    void NotationDeclHandler(std::string name, std::string sysid, std::string pubid);

    std::istream *external_entity_ref(std::string_view base, std::string_view pubid, std::string_view sysid);
    void parse(std::istream &data);

    std::function<std::istream *(std::string_view base, std::string_view pubid, std::string_view sysid)>
        m_external_entity_ref_loader;

    void write(std::ostream &os, format_info fmt) const override;

    std::string m_dtd_dir;

    // some content information
    doc_type m_doctype;
    bool m_validating;
    bool m_validating_ns = false;
    bool m_preserve_cdata;
    bool m_has_xml_decl;
    encoding_type m_encoding;
    version_type m_version;
    bool m_standalone;
    bool m_wrap_prolog = true;
    bool m_write_doctype = false;
    bool m_write_xml_decl = false;

    format_info m_fmt;

    struct notation
    {
        std::string m_name;
        std::string m_sysid;
        std::string m_pubid;
    };

    element_container *m_cur = nullptr; // construction
    cdata *m_cdata = nullptr;           // only defined in a CDATA section
    std::vector<std::pair<std::string, std::string>> m_namespaces;
    std::vector<notation> m_notations;
    size_t m_root_size_at_first_notation = 0; // for processing instructions that occur before a notation

};

namespace literals
{
    document operator""_xml(const char *text, size_t length);
} // namespace literals

} // namespace mxml