/*****************************************************************************
   file         : $Id: stream.cpp,v 1.11 2006/09/19 14:04:36 nils Exp $
   description  :
   ------------------------------------------------------------------------

   copyright    : (C) 2006 by Nils Springob, Aachen, GERMANY
   email        : nils.springob@nicai-systems.de
   project      : nicai-systems library

 *****************************************************************************/

#include "xml/stream.h"
#include <iostream>

namespace nicai {
namespace xml {


node * ixmlbasestream::getNext()
{
  if (nextNode)
  {
    node * retval = nextNode;
    nextNode=0;
    return retval;
  }
  return getNextInternal();
}

node * ixmlbasestream::peekNext()
{
  prefetchNode();
  return nextNode;
}


void ixmlbasestream::prefetchNode()
{
  if (nextNode==0)
    nextNode = getNextInternal();

  while(true)
  {
    if (nextNode==0)
      return;
    if (doIgnoreComments && nextNode->isComment())
      dropNode();
    else
      return;
    nextNode = getNextInternal();
  }
}

void ixmlbasestream::dropNode()
{
  delete nextNode;
  nextNode = 0;
}

bool ixmlbasestream::expectNode(const node & node)
{
  prefetchNode();
  if (*nextNode==node)
  {
    dropNode();
    return true;
  }
  return false;
}

bool ixmlbasestream::expectTag(const std::string & name)
{
  return expectNode(tag(name));
}

bool ixmlbasestream::expectTagReadAttributes(const std::string & name, attributemap & attributes)
{
  if (!expectNode(tag(name)))
    return false;
  readAttributes(attributes);
  return true;
}

bool ixmlbasestream::readAttributes(attributemap & attributes, bool clear)
{
  if (clear)
    attributes.clear();
  while(true)
  {
    prefetchNode();
    if (nextNode->getNodeType()!=ATTRIBUTE_NODE)
      return true;
    attribute & attr (*(attribute*)nextNode);
    attributes[attr.getName()] = attr.getVal();
    dropNode();
  }
}

bool ixmlbasestream::readTag(std::string & name)
{
  name.clear();
  prefetchNode();
  if (!nextNode->isTag())
    return false;
  tag & tagNode (*(tag*)nextNode);
  name = tagNode.getName();
  dropNode();
  return true;
}


bool ixmlbasestream::readText(std::string & str)
{
  str.clear();
  prefetchNode();
  if (nextNode->getNodeType()!=TEXT_NODE)
    return false;
  text & textNode (*(text*)nextNode);
  str = textNode.getText();
  dropNode();
  return true;
}

// function should skip a single block
// ignore Text
// ignore <*>...</*>
// ignore <!--...-->
// ignore <?...?>

bool ixmlbasestream::ignoreContent() {
  int depth = 0;
  do {
    prefetchNode();
    if (nextNode==0) {
      return false;
    }
    if (nextNode->isTag()) {
      depth++;
    }
    if (nextNode->isEndTag()) {
      depth--;
    }
    dropNode();
  } while (depth);
  return true;
}


/*****************************************************************************/

void oxmlstream::outString(const std::string & str, bool attribute)
{
  std::string::size_type pos0 = 0;
  std::string::size_type pos1;
  while(true)
  {
    if (attribute)
      pos1 = str.find_first_of("<>&\"", pos0);
    else
      pos1 = str.find_first_of("<>&", pos0);

    if (pos1==std::string::npos)
      break;
    out << std::string(str, pos0, pos1-pos0);
    switch (str[pos1])
    {
      case '<': out << "&lt;"; break;
      case '>': out << "&gt;"; break;
      case '&': out << "&amp;"; break;
      case '\"': out << "&quot;"; break;
    }
    pos0 = pos1+1;
  }
  out << std::string(str, pos0);
}

std::string oxmlstream::indent(int offset)
{
  if (doCollapseWhitespace) {
    return "";
  }
  return std::string((nodes.size()+offset)*2, ' ');
}

std::string oxmlstream::finish_state()
{
  if (doCollapseWhitespace) {
    if (state==1) {state=0; return ">";}
    if (state==2) {state=0; return "";}
    if (state==3) {state=0; return "";}
    return "";
  }
  if (state==1) {state=0; return ">\n";}
  if (state==2) {state=0; return "\n";}
  if (state==3) {state=0; return "\n";}
  return "";
}


oxmlstream & oxmlstream::outputNode(const node & node) {
  switch(node.getNodeType())
  {
    case NULL_NODE:
      return *this;
    case DOCUMENT_NODE:
      return *this << (document &)node;
    case TAG_NODE:
      return *this << (tag &)node;
    case COMMENT_NODE:
      return *this << (comment &)node;
    case INSTRUCTION_NODE:
      return *this << (instruction &)node;
    case CDATA_NODE:
      return *this << (cdata &)node;
    case ATTRIBUTE_NODE:
      return *this << (attribute &)node;
    case ENDTAG_NODE:
      return *this << (endtag &)node;
    case ENDDOCUMENT_NODE:
      return *this << (enddocument &)node;
    case TEXT_NODE:
      return *this << (text &)node;
    case POLY_NODE:
      ((polynode &)node).tostream(*this);
      return *this;
  }
  return *this;
}

oxmlstream & oxmlstream::operator<< (const document &) {
  state=1;
  out << "<?xml version=\"1.0\"?";
  return *this;
}

oxmlstream & oxmlstream::operator<< (const tag & node) {
  out << finish_state() << indent() << "<" << node.getName();
  state=1;
  nodes.push_back(node.getName());
  return *this;
}

oxmlstream & oxmlstream::operator<< (const attribute & node) {
  out << " " << node.getName() << "=\"";
  outString(node.getVal());
  out << "\"";
  return *this;
}

oxmlstream & oxmlstream::operator<< (const endtag &) {
  if (doCollapseWhitespace) {
    if (state==1)
      out << "/>";
    else if (state==2)
      out << "</" << nodes.back() << ">";
    else if (state==3)
      out << "</" << nodes.back() << ">";
    else
      out << "</" << nodes.back() << ">";
  } else {
    if (state==1)
      out << "/>\n";
    else if (state==2)
      out << "\n" << indent(-1) << "</" << nodes.back() << ">\n";
    else if (state==3)
      out << "</" << nodes.back() << ">\n";
    else
      out << indent(-1) << "</" << nodes.back() << ">\n";
  }
  state=0;
  nodes.pop_back();
  return *this;
}

oxmlstream & oxmlstream::operator<< (const enddocument &) {
  out << finish_state();
  out.flush();
  return *this;
}

oxmlstream & oxmlstream::operator<< (const comment & node) {
  out << finish_state() << indent() << "<!--" << node.getText() << "-->";
  state=2;
  return *this;
}

oxmlstream & oxmlstream::operator<< (const instruction & node) {
  out << finish_state() << indent() << "<?" << node.getTarget();
  if (node.getData()!="")
    out << " " << node.getData();
  out << "?>";
  state=2;
  return *this;
}

oxmlstream & oxmlstream::operator<< (const cdata & node) {
  out << finish_state() << indent() << "<![CDATA[" << node.getData() << "]]>";
  state=2;
  return *this;
}

oxmlstream & oxmlstream::operator<< (const text & node) {
  if (doCollapseWhitespace || doInlineText) {
    if (state==1)
      out << ">";
  } else {
    if (state==0)
      out << indent();
    else if (state==1)
      out << ">\n" << indent();
  }
  outString(node.getText(), false);
  if (doInlineText) {
    state=3;
  } else {
    state=2;
  }

  return *this;
}

oxmlstream & oxmlstream::flush()
{
  out.flush();
  return *this;
}

/*****************************************************************************/

std::string ixmlstream::readEscaped(char until)
{
  bool wsStart=true;
  bool wsPending=false;
  std::string result;
  char c;
  while (in.get(c))
  {
    if (c==until)
    {
      in.unget();
      break;
    }
    if (c=='&')
    {
      std::string entity;
      while (in.get(c))
      {
        if (c==';')
          break;
        entity+=c;
      }
      if (entity=="amp") result += '&';
      else if (entity=="lt") result += '<';
      else if (entity=="gt") result += '>';
      else if (entity=="quot") result += '\"';
      else if (entity=="apos") result += '\'';
      else result += '&'+entity+';';
    }
    else if ( doCollapseWhitespace && ((c==' ') || (c=='\n') || (c=='\t')))
    {
      if (!wsStart)
        wsPending=true;
    }
    else
    {
      if (wsPending)
      {
        result+=' ';
        wsPending=false;
      }
      wsStart=false;
      result+=c;
    }
  }
  return result;
}

std::string ixmlstream::readString(const std::string & until)
{
  std::string result;
  char c;
  int pos=0;
  while (in.get(c))
  {
    if (c==until[pos])
    {
      pos++;
      if (pos==(int)until.size())
        break;
    }
    else
    {
      if (pos>0)
      {
        if ((pos>0) && (until.substr(0, pos)==until.substr(1, pos-1)+c))
          result+=until[0];
        else
        {
          result+=until.substr(0, pos)+c;
          pos=0;
        }
      }
      else
      {
        result += c;
      }
    }
  }
  return result;
}


std::string ixmlstream::readName()
{
  std::string name;
  char c;
  while (in.get(c))
  {
    if (std::isalnum(c)||(c=='_')||(c=='-')||(c=='.')||(c==':'))
      name+=c;
    else
    {
      in.unget();
      return name;
    }
  }
  return name;
}

bool ixmlstream::readAttribute(std::string & name, std::string & val)
{
  name = readName();
  in >> std::ws;
  if (in.get()=='=')
  {
    in >> std::ws;
    char c = in.get();
    if (c=='"' || c=='\'')
    {
      val=readEscaped(c);
      in.get();
      return true;
    }
  }
  return false;
}

int ixmlstream::getPos()
{
  return in.tellg();
}


node * ixmlstream::activateError(const std::string & msg)
{
  state=STATE_ERROR;
  throw failure("XML parse error: " + msg);
  return 0;
}



node * ixmlstream::getNextInternal()
{
  switch (state)
  {
    case STATE_PREDOC: // DOCUMENT
    {
      char head[5];
      in.read(head, 5);
      if (std::string(head,5)=="<?xml")
      {
        std::string encoding, standalone;
        while ((in >> std::ws) && (in.peek()!='?'))
        {
          std::string name, val;
          readAttribute(name, val);
          if (name=="version" && val!="1.0")
            return activateError("wrong version");
          if (name=="encoding") encoding=val;
          if (name=="standalone") standalone=val;
        }
        in.get();
        if (in.get()=='>')
        {
          state=STATE_NORMAL;
          return new document(encoding, standalone);
        }
      }
      return activateError("not an xml document");
    }

    case STATE_NORMAL:
      in>>std::ws;
      if (in.eof())
      {
        if (!nodes.empty())
          return activateError("unexpected end of file");

        state=STATE_POSTDOC;
        return new enddocument();
      }
      if (in.peek()=='<')
      {
        in.get();
        if (in.peek()=='/') // XML ENDTAG
        {
          in.get();
          std::string name=readName();
          in >> std::ws;
          if (in.get()=='>')
          {
            if (nodes.empty())
              return activateError("to many end tags");
            if (nodes.back()!=name)
              return activateError("end tag does not match start tag");
            nodes.pop_back();
            state = STATE_NORMAL;
            return new endtag();
          }
          return activateError("malformed end tag");
        }
        else if (in.peek()=='!')
        {
          in.get();
          if (in.peek()=='[') // XML CDATA
          {
            in.get();
            char head[6];
            in.read(head, 6);
            if (std::string(head,6)=="CDATA[")
            {
              std::string data = readString("]]>");
              return new cdata(data);
            }
            return activateError("malformed CDATA");
          }
          else if (in.peek()=='-') // XML COMMENT
          {
            in.get();
            std::string text;
            if (in.get()=='-')
            {
              char c;
              bool firstMinus=false;
              while (in.get(c))
              {
                if (c=='-')
                {
                  if (firstMinus)
                  {
                    if (in.get()=='>')
                    {
                      state=STATE_NORMAL;
                      return new comment(text);
                    }
                    else break;
                  }
                  firstMinus=true;
                }
                else
                {
                  text+=c;
                  firstMinus=false;
                }
              }
            }
            return activateError("malformed comment");
          }
          return activateError("we do not support DTD at the moment"); //FIXME
        }
        else if (in.peek()=='?') // XML INSTRUCTION
        {
          in.get();
          std::string target = readName();
          in >> std::ws;
          std::string data = readString("?>");
          return new instruction(target, data);
        }
        else // XML TAG
        {
          std::string name = readName();
          state=STATE_INTAG;
          char c=in.peek();
          if ((c!='/')&&(c!='>')&&(!std::isspace(c)))
            return activateError("malformed tag name");
          nodes.push_back(name);
          return new tag(name);
        }
      }
      else // XML TEXT
      {
        std::string str=readEscaped('<');
        return new text(str);
      }

    case STATE_INTAG:
      in >> std::ws;
      if (in.peek()=='>')
      {
        in.get();
        state = STATE_NORMAL;
        return getNextInternal();
      }
      else if (in.peek()=='/') // XML ENDTAG
      {
        in.get();
        in >> std::ws;
        if (in.get()=='>')
        {
          if (nodes.empty())
            return activateError("to many end tags");
          nodes.pop_back();
          state = STATE_NORMAL;
          return new endtag();
        }
        return activateError("malformed single tag");
      }
      else // XML ATTRIBUTE
      {
        std::string name, val;
        if (readAttribute(name, val))
          return new attribute(name, val);
        return activateError("malformed attribute");
      }

    case STATE_POSTDOC:
      return 0;

  }
  return activateError("internal error");
}

} // namespace
} // namespace
