/**
 * Copyright  Sergey Melnik (Stanford University, Database Group) 
 *
 * Distribution policies are governed by the W3C software license.
 * http://www.w3.org/Consortium/Legal/copyright-software   
 * 
 * All Rights Reserved.
 * 
 * @author      Sergey Melnik <melnik@db.stanford.edu>
 *
 * modified by William Grosso <grosso@acm.org>.
 */

package org.w3c.rdf.implementation.syntax.sirpac;

import org.w3c.rdf.model.*;
import org.w3c.rdf.syntax.*;
import org.w3c.rdf.vocabulary.rdf_syntax_19990222.RDF;
import org.w3c.rdf.vocabulary.rdf_schema_19990303.RDFS;
import org.w3c.rdf.util.RDFUtil;
import org.w3c.rdf.tools.sorter.*;

import java.util.*;
import java.io.*;

/**
 * A default implementation of the RDFMS interface.
 * For parsing it uses a modified version of
 * SiRPAC parser written by Janne Saarela at W3C.
 *
 * @author Sergey Melnik <melnik@db.stanford.edu>
 */

public class SiRS implements Comparer, RDFSerializer {

  Model model;
  Hashtable namespaces = new Hashtable();

  public int compare(Object handle, Object o1, Object o2) {

    try {
      Statement t1 = (Statement)o1;
      Statement t2 = (Statement)o2;
      
      int res = t1.subject().getURI().compareTo( t2.subject().getURI() );
      
      if(res != 0)
	return res;
      
      // sort abbreviatable objects before long objects
      RDFNode n1 = t1.object();
      RDFNode n2 = t2.object();
      
      // which is a resource?
      boolean r1 = n1 instanceof Resource;
      boolean r2 = n2 instanceof Resource;
      
      // push resources down
      if(r1 && r2)
	return n1.getLabel().compareTo( n2.getLabel() );
      if(r2)
	return -1;
      else if(r1)
	return 1;
      
      // both are literals
      // which can be abbreviated?
      r1 = canAbbrev( n1.getLabel() );
      r2 = canAbbrev( n2.getLabel() );
      
      if(r1 && !r2)
	return -1;
      else if(r2 && !r1)
	return 1;
      
      // in case of literals, first sort them out
      // according to their names (important for postponed elements)
      res = t1.predicate().getLabel().compareTo( t2.predicate().getLabel() );
      if(res != 0)
	return res;
      else // compare their values
	return n1.getLabel().compareTo( n2.getLabel() );

    } catch (ModelException exc) {}

    return 0; // do not sort
  }

  final char ABB_LONG = (char)0;
  final char ABB_CDATA = (char)1;

  final char ANYQUOTE = (char)0;
  final int MAX_ABBLENGTH = 60;

  /**
   * @return Quote sign that can be used to abbreviate this string 
   * as an attribute. (char)0 if is CDATA or too long.
   */
  char abbrevQuote( String s ) {

    char quote = ANYQUOTE; // any
    boolean hasBreaks = false;
    boolean whiteSpaceOnly = true;

    for(int i=0; i < s.length(); i++) {
      char c = s.charAt(i);
      if(c == '<' /*|| c == '>'*/ || c == '&')
	return ABB_CDATA;
      else if(c == '\n')
	hasBreaks = true;

      if(c == '"' || c == '\'') {
	if(quote == ANYQUOTE)
	  quote = (c == '"') ? '\'' : '"';
	else if (c == quote)
 	  return ABB_CDATA;
      }

      if(!Character.isWhitespace(c))
	whiteSpaceOnly = false;
    }

    if(whiteSpaceOnly && hasBreaks)
      return ABB_CDATA;

    if(hasBreaks || s.length() > MAX_ABBLENGTH) // optically nice value
      return whiteSpaceOnly ? ABB_CDATA : ABB_LONG;

    return quote == ANYQUOTE ? '"' : quote;

  }

  boolean canAbbrev(RDFNode n) throws ModelException {
    return (n instanceof Literal) && canAbbrev(n.getLabel());
  }

  boolean canAbbrev(String s) {

    // just to speed up for long strings
    if(s.length() > MAX_ABBLENGTH)
      return false;

    char c = abbrevQuote(s);
    return c == '"' || c == '\'';
  }

  Hashtable getNamespaces(Enumeration en) throws ModelException {

    Hashtable h = new Hashtable();

    for(; en.hasMoreElements(); ) {
      Statement t = (Statement)en.nextElement();

      // collect types
      if(RDF.type.equals(t.predicate())) // RDFMS.type
	getNamespace(h, t.object().getLabel());
      else if(RDFS.subClassOf.equals(t.predicate()) || 
              RDFS.subPropertyOf.equals(t.predicate())) {
        getNamespace(h, t.subject().getLabel());
        getNamespace(h, t.object().getLabel());
      }
      getNamespace(h, t.predicate().getLabel());
    }
    return h;
  }

  // @@ TODO: According to the XML spec, characters >= 2 can be Combining chars
  // Extender chars ...
  static boolean validXMLTag(String str) {
    for(int i = 0; i < str.length(); i++) {
      char c = str.charAt(i);
      if (i == 0) {
        // Must be Letter | '_' | ':'
        if(!(Character.isLetter(c) || c == '_' || c == ':'))
	  return false;
      } else {
        if(!(Character.isDigit(c) || Character.isLetter(c) || c=='.' || c=='-' || c=='_' | c==':'))
	  return false;
      }
    }
    return true;
  }

  void getNamespace(Hashtable h, String uri) {
    h.put(RDFUtil.guessNamespace(uri), uri);
  }

  final String INDENT_ABB = "\t ";
  final String INDENT_LONG = "\t";

  public void serialize(Model m, Writer w) throws IOException, ModelException {

    if(m == null)
      return;

    model = m;

    PrintWriter out = new PrintWriter(w);
    out.println("<?xml version='1.0' encoding='ISO-8859-1'?>");

    if(m.isEmpty()) {
      // write empty model
      out.println("<rdf:RDF xmlns:rdf=\"" + RDF._Namespace + "\"/>");
      out.flush();
      return;
    }

    //    String modelURI = m.getSourceURI();

    // triples that have been written out
    Hashtable done = new Hashtable();

    Vector tv = RDFUtil.getStatementVector(m);
    // sort order: GROUP BY resource, abbreviatable
    QuickSort.sort(tv, this, new Integer(1));

    out.println("<!DOCTYPE rdf:RDF [");

    // collect namespaces

    Hashtable ns = getNamespaces(tv.elements());
    int counter = 0;
    String indent = INDENT_ABB;

    namespaces.clear();

    // just to be sure
    ns.put(RDF._Namespace, "rdf");

    for(Enumeration en = ns.keys(); en.hasMoreElements();) {

      // assign a shortcut to a namespace

      String s = (String)en.nextElement();

      String shortcut;
      if(RDF._Namespace.equals(s))
	shortcut = "rdf";
      else if(RDFS._Namespace.equals(s))
	shortcut = "s";
      else
	shortcut = counter < (int)'s' - 1 ? String.valueOf((char)((int)'a' + counter++)) : "n" + (++counter - (int)'s' + 1);

      namespaces.put(s, shortcut);
      out.print(indent + "<!ENTITY " + shortcut + " '" + s + "'>");

      indent = "\n" + INDENT_ABB;
    }
    out.println("\n]>");


    // print out the declaration itself
    out.print("<rdf:RDF ");

    indent = "";
    for(Enumeration en = namespaces.elements(); en.hasMoreElements();) {

      String shortcut = (String)en.nextElement();
			out.print(indent + "xmlns:" + shortcut + "=\"&" + shortcut + ";\"");
      indent = "\n" + INDENT_ABB;
    }
    out.println(">");

    // contains triples having the same subject
    Vector group = new Vector();

    Statement tCurrent = null, tNext = null;
    StringBuffer buf = new StringBuffer();

    // loop over all triples
    for(int i=0; i <= tv.size(); i++) { // loop one more time than needed

      tCurrent = tNext;
      tNext = null;

      if(i < tv.size()) {
	tNext = (Statement)tv.elementAt(i);
      }

      // if different resource starts
      if(tCurrent == null || tNext == null || !tCurrent.subject().equals(tNext.subject()) ) {

	if(i > 0) {
	  // process group of triples with equals subjects, contains at least 1 element

	  // Problem 1: find the first "type" and remove it from the group
	  Statement type = null;
       	  String qualName = null; // qualified name of this resource (if any)
	  for(int j=0; j < group.size(); j++) {
	    Statement tEl = (Statement)group.elementAt(j);
	    Resource predicate = tEl.predicate();
	    if( predicate.equals(RDF.type) ) {
       	      String tagName = RDFUtil.guessName(tEl.object().getLabel());
       	      if(validXMLTag(tagName)) {
	        type = tEl;
                qualName = namespaces.get( RDFUtil.guessNamespace(type.object().getLabel()) ) + ":" + tagName;
	        group.removeElementAt(j--);
	        break;
	      }
	    }
	  }

	  // Problem 2: find properties that can be abbreviated and have equal names
	  // Move all of them to the "bottom"
	  int abbNum = 0; // number of abbreviatable
	  int endOfGroup = group.size();

	  for(int j=0; j < endOfGroup; j++) {
	    
	    Statement tEl = (Statement)group.elementAt(j);
	    
	    if(!canAbbrev(tEl.object())) {
	      abbNum = j;
	      break;

	    } else {

	      abbNum++;
	    
	      if(j > 0 && tEl.predicate().equals( ((Statement)group.elementAt(j-1)).predicate() )) {
		// move
		group.removeElementAt(j);
		group.addElement(tEl);
		endOfGroup--;
		j--;
	      }
	    }
	  }

	  // prepare variables
	  Resource subj = tCurrent.subject();
	  boolean shortDescription = abbNum == group.size();

	  // now: "type" contains type of the described resource (+ qualName)
	  //      abbNum contains the number of abbreviatable resources
	  //      subj   contains the subject of the triple
	  //      shortDescription ( /> )

	  // start description
	  if(type != null)
	    out.print("<" + qualName);
	  else
	    out.print("<rdf:Description");
	  out.print(IDorAbout(subj.getLabel()));

	  // process abbreviated literals
	  for(int j=0; j < abbNum; j++) {
	    
	    Statement tEl = (Statement)group.elementAt(j);
	    Resource pred = tEl.predicate();
	    RDFNode obj = tEl.object();
	    // subj, pred, obj
	    
	    String p_ns = (String) namespaces.get( RDFUtil.guessNamespace(pred.getLabel()) );
	    String p_name = RDFUtil.guessName(pred.getLabel());
	    char quote = abbrevQuote( obj.getLabel() );
	    
	    out.print(indent + p_ns + ":" + p_name + "=" + quote + obj.getLabel() + quote);
	  }

	  if(shortDescription)
	    // we are done
	    out.println("/>");
	  else {

	    out.println('>');
	    // process resources and long literals (can also be postponed)

	    for(int j=abbNum; j < group.size(); j++) {
		
	      Statement tEl = (Statement)group.elementAt(j);
	      Resource pred = tEl.predicate();
	      RDFNode obj = tEl.object();

	      String p_ns = (String) namespaces.get( RDFUtil.guessNamespace(pred.getLabel()) );
	      String p_name = RDFUtil.guessName(pred.getLabel());

	      if(obj instanceof Resource) {
		// resource
		out.println(INDENT_LONG + "<" + p_ns + ":" + p_name + 
                            " rdf:resource=\"" + 
                            shortcutPrefix( obj.getLabel() ) +
			    "\"" + "/>");
	      } else {
		
		char quote = abbrevQuote( obj.getLabel() );
		
		out.print(INDENT_LONG + "<" + p_ns + ":" + p_name);
		if(quote == ABB_CDATA)
		  out.print(" xml:space='preserve'");
		out.print(">");

		if(quote == ABB_CDATA) {
		  out.print("<![CDATA[");
		  // write out CDATA
		  escapeCDATA(out, obj.getLabel());
		} else
		  out.print(obj.getLabel());
		
		if(quote == ABB_CDATA)
		  out.print("]]>");
		out.println("</" + p_ns + ":" + p_name + ">");
	      }
	    }

	    // end description
	    if(type != null)
	      out.println("</" + qualName + ">");
	    else
	      out.println("</rdf:Description>");
	  }
	  
	  // end processing of the group
	  group.setSize(0);
	}

      }

      if(tNext != null) // continue building group
	group.addElement( tNext );

    } // loop over all triples

    out.println("</rdf:RDF>");
    out.flush();
  }

  /**
   * ]]> cannot be there!
   * FIXME: how to we encode binary data? It is a function
   * of some more abstract layer?
   */
  void escapeCDATA(PrintWriter out, String str) {

    int start = 0, i = 0;
    do {
      i = str.indexOf("]]>", start);
      if(i >= 0) {
	out.print(str.substring(start, i+2));
	out.print("]]><![CDATA[");
	start = i + 2;
      } else
	out.print(str.substring(start));
    } while (i >= 0 && start < str.length());
  }

  /**
   * < and &
   */
  void escapeAttValue(StringBuffer buf, String str) {

    for(int i=0; i < str.length(); i++) {
      char c = str.charAt(i);
      if(c == '<')
	buf.append("&lt;");
      else if(c == '&')
	buf.append("&amp;");
      else
	buf.append(c);
    }
  }

  String escapeAttValue(String s) {

    for(int i=0; i < s.length(); i++) {
      char c = s.charAt(i);
      if(c == '<' || c == '>' || c == '&') {
	StringBuffer buf = new StringBuffer();
	escapeAttValue(buf, s);
	return buf.toString();
      }
    }
    return s;
  }

  String IDorAbout(String s) throws ModelException {
      return " rdf:about=\"" + shortcutPrefix(s) + "\"";
  }

  String shortcutPrefix(String s) {
    for (Enumeration en = namespaces.keys(); en.hasMoreElements();) {
      String prefix = (String)en.nextElement();
      if(s.startsWith(prefix))
        return "&" + namespaces.get(prefix) + ";" + escapeAttValue(s.substring(prefix.length()));
    }
    return escapeAttValue(s);
  }

  String resourceID(String s) throws ModelException {

    if((model.getSourceURI() != null && s.startsWith(model.getSourceURI())) ||
       (model.getSourceURI() == null && RDFUtil.guessNamespace(s).length() == 0) ) {
      return "#" + RDFUtil.guessName(s);
    } else
      return s;
  }
}
