[jdom-interest] getTextNormalize bug?

Tim Daly tdaly at ans.net
Mon Nov 26 20:07:18 PST 2001


I wrote a simple program to read an XML file and write it to stdout.
The text of each Element is printed with the call:

  Element element; 
  ...
  System.out.println(element.getTextNormalize());

During my testing I ran it on the build.xml file in jdom, thus:

  java XMLCopy build.xml >foo.xml

Then I renamed foo.xml to build.xml and did:

  ./build.sh

The Ant program died with the message:

 Character conversion error: "Unconvertible UTF-8 character beginning 
 with 0xa9" (line number may be too low).

The original line in the original build.xml file contains:

  Copyright ©

(aside: we knew the problem was the copyright :-))
which got converted by getTextNormalize() into

  Copyright (someStrangeCharacter)

Is this a bug in getTextNormalize?
My source code follows.

Tim Daly
daly at idsi.net

=====================================================================

package samples;

import org.jdom.*;
import org.jdom.input.SAXBuilder;
import org.jdom.input.DOMBuilder;
import org.jdom.output.*;
import java.util.*;

public class Count 
{
  static Stack stack = new Stack();
  static int indent = 0;

  public static void doIndent(int count)
  { if (count < 0)
      indent=indent+count;
    for(int i=0; i<indent; i++)
      System.out.print(" ");
    if (count > 0)
      indent=indent+count;
  }

  public static void main(String[] args) 
  { if (args.length == 0) 
    { System.out.println("Usage: java Count URL1 URL2..."); 
      return;
    } 
    SAXBuilder saxBuilder = new SAXBuilder();
    DOMBuilder domBuilder = new DOMBuilder();
    DOMOutputter domOutputter = new DOMOutputter();
    Document jdomDocument;
    org.w3c.dom.Element domElement;
    org.jdom.Element jdomElement;
    org.w3c.dom.Document domDocument;
    try 
    { jdomDocument = saxBuilder.build(args[0]);
      domElement = domOutputter.output(jdomDocument.getRootElement());
      jdomElement = domBuilder.build(domElement);
      count(jdomElement);
    }
    catch (JDOMException e) 
    { System.out.println(args[0] + " is not a well formed XML document.");
      System.out.println(e.getMessage());
    }     
  }  

  public static void printAttributes(List attributes)
  { Iterator iterator = attributes.iterator();
    while (iterator.hasNext()) 
    { Object o = iterator.next();
      System.out.print(" "+((Attribute)o).getName()+" = \""+
                           ((Attribute)o).getValue()+"\"");
    }
  }

  public static void count(Element element) 
  { doIndent(1);
    System.out.print("<"+element.getName());
    stack.push(element.getName());
    printAttributes(element.getAttributes());
    System.out.println(">");
    String text = element.getTextNormalize();
    if (! text.equals(""))
    { doIndent(0);
      System.out.println(" "+text);
    }
    List children = element.getContent();
    Iterator iterator = children.iterator();
    while (iterator.hasNext()) 
    { Object o = iterator.next();
      if (o instanceof Element) 
        count((Element) o);
    }
    doIndent(-1);
    System.out.println("</"+(String)stack.pop()+">");
   }  
 }



More information about the jdom-interest mailing list