如何从Java打印XML?

我有一个Javastring,包含XML,没有换行或缩进。 我想把它变成一个String格式良好的XML。 我如何做到这一点?

String unformattedXml = "<tag><nested>hello</nested></tag>"; String formattedXml = new [UnknownClass]().format(unformattedXml); 

注意:我的input是一个string 。 我的输出是一个string

 Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); //initialize StreamResult with File object to save to file StreamResult result = new StreamResult(new StringWriter()); DOMSource source = new DOMSource(doc); transformer.transform(source, result); String xmlString = result.getWriter().toString(); System.out.println(xmlString); 

注意:结果可能因Java版本而异。 search特定于您的平台的解决方法。

这是我自己的问题的答案。 我将各种结果的答案结合在一起,编写出可以打印XML的类。

无法保证它如何响应无效的XML或大型文档。

 package ecb.sdw.pretty; import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.w3c.dom.Document; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import java.io.IOException; import java.io.StringReader; import java.io.StringWriter; import java.io.Writer; /** * Pretty-prints xml, supplied as a string. * <p/> * eg. * <code> * String formattedXml = new XmlFormatter().format("<tag><nested>hello</nested></tag>"); * </code> */ public class XmlFormatter { public XmlFormatter() { } public String format(String unformattedXml) { try { final Document document = parseXmlFile(unformattedXml); OutputFormat format = new OutputFormat(document); format.setLineWidth(65); format.setIndenting(true); format.setIndent(2); Writer out = new StringWriter(); XMLSerializer serializer = new XMLSerializer(out, format); serializer.serialize(document); return out.toString(); } catch (IOException e) { throw new RuntimeException(e); } } private Document parseXmlFile(String in) { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); InputSource is = new InputSource(new StringReader(in)); return db.parse(is); } catch (ParserConfigurationException e) { throw new RuntimeException(e); } catch (SAXException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } } public static void main(String[] args) { String unformattedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><QueryMessage\n" + " xmlns=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/message\"\n" + " xmlns:query=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/query\">\n" + " <Query>\n" + " <query:CategorySchemeWhere>\n" + " \t\t\t\t\t <query:AgencyID>ECB\n\n\n\n</query:AgencyID>\n" + " </query:CategorySchemeWhere>\n" + " </Query>\n\n\n\n\n" + "</QueryMessage>"; System.out.println(new XmlFormatter().format(unformattedXml)); } } 

基于这个答案一个更简单的解决scheme :

 public static String prettyFormat(String input, int indent) { try { Source xmlInput = new StreamSource(new StringReader(input)); StringWriter stringWriter = new StringWriter(); StreamResult xmlOutput = new StreamResult(stringWriter); TransformerFactory transformerFactory = TransformerFactory.newInstance(); transformerFactory.setAttribute("indent-number", indent); Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.transform(xmlInput, xmlOutput); return xmlOutput.getWriter().toString(); } catch (Exception e) { throw new RuntimeException(e); // simple exception handling, please review it } } public static String prettyFormat(String input) { return prettyFormat(input, 2); } 

testing用例:

 prettyFormat("<root><child>aaa</child><child/></root>"); 

收益:

 <?xml version="1.0" encoding="UTF-8"?> <root> <child>aaa</child> <child/> </root> 

现在是2012年了,Java可以做的比以前用XML更多,我想添加一个替代我接受的答案。 这在Java 6之外没有依赖关系。

 import org.w3c.dom.Node; import org.w3c.dom.bootstrap.DOMImplementationRegistry; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSSerializer; import org.xml.sax.InputSource; import javax.xml.parsers.DocumentBuilderFactory; import java.io.StringReader; /** * Pretty-prints xml, supplied as a string. * <p/> * eg. * <code> * String formattedXml = new XmlFormatter().format("<tag><nested>hello</nested></tag>"); * </code> */ public class XmlFormatter { public String format(String xml) { try { final InputSource src = new InputSource(new StringReader(xml)); final Node document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(src).getDocumentElement(); final Boolean keepDeclaration = Boolean.valueOf(xml.startsWith("<?xml")); //May need this: System.setProperty(DOMImplementationRegistry.PROPERTY,"com.sun.org.apache.xerces.internal.dom.DOMImplementationSourceImpl"); final DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance(); final DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS"); final LSSerializer writer = impl.createLSSerializer(); writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE); // Set this to true if the output needs to be beautified. writer.getDomConfig().setParameter("xml-declaration", keepDeclaration); // Set this to true if the declaration is needed to be outputted. return writer.writeToString(document); } catch (Exception e) { throw new RuntimeException(e); } } public static void main(String[] args) { String unformattedXml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><QueryMessage\n" + " xmlns=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/message\"\n" + " xmlns:query=\"http://www.SDMX.org/resources/SDMXML/schemas/v2_0/query\">\n" + " <Query>\n" + " <query:CategorySchemeWhere>\n" + " \t\t\t\t\t <query:AgencyID>ECB\n\n\n\n</query:AgencyID>\n" + " </query:CategorySchemeWhere>\n" + " </Query>\n\n\n\n\n" + "</QueryMessage>"; System.out.println(new XmlFormatter().format(unformattedXml)); } } 

只需要注意,评分最高的答案需要使用xerces。

如果你不想添加这个外部依赖,那么你可以简单地使用标准的jdk库(实际上是使用内部的xerces构build的)。

注意:jdk版本1.5有一个bug,请参阅http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6296446,但现在已经解决了。;

(注意如果发生错误,这将返回原始文本)

 package com.test; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.sax.SAXTransformerFactory; import javax.xml.transform.stream.StreamResult; import org.xml.sax.InputSource; public class XmlTest { public static void main(String[] args) { XmlTest t = new XmlTest(); System.out.println(t.formatXml("<a><b><c/><d>text D</d><e value='0'/></b></a>")); } public String formatXml(String xml){ try{ Transformer serializer= SAXTransformerFactory.newInstance().newTransformer(); serializer.setOutputProperty(OutputKeys.INDENT, "yes"); //serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); serializer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); //serializer.setOutputProperty("{http://xml.customer.org/xslt}indent-amount", "2"); Source xmlSource=new SAXSource(new InputSource(new ByteArrayInputStream(xml.getBytes()))); StreamResult res = new StreamResult(new ByteArrayOutputStream()); serializer.transform(xmlSource, res); return new String(((ByteArrayOutputStream)res.getOutputStream()).toByteArray()); }catch(Exception e){ //TODO log error return xml; } } } 

我以前用org.dom4j.io.OutputFormat.createPrettyPrint()方法打印过

 public String prettyPrint(final String xml){ if (StringUtils.isBlank(xml)) { throw new RuntimeException("xml was null or blank in prettyPrint()"); } final StringWriter sw; try { final OutputFormat format = OutputFormat.createPrettyPrint(); final org.dom4j.Document document = DocumentHelper.parseText(xml); sw = new StringWriter(); final XMLWriter writer = new XMLWriter(sw, format); writer.write(document); } catch (Exception e) { throw new RuntimeException("Error pretty printing xml:\n" + xml, e); } return sw.toString(); } 

这里有一个使用dom4j的方法 :

import:

 import org.dom4j.Document; import org.dom4j.DocumentHelper; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; 

码:

 String xml = "<your xml='here'/>"; Document doc = DocumentHelper.parseText(xml); StringWriter sw = new StringWriter(); OutputFormat format = OutputFormat.createPrettyPrint(); XMLWriter xw = new XMLWriter(sw, format); xw.write(doc); String result = sw.toString(); 

既然你是从一个String开始,你需要隐藏一个DOM对象(例如Node ),然后才能使用Transformer 。 但是,如果您知道您的XMLstring是有效的,并且您不希望产生将stringparsing为DOM的内存开销,那么在DOM上运行一个转换以获取一个string – 您可以做一些老式的逐字符parsing。 在每个</...>字符之后插入一个换行符和空格,保留并缩进每个<...>增加的计数器(以确定空格的数量),并为每个</...>递减。

免责声明 – 我做了下面的function的剪切/粘贴/文本编辑,所以他们可能不会按原样编译。

 public static final Element createDOM(String strXML) throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setValidating(true); DocumentBuilder db = dbf.newDocumentBuilder(); InputSource sourceXML = new InputSource(new StringReader(strXML)) Document xmlDoc = db.parse(sourceXML); Element e = xmlDoc.getDocumentElement(); e.normalize(); return e; } public static final void prettyPrint(Node xml, OutputStream out) throws TransformerConfigurationException, TransformerFactoryConfigurationError, TransformerException { Transformer tf = TransformerFactory.newInstance().newTransformer(); tf.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); tf.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); tf.setOutputProperty(OutputKeys.INDENT, "yes"); tf.transform(new DOMSource(xml), new StreamResult(out)); } 

如果使用第三方XML库是可以的,那么比起当前得票数最高的 答案所显示的要简单得多,就可以避开这个问题 。

有人说,input和输出都应该是string,所以这里有一个实用的方法,就是用XOM库实现的:

 import nu.xom.*; import java.io.*; [...] public static String format(String xml) throws ParsingException, IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); Serializer serializer = new Serializer(out); serializer.setIndent(4); // or whatever you like serializer.write(new Builder().build(xml, "")); return out.toString("UTF-8"); } 

我testing了它的工作原理,结果并不取决于你的JRE版本或类似的东西。 要了解如何根据自己的喜好自定义输出格式,请查看Serializer API。

这实际上比我想象的要长 – 需要一些额外的行,因为Serializer需要一个OutputStream来写入。 但请注意,实际的XML在这里只有很less的代码。

(这个答案是我对XOM评估的一部分,在我的关于最好的Java XML库replacedom4j的问题中 提出了一个选项,对于dom4j来说,使用XMLWriterOutputFormat可以很容易地实现这一点。 ……正如mlo55的答案中所表明的那样 )

嗯…遇到这样的事情,这是一个已知的bug …只需添加此OutputProperty ..

 transformer.setOutputProperty(OutputPropertiesFactory.S_KEY_INDENT_AMOUNT, "8"); 

希望这可以帮助 …

Kevin Hakanson说:“但是,如果你知道你的XMLstring是有效的,而且你不想产生把stringparsing成DOM的内存开销,那么在DOM上运行一个变换来得到一个string – 你可以只要在字符parsing的基础上做一些老式的字符,在每个字符之后插入一个换行符和空格,保留并缩进每个<…>增加的计数器(以确定空格的数量),然后递减。

同意。 这种方法要快得多,依赖性也less得多。

示例解决scheme

 /** * XML utils, including formatting. */ public class XmlUtils { private static XmlFormatter formatter = new XmlFormatter(2, 80); public static String formatXml(String s) { return formatter.format(s, 0); } public static String formatXml(String s, int initialIndent) { return formatter.format(s, initialIndent); } private static class XmlFormatter { private int indentNumChars; private int lineLength; private boolean singleLine; public XmlFormatter(int indentNumChars, int lineLength) { this.indentNumChars = indentNumChars; this.lineLength = lineLength; } public synchronized String format(String s, int initialIndent) { int indent = initialIndent; StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char currentChar = s.charAt(i); if (currentChar == '<') { char nextChar = s.charAt(i + 1); if (nextChar == '/') indent -= indentNumChars; if (!singleLine) // Don't indent before closing element if we're creating opening and closing elements on a single line. sb.append(buildWhitespace(indent)); if (nextChar != '?' && nextChar != '!' && nextChar != '/') indent += indentNumChars; singleLine = false; // Reset flag. } sb.append(currentChar); if (currentChar == '>') { if (s.charAt(i - 1) == '/') { indent -= indentNumChars; sb.append("\n"); } else { int nextStartElementPos = s.indexOf('<', i); if (nextStartElementPos > i + 1) { String textBetweenElements = s.substring(i + 1, nextStartElementPos); // If the space between elements is solely newlines, let them through to preserve additional newlines in source document. if (textBetweenElements.replaceAll("\n", "").length() == 0) { sb.append(textBetweenElements + "\n"); } // Put tags and text on a single line if the text is short. else if (textBetweenElements.length() <= lineLength * 0.5) { sb.append(textBetweenElements); singleLine = true; } // For larger amounts of text, wrap lines to a maximum line length. else { sb.append("\n" + lineWrap(textBetweenElements, lineLength, indent, null) + "\n"); } i = nextStartElementPos - 1; } else { sb.append("\n"); } } } } return sb.toString(); } } private static String buildWhitespace(int numChars) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < numChars; i++) sb.append(" "); return sb.toString(); } /** * Wraps the supplied text to the specified line length. * @lineLength the maximum length of each line in the returned string (not including indent if specified). * @indent optional number of whitespace characters to prepend to each line before the text. * @linePrefix optional string to append to the indent (before the text). * @returns the supplied text wrapped so that no line exceeds the specified line length + indent, optionally with * indent and prefix applied to each line. */ private static String lineWrap(String s, int lineLength, Integer indent, String linePrefix) { if (s == null) return null; StringBuilder sb = new StringBuilder(); int lineStartPos = 0; int lineEndPos; boolean firstLine = true; while(lineStartPos < s.length()) { if (!firstLine) sb.append("\n"); else firstLine = false; if (lineStartPos + lineLength > s.length()) lineEndPos = s.length() - 1; else { lineEndPos = lineStartPos + lineLength - 1; while (lineEndPos > lineStartPos && (s.charAt(lineEndPos) != ' ' && s.charAt(lineEndPos) != '\t')) lineEndPos--; } sb.append(buildWhitespace(indent)); if (linePrefix != null) sb.append(linePrefix); sb.append(s.substring(lineStartPos, lineEndPos + 1)); lineStartPos = lineEndPos + 1; } return sb.toString(); } // other utils removed for brevity } 

使用Scala:

 import xml._ val xml = XML.loadString("<tag><nested>hello</nested></tag>") val formatted = new PrettyPrinter(150, 2).format(xml) println(formatted) 

如果你依赖scala-library.jar,你也可以用Java来做到这一点。 它看起来像这样:

 import scala.xml.*; public class FormatXML { public static void main(String[] args) { String unformattedXml = "<tag><nested>hello</nested></tag>"; PrettyPrinter pp = new PrettyPrinter(150, 3); String formatted = pp.format(XML.loadString(unformattedXml), TopScope$.MODULE$); System.out.println(formatted); } } 

PrettyPrinter对象由两个整数构成,第一个是最大行长,第二个是缩进步骤。

关于“你必须先build立一个DOM树”的评论:不,你不需要,也不应该这样做。

相反,创build一个StreamSource(新的StreamSource(新的StringReader(str)),并将其提供给所提到的标识转换器,这将使用SAXparsing器,结果会快得多。否则,排名第一的答案是好的。

只是为了将来的参考,这里有一个解决scheme,为我工作(感谢@乔治·霍金斯在一个答案中发表的评论):

 DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance(); DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS"); LSSerializer writer = impl.createLSSerializer(); writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE); LSOutput output = impl.createLSOutput(); ByteArrayOutputStream out = new ByteArrayOutputStream(); output.setByteStream(out); writer.write(document, output); String xmlStr = new String(out.toByteArray()); 

从milosmns稍微改进的版本…

 public static String getPrettyXml(String xml) { if (xml == null || xml.trim().length() == 0) return ""; int stack = 0; StringBuilder pretty = new StringBuilder(); String[] rows = xml.trim().replaceAll(">", ">\n").replaceAll("<", "\n<").split("\n"); for (int i = 0; i < rows.length; i++) { if (rows[i] == null || rows[i].trim().length() == 0) continue; String row = rows[i].trim(); if (row.startsWith("<?")) { pretty.append(row + "\n"); } else if (row.startsWith("</")) { String indent = repeatString(--stack); pretty.append(indent + row + "\n"); } else if (row.startsWith("<") && row.endsWith("/>") == false) { String indent = repeatString(stack++); pretty.append(indent + row + "\n"); if (row.endsWith("]]>")) stack--; } else { String indent = repeatString(stack); pretty.append(indent + row + "\n"); } } return pretty.toString().trim(); } private static String repeatString(int stack) { StringBuilder indent = new StringBuilder(); for (int i = 0; i < stack; i++) { indent.append(" "); } return indent.toString(); } 

如果你确定你有一个有效的XML,这个很简单,并避免XML DOM树。 也许有一些错误,做任何评论,如果你看到任何东西

 public String prettyPrint(String xml) { if (xml == null || xml.trim().length() == 0) return ""; int stack = 0; StringBuilder pretty = new StringBuilder(); String[] rows = xml.trim().replaceAll(">", ">\n").replaceAll("<", "\n<").split("\n"); for (int i = 0; i < rows.length; i++) { if (rows[i] == null || rows[i].trim().length() == 0) continue; String row = rows[i].trim(); if (row.startsWith("<?")) { // xml version tag pretty.append(row + "\n"); } else if (row.startsWith("</")) { // closing tag String indent = repeatString(" ", --stack); pretty.append(indent + row + "\n"); } else if (row.startsWith("<")) { // starting tag String indent = repeatString(" ", stack++); pretty.append(indent + row + "\n"); } else { // tag data String indent = repeatString(" ", stack); pretty.append(indent + row + "\n"); } } return pretty.toString().trim(); } 

以上所有的解决scheme都不适合我,然后我发现这个http://myshittycode.com/2014/02/10/java-properly-indenting-xml-string/

线索是用XPath删除空格

  String xml = "<root>" + "\n " + "\n<name>Coco Puff</name>" + "\n <total>10</total> </root>"; try { Document document = DocumentBuilderFactory.newInstance() .newDocumentBuilder() .parse(new InputSource(new ByteArrayInputStream(xml.getBytes("utf-8")))); XPath xPath = XPathFactory.newInstance().newXPath(); NodeList nodeList = (NodeList) xPath.evaluate("//text()[normalize-space()='']", document, XPathConstants.NODESET); for (int i = 0; i < nodeList.getLength(); ++i) { Node node = nodeList.item(i); node.getParentNode().removeChild(node); } Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); StringWriter stringWriter = new StringWriter(); StreamResult streamResult = new StreamResult(stringWriter); transformer.transform(new DOMSource(document), streamResult); System.out.println(stringWriter.toString()); } catch (Exception e) { e.printStackTrace(); } 

只是另一个解决scheme,为我们工作

 import java.io.StringWriter; import org.dom4j.DocumentHelper; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; ** * Pretty Print XML String * * @param inputXmlString * @return */ public static String prettyPrintXml(String xml) { final StringWriter sw; try { final OutputFormat format = OutputFormat.createPrettyPrint(); final org.dom4j.Document document = DocumentHelper.parseText(xml); sw = new StringWriter(); final XMLWriter writer = new XMLWriter(sw, format); writer.write(document); } catch (Exception e) { throw new RuntimeException("Error pretty printing xml:\n" + xml, e); } return sw.toString(); } 

作为max , codekraps , David Easley和milosmns的答案的替代方法, 请查看我的轻量级高性能漂亮打印机库: xml-formatter

 // construct lightweight, threadsafe, instance PrettyPrinter prettyPrinter = PrettyPrinterBuilder.newPrettyPrinter().build(); StringBuilder buffer = new StringBuilder(); String xml = ..; // also works with char[] or Reader if(prettyPrinter.process(xml, buffer)) { // valid XML, print buffer } else { // invalid XML, print xml } 

有时,就像直接从文件运行嘲讽的SOAP服务时,最好有一个漂亮的打印机,它也处理已经漂亮的XML:

 PrettyPrinter prettyPrinter = PrettyPrinterBuilder.newPrettyPrinter().ignoreWhitespace().build(); 

正如一些人所说的那样,漂亮的打印只是一种以更易于理解的forms呈现XML的方式 – 空格绝对不属于您的XML数据。

该库旨在用于logging的漂亮打印,还包括过滤(子树移除/匿名化)和CDATA和Text节点中XML的漂亮打印function。

下面的代码完美工作

 import javax.xml.transform.OutputKeys; import javax.xml.transform.Source; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; String formattedXml1 = prettyFormat("<root><child>aaa</child><child/></root>"); public static String prettyFormat(String input) { return prettyFormat(input, "2"); } public static String prettyFormat(String input, String indent) { Source xmlInput = new StreamSource(new StringReader(input)); StringWriter stringWriter = new StringWriter(); try { TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", indent); transformer.transform(xmlInput, new StreamResult(stringWriter)); String pretty = stringWriter.toString(); pretty = pretty.replace("\r\n", "\n"); return pretty; } catch (Exception e) { throw new RuntimeException(e); } } 

I had the same problem and I'm having great success with JTidy ( http://jtidy.sourceforge.net/index.html )

例:

 Tidy t = new Tidy(); t.setIndentContent(true); Document d = t.parseDOM( new ByteArrayInputStream("HTML goes here", null); OutputStream out = new ByteArrayOutputStream(); t.pprint(d, out); String html = out.toString(); 

Using jdom2 : http://www.jdom.org/

 import java.io.StringReader; import org.jdom2.input.SAXBuilder; import org.jdom2.output.Format; import org.jdom2.output.XMLOutputter; String prettyXml = new XMLOutputter(Format.getPrettyFormat()). outputString(new SAXBuilder().build(new StringReader(uglyXml))); 

there is a very nice command line xml utility called xmlstarlet( http://xmlstar.sourceforge.net/ ) that can do a lot of things which a lot of people use.

Your could execute this program programatically using Runtime.exec and then readin the formatted output file. It has more options and better error reporting than a few lines of Java code can provide.

download xmlstarlet : http://sourceforge.net/project/showfiles.php?group_id=66612&package_id=64589

I have found that in Java 1.6.0_32 the normal method to pretty print an XML string (using a Transformer with a null or identity xslt) does not behave as I would like if tags are merely separated by whitespace, as opposed to having no separating text. I tried using <xsl:strip-space elements="*"/> in my template to no avail. The simplest solution I found was to strip the space the way I wanted using a SAXSource and XML filter. Since my solution was for logging I also extended this to work with incomplete XML fragments. Note the normal method seems to work fine if you use a DOMSource but I did not want to use this because of the incompleteness and memory overhead.

 public static class WhitespaceIgnoreFilter extends XMLFilterImpl { @Override public void ignorableWhitespace(char[] arg0, int arg1, int arg2) throws SAXException { //Ignore it then... } @Override public void characters( char[] ch, int start, int length) throws SAXException { if (!new String(ch, start, length).trim().equals("")) super.characters(ch, start, length); } } public static String prettyXML(String logMsg, boolean allowBadlyFormedFragments) throws SAXException, IOException, TransformerException { TransformerFactory transFactory = TransformerFactory.newInstance(); transFactory.setAttribute("indent-number", new Integer(2)); Transformer transformer = transFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); StringWriter out = new StringWriter(); XMLReader masterParser = SAXHelper.getSAXParser(true); XMLFilter parser = new WhitespaceIgnoreFilter(); parser.setParent(masterParser); if(allowBadlyFormedFragments) { transformer.setErrorListener(new ErrorListener() { @Override public void warning(TransformerException exception) throws TransformerException { } @Override public void fatalError(TransformerException exception) throws TransformerException { } @Override public void error(TransformerException exception) throws TransformerException { } }); } try { transformer.transform(new SAXSource(parser, new InputSource(new StringReader(logMsg))), new StreamResult(out)); } catch (TransformerException e) { if(e.getCause() != null && e.getCause() instanceof SAXParseException) { if(!allowBadlyFormedFragments || !"XML document structures must start and end within the same entity.".equals(e.getCause().getMessage())) { throw e; } } else { throw e; } } out.flush(); return out.toString(); } 

The solutions I have found here for Java 1.6+ do not reformat the code if it is already formatted. The one that worked for me (and re-formatted already formatted code) was the following.

 import org.apache.xml.security.c14n.CanonicalizationException; import org.apache.xml.security.c14n.Canonicalizer; import org.apache.xml.security.c14n.InvalidCanonicalizerException; import org.w3c.dom.Element; import org.w3c.dom.bootstrap.DOMImplementationRegistry; import org.w3c.dom.ls.DOMImplementationLS; import org.w3c.dom.ls.LSSerializer; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import java.io.IOException; import java.io.StringReader; public class XmlUtils { public static String toCanonicalXml(String xml) throws InvalidCanonicalizerException, ParserConfigurationException, SAXException, CanonicalizationException, IOException { Canonicalizer canon = Canonicalizer.getInstance(Canonicalizer.ALGO_ID_C14N_OMIT_COMMENTS); byte canonXmlBytes[] = canon.canonicalize(xml.getBytes()); return new String(canonXmlBytes); } public static String prettyFormat(String input) throws TransformerException, ParserConfigurationException, IOException, SAXException, InstantiationException, IllegalAccessException, ClassNotFoundException { InputSource src = new InputSource(new StringReader(input)); Element document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(src).getDocumentElement(); Boolean keepDeclaration = input.startsWith("<?xml"); DOMImplementationRegistry registry = DOMImplementationRegistry.newInstance(); DOMImplementationLS impl = (DOMImplementationLS) registry.getDOMImplementation("LS"); LSSerializer writer = impl.createLSSerializer(); writer.getDomConfig().setParameter("format-pretty-print", Boolean.TRUE); writer.getDomConfig().setParameter("xml-declaration", keepDeclaration); return writer.writeToString(document); } } 

It is a good tool to use in your unit tests for full-string xml comparison.

 private void assertXMLEqual(String expected, String actual) throws ParserConfigurationException, IOException, SAXException, CanonicalizationException, InvalidCanonicalizerException, TransformerException, IllegalAccessException, ClassNotFoundException, InstantiationException { String canonicalExpected = prettyFormat(toCanonicalXml(expected)); String canonicalActual = prettyFormat(toCanonicalXml(actual)); assertEquals(canonicalExpected, canonicalActual); } 

For those searching for a quick and dirty solution – which doesn't need the XML to be 100% valid. eg in case of REST / SOAP logging (you never know what the others send ;-))

I found and advanced a code snipped I found online which I think is still missing here as a valid possible approach:

 public static String prettyPrintXMLAsString(String xmlString) { /* Remove new lines */ final String LINE_BREAK = "\n"; xmlString = xmlString.replaceAll(LINE_BREAK, ""); StringBuffer prettyPrintXml = new StringBuffer(); /* Group the xml tags */ Pattern pattern = Pattern.compile("(<[^/][^>]+>)?([^<]*)(</[^>]+>)?(<[^/][^>]+/>)?"); Matcher matcher = pattern.matcher(xmlString); int tabCount = 0; while (matcher.find()) { String str1 = (null == matcher.group(1) || "null".equals(matcher.group())) ? "" : matcher.group(1); String str2 = (null == matcher.group(2) || "null".equals(matcher.group())) ? "" : matcher.group(2); String str3 = (null == matcher.group(3) || "null".equals(matcher.group())) ? "" : matcher.group(3); String str4 = (null == matcher.group(4) || "null".equals(matcher.group())) ? "" : matcher.group(4); if (matcher.group() != null && !matcher.group().trim().equals("")) { printTabs(tabCount, prettyPrintXml); if (!str1.equals("") && str3.equals("")) { ++tabCount; } if (str1.equals("") && !str3.equals("")) { --tabCount; prettyPrintXml.deleteCharAt(prettyPrintXml.length() - 1); } prettyPrintXml.append(str1); prettyPrintXml.append(str2); prettyPrintXml.append(str3); if (!str4.equals("")) { prettyPrintXml.append(LINE_BREAK); printTabs(tabCount, prettyPrintXml); prettyPrintXml.append(str4); } prettyPrintXml.append(LINE_BREAK); } } return prettyPrintXml.toString(); } private static void printTabs(int count, StringBuffer stringBuffer) { for (int i = 0; i < count; i++) { stringBuffer.append("\t"); } } public static void main(String[] args) { String x = new String( "<soap:Envelope xmlns:soap=\"http://schemas.xmlsoap.org/soap/envelope/\"><soap:Body><soap:Fault><faultcode>soap:Client</faultcode><faultstring>INVALID_MESSAGE</faultstring><detail><ns3:XcbSoapFault xmlns=\"\" xmlns:ns3=\"http://www.someapp.eu/xcb/types/xcb/v1\"><CauseCode>20007</CauseCode><CauseText>INVALID_MESSAGE</CauseText><DebugInfo>Problems creating SAAJ object model</DebugInfo></ns3:XcbSoapFault></detail></soap:Fault></soap:Body></soap:Envelope>"); System.out.println(prettyPrintXMLAsString(x)); } 

here is the output:

 <soap:Envelope xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/"> <soap:Body> <soap:Fault> <faultcode>soap:Client</faultcode> <faultstring>INVALID_MESSAGE</faultstring> <detail> <ns3:XcbSoapFault xmlns="" xmlns:ns3="http://www.someapp.eu/xcb/types/xcb/v1"> <CauseCode>20007</CauseCode> <CauseText>INVALID_MESSAGE</CauseText> <DebugInfo>Problems creating SAAJ object model</DebugInfo> </ns3:XcbSoapFault> </detail> </soap:Fault> </soap:Body> </soap:Envelope> 

I saw one answer using Scala , so here is another one in Groovy , just in case someone finds it interesting. The default indentation is 2 steps, XmlNodePrinter constructor can be passed another value as well.

 def xml = "<tag><nested>hello</nested></tag>" def stringWriter = new StringWriter() def node = new XmlParser().parseText(xml); new XmlNodePrinter(new PrintWriter(stringWriter)).print(node) println stringWriter.toString() 

Usage from Java if groovy jar is in classpath

  String xml = "<tag><nested>hello</nested></tag>"; StringWriter stringWriter = new StringWriter(); Node node = new XmlParser().parseText(xml); new XmlNodePrinter(new PrintWriter(stringWriter)).print(node); System.out.println(stringWriter.toString()); 

In case you do not need indentation that much but a few line breaks, it could be sufficient to simply regex…

 String leastPrettifiedXml = uglyXml.replaceAll("><", ">\n<"); 

The code is nice, not the result because of missing indentation.


(For solutions with indentation, see other answers.)

尝试这个:

  try { TransformerFactory transFactory = TransformerFactory.newInstance(); Transformer transformer = null; transformer = transFactory.newTransformer(); StringWriter buffer = new StringWriter(); transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); transformer.transform(new DOMSource(element), new StreamResult(buffer)); String str = buffer.toString(); System.out.println("XML INSIDE IS #########################################"+str); return element; } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } 

I should have looked for this page first before coming up with my own solution! Anyway, mine uses Java recursion to parse the xml page. This code is totally self-contained and does not rely on third party libraries. Also .. it uses recursion!

 // you call this method passing in the xml text public static void prettyPrint(String text){ prettyPrint(text, 0); } // "index" corresponds to the number of levels of nesting and/or the number of tabs to print before printing the tag public static void prettyPrint(String xmlText, int index){ boolean foundTagStart = false; StringBuilder tagChars = new StringBuilder(); String startTag = ""; String endTag = ""; String[] chars = xmlText.split(""); // find the next start tag for(String ch : chars){ if(ch.equalsIgnoreCase("<")){ tagChars.append(ch); foundTagStart = true; } else if(ch.equalsIgnoreCase(">") && foundTagStart){ startTag = tagChars.append(ch).toString(); String tempTag = startTag; endTag = (tempTag.contains("\"") ? (tempTag.split(" ")[0] + ">") : tempTag).replace("<", "</"); // <startTag attr1=1 attr2=2> => </startTag> break; } else if(foundTagStart){ tagChars.append(ch); } } // once start and end tag are calculated, print start tag, then content, then end tag if(foundTagStart){ int startIndex = xmlText.indexOf(startTag); int endIndex = xmlText.indexOf(endTag); // handle if matching tags NOT found if((startIndex < 0) || (endIndex < 0)){ if(startIndex < 0) { // no start tag found return; } else { // start tag found, no end tag found (handles single tags aka "<mytag/>" or "<?xml ...>") printTabs(index); System.out.println(startTag); // move on to the next tag // NOTE: "index" (not index+1) because next tag is on same level as this one prettyPrint(xmlText.substring(startIndex+startTag.length(), xmlText.length()), index); return; } // handle when matching tags found } else { String content = xmlText.substring(startIndex+startTag.length(), endIndex); boolean isTagContainsTags = content.contains("<"); // content contains tags printTabs(index); if(isTagContainsTags){ // ie: <tag1><tag2>stuff</tag2></tag1> System.out.println(startTag); prettyPrint(content, index+1); // "index+1" because "content" is nested printTabs(index); } else { System.out.print(startTag); // ie: <tag1>stuff</tag1> or <tag1></tag1> System.out.print(content); } System.out.println(endTag); int nextIndex = endIndex + endTag.length(); if(xmlText.length() > nextIndex){ // if there are more tags on this level, continue prettyPrint(xmlText.substring(nextIndex, xmlText.length()), index); } } } else { System.out.print(xmlText); } } private static void printTabs(int counter){ while(counter-- > 0){ System.out.print("\t"); } }