001package com.ganteater.ae.util.xml.easyparser; 002 003import java.io.File; 004import java.io.FileInputStream; 005import java.io.IOException; 006import java.io.InputStream; 007import java.io.InputStreamReader; 008import java.net.URL; 009import java.text.ParseException; 010import java.util.ArrayList; 011 012import org.apache.commons.io.IOUtils; 013import org.apache.commons.lang.StringEscapeUtils; 014import org.apache.commons.lang.StringUtils; 015 016/** 017 * @author victort 018 */ 019public class EasyParser { 020 021 final static boolean debug = false; 022 023 public EasyParser() { 024 } 025 026 public Node getObject(InputStream aInputStream) throws ParserException { 027 try { 028 String xml = IOUtils.toString(new InputStreamReader(aInputStream, "UTF-8")); 029 030 if (xml.indexOf("<?xml") == 0) { 031 xml = StringUtils.substringAfter(xml, "?>"); 032 } 033 034 return getObject(xml); 035 036 } catch (Exception e) { 037 throw new ParserException(e); 038 } 039 } 040 041 public Node getObject(String theXML) { 042 043 if (theXML == null) { 044 return null; 045 } 046 047 int start = -1; 048 do { 049 start++; 050 start = theXML.indexOf("<", start); 051 } while (theXML.charAt(start + 1) == '!' && theXML.charAt(start + 2) == '-'); 052 053 if (start < 0) 054 start = 0; 055 056 theXML = replaceComment(theXML.substring(start)); 057 058 theXML = theXML.trim(); 059 060 if (theXML.charAt(0) != '<' || theXML.charAt(theXML.length() - 1) != '>') { 061 // text block 062 int theEndText = theXML.indexOf('<'); 063 if (theEndText < 0) { 064 theEndText = theXML.length(); 065 } 066 067 Node theNode = new Node(Node.TEXT_TEAG_NAME); 068 String theText = StringEscapeUtils.unescapeXml(theXML.substring(0, theEndText)); 069 theNode.setText(theText); 070 return theNode; 071 } 072 073 theXML = theXML.substring(1).trim(); 074 075 int theEndPos; 076 077 for (theEndPos = 0; theEndPos < theXML.length() && theXML.charAt(theEndPos) != ' ' 078 && theXML.charAt(theEndPos) != '\r' && theXML.charAt(theEndPos) != '>' 079 && theXML.charAt(theEndPos) != '\n'; theEndPos++) { 080 if (theXML.charAt(theEndPos) == '/') { 081 Node theNode = new Node(theXML.substring(0, theEndPos)); 082 theNode.setNill(true); 083 return theNode; 084 } 085 } 086 087 if (theXML.charAt(theEndPos - 1) == '>') { 088 theEndPos--; 089 } 090 091 String theTagName = theXML.substring(0, theEndPos); 092 093 theXML = theXML.substring(theEndPos).trim(); 094 095 int theTagEndPos = theXML.indexOf('>'); 096 097 boolean theEmptyTag = theTagEndPos != 0 && theXML.charAt(theTagEndPos - 1) == '/'; 098 099 Node theNode = new Node(theTagName); 100 theNode.setNill(theEmptyTag); 101 102 String theAttributeText = theXML.substring(0, theTagEndPos).trim(); 103 parseAtributeText(theNode, theAttributeText); 104 105 theTagEndPos++; 106 107 theXML = theXML.substring(theTagEndPos).trim(); 108 109 if (theEmptyTag == false) { 110 111 int theInnerTagEndPos = getEndTagPosition(theTagName, theXML, 1); 112 113 if (theInnerTagEndPos < 0) { 114 throw new RuntimeException("Not found tag: " + "</" + theTagName + ">\nin text:\n" + theXML); 115 } 116 117 String theInnerText = theXML.substring(0, theInnerTagEndPos).trim(); 118 119 Node[] theNodeArray = getObjectArray(theInnerText); 120 if (theNodeArray.length > 0) { 121 for (int i = 0; i < theNodeArray.length; i++) { 122 theNode.addInnerTag(theNodeArray[i]); 123 } 124 } 125 } 126 127 return theNode; 128 } 129 130 private String replaceComment(String theXML) { 131 132 int theStartComment = 0; 133 int theEndComment = 0; 134 StringBuffer theBuffer = new StringBuffer(); 135 136 while ((theStartComment = theXML.indexOf("<!--", theEndComment)) >= 0) { 137 theBuffer.append(theXML.substring(theEndComment, theStartComment)); 138 theEndComment = theXML.indexOf("-->", theStartComment) + 3; 139 140 if (theEndComment > 0) { 141 theBuffer.append("<Comment>"); 142 theBuffer.append(theXML.substring(theStartComment + 4, theEndComment - 3).replaceAll("<", "<") 143 .replaceAll(">", ">")); 144 theBuffer.append("</Comment>"); 145 } 146 147 } 148 149 theBuffer.append(theXML.substring(theEndComment)); 150 String s = theBuffer.toString(); 151 theBuffer.setLength(0); 152 theBuffer = null; 153 154 return deleteDoctype(s); 155 156 } 157 158 private static String deleteDoctype(String theXML) { 159 int theStartComment = 0; 160 int theEndComment = 0; 161 StringBuffer theBuffer = new StringBuffer(); 162 163 while ((theStartComment = theXML.indexOf("<!DOCTYPE", theEndComment)) >= 0) { 164 theBuffer.append(theXML.substring(theEndComment, theStartComment)); 165 theEndComment = theXML.indexOf(">", theStartComment) + 1; 166 } 167 168 theBuffer.append(theXML.substring(theEndComment)); 169 String s = theBuffer.toString(); 170 theBuffer.setLength(0); 171 theBuffer = null; 172 173 return s; 174 } 175 176 private int getEndTagPosition(String theTagName, String theXML, int theLevet) { 177 178 int theTagEndPos = 0; 179 int theInnerTagEndPos = 0; 180 int theStart = 0; 181 182 int theCounter = theLevet; 183 184 String theTag = "</" + theTagName + ">"; 185 186 do { 187 theTagEndPos = theXML.indexOf(theTag, theStart); 188 189 theInnerTagEndPos = theXML.indexOf("<" + theTagName + " ", theStart); 190 if (theInnerTagEndPos < 0) { 191 theInnerTagEndPos = theXML.length(); 192 } 193 194 int theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "/", theStart); 195 if (theInnerTagEndPos1 >= 0) { 196 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 197 } 198 199 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + ">", theStart); 200 if (theInnerTagEndPos1 >= 0) { 201 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 202 } 203 204 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\r", theStart); 205 if (theInnerTagEndPos1 >= 0) { 206 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 207 } 208 209 theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\n", theStart); 210 if (theInnerTagEndPos1 >= 0) { 211 theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos); 212 } 213 214 if (theTagEndPos > theInnerTagEndPos && theInnerTagEndPos >= 0) { 215 // Check for an empty tag. 216 int theEndPbrk = theXML.indexOf(">", theInnerTagEndPos); 217 if (theXML.charAt(theEndPbrk - 1) != '/') { 218 theCounter++; 219 } 220 } else { 221 theCounter--; 222 } 223 224 theStart = Math.min(theTagEndPos, theInnerTagEndPos) + 1; 225 226 if (theTagEndPos < 0) { 227 throw new IllegalArgumentException("The closing tag " + theTag + " is not retrieved. "); 228 } 229 230 } while (theCounter > 0); 231 232 return theTagEndPos; 233 } 234 235 public Node[] getObjectArray(String theXML) { 236 if (theXML == null) { 237 return null; 238 } 239 240 theXML = replaceComment(theXML); 241 ArrayList<Node> theNodeArray = new ArrayList<Node>(); 242 243 theXML = theXML.trim(); 244 245 while (theXML.length() > 0) { 246 247 Node theInnerTag = getObject(theXML); 248 theNodeArray.add(theInnerTag); 249 if (theInnerTag == null) { 250 break; 251 } 252 253 if (theInnerTag.isNill()) { 254 theXML = theXML.substring(theXML.indexOf("/>") + 2); 255 } 256 if (theInnerTag.isNill() == false && theInnerTag.getText() == null) { 257 int theInnerTagEndPos = getEndTagPosition(theInnerTag.getTag(), theXML, 0); 258 String theEndTag = "</" + theInnerTag.getTag() + ">"; 259 260 theXML = theXML.substring(theInnerTagEndPos + theEndTag.length()).trim(); 261 } 262 263 String theText = theInnerTag.getText(); 264 if (theInnerTag.isNill() && theText != null) { 265 int theEndText = theXML.indexOf('<'); 266 if (theEndText < 0) { 267 theEndText = theXML.length(); 268 } 269 theXML = theXML.substring(theEndText); 270 } 271 } 272 273 int theNumberNode = theNodeArray.size(); 274 275 Node[] theResult = new Node[theNumberNode]; 276 277 for (int i = 0; i < theNumberNode; i++) { 278 theResult[i] = (Node) theNodeArray.get(i); 279 } 280 281 return theResult; 282 } 283 284 public void parseAtributeText(Node aNode, String aAttributeText) { 285 286 if (aAttributeText.length() > 0 && aAttributeText.charAt(aAttributeText.length() - 1) == '/') { 287 aAttributeText = aAttributeText.substring(0, aAttributeText.length() - 1); 288 } 289 290 while (aAttributeText.length() > 0) { 291 292 aAttributeText = aAttributeText.trim(); 293 int theEndLine = aAttributeText.indexOf('='); 294 295 String theName = aAttributeText.substring(0, theEndLine).trim(); 296 aAttributeText = aAttributeText.substring(theEndLine + 1).trim(); 297 298 char theBChar = aAttributeText.charAt(0); 299 int theBeginValue = 1; 300 int theEndValue = 0; 301 302 if (theBChar == '"' || theBChar == '\'') { 303 theEndValue = aAttributeText.indexOf(theBChar, 1); 304 } else { 305 for (theEndValue = 0; theEndValue < aAttributeText.length() && aAttributeText.charAt(theEndValue) != ' ' 306 && aAttributeText.charAt(theEndValue) != '\r' 307 && aAttributeText.charAt(theEndValue) != '\n'; theEndValue++) { 308 ; 309 } 310 theBeginValue = 0; 311 } 312 313 String theValue = aAttributeText.substring(theBeginValue, theEndValue); 314 theValue = StringEscapeUtils.unescapeXml(theValue); 315 aNode.setAttribute(theName, theValue); 316 aAttributeText = aAttributeText.substring(theEndValue + 1); 317 } 318 } 319 320 public Node load(String filePath) throws ParserException, ParseException, IOException { 321 if (new File(filePath).exists()) { 322 return new EasyParser().getObject(new File(filePath)); 323 } 324 325 URL entryUrl = new URL(filePath); 326 InputStream is = entryUrl.openStream(); 327 return new EasyParser().getObject(is); 328 } 329 330 public Node getObject(File theXMLFile) throws IOException { 331 int theLength = (int) theXMLFile.length(); 332 byte[] theBuffer = new byte[theLength]; 333 Node object = null; 334 335 try (FileInputStream theInputStream = new FileInputStream(theXMLFile)) { 336 theInputStream.read(theBuffer); 337 theInputStream.close(); 338 339 String theEncode = "UTF-8"; 340 341 int theEndHead = 0; 342 343 String theDocHead = new String(theBuffer, "UTF-8"); 344 if (theDocHead.indexOf("<?xml") == 0) { 345 theEndHead = theDocHead.indexOf("?>"); 346 Node theHead = getObject(theDocHead.substring(0, theEndHead) + "/>"); 347 theEncode = theHead.getAttribute("encoding"); 348 theEndHead += 2; 349 } 350 351 if (theEncode == null) 352 theEncode = "UTF-8"; 353 String theXML = new String(theBuffer, theEndHead, theBuffer.length - theEndHead, theEncode); 354 theBuffer = null; 355 356 object = getObject(theXML); 357 } 358 359 return object; 360 } 361 362 public static String replaceProperties(String aValue, String aFragment, String aNewFragment) { 363 int theBeginPos = 0; 364 int theEndPos = 0; 365 366 if (aValue == null) { 367 return aValue; 368 } 369 370 StringBuffer theStringBuffer = new StringBuffer(); 371 372 while (true) { 373 theBeginPos = aValue.indexOf(aFragment, theEndPos); 374 if (theBeginPos < 0) { 375 break; 376 } 377 378 theStringBuffer.append(aValue.substring(theEndPos, theBeginPos)); 379 theEndPos = theBeginPos + aFragment.length(); 380 381 theStringBuffer.append(aNewFragment); 382 } 383 384 theStringBuffer.append(aValue.substring(theEndPos)); 385 386 return theStringBuffer.toString(); 387 } 388 389}