001package com.ganteater.ae.util.xml.easyparser;
002
003import java.io.File;
004import java.io.FileInputStream;
005import java.io.IOException;
006import java.io.InputStream;
007import java.io.InputStreamReader;
008import java.net.URL;
009import java.text.ParseException;
010import java.util.ArrayList;
011
012import org.apache.commons.io.IOUtils;
013import org.apache.commons.lang.StringEscapeUtils;
014import org.apache.commons.lang.StringUtils;
015
016/**
017 * @author victort
018 */
019public class EasyParser {
020
021        final static boolean debug = false;
022
023        public EasyParser() {
024        }
025
026        public Node getObject(InputStream aInputStream) throws ParserException {
027                try {
028                        String xml = IOUtils.toString(new InputStreamReader(aInputStream, "UTF-8"));
029                        
030                        if (xml.indexOf("<?xml") == 0) {
031                                xml = StringUtils.substringAfter(xml, "?>");
032                        }
033
034                        return getObject(xml);
035
036                } catch (Exception e) {
037                        throw new ParserException(e);
038                }
039        }
040
041        public Node getObject(String theXML) {
042
043                if (theXML == null) {
044                        return null;
045                }
046
047                int start = -1;
048                do {
049                        start++;
050                        start = theXML.indexOf("<", start);
051                } while (theXML.charAt(start + 1) == '!' && theXML.charAt(start + 2) == '-');
052
053                if (start < 0)
054                        start = 0;
055
056                theXML = replaceComment(theXML.substring(start));
057
058                theXML = theXML.trim();
059
060                if (theXML.charAt(0) != '<' || theXML.charAt(theXML.length() - 1) != '>') {
061                        // text block
062                        int theEndText = theXML.indexOf('<');
063                        if (theEndText < 0) {
064                                theEndText = theXML.length();
065                        }
066
067                        Node theNode = new Node(Node.TEXT_TEAG_NAME);
068                        String theText = StringEscapeUtils.unescapeXml(theXML.substring(0, theEndText));
069                        theNode.setText(theText);
070                        return theNode;
071                }
072
073                theXML = theXML.substring(1).trim();
074
075                int theEndPos;
076
077                for (theEndPos = 0; theEndPos < theXML.length() && theXML.charAt(theEndPos) != ' '
078                                && theXML.charAt(theEndPos) != '\r' && theXML.charAt(theEndPos) != '>'
079                                && theXML.charAt(theEndPos) != '\n'; theEndPos++) {
080                        if (theXML.charAt(theEndPos) == '/') {
081                                Node theNode = new Node(theXML.substring(0, theEndPos));
082                                theNode.setNill(true);
083                                return theNode;
084                        }
085                }
086
087                if (theXML.charAt(theEndPos - 1) == '>') {
088                        theEndPos--;
089                }
090
091                String theTagName = theXML.substring(0, theEndPos);
092
093                theXML = theXML.substring(theEndPos).trim();
094
095                int theTagEndPos = theXML.indexOf('>');
096
097                boolean theEmptyTag = theTagEndPos != 0 && theXML.charAt(theTagEndPos - 1) == '/';
098
099                Node theNode = new Node(theTagName);
100                theNode.setNill(theEmptyTag);
101
102                String theAttributeText = theXML.substring(0, theTagEndPos).trim();
103                parseAtributeText(theNode, theAttributeText);
104
105                theTagEndPos++;
106
107                theXML = theXML.substring(theTagEndPos).trim();
108
109                if (theEmptyTag == false) {
110
111                        int theInnerTagEndPos = getEndTagPosition(theTagName, theXML, 1);
112
113                        if (theInnerTagEndPos < 0) {
114                                throw new RuntimeException("Not found tag: " + "</" + theTagName + ">\nin text:\n" + theXML);
115                        }
116
117                        String theInnerText = theXML.substring(0, theInnerTagEndPos).trim();
118
119                        Node[] theNodeArray = getObjectArray(theInnerText);
120                        if (theNodeArray.length > 0) {
121                                for (int i = 0; i < theNodeArray.length; i++) {
122                                        theNode.addInnerTag(theNodeArray[i]);
123                                }
124                        }
125                }
126
127                return theNode;
128        }
129
130        private String replaceComment(String theXML) {
131
132                int theStartComment = 0;
133                int theEndComment = 0;
134                StringBuffer theBuffer = new StringBuffer();
135
136                while ((theStartComment = theXML.indexOf("<!--", theEndComment)) >= 0) {
137                        theBuffer.append(theXML.substring(theEndComment, theStartComment));
138                        theEndComment = theXML.indexOf("-->", theStartComment) + 3;
139
140                        if (theEndComment > 0) {
141                                theBuffer.append("<Comment>");
142                                theBuffer.append(theXML.substring(theStartComment + 4, theEndComment - 3).replaceAll("<", "&lt;")
143                                                .replaceAll(">", "&gt;"));
144                                theBuffer.append("</Comment>");
145                        }
146
147                }
148
149                theBuffer.append(theXML.substring(theEndComment));
150                String s = theBuffer.toString();
151                theBuffer.setLength(0);
152                theBuffer = null;
153
154                return deleteDoctype(s);
155
156        }
157
158        private static String deleteDoctype(String theXML) {
159                int theStartComment = 0;
160                int theEndComment = 0;
161                StringBuffer theBuffer = new StringBuffer();
162
163                while ((theStartComment = theXML.indexOf("<!DOCTYPE", theEndComment)) >= 0) {
164                        theBuffer.append(theXML.substring(theEndComment, theStartComment));
165                        theEndComment = theXML.indexOf(">", theStartComment) + 1;
166                }
167
168                theBuffer.append(theXML.substring(theEndComment));
169                String s = theBuffer.toString();
170                theBuffer.setLength(0);
171                theBuffer = null;
172
173                return s;
174        }
175
176        private int getEndTagPosition(String theTagName, String theXML, int theLevet) {
177
178                int theTagEndPos = 0;
179                int theInnerTagEndPos = 0;
180                int theStart = 0;
181
182                int theCounter = theLevet;
183
184                String theTag = "</" + theTagName + ">";
185
186                do {
187                        theTagEndPos = theXML.indexOf(theTag, theStart);
188
189                        theInnerTagEndPos = theXML.indexOf("<" + theTagName + " ", theStart);
190                        if (theInnerTagEndPos < 0) {
191                                theInnerTagEndPos = theXML.length();
192                        }
193
194                        int theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "/", theStart);
195                        if (theInnerTagEndPos1 >= 0) {
196                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
197                        }
198
199                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + ">", theStart);
200                        if (theInnerTagEndPos1 >= 0) {
201                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
202                        }
203
204                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\r", theStart);
205                        if (theInnerTagEndPos1 >= 0) {
206                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
207                        }
208
209                        theInnerTagEndPos1 = theXML.indexOf("<" + theTagName + "\n", theStart);
210                        if (theInnerTagEndPos1 >= 0) {
211                                theInnerTagEndPos = Math.min(theInnerTagEndPos1, theInnerTagEndPos);
212                        }
213
214                        if (theTagEndPos > theInnerTagEndPos && theInnerTagEndPos >= 0) {
215                                // Check for an empty tag.
216                                int theEndPbrk = theXML.indexOf(">", theInnerTagEndPos);
217                                if (theXML.charAt(theEndPbrk - 1) != '/') {
218                                        theCounter++;
219                                }
220                        } else {
221                                theCounter--;
222                        }
223
224                        theStart = Math.min(theTagEndPos, theInnerTagEndPos) + 1;
225
226                        if (theTagEndPos < 0) {
227                                throw new IllegalArgumentException("The closing tag " + theTag + " is not retrieved. ");
228                        }
229
230                } while (theCounter > 0);
231
232                return theTagEndPos;
233        }
234
235        public Node[] getObjectArray(String theXML) {
236                if (theXML == null) {
237                        return null;
238                }
239
240                theXML = replaceComment(theXML);
241                ArrayList<Node> theNodeArray = new ArrayList<Node>();
242
243                theXML = theXML.trim();
244
245                while (theXML.length() > 0) {
246
247                        Node theInnerTag = getObject(theXML);
248                        theNodeArray.add(theInnerTag);
249                        if (theInnerTag == null) {
250                                break;
251                        }
252
253                        if (theInnerTag.isNill()) {
254                                theXML = theXML.substring(theXML.indexOf("/>") + 2);
255                        }
256                        if (theInnerTag.isNill() == false && theInnerTag.getText() == null) {
257                                int theInnerTagEndPos = getEndTagPosition(theInnerTag.getTag(), theXML, 0);
258                                String theEndTag = "</" + theInnerTag.getTag() + ">";
259
260                                theXML = theXML.substring(theInnerTagEndPos + theEndTag.length()).trim();
261                        }
262
263                        String theText = theInnerTag.getText();
264                        if (theInnerTag.isNill() && theText != null) {
265                                int theEndText = theXML.indexOf('<');
266                                if (theEndText < 0) {
267                                        theEndText = theXML.length();
268                                }
269                                theXML = theXML.substring(theEndText);
270                        }
271                }
272
273                int theNumberNode = theNodeArray.size();
274
275                Node[] theResult = new Node[theNumberNode];
276
277                for (int i = 0; i < theNumberNode; i++) {
278                        theResult[i] = (Node) theNodeArray.get(i);
279                }
280
281                return theResult;
282        }
283
284        public void parseAtributeText(Node aNode, String aAttributeText) {
285
286                if (aAttributeText.length() > 0 && aAttributeText.charAt(aAttributeText.length() - 1) == '/') {
287                        aAttributeText = aAttributeText.substring(0, aAttributeText.length() - 1);
288                }
289
290                while (aAttributeText.length() > 0) {
291
292                        aAttributeText = aAttributeText.trim();
293                        int theEndLine = aAttributeText.indexOf('=');
294
295                        String theName = aAttributeText.substring(0, theEndLine).trim();
296                        aAttributeText = aAttributeText.substring(theEndLine + 1).trim();
297
298                        char theBChar = aAttributeText.charAt(0);
299                        int theBeginValue = 1;
300                        int theEndValue = 0;
301
302                        if (theBChar == '"' || theBChar == '\'') {
303                                theEndValue = aAttributeText.indexOf(theBChar, 1);
304                        } else {
305                                for (theEndValue = 0; theEndValue < aAttributeText.length() && aAttributeText.charAt(theEndValue) != ' '
306                                                && aAttributeText.charAt(theEndValue) != '\r'
307                                                && aAttributeText.charAt(theEndValue) != '\n'; theEndValue++) {
308                                        ;
309                                }
310                                theBeginValue = 0;
311                        }
312
313                        String theValue = aAttributeText.substring(theBeginValue, theEndValue);
314                        theValue = StringEscapeUtils.unescapeXml(theValue);
315                        aNode.setAttribute(theName, theValue);
316                        aAttributeText = aAttributeText.substring(theEndValue + 1);
317                }
318        }
319
320        public Node load(String filePath) throws ParserException, ParseException, IOException {
321                if (new File(filePath).exists()) {
322                        return new EasyParser().getObject(new File(filePath));
323                }
324
325                URL entryUrl = new URL(filePath);
326                InputStream is = entryUrl.openStream();
327                return new EasyParser().getObject(is);
328        }
329
330        public Node getObject(File theXMLFile) throws IOException {
331                int theLength = (int) theXMLFile.length();
332                byte[] theBuffer = new byte[theLength];
333                Node object = null;
334
335                try (FileInputStream theInputStream = new FileInputStream(theXMLFile)) {
336                        theInputStream.read(theBuffer);
337                        theInputStream.close();
338
339                        String theEncode = "UTF-8";
340
341                        int theEndHead = 0;
342
343                        String theDocHead = new String(theBuffer, "UTF-8");
344                        if (theDocHead.indexOf("<?xml") == 0) {
345                                theEndHead = theDocHead.indexOf("?>");
346                                Node theHead = getObject(theDocHead.substring(0, theEndHead) + "/>");
347                                theEncode = theHead.getAttribute("encoding");
348                                theEndHead += 2;
349                        }
350
351                        if (theEncode == null)
352                                theEncode = "UTF-8";
353                        String theXML = new String(theBuffer, theEndHead, theBuffer.length - theEndHead, theEncode);
354                        theBuffer = null;
355
356                        object = getObject(theXML);
357                }
358
359                return object;
360        }
361
362        public static String replaceProperties(String aValue, String aFragment, String aNewFragment) {
363                int theBeginPos = 0;
364                int theEndPos = 0;
365
366                if (aValue == null) {
367                        return aValue;
368                }
369
370                StringBuffer theStringBuffer = new StringBuffer();
371
372                while (true) {
373                        theBeginPos = aValue.indexOf(aFragment, theEndPos);
374                        if (theBeginPos < 0) {
375                                break;
376                        }
377
378                        theStringBuffer.append(aValue.substring(theEndPos, theBeginPos));
379                        theEndPos = theBeginPos + aFragment.length();
380
381                        theStringBuffer.append(aNewFragment);
382                }
383
384                theStringBuffer.append(aValue.substring(theEndPos));
385
386                return theStringBuffer.toString();
387        }
388
389}