001    /**
002     * Copyright (c) 2000-2010 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.util;
016    
017    import java.io.IOException;
018    import java.io.Reader;
019    
020    import java.util.ArrayList;
021    import java.util.List;
022    
023    import javax.swing.text.MutableAttributeSet;
024    import javax.swing.text.html.HTML;
025    import javax.swing.text.html.HTMLEditorKit;
026    
027    /**
028     * @author Brian Wing Shun Chan
029     */
030    public class HTMLParser {
031    
032            public HTMLParser(Reader reader) throws IOException {
033                    HTMLEditorKit.Parser parser = new DefaultParser().getParser();
034    
035                    parser.parse(reader, new HTMLCallback(), true);
036            }
037    
038            public List<String> getImages() {
039                    return _images;
040            }
041    
042            public List<String> getLinks() {
043                    return _links;
044            }
045    
046            private List<String> _images = new ArrayList<String>();
047            private List<String> _links = new ArrayList<String>();
048    
049            private class DefaultParser extends HTMLEditorKit {
050    
051                    public HTMLEditorKit.Parser getParser() {
052                            return super.getParser();
053                    }
054    
055            }
056    
057            private class HTMLCallback extends HTMLEditorKit.ParserCallback{
058    
059                    public void handleText(char[] data, int pos) {
060                    }
061    
062                    public void handleStartTag(
063                            HTML.Tag tag, MutableAttributeSet attributes, int pos) {
064    
065                            if (tag.equals(HTML.Tag.A)) {
066                                    String href = (String)attributes.getAttribute(
067                                            HTML.Attribute.HREF);
068    
069                                    if (href != null) {
070                                            _links.add(href);
071                                    }
072                            }
073                            else if (tag.equals(HTML.Tag.IMG)) {
074                                    String src = (String)attributes.getAttribute(
075                                            HTML.Attribute.SRC);
076    
077                                    if (src != null) {
078                                            _images.add(src);
079                                    }
080                            }
081                    }
082    
083                    public void handleEndTag(HTML.Tag tag, int pos) {
084                    }
085    
086                    public void handleSimpleTag(
087                            HTML.Tag tag, MutableAttributeSet attributes, int pos) {
088    
089                            if (tag.equals(HTML.Tag.A)) {
090                                    String href = (String)attributes.getAttribute(
091                                            HTML.Attribute.HREF);
092    
093                                    if (href != null) {
094                                            _links.add(href);
095                                    }
096                            }
097                            else if (tag.equals(HTML.Tag.IMG)) {
098                                    String src = (String)attributes.getAttribute(
099                                            HTML.Attribute.SRC);
100    
101                                    if (src != null) {
102                                            _images.add(src);
103                                    }
104                            }
105                    }
106    
107                    public void handleComment(char[] data, int pos) {
108                    }
109    
110                    public void handleError(String errorMsg, int pos) {
111                    }
112    
113            }
114    
115    }