001    /**
002     * Copyright (c) 2000-2013 Liferay, Inc. All rights reserved.
003     *
004     * This library is free software; you can redistribute it and/or modify it under
005     * the terms of the GNU Lesser General Public License as published by the Free
006     * Software Foundation; either version 2.1 of the License, or (at your option)
007     * any later version.
008     *
009     * This library is distributed in the hope that it will be useful, but WITHOUT
010     * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
011     * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
012     * details.
013     */
014    
015    package com.liferay.util;
016    
017    import java.io.IOException;
018    import java.io.Reader;
019    
020    import java.util.ArrayList;
021    import java.util.List;
022    
023    import javax.swing.text.MutableAttributeSet;
024    import javax.swing.text.html.HTML;
025    import javax.swing.text.html.HTMLEditorKit;
026    
027    /**
028     * @author Brian Wing Shun Chan
029     */
030    public class HTMLParser {
031    
032            public HTMLParser(Reader reader) throws IOException {
033                    HTMLEditorKit.Parser parser = new DefaultParser().getParser();
034    
035                    parser.parse(reader, new HTMLCallback(), true);
036            }
037    
038            public List<String> getImages() {
039                    return _images;
040            }
041    
042            public List<String> getLinks() {
043                    return _links;
044            }
045    
046            private List<String> _images = new ArrayList<String>();
047            private List<String> _links = new ArrayList<String>();
048    
049            private class DefaultParser extends HTMLEditorKit {
050    
051                    @Override
052                    public HTMLEditorKit.Parser getParser() {
053                            return super.getParser();
054                    }
055    
056            }
057    
058            private class HTMLCallback extends HTMLEditorKit.ParserCallback {
059    
060                    @Override
061                    public void handleText(char[] data, int pos) {
062                    }
063    
064                    @Override
065                    public void handleStartTag(
066                            HTML.Tag tag, MutableAttributeSet attributes, int pos) {
067    
068                            if (tag.equals(HTML.Tag.A)) {
069                                    String href = (String)attributes.getAttribute(
070                                            HTML.Attribute.HREF);
071    
072                                    if (href != null) {
073                                            _links.add(href);
074                                    }
075                            }
076                            else if (tag.equals(HTML.Tag.IMG)) {
077                                    String src = (String)attributes.getAttribute(
078                                            HTML.Attribute.SRC);
079    
080                                    if (src != null) {
081                                            _images.add(src);
082                                    }
083                            }
084                    }
085    
086                    @Override
087                    public void handleEndTag(HTML.Tag tag, int pos) {
088                    }
089    
090                    @Override
091                    public void handleSimpleTag(
092                            HTML.Tag tag, MutableAttributeSet attributes, int pos) {
093    
094                            if (tag.equals(HTML.Tag.A)) {
095                                    String href = (String)attributes.getAttribute(
096                                            HTML.Attribute.HREF);
097    
098                                    if (href != null) {
099                                            _links.add(href);
100                                    }
101                            }
102                            else if (tag.equals(HTML.Tag.IMG)) {
103                                    String src = (String)attributes.getAttribute(
104                                            HTML.Attribute.SRC);
105    
106                                    if (src != null) {
107                                            _images.add(src);
108                                    }
109                            }
110                    }
111    
112                    @Override
113                    public void handleComment(char[] data, int pos) {
114                    }
115    
116                    @Override
117                    public void handleError(String errorMsg, int pos) {
118                    }
119    
120            }
121    
122    }