1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.translators;
24  
25  import com.liferay.portal.kernel.util.StringPool;
26  import com.liferay.portlet.wiki.importers.mediawiki.MediaWikiImporter;
27  
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  /**
32   * <a href="MediaWikiToCreoleTranslator.java.html"><b><i>View Source</i></b></a>
33   *
34   * @author Jorge Ferrer
35   *
36   */
37  public class MediaWikiToCreoleTranslator extends BaseTranslator {
38  
39      public static final String TABLE_OF_CONTENTS = "<<TableOfContents>>\n\n";
40  
41      public MediaWikiToCreoleTranslator() {
42          initRegexps();
43          initNowikiRegexps();
44      }
45  
46      protected void initNowikiRegexps() {
47  
48          // Preformat protected
49  
50          nowikiRegexps.add("(<nowiki>)(.*?)(</nowiki>)");
51          nowikiRegexps.add("(<pre>)(.*?)(</pre>)");
52  
53          // Escape protected
54  
55          nowikiRegexps.add(
56              "~(\\*\\*|~|//|-|#|\\{\\{|}}|\\\\|~\\[~~[|]]|----|=|\\|)");
57      }
58  
59      protected void initRegexps() {
60  
61          // Clean unnecessary header emphasis
62  
63          regexps.put("= '''([^=]+)''' =", "= $1 =");
64          regexps.put("== '''([^=]+)''' ==", "== $1 ==");
65          regexps.put("== '''([^=]+)''' ===", "=== $1 ===");
66  
67          // Unscape angle brackets
68  
69          regexps.put("&lt;", "<");
70          regexps.put("&gt;", ">");
71  
72          // Remove categories
73  
74          regexps.put("\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*", "");
75  
76          // Remove disambiguations
77  
78          regexps.put("\\{{2}OtherTopics\\|([^\\}]*)\\}{2}", StringPool.BLANK);
79  
80          // Remove work in progress
81  
82          regexps.put("\\{{2}Work in progress\\}{2}", StringPool.BLANK);
83  
84          // Bold and italics
85  
86          regexps.put(
87              "''''((?s:.)*?)(''''|(\n\n|\r\r|\r\n\r\n))", "**//$1//**$3");
88  
89          // Bold
90  
91          regexps.put("'''((?s:.)*?)('''|(\n\n|\r\r|\r\n\r\n))", "**$1**$3");
92  
93          // Italics
94  
95          regexps.put("''((?s:.)*?)(''|(\n\n|\r\r|\r\n\r\n))", "//$1//$3");
96  
97          // Normalize URLs
98  
99          regexps.put("\\[{2}((http|ftp)[^ ]*) ([^\\]]*)\\]{2}", "[$1 $3]");
100 
101         // URL
102 
103         regexps.put("\\[((http|ftp)[^ ]*)\\]", "[[$1]]");
104 
105         // URL with label
106 
107         regexps.put("\\[((http|ftp)[^ ]*) ([^\\]]*)\\]", "[[$1|$3]]");
108 
109         // Term and definition
110 
111         regexps.put("^\\t([\\w]+):\\t(.*)", "**$1**:\n$2");
112 
113         // Indented paragraph
114 
115         regexps.put("^\\t:\\t(.*)", "$1");
116 
117         // Monospace
118 
119         regexps.put("(^ (.+))(\\n (.+))*", "{{{\n$0\n}}}");
120 
121         // No wiki
122 
123         regexps.put("<nowiki>([^<]*)</nowiki>", "{{{$1}}}");
124 
125         // HTML PRE
126 
127         regexps.put("<pre>([^<]*)</pre>", "{{{$1}}}");
128 
129         // User reference
130 
131         regexps.put("[-]*\\[{2}User:([^\\]]*)\\]{2}", "$1");
132     }
133 
134     protected String postProcess(String content) {
135 
136         // LEP-6118
137 
138         Matcher matcher = Pattern.compile(
139             "^=([^=]+)=", Pattern.MULTILINE).matcher(content);
140 
141         if (matcher.find()) {
142             content = runRegexp(content, "^===([^=]+)===", "====$1====");
143             content = runRegexp(content, "^==([^=]+)==", "===$1===");
144             content = runRegexp(content, "^=([^=]+)=", "==$1==");
145         }
146 
147         // Remove HTML tags
148 
149         for (int i = 0; i < _HTML_TAGS.length; i++) {
150             content = content.replaceAll(_HTML_TAGS[i], StringPool.BLANK);
151         }
152 
153         // Images
154 
155         matcher = Pattern.compile(
156             "\\[{2}Image:([^\\]]*)\\]{2}", Pattern.DOTALL).matcher(content);
157 
158         StringBuffer sb = new StringBuffer(content);
159 
160         int offset = 0;
161 
162         while (matcher.find()) {
163             String image =
164                 "{{" + MediaWikiImporter.SHARED_IMAGES_TITLE + "/" +
165                     matcher.group(1).toLowerCase() + "}}";
166 
167             sb.replace(
168                 matcher.start(0) + offset, matcher.end(0) + offset, image);
169 
170             offset += MediaWikiImporter.SHARED_IMAGES_TITLE.length() - 5;
171         }
172 
173         content = sb.toString();
174 
175         // Remove underscores from links
176 
177         matcher = Pattern.compile(
178             "\\[{2}([^\\]]*)\\]{2}", Pattern.DOTALL).matcher(content);
179 
180         sb = new StringBuffer(content);
181 
182         while (matcher.find()) {
183             String link = matcher.group(1).replace(
184                 StringPool.UNDERLINE, StringPool.SPACE);
185 
186             sb.replace(matcher.start(1), matcher.end(1), link);
187         }
188 
189         return TABLE_OF_CONTENTS + super.postProcess(sb.toString());
190     }
191 
192     private static final String[] _HTML_TAGS = {
193         "<blockquote>", "</blockquote>", "<br>", "<br/>", "<br />",  "<center>",
194         "</center>", "<cite>", "</cite>","<code>", "</code>", "<div[^>]*>",
195         "</div>", "<font[^>]*>", "</font>", "<hr>", "<hr/>", "<hr />", "<p>",
196         "</p>", "<tt>", "</tt>", "<var>", "</var>"};
197 
198 }