1   /**
2    * Copyright (c) 2000-2009 Liferay, Inc. All rights reserved.
3    *
4    * Permission is hereby granted, free of charge, to any person obtaining a copy
5    * of this software and associated documentation files (the "Software"), to deal
6    * in the Software without restriction, including without limitation the rights
7    * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8    * copies of the Software, and to permit persons to whom the Software is
9    * furnished to do so, subject to the following conditions:
10   *
11   * The above copyright notice and this permission notice shall be included in
12   * all copies or substantial portions of the Software.
13   *
14   * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15   * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16   * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17   * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18   * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20   * SOFTWARE.
21   */
22  
23  package com.liferay.portlet.wiki.importers.mediawiki;
24  
25  import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26  import com.liferay.portal.NoSuchUserException;
27  import com.liferay.portal.PortalException;
28  import com.liferay.portal.SystemException;
29  import com.liferay.portal.kernel.log.Log;
30  import com.liferay.portal.kernel.log.LogFactoryUtil;
31  import com.liferay.portal.kernel.util.ArrayUtil;
32  import com.liferay.portal.kernel.util.ObjectValuePair;
33  import com.liferay.portal.kernel.util.ProgressTracker;
34  import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
35  import com.liferay.portal.kernel.util.StringPool;
36  import com.liferay.portal.kernel.util.StringUtil;
37  import com.liferay.portal.kernel.util.Validator;
38  import com.liferay.portal.kernel.xml.Document;
39  import com.liferay.portal.kernel.xml.DocumentException;
40  import com.liferay.portal.kernel.xml.Element;
41  import com.liferay.portal.kernel.xml.SAXReaderUtil;
42  import com.liferay.portal.kernel.zip.ZipReader;
43  import com.liferay.portal.model.User;
44  import com.liferay.portal.service.ServiceContext;
45  import com.liferay.portal.service.UserLocalServiceUtil;
46  import com.liferay.portal.util.PropsValues;
47  import com.liferay.portlet.tags.NoSuchEntryException;
48  import com.liferay.portlet.tags.model.TagsEntry;
49  import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
50  import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
51  import com.liferay.portlet.tags.util.TagsUtil;
52  import com.liferay.portlet.wiki.ImportFilesException;
53  import com.liferay.portlet.wiki.NoSuchPageException;
54  import com.liferay.portlet.wiki.importers.WikiImporter;
55  import com.liferay.portlet.wiki.importers.WikiImporterKeys;
56  import com.liferay.portlet.wiki.model.WikiNode;
57  import com.liferay.portlet.wiki.model.WikiPage;
58  import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
59  import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
60  import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
61  import com.liferay.util.MapUtil;
62  
63  import java.io.BufferedReader;
64  import java.io.File;
65  import java.io.FileReader;
66  import java.io.IOException;
67  
68  import java.util.ArrayList;
69  import java.util.Collections;
70  import java.util.HashMap;
71  import java.util.Iterator;
72  import java.util.List;
73  import java.util.Map;
74  import java.util.regex.Matcher;
75  import java.util.regex.Pattern;
76  
77  /**
78   * <a href="MediaWikiImporter.java.html"><b><i>View Source</i></b></a>
79   *
80   * @author Alvaro del Castillo
81   * @author Jorge Ferrer
82   *
83   */
84  public class MediaWikiImporter implements WikiImporter {
85  
86      public static final String SHARED_IMAGES_CONTENT = "See attachments";
87  
88      public static final String SHARED_IMAGES_TITLE = "SharedImages";
89  
90      public void importPages(
91              long userId, WikiNode node, File[] files,
92              Map<String, String[]> options)
93          throws PortalException {
94  
95          if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
96              throw new PortalException("The pages file is mandatory");
97          }
98  
99          File pagesFile = files[0];
100         File usersFile = files[1];
101         File imagesFile = files[2];
102 
103         try {
104             Document doc = SAXReaderUtil.read(pagesFile);
105 
106             Map<String, String> usersMap = readUsersFile(usersFile);
107 
108             Element root = doc.getRootElement();
109 
110             List<String> specialNamespaces = readSpecialNamespaces(root);
111 
112             processSpecialPages(userId, node, root, specialNamespaces);
113             processRegularPages(
114                 userId, node, root, specialNamespaces, usersMap, imagesFile,
115                 options);
116             processImages(userId, node, imagesFile);
117 
118             moveFrontPage(userId, node, options);
119         }
120         catch (DocumentException de) {
121             throw new ImportFilesException("Invalid XML file provided");
122         }
123         catch (IOException de) {
124             throw new ImportFilesException("Error reading the files provided");
125         }
126         catch (PortalException e) {
127             throw e;
128         }
129         catch (Exception e) {
130             throw new PortalException(e);
131         }
132     }
133 
134     protected long getUserId(
135             long userId, WikiNode node, String author,
136             Map<String, String> usersMap)
137         throws PortalException, SystemException {
138 
139         User user = null;
140 
141         String emailAddress = usersMap.get(author);
142 
143         try {
144             if (Validator.isNull(emailAddress)) {
145                 user = UserLocalServiceUtil.getUserByScreenName(
146                     node.getCompanyId(), author.toLowerCase());
147             }
148             else {
149                 user = UserLocalServiceUtil.getUserByEmailAddress(
150                     node.getCompanyId(), emailAddress);
151             }
152         }
153         catch (NoSuchUserException nsue) {
154             user = UserLocalServiceUtil.getUserById(userId);
155         }
156 
157         return user.getUserId();
158     }
159 
160     protected void importPage(
161             long userId, String author, WikiNode node, String title,
162             String content, String summary, Map<String, String> usersMap)
163         throws PortalException {
164 
165         try {
166             long authorUserId = getUserId(userId, node, author, usersMap);
167             String parentTitle = readParentTitle(content);
168             String redirectTitle = readRedirectTitle(content);
169 
170             ServiceContext serviceContext = new ServiceContext();
171 
172             serviceContext.setTagsEntries(
173                 readTagsEntries(userId, node, content));
174 
175             if (Validator.isNull(redirectTitle)) {
176                 content = _translator.translate(content);
177             }
178             else {
179                 content =
180                     StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
181                         StringPool.DOUBLE_CLOSE_BRACKET;
182             }
183 
184             WikiPage page = null;
185 
186             try {
187                 page = WikiPageLocalServiceUtil.getPage(
188                     node.getNodeId(), title);
189             }
190             catch (NoSuchPageException nspe) {
191                 page = WikiPageLocalServiceUtil.addPage(
192                     authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
193                     null, true, serviceContext);
194             }
195 
196             WikiPageLocalServiceUtil.updatePage(
197                 authorUserId, node.getNodeId(), title, page.getVersion(),
198                 content, summary, true, "creole", parentTitle, redirectTitle,
199                 serviceContext);
200         }
201         catch (Exception e) {
202             throw new PortalException("Error importing page " + title, e);
203         }
204     }
205 
206     protected boolean isSpecialMediaWikiPage(
207         String title, List<String> specialNamespaces) {
208 
209         for (String namespace: specialNamespaces) {
210             if (title.startsWith(namespace + StringPool.COLON)) {
211                 return true;
212             }
213         }
214 
215         return false;
216     }
217 
218     protected boolean isValidImage(String[] paths, byte[] bytes) {
219         if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
220             return false;
221         }
222 
223         if ((paths.length > 1) &&
224             (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
225 
226             return false;
227         }
228 
229         String fileName = paths[paths.length - 1];
230 
231         try {
232             DLLocalServiceUtil.validate(fileName, bytes);
233         }
234         catch (PortalException pe) {
235             return false;
236         }
237 
238         return true;
239     }
240 
241     protected void moveFrontPage(
242         long userId, WikiNode node, Map<String, String[]> options) {
243 
244         String frontPageTitle = MapUtil.getString(
245             options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
246 
247         if (Validator.isNotNull(frontPageTitle)) {
248             frontPageTitle = normalizeTitle(frontPageTitle);
249 
250             try {
251                 if (WikiPageLocalServiceUtil.getPagesCount(
252                         node.getNodeId(), frontPageTitle, true) > 0) {
253 
254                     ServiceContext serviceContext = new ServiceContext();
255 
256                     WikiPageLocalServiceUtil.movePage(
257                         userId, node.getNodeId(), frontPageTitle,
258                         WikiPageImpl.FRONT_PAGE, false, serviceContext);
259 
260                 }
261             }
262             catch (Exception e) {
263                 if (_log.isWarnEnabled()) {
264                     StringBuilder sb = new StringBuilder();
265 
266                     sb.append("Could not move ");
267                     sb.append(WikiPageImpl.FRONT_PAGE);
268                     sb.append(" to the title provided: ");
269                     sb.append(frontPageTitle);
270 
271                     _log.warn(sb.toString(), e);
272                 }
273             }
274 
275         }
276 
277     }
278 
279     protected String normalize(String categoryName, int length) {
280         categoryName = TagsUtil.toWord(categoryName.trim());
281 
282         return StringUtil.shorten(categoryName, length);
283     }
284 
285     protected String normalizeDescription(String description) {
286         description = description.replaceAll(
287             _categoriesPattern.pattern(), StringPool.BLANK);
288 
289         return normalize(description, 300);
290     }
291 
292     protected String normalizeTitle(String title) {
293         title = title.replaceAll(
294             PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
295 
296         return StringUtil.shorten(title, 75);
297     }
298 
299     private void processImages(long userId, WikiNode node, File imagesFile)
300         throws Exception {
301 
302         if ((imagesFile == null) || (!imagesFile.exists())) {
303             return;
304         }
305 
306         ProgressTracker progressTracker =
307             ProgressTrackerThreadLocal.getProgressTracker();
308 
309         int count = 0;
310 
311         ZipReader zipReader = new ZipReader(imagesFile);
312 
313         Map<String, byte[]> entries = zipReader.getEntries();
314 
315         int total = entries.size();
316 
317         if (total > 0) {
318             try {
319                 WikiPageLocalServiceUtil.getPage(
320                     node.getNodeId(), SHARED_IMAGES_TITLE);
321             }
322             catch (NoSuchPageException nspe) {
323                 ServiceContext serviceContext = new ServiceContext();
324 
325                 WikiPageLocalServiceUtil.addPage(
326                     userId, node.getNodeId(), SHARED_IMAGES_TITLE,
327                     SHARED_IMAGES_CONTENT, null, true, serviceContext);
328             }
329         }
330 
331         List<ObjectValuePair<String, byte[]>> attachments =
332             new ArrayList<ObjectValuePair<String, byte[]>>();
333 
334         Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
335 
336         int percentage = 50;
337 
338         for (int i = 0; itr.hasNext(); i++) {
339             Map.Entry<String, byte[]> entry = itr.next();
340 
341             String key = entry.getKey();
342             byte[] value = entry.getValue();
343 
344             if (key.endsWith(StringPool.SLASH)) {
345                 if (_log.isInfoEnabled()) {
346                     _log.info("Ignoring " + key);
347                 }
348 
349                 continue;
350             }
351 
352             String[] paths = StringUtil.split(key, StringPool.SLASH);
353 
354             if (!isValidImage(paths, value)) {
355                 if (_log.isInfoEnabled()) {
356                     _log.info("Ignoring " + key);
357                 }
358 
359                 continue;
360             }
361 
362             String fileName = paths[paths.length - 1].toLowerCase();
363 
364             attachments.add(
365                 new ObjectValuePair<String, byte[]>(fileName, value));
366 
367             count++;
368 
369             if ((i % 5) == 0) {
370                 WikiPageLocalServiceUtil.addPageAttachments(
371                     node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
372 
373                 attachments.clear();
374 
375                 percentage = Math.min(50 + (i * 50) / total, 99);
376 
377                 progressTracker.updateProgress(percentage);
378             }
379         }
380 
381         if (!attachments.isEmpty()) {
382             WikiPageLocalServiceUtil.addPageAttachments(
383                 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
384         }
385 
386         if (_log.isInfoEnabled()) {
387             _log.info("Imported " + count + " images into " + node.getName());
388         }
389     }
390 
391     protected void processRegularPages(
392         long userId, WikiNode node, Element root,
393         List<String> specialNamespaces, Map<String, String> usersMap,
394         File imagesFile, Map<String, String[]> options) {
395 
396         boolean importLatestVersion = MapUtil.getBoolean(
397             options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
398 
399         ProgressTracker progressTracker =
400             ProgressTrackerThreadLocal.getProgressTracker();
401 
402         int count = 0;
403 
404         List<Element> pages = root.elements("page");
405 
406         int total = pages.size();
407 
408         Iterator<Element> itr = root.elements("page").iterator();
409 
410         int percentage = 10;
411         int maxPercentage = 50;
412 
413         if ((imagesFile == null) || (!imagesFile.exists())) {
414             maxPercentage = 99;
415         }
416 
417         int percentageRange = maxPercentage - percentage;
418 
419         for (int i = 0; itr.hasNext(); i++) {
420             Element pageEl = itr.next();
421 
422             String title = pageEl.elementText("title");
423 
424             title = normalizeTitle(title);
425 
426             percentage = Math.min(
427                 10 + (i * percentageRange) / total, maxPercentage);
428 
429             progressTracker.updateProgress(percentage);
430 
431             if (isSpecialMediaWikiPage(title, specialNamespaces)) {
432                 continue;
433             }
434 
435             List<Element> revisionEls = pageEl.elements("revision");
436 
437             if (importLatestVersion) {
438                 Element lastRevisionEl = revisionEls.get(
439                     revisionEls.size() - 1);
440 
441                 revisionEls = new ArrayList<Element>();
442 
443                 revisionEls.add(lastRevisionEl);
444             }
445 
446             for (Element curRevisionEl : revisionEls) {
447                 String author = curRevisionEl.element(
448                     "contributor").elementText("username");
449                 String content = curRevisionEl.elementText("text");
450                 String summary = curRevisionEl.elementText("comment");
451 
452                 try {
453                     importPage(
454                         userId, author, node, title, content, summary,
455                         usersMap);
456                 }
457                 catch (Exception e) {
458                     if (_log.isWarnEnabled()) {
459                         StringBuilder sb = new StringBuilder();
460 
461                         sb.append("Page with title ");
462                         sb.append(title);
463                         sb.append(" could not be imported");
464 
465                         _log.warn(sb.toString(), e);
466                     }
467                 }
468             }
469 
470             count++;
471         }
472 
473         if (_log.isInfoEnabled()) {
474             _log.info("Imported " + count + " pages into " + node.getName());
475         }
476     }
477 
478     protected void processSpecialPages(
479             long userId, WikiNode node, Element root,
480             List<String> specialNamespaces)
481         throws PortalException {
482 
483         ProgressTracker progressTracker =
484             ProgressTrackerThreadLocal.getProgressTracker();
485 
486         List<Element> pages = root.elements("page");
487 
488         int total = pages.size();
489 
490         Iterator<Element> itr = pages.iterator();
491 
492         for (int i = 0; itr.hasNext(); i++) {
493             Element page = itr.next();
494 
495             String title = page.elementText("title");
496 
497             if (!title.startsWith("Category:")) {
498                 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
499                     root.remove(page);
500                 }
501 
502                 continue;
503             }
504 
505             String categoryName = title.substring("Category:".length());
506 
507             categoryName = normalize(categoryName, 75);
508 
509             String description = page.element("revision").elementText("text");
510 
511             description = normalizeDescription(description);
512 
513             try {
514                 TagsEntry tagsEntry = null;
515 
516                 try {
517                     tagsEntry = TagsEntryLocalServiceUtil.getEntry(
518                         node.getCompanyId(), categoryName);
519                 }
520                 catch (NoSuchEntryException nsee) {
521                     ServiceContext serviceContext = new ServiceContext();
522 
523                     serviceContext.setAddCommunityPermissions(true);
524                     serviceContext.setAddGuestPermissions(true);
525                     serviceContext.setScopeGroupId(node.getGroupId());
526 
527                     tagsEntry = TagsEntryLocalServiceUtil.addEntry(
528                         userId, null, categoryName, null, null, serviceContext);
529                 }
530 
531                 if (Validator.isNotNull(description)) {
532                     TagsPropertyLocalServiceUtil.addProperty(
533                         userId, tagsEntry.getEntryId(), "description",
534                         description);
535                 }
536             }
537             catch (SystemException se) {
538                  _log.error(se, se);
539             }
540 
541             if ((i % 5) == 0) {
542                 progressTracker.updateProgress((i * 10) / total);
543             }
544         }
545     }
546 
547     protected String readParentTitle(String content) {
548         Matcher matcher = _parentPattern.matcher(content);
549 
550         String redirectTitle = StringPool.BLANK;
551 
552         if (matcher.find()) {
553             redirectTitle = matcher.group(1);
554 
555             redirectTitle = normalizeTitle(redirectTitle);
556 
557             redirectTitle += " (disambiguation)";
558         }
559 
560         return redirectTitle;
561     }
562 
563     protected String readRedirectTitle(String content) {
564         Matcher matcher = _redirectPattern.matcher(content);
565 
566         String redirectTitle = StringPool.BLANK;
567 
568         if (matcher.find()) {
569             redirectTitle = matcher.group(1);
570 
571             redirectTitle = normalizeTitle(redirectTitle);
572         }
573 
574         return redirectTitle;
575     }
576 
577     protected List<String> readSpecialNamespaces(Element root)
578         throws ImportFilesException {
579 
580         List<String> namespaces = new ArrayList<String>();
581 
582         Element siteinfoEl = root.element("siteinfo");
583 
584         if (siteinfoEl == null) {
585             throw new ImportFilesException("Invalid pages XML file");
586         }
587 
588         Iterator<Element> itr = siteinfoEl.element(
589             "namespaces").elements("namespace").iterator();
590 
591         while (itr.hasNext()) {
592             Element namespace = itr.next();
593 
594             if (!namespace.attribute("key").getData().equals("0")) {
595                 namespaces.add(namespace.getText());
596             }
597         }
598 
599         return namespaces;
600     }
601 
602     protected String[] readTagsEntries(
603             long userId, WikiNode node, String content)
604         throws PortalException, SystemException {
605 
606         Matcher matcher = _categoriesPattern.matcher(content);
607 
608         List<String> tagsEntries = new ArrayList<String>();
609 
610         while (matcher.find()) {
611             String categoryName = matcher.group(1);
612 
613             categoryName = normalize(categoryName, 75);
614 
615             TagsEntry tagsEntry = null;
616 
617             try {
618                 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
619                     node.getCompanyId(), categoryName);
620             }
621             catch (NoSuchEntryException nsee) {
622                 ServiceContext serviceContext = new ServiceContext();
623 
624                 serviceContext.setAddCommunityPermissions(true);
625                 serviceContext.setAddGuestPermissions(true);
626                 serviceContext.setScopeGroupId(node.getGroupId());
627 
628                 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
629                     userId, null, categoryName, null, null, serviceContext);
630             }
631 
632             tagsEntries.add(tagsEntry.getName());
633         }
634 
635         if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
636             tagsEntries.add(_WORK_IN_PROGRESS_TAG);
637         }
638 
639         return tagsEntries.toArray(new String[tagsEntries.size()]);
640     }
641 
642     protected Map<String, String> readUsersFile(File usersFile)
643         throws IOException {
644 
645         if ((usersFile == null) || (!usersFile.exists())) {
646             return Collections.EMPTY_MAP;
647         }
648 
649         Map<String, String> usersMap = new HashMap<String, String>();
650 
651         BufferedReader reader = new BufferedReader(new FileReader(usersFile));
652 
653         String line = reader.readLine();
654 
655         while (line != null) {
656             String[] array = StringUtil.split(line);
657 
658             if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
659                 (Validator.isNotNull(array[1]))) {
660 
661                 usersMap.put(array[0], array[1]);
662             }
663             else {
664                 if (_log.isInfoEnabled()) {
665                     _log.info(
666                         "Ignoring line " + line +
667                             " because it does not contain exactly 2 columns");
668                 }
669             }
670 
671             line = reader.readLine();
672         }
673 
674         return usersMap;
675     }
676 
677     private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
678         "thumb", "temp", "archive"};
679 
680     private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
681 
682     private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
683 
684     private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
685 
686     private static Pattern _categoriesPattern = Pattern.compile(
687         "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
688 
689     private static Pattern _parentPattern = Pattern.compile(
690         "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
691 
692     private static Pattern _redirectPattern = Pattern.compile(
693         "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
694 
695     private MediaWikiToCreoleTranslator _translator =
696         new MediaWikiToCreoleTranslator();
697 
698 }