001
014
015 package com.liferay.portlet.wiki.importers.mediawiki;
016
017 import com.liferay.portal.kernel.exception.PortalException;
018 import com.liferay.portal.kernel.exception.SystemException;
019 import com.liferay.portal.kernel.io.unsync.UnsyncBufferedReader;
020 import com.liferay.portal.kernel.log.Log;
021 import com.liferay.portal.kernel.log.LogFactoryUtil;
022 import com.liferay.portal.kernel.util.CharPool;
023 import com.liferay.portal.kernel.util.MapUtil;
024 import com.liferay.portal.kernel.util.ObjectValuePair;
025 import com.liferay.portal.kernel.util.ProgressTracker;
026 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
027 import com.liferay.portal.kernel.util.SetUtil;
028 import com.liferay.portal.kernel.util.StreamUtil;
029 import com.liferay.portal.kernel.util.StringBundler;
030 import com.liferay.portal.kernel.util.StringPool;
031 import com.liferay.portal.kernel.util.StringUtil;
032 import com.liferay.portal.kernel.util.Validator;
033 import com.liferay.portal.kernel.xml.Attribute;
034 import com.liferay.portal.kernel.xml.Document;
035 import com.liferay.portal.kernel.xml.DocumentException;
036 import com.liferay.portal.kernel.xml.Element;
037 import com.liferay.portal.kernel.xml.SAXReaderUtil;
038 import com.liferay.portal.kernel.zip.ZipReader;
039 import com.liferay.portal.kernel.zip.ZipReaderFactoryUtil;
040 import com.liferay.portal.model.User;
041 import com.liferay.portal.service.ServiceContext;
042 import com.liferay.portal.service.UserLocalServiceUtil;
043 import com.liferay.portal.util.PropsValues;
044 import com.liferay.portlet.asset.NoSuchTagException;
045 import com.liferay.portlet.asset.model.AssetTag;
046 import com.liferay.portlet.asset.service.AssetTagLocalServiceUtil;
047 import com.liferay.portlet.asset.service.AssetTagPropertyLocalServiceUtil;
048 import com.liferay.portlet.asset.util.AssetUtil;
049 import com.liferay.portlet.documentlibrary.store.DLStoreUtil;
050 import com.liferay.portlet.wiki.ImportFilesException;
051 import com.liferay.portlet.wiki.NoSuchPageException;
052 import com.liferay.portlet.wiki.importers.WikiImporter;
053 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
054 import com.liferay.portlet.wiki.model.WikiNode;
055 import com.liferay.portlet.wiki.model.WikiPage;
056 import com.liferay.portlet.wiki.model.WikiPageConstants;
057 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
058 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
059
060 import java.io.IOException;
061 import java.io.InputStream;
062 import java.io.InputStreamReader;
063
064 import java.util.ArrayList;
065 import java.util.Collections;
066 import java.util.HashMap;
067 import java.util.List;
068 import java.util.Map;
069 import java.util.Set;
070 import java.util.regex.Matcher;
071 import java.util.regex.Pattern;
072
073
077 public class MediaWikiImporter implements WikiImporter {
078
079 public static final String SHARED_IMAGES_CONTENT = "See attachments";
080
081 public static final String SHARED_IMAGES_TITLE = "SharedImages";
082
083 @Override
084 public void importPages(
085 long userId, WikiNode node, InputStream[] inputStreams,
086 Map<String, String[]> options)
087 throws PortalException {
088
089 if ((inputStreams.length < 1) || (inputStreams[0] == null)) {
090 throw new PortalException("The pages file is mandatory");
091 }
092
093 InputStream pagesInputStream = inputStreams[0];
094 InputStream usersInputStream = inputStreams[1];
095 InputStream imagesInputStream = inputStreams[2];
096
097 try {
098 Document document = SAXReaderUtil.read(pagesInputStream);
099
100 Map<String, String> usersMap = readUsersFile(usersInputStream);
101
102 Element rootElement = document.getRootElement();
103
104 List<String> specialNamespaces = readSpecialNamespaces(rootElement);
105
106 processSpecialPages(userId, node, rootElement, specialNamespaces);
107 processRegularPages(
108 userId, node, rootElement, specialNamespaces, usersMap,
109 imagesInputStream, options);
110 processImages(userId, node, imagesInputStream);
111
112 moveFrontPage(userId, node, options);
113 }
114 catch (DocumentException de) {
115 throw new ImportFilesException("Invalid XML file provided");
116 }
117 catch (IOException ioe) {
118 throw new ImportFilesException("Error reading the files provided");
119 }
120 catch (PortalException pe) {
121 throw pe;
122 }
123 catch (Exception e) {
124 throw new PortalException(e);
125 }
126 }
127
128 protected long getUserId(
129 long userId, WikiNode node, String author,
130 Map<String, String> usersMap)
131 throws SystemException {
132
133 User user = null;
134
135 String emailAddress = usersMap.get(author);
136
137 if (Validator.isNotNull(emailAddress)) {
138 user = UserLocalServiceUtil.fetchUserByEmailAddress(
139 node.getCompanyId(), emailAddress);
140 }
141 else {
142 user = UserLocalServiceUtil.fetchUserByScreenName(
143 node.getCompanyId(), StringUtil.toLowerCase(author));
144 }
145
146 if (user != null) {
147 return user.getUserId();
148 }
149
150 return userId;
151 }
152
153 protected void importPage(
154 long userId, String author, WikiNode node, String title,
155 String content, String summary, Map<String, String> usersMap,
156 boolean strictImportMode)
157 throws PortalException {
158
159 try {
160 long authorUserId = getUserId(userId, node, author, usersMap);
161 String parentTitle = readParentTitle(content);
162 String redirectTitle = readRedirectTitle(content);
163
164 ServiceContext serviceContext = new ServiceContext();
165
166 serviceContext.setAddGroupPermissions(true);
167 serviceContext.setAddGuestPermissions(true);
168 serviceContext.setAssetTagNames(
169 readAssetTagNames(userId, node, content));
170
171 if (Validator.isNull(redirectTitle)) {
172 _translator.setStrictImportMode(strictImportMode);
173
174 content = _translator.translate(content);
175 }
176 else {
177 content =
178 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
179 StringPool.DOUBLE_CLOSE_BRACKET;
180 }
181
182 WikiPage page = null;
183
184 try {
185 page = WikiPageLocalServiceUtil.getPage(
186 node.getNodeId(), title);
187 }
188 catch (NoSuchPageException nspe) {
189 page = WikiPageLocalServiceUtil.addPage(
190 authorUserId, node.getNodeId(), title,
191 WikiPageConstants.NEW, null, true, serviceContext);
192 }
193
194 WikiPageLocalServiceUtil.updatePage(
195 authorUserId, node.getNodeId(), title, page.getVersion(),
196 content, summary, true, "creole", parentTitle, redirectTitle,
197 serviceContext);
198 }
199 catch (Exception e) {
200 throw new PortalException("Error importing page " + title, e);
201 }
202 }
203
204 protected boolean isSpecialMediaWikiPage(
205 String title, List<String> specialNamespaces) {
206
207 for (String namespace : specialNamespaces) {
208 if (title.startsWith(namespace + StringPool.COLON)) {
209 return true;
210 }
211 }
212
213 return false;
214 }
215
216 protected boolean isValidImage(String[] paths, InputStream inputStream) {
217 if (_specialMediaWikiDirs.contains(paths[0])) {
218 return false;
219 }
220
221 if ((paths.length > 1) && _specialMediaWikiDirs.contains(paths[1])) {
222 return false;
223 }
224
225 String fileName = paths[paths.length - 1];
226
227 try {
228 DLStoreUtil.validate(fileName, true, inputStream);
229 }
230 catch (PortalException pe) {
231 return false;
232 }
233 catch (SystemException se) {
234 return false;
235 }
236
237 return true;
238 }
239
240 protected void moveFrontPage(
241 long userId, WikiNode node, Map<String, String[]> options) {
242
243 String frontPageTitle = MapUtil.getString(
244 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
245
246 if (Validator.isNotNull(frontPageTitle)) {
247 frontPageTitle = normalizeTitle(frontPageTitle);
248
249 try {
250 if (WikiPageLocalServiceUtil.getPagesCount(
251 node.getNodeId(), frontPageTitle, true) > 0) {
252
253 ServiceContext serviceContext = new ServiceContext();
254
255 serviceContext.setAddGroupPermissions(true);
256 serviceContext.setAddGuestPermissions(true);
257
258 WikiPageLocalServiceUtil.movePage(
259 userId, node.getNodeId(), frontPageTitle,
260 WikiPageConstants.FRONT_PAGE, false, serviceContext);
261 }
262 }
263 catch (Exception e) {
264 if (_log.isWarnEnabled()) {
265 StringBundler sb = new StringBundler(4);
266
267 sb.append("Could not move ");
268 sb.append(WikiPageConstants.FRONT_PAGE);
269 sb.append(" to the title provided: ");
270 sb.append(frontPageTitle);
271
272 _log.warn(sb.toString(), e);
273 }
274 }
275 }
276 }
277
278 protected String normalize(String categoryName, int length) {
279 categoryName = AssetUtil.toWord(categoryName.trim());
280
281 return StringUtil.shorten(categoryName, length);
282 }
283
284 protected String normalizeDescription(String description) {
285 description = description.replaceAll(
286 _categoriesPattern.pattern(), StringPool.BLANK);
287
288 return normalize(description, 255);
289 }
290
291 protected String normalizeTitle(String title) {
292 title = title.replaceAll(
293 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
294
295 return StringUtil.shorten(title, 75);
296 }
297
298 protected void processImages(
299 long userId, WikiNode node, InputStream imagesInputStream)
300 throws Exception {
301
302 if (imagesInputStream == null) {
303 return;
304 }
305
306 ProgressTracker progressTracker =
307 ProgressTrackerThreadLocal.getProgressTracker();
308
309 int count = 0;
310
311 ZipReader zipReader = ZipReaderFactoryUtil.getZipReader(
312 imagesInputStream);
313
314 List<String> entries = zipReader.getEntries();
315
316 int total = entries.size();
317
318 if (total > 0) {
319 try {
320 WikiPageLocalServiceUtil.getPage(
321 node.getNodeId(), SHARED_IMAGES_TITLE);
322 }
323 catch (NoSuchPageException nspe) {
324 ServiceContext serviceContext = new ServiceContext();
325
326 serviceContext.setAddGroupPermissions(true);
327 serviceContext.setAddGuestPermissions(true);
328
329 WikiPageLocalServiceUtil.addPage(
330 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
331 SHARED_IMAGES_CONTENT, null, true, serviceContext);
332 }
333 }
334
335 List<ObjectValuePair<String, InputStream>> inputStreamOVPs =
336 new ArrayList<ObjectValuePair<String, InputStream>>();
337
338 try {
339 int percentage = 50;
340
341 for (int i = 0; i < entries.size(); i++) {
342 String entry = entries.get(i);
343
344 String key = entry;
345
346 InputStream inputStream = zipReader.getEntryAsInputStream(
347 entry);
348
349 String[] paths = StringUtil.split(key, CharPool.SLASH);
350
351 if (!isValidImage(paths, inputStream)) {
352 if (_log.isInfoEnabled()) {
353 _log.info("Ignoring " + key);
354 }
355
356 continue;
357 }
358
359 String fileName = StringUtil.toLowerCase(
360 paths[paths.length - 1]);
361
362 ObjectValuePair<String, InputStream> inputStreamOVP =
363 new ObjectValuePair<String, InputStream>(
364 fileName, inputStream);
365
366 inputStreamOVPs.add(inputStreamOVP);
367
368 count++;
369
370 if ((i % 5) == 0) {
371 WikiPageLocalServiceUtil.addPageAttachments(
372 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
373 inputStreamOVPs);
374
375 inputStreamOVPs.clear();
376
377 percentage = Math.min(50 + (i * 50) / total, 99);
378
379 progressTracker.setPercent(percentage);
380 }
381 }
382
383 if (!inputStreamOVPs.isEmpty()) {
384 WikiPageLocalServiceUtil.addPageAttachments(
385 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
386 inputStreamOVPs);
387 }
388 }
389 finally {
390 for (ObjectValuePair<String, InputStream> inputStreamOVP :
391 inputStreamOVPs) {
392
393 InputStream inputStream = inputStreamOVP.getValue();
394
395 StreamUtil.cleanUp(inputStream);
396 }
397 }
398
399 zipReader.close();
400
401 if (_log.isInfoEnabled()) {
402 _log.info("Imported " + count + " images into " + node.getName());
403 }
404 }
405
406 protected void processRegularPages(
407 long userId, WikiNode node, Element rootElement,
408 List<String> specialNamespaces, Map<String, String> usersMap,
409 InputStream imagesInputStream, Map<String, String[]> options) {
410
411 boolean importLatestVersion = MapUtil.getBoolean(
412 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
413 boolean strictImportMode = MapUtil.getBoolean(
414 options, WikiImporterKeys.OPTIONS_STRICT_IMPORT_MODE);
415
416 ProgressTracker progressTracker =
417 ProgressTrackerThreadLocal.getProgressTracker();
418
419 int count = 0;
420
421 int percentage = 10;
422
423 int maxPercentage = 50;
424
425 if (imagesInputStream == null) {
426 maxPercentage = 99;
427 }
428
429 List<Element> pageElements = rootElement.elements("page");
430
431 for (int i = 0; i < pageElements.size(); i++) {
432 Element pageElement = pageElements.get(i);
433
434 String title = pageElement.elementText("title");
435
436 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
437 continue;
438 }
439
440 title = normalizeTitle(title);
441
442 percentage = Math.min(
443 10 + (i * (maxPercentage - percentage)) / pageElements.size(),
444 maxPercentage);
445
446 progressTracker.setPercent(percentage);
447
448 List<Element> revisionElements = pageElement.elements("revision");
449
450 if (importLatestVersion) {
451 Element lastRevisionElement = revisionElements.get(
452 revisionElements.size() - 1);
453
454 revisionElements = new ArrayList<Element>();
455
456 revisionElements.add(lastRevisionElement);
457 }
458
459 for (Element revisionElement : revisionElements) {
460 Element contributorElement = revisionElement.element(
461 "contributor");
462
463 String author = contributorElement.elementText("username");
464
465 String content = revisionElement.elementText("text");
466 String summary = revisionElement.elementText("comment");
467
468 try {
469 importPage(
470 userId, author, node, title, content, summary, usersMap,
471 strictImportMode);
472 }
473 catch (Exception e) {
474 if (_log.isWarnEnabled()) {
475 _log.warn(
476 "Page with title " + title +
477 " could not be imported",
478 e);
479 }
480 }
481 }
482
483 count++;
484 }
485
486 if (_log.isInfoEnabled()) {
487 _log.info("Imported " + count + " pages into " + node.getName());
488 }
489 }
490
491 protected void processSpecialPages(
492 long userId, WikiNode node, Element rootElement,
493 List<String> specialNamespaces)
494 throws PortalException {
495
496 ProgressTracker progressTracker =
497 ProgressTrackerThreadLocal.getProgressTracker();
498
499 List<Element> pageElements = rootElement.elements("page");
500
501 for (int i = 0; i < pageElements.size(); i++) {
502 Element pageElement = pageElements.get(i);
503
504 String title = pageElement.elementText("title");
505
506 if (!title.startsWith("Category:")) {
507 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
508 rootElement.remove(pageElement);
509 }
510
511 continue;
512 }
513
514 String categoryName = title.substring("Category:".length());
515
516 categoryName = normalize(categoryName, 75);
517
518 Element revisionElement = pageElement.element("revision");
519
520 String description = revisionElement.elementText("text");
521
522 description = normalizeDescription(description);
523
524 try {
525 AssetTag assetTag = null;
526
527 try {
528 assetTag = AssetTagLocalServiceUtil.getTag(
529 node.getGroupId(), categoryName);
530 }
531 catch (NoSuchTagException nste) {
532 ServiceContext serviceContext = new ServiceContext();
533
534 serviceContext.setAddGroupPermissions(true);
535 serviceContext.setAddGuestPermissions(true);
536 serviceContext.setScopeGroupId(node.getGroupId());
537
538 assetTag = AssetTagLocalServiceUtil.addTag(
539 userId, categoryName, null, serviceContext);
540
541 if (PropsValues.ASSET_TAG_PROPERTIES_ENABLED &&
542 Validator.isNotNull(description)) {
543
544 AssetTagPropertyLocalServiceUtil.addTagProperty(
545 userId, assetTag.getTagId(), "description",
546 description);
547 }
548 }
549 }
550 catch (SystemException se) {
551 _log.error(se, se);
552 }
553
554 if ((i % 5) == 0) {
555 progressTracker.setPercent((i * 10) / pageElements.size());
556 }
557 }
558 }
559
560 protected String[] readAssetTagNames(
561 long userId, WikiNode node, String content)
562 throws PortalException, SystemException {
563
564 Matcher matcher = _categoriesPattern.matcher(content);
565
566 List<String> assetTagNames = new ArrayList<String>();
567
568 while (matcher.find()) {
569 String categoryName = matcher.group(1);
570
571 categoryName = normalize(categoryName, 75);
572
573 AssetTag assetTag = null;
574
575 try {
576 assetTag = AssetTagLocalServiceUtil.getTag(
577 node.getGroupId(), categoryName);
578 }
579 catch (NoSuchTagException nste) {
580 ServiceContext serviceContext = new ServiceContext();
581
582 serviceContext.setAddGroupPermissions(true);
583 serviceContext.setAddGuestPermissions(true);
584 serviceContext.setScopeGroupId(node.getGroupId());
585
586 assetTag = AssetTagLocalServiceUtil.addTag(
587 userId, categoryName, null, serviceContext);
588 }
589
590 assetTagNames.add(assetTag.getName());
591 }
592
593 if (content.contains(_WORK_IN_PROGRESS)) {
594 assetTagNames.add(_WORK_IN_PROGRESS_TAG);
595 }
596
597 return assetTagNames.toArray(new String[assetTagNames.size()]);
598 }
599
600 protected String readParentTitle(String content) {
601 Matcher matcher = _parentPattern.matcher(content);
602
603 String redirectTitle = StringPool.BLANK;
604
605 if (matcher.find()) {
606 redirectTitle = matcher.group(1);
607
608 redirectTitle = normalizeTitle(redirectTitle);
609
610 redirectTitle += " (disambiguation)";
611 }
612
613 return redirectTitle;
614 }
615
616 protected String readRedirectTitle(String content) {
617 Matcher matcher = _redirectPattern.matcher(content);
618
619 String redirectTitle = StringPool.BLANK;
620
621 if (matcher.find()) {
622 redirectTitle = matcher.group(1);
623
624 redirectTitle = normalizeTitle(redirectTitle);
625 }
626
627 return redirectTitle;
628 }
629
630 protected List<String> readSpecialNamespaces(Element root)
631 throws ImportFilesException {
632
633 List<String> namespaces = new ArrayList<String>();
634
635 Element siteinfoElement = root.element("siteinfo");
636
637 if (siteinfoElement == null) {
638 throw new ImportFilesException("Invalid pages XML file");
639 }
640
641 Element namespacesElement = siteinfoElement.element("namespaces");
642
643 List<Element> namespaceElements = namespacesElement.elements(
644 "namespace");
645
646 for (Element namespaceElement : namespaceElements) {
647 Attribute attribute = namespaceElement.attribute("key");
648
649 String value = attribute.getValue();
650
651 if (!value.equals("0")) {
652 namespaces.add(namespaceElement.getText());
653 }
654 }
655
656 return namespaces;
657 }
658
659 protected Map<String, String> readUsersFile(InputStream usersInputStream)
660 throws IOException {
661
662 if (usersInputStream == null) {
663 return Collections.emptyMap();
664 }
665
666 Map<String, String> usersMap = new HashMap<String, String>();
667
668 UnsyncBufferedReader unsyncBufferedReader = new UnsyncBufferedReader(
669 new InputStreamReader(usersInputStream));
670
671 String line = unsyncBufferedReader.readLine();
672
673 while (line != null) {
674 String[] array = StringUtil.split(line);
675
676 if ((array.length == 2) && Validator.isNotNull(array[0]) &&
677 Validator.isNotNull(array[1])) {
678
679 usersMap.put(array[0], array[1]);
680 }
681 else {
682 if (_log.isInfoEnabled()) {
683 _log.info(
684 "Ignoring line " + line +
685 " because it does not contain exactly 2 columns");
686 }
687 }
688
689 line = unsyncBufferedReader.readLine();
690 }
691
692 return usersMap;
693 }
694
695 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
696
697 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
698
699 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
700
701 private static Pattern _categoriesPattern = Pattern.compile(
702 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
703 private static Pattern _parentPattern = Pattern.compile(
704 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
705 private static Pattern _redirectPattern = Pattern.compile(
706 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
707 private static Set<String> _specialMediaWikiDirs = SetUtil.fromArray(
708 new String[] {"archive", "temp", "thumb"});
709
710 private MediaWikiToCreoleTranslator _translator =
711 new MediaWikiToCreoleTranslator();
712
713 }