1
22
23 package com.liferay.portlet.wiki.importers.mediawiki;
24
25 import com.liferay.documentlibrary.service.DLLocalServiceUtil;
26 import com.liferay.portal.NoSuchUserException;
27 import com.liferay.portal.PortalException;
28 import com.liferay.portal.SystemException;
29 import com.liferay.portal.kernel.log.Log;
30 import com.liferay.portal.kernel.log.LogFactoryUtil;
31 import com.liferay.portal.kernel.util.ArrayUtil;
32 import com.liferay.portal.kernel.util.ObjectValuePair;
33 import com.liferay.portal.kernel.util.ProgressTracker;
34 import com.liferay.portal.kernel.util.ProgressTrackerThreadLocal;
35 import com.liferay.portal.kernel.util.StringPool;
36 import com.liferay.portal.kernel.util.StringUtil;
37 import com.liferay.portal.kernel.util.Validator;
38 import com.liferay.portal.kernel.xml.Document;
39 import com.liferay.portal.kernel.xml.DocumentException;
40 import com.liferay.portal.kernel.xml.Element;
41 import com.liferay.portal.kernel.xml.SAXReaderUtil;
42 import com.liferay.portal.kernel.zip.ZipReader;
43 import com.liferay.portal.model.User;
44 import com.liferay.portal.service.ServiceContext;
45 import com.liferay.portal.service.UserLocalServiceUtil;
46 import com.liferay.portal.util.PropsValues;
47 import com.liferay.portlet.tags.NoSuchEntryException;
48 import com.liferay.portlet.tags.model.TagsEntry;
49 import com.liferay.portlet.tags.service.TagsEntryLocalServiceUtil;
50 import com.liferay.portlet.tags.service.TagsPropertyLocalServiceUtil;
51 import com.liferay.portlet.tags.util.TagsUtil;
52 import com.liferay.portlet.wiki.ImportFilesException;
53 import com.liferay.portlet.wiki.NoSuchPageException;
54 import com.liferay.portlet.wiki.importers.WikiImporter;
55 import com.liferay.portlet.wiki.importers.WikiImporterKeys;
56 import com.liferay.portlet.wiki.model.WikiNode;
57 import com.liferay.portlet.wiki.model.WikiPage;
58 import com.liferay.portlet.wiki.model.impl.WikiPageImpl;
59 import com.liferay.portlet.wiki.service.WikiPageLocalServiceUtil;
60 import com.liferay.portlet.wiki.translators.MediaWikiToCreoleTranslator;
61 import com.liferay.util.MapUtil;
62
63 import java.io.BufferedReader;
64 import java.io.File;
65 import java.io.FileReader;
66 import java.io.IOException;
67
68 import java.util.ArrayList;
69 import java.util.Collections;
70 import java.util.HashMap;
71 import java.util.Iterator;
72 import java.util.List;
73 import java.util.Map;
74 import java.util.regex.Matcher;
75 import java.util.regex.Pattern;
76
77
84 public class MediaWikiImporter implements WikiImporter {
85
86 public static final String SHARED_IMAGES_CONTENT = "See attachments";
87
88 public static final String SHARED_IMAGES_TITLE = "SharedImages";
89
90 public void importPages(
91 long userId, WikiNode node, File[] files,
92 Map<String, String[]> options)
93 throws PortalException {
94
95 if ((files.length < 1) || (files[0] == null) || (!files[0].exists())) {
96 throw new PortalException("The pages file is mandatory");
97 }
98
99 File pagesFile = files[0];
100 File usersFile = files[1];
101 File imagesFile = files[2];
102
103 try {
104 Document doc = SAXReaderUtil.read(pagesFile);
105
106 Map<String, String> usersMap = readUsersFile(usersFile);
107
108 Element root = doc.getRootElement();
109
110 List<String> specialNamespaces = readSpecialNamespaces(root);
111
112 processSpecialPages(userId, node, root, specialNamespaces);
113 processRegularPages(
114 userId, node, root, specialNamespaces, usersMap, imagesFile,
115 options);
116 processImages(userId, node, imagesFile);
117
118 moveFrontPage(userId, node, options);
119 }
120 catch (DocumentException de) {
121 throw new ImportFilesException("Invalid XML file provided");
122 }
123 catch (IOException de) {
124 throw new ImportFilesException("Error reading the files provided");
125 }
126 catch (PortalException e) {
127 throw e;
128 }
129 catch (Exception e) {
130 throw new PortalException(e);
131 }
132 }
133
134 protected long getUserId(
135 long userId, WikiNode node, String author,
136 Map<String, String> usersMap)
137 throws PortalException, SystemException {
138
139 User user = null;
140
141 String emailAddress = usersMap.get(author);
142
143 try {
144 if (Validator.isNull(emailAddress)) {
145 user = UserLocalServiceUtil.getUserByScreenName(
146 node.getCompanyId(), author.toLowerCase());
147 }
148 else {
149 user = UserLocalServiceUtil.getUserByEmailAddress(
150 node.getCompanyId(), emailAddress);
151 }
152 }
153 catch (NoSuchUserException nsue) {
154 user = UserLocalServiceUtil.getUserById(userId);
155 }
156
157 return user.getUserId();
158 }
159
160 protected void importPage(
161 long userId, String author, WikiNode node, String title,
162 String content, String summary, Map<String, String> usersMap)
163 throws PortalException {
164
165 try {
166 long authorUserId = getUserId(userId, node, author, usersMap);
167 String parentTitle = readParentTitle(content);
168 String redirectTitle = readRedirectTitle(content);
169
170 ServiceContext serviceContext = new ServiceContext();
171
172 serviceContext.setTagsEntries(
173 readTagsEntries(userId, node, content));
174
175 if (Validator.isNull(redirectTitle)) {
176 content = _translator.translate(content);
177 }
178 else {
179 content =
180 StringPool.DOUBLE_OPEN_BRACKET + redirectTitle +
181 StringPool.DOUBLE_CLOSE_BRACKET;
182 }
183
184 WikiPage page = null;
185
186 try {
187 page = WikiPageLocalServiceUtil.getPage(
188 node.getNodeId(), title);
189 }
190 catch (NoSuchPageException nspe) {
191 page = WikiPageLocalServiceUtil.addPage(
192 authorUserId, node.getNodeId(), title, WikiPageImpl.NEW,
193 null, true, serviceContext);
194 }
195
196 WikiPageLocalServiceUtil.updatePage(
197 authorUserId, node.getNodeId(), title, page.getVersion(),
198 content, summary, true, "creole", parentTitle, redirectTitle,
199 serviceContext);
200 }
201 catch (Exception e) {
202 throw new PortalException("Error importing page " + title, e);
203 }
204 }
205
206 protected boolean isSpecialMediaWikiPage(
207 String title, List<String> specialNamespaces) {
208
209 for (String namespace: specialNamespaces) {
210 if (title.startsWith(namespace + StringPool.COLON)) {
211 return true;
212 }
213 }
214
215 return false;
216 }
217
218 protected boolean isValidImage(String[] paths, byte[] bytes) {
219 if (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[0])) {
220 return false;
221 }
222
223 if ((paths.length > 1) &&
224 (ArrayUtil.contains(_SPECIAL_MEDIA_WIKI_DIRS, paths[1]))) {
225
226 return false;
227 }
228
229 String fileName = paths[paths.length - 1];
230
231 try {
232 DLLocalServiceUtil.validate(fileName, bytes);
233 }
234 catch (PortalException pe) {
235 return false;
236 }
237
238 return true;
239 }
240
241 protected void moveFrontPage(
242 long userId, WikiNode node, Map<String, String[]> options) {
243
244 String frontPageTitle = MapUtil.getString(
245 options, WikiImporterKeys.OPTIONS_FRONT_PAGE);
246
247 if (Validator.isNotNull(frontPageTitle)) {
248 frontPageTitle = normalizeTitle(frontPageTitle);
249
250 try {
251 if (WikiPageLocalServiceUtil.getPagesCount(
252 node.getNodeId(), frontPageTitle, true) > 0) {
253
254 ServiceContext serviceContext = new ServiceContext();
255
256 WikiPageLocalServiceUtil.movePage(
257 userId, node.getNodeId(), frontPageTitle,
258 WikiPageImpl.FRONT_PAGE, false, serviceContext);
259
260 }
261 }
262 catch (Exception e) {
263 if (_log.isWarnEnabled()) {
264 StringBuilder sb = new StringBuilder();
265
266 sb.append("Could not move ");
267 sb.append(WikiPageImpl.FRONT_PAGE);
268 sb.append(" to the title provided: ");
269 sb.append(frontPageTitle);
270
271 _log.warn(sb.toString(), e);
272 }
273 }
274
275 }
276
277 }
278
279 protected String normalize(String categoryName, int length) {
280 categoryName = TagsUtil.toWord(categoryName.trim());
281
282 return StringUtil.shorten(categoryName, length);
283 }
284
285 protected String normalizeDescription(String description) {
286 description = description.replaceAll(
287 _categoriesPattern.pattern(), StringPool.BLANK);
288
289 return normalize(description, 300);
290 }
291
292 protected String normalizeTitle(String title) {
293 title = title.replaceAll(
294 PropsValues.WIKI_PAGE_TITLES_REMOVE_REGEXP, StringPool.BLANK);
295
296 return StringUtil.shorten(title, 75);
297 }
298
299 private void processImages(long userId, WikiNode node, File imagesFile)
300 throws Exception {
301
302 if ((imagesFile == null) || (!imagesFile.exists())) {
303 return;
304 }
305
306 ProgressTracker progressTracker =
307 ProgressTrackerThreadLocal.getProgressTracker();
308
309 int count = 0;
310
311 ZipReader zipReader = new ZipReader(imagesFile);
312
313 Map<String, byte[]> entries = zipReader.getEntries();
314
315 int total = entries.size();
316
317 if (total > 0) {
318 try {
319 WikiPageLocalServiceUtil.getPage(
320 node.getNodeId(), SHARED_IMAGES_TITLE);
321 }
322 catch (NoSuchPageException nspe) {
323 ServiceContext serviceContext = new ServiceContext();
324
325 WikiPageLocalServiceUtil.addPage(
326 userId, node.getNodeId(), SHARED_IMAGES_TITLE,
327 SHARED_IMAGES_CONTENT, null, true, serviceContext);
328 }
329 }
330
331 List<ObjectValuePair<String, byte[]>> attachments =
332 new ArrayList<ObjectValuePair<String, byte[]>>();
333
334 Iterator<Map.Entry<String, byte[]>> itr = entries.entrySet().iterator();
335
336 int percentage = 50;
337
338 for (int i = 0; itr.hasNext(); i++) {
339 Map.Entry<String, byte[]> entry = itr.next();
340
341 String key = entry.getKey();
342 byte[] value = entry.getValue();
343
344 if (key.endsWith(StringPool.SLASH)) {
345 if (_log.isInfoEnabled()) {
346 _log.info("Ignoring " + key);
347 }
348
349 continue;
350 }
351
352 String[] paths = StringUtil.split(key, StringPool.SLASH);
353
354 if (!isValidImage(paths, value)) {
355 if (_log.isInfoEnabled()) {
356 _log.info("Ignoring " + key);
357 }
358
359 continue;
360 }
361
362 String fileName = paths[paths.length - 1].toLowerCase();
363
364 attachments.add(
365 new ObjectValuePair<String, byte[]>(fileName, value));
366
367 count++;
368
369 if ((i % 5) == 0) {
370 WikiPageLocalServiceUtil.addPageAttachments(
371 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
372
373 attachments.clear();
374
375 percentage = Math.min(50 + (i * 50) / total, 99);
376
377 progressTracker.updateProgress(percentage);
378 }
379 }
380
381 if (!attachments.isEmpty()) {
382 WikiPageLocalServiceUtil.addPageAttachments(
383 node.getNodeId(), SHARED_IMAGES_TITLE, attachments);
384 }
385
386 if (_log.isInfoEnabled()) {
387 _log.info("Imported " + count + " images into " + node.getName());
388 }
389 }
390
391 protected void processRegularPages(
392 long userId, WikiNode node, Element root,
393 List<String> specialNamespaces, Map<String, String> usersMap,
394 File imagesFile, Map<String, String[]> options) {
395
396 boolean importLatestVersion = MapUtil.getBoolean(
397 options, WikiImporterKeys.OPTIONS_IMPORT_LATEST_VERSION);
398
399 ProgressTracker progressTracker =
400 ProgressTrackerThreadLocal.getProgressTracker();
401
402 int count = 0;
403
404 List<Element> pages = root.elements("page");
405
406 int total = pages.size();
407
408 Iterator<Element> itr = root.elements("page").iterator();
409
410 int percentage = 10;
411 int maxPercentage = 50;
412
413 if ((imagesFile == null) || (!imagesFile.exists())) {
414 maxPercentage = 99;
415 }
416
417 int percentageRange = maxPercentage - percentage;
418
419 for (int i = 0; itr.hasNext(); i++) {
420 Element pageEl = itr.next();
421
422 String title = pageEl.elementText("title");
423
424 title = normalizeTitle(title);
425
426 percentage = Math.min(
427 10 + (i * percentageRange) / total, maxPercentage);
428
429 progressTracker.updateProgress(percentage);
430
431 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
432 continue;
433 }
434
435 List<Element> revisionEls = pageEl.elements("revision");
436
437 if (importLatestVersion) {
438 Element lastRevisionEl = revisionEls.get(
439 revisionEls.size() - 1);
440
441 revisionEls = new ArrayList<Element>();
442
443 revisionEls.add(lastRevisionEl);
444 }
445
446 for (Element curRevisionEl : revisionEls) {
447 String author = curRevisionEl.element(
448 "contributor").elementText("username");
449 String content = curRevisionEl.elementText("text");
450 String summary = curRevisionEl.elementText("comment");
451
452 try {
453 importPage(
454 userId, author, node, title, content, summary,
455 usersMap);
456 }
457 catch (Exception e) {
458 if (_log.isWarnEnabled()) {
459 StringBuilder sb = new StringBuilder();
460
461 sb.append("Page with title ");
462 sb.append(title);
463 sb.append(" could not be imported");
464
465 _log.warn(sb.toString(), e);
466 }
467 }
468 }
469
470 count++;
471 }
472
473 if (_log.isInfoEnabled()) {
474 _log.info("Imported " + count + " pages into " + node.getName());
475 }
476 }
477
478 protected void processSpecialPages(
479 long userId, WikiNode node, Element root,
480 List<String> specialNamespaces)
481 throws PortalException {
482
483 ProgressTracker progressTracker =
484 ProgressTrackerThreadLocal.getProgressTracker();
485
486 List<Element> pages = root.elements("page");
487
488 int total = pages.size();
489
490 Iterator<Element> itr = pages.iterator();
491
492 for (int i = 0; itr.hasNext(); i++) {
493 Element page = itr.next();
494
495 String title = page.elementText("title");
496
497 if (!title.startsWith("Category:")) {
498 if (isSpecialMediaWikiPage(title, specialNamespaces)) {
499 root.remove(page);
500 }
501
502 continue;
503 }
504
505 String categoryName = title.substring("Category:".length());
506
507 categoryName = normalize(categoryName, 75);
508
509 String description = page.element("revision").elementText("text");
510
511 description = normalizeDescription(description);
512
513 try {
514 TagsEntry tagsEntry = null;
515
516 try {
517 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
518 node.getCompanyId(), categoryName);
519 }
520 catch (NoSuchEntryException nsee) {
521 ServiceContext serviceContext = new ServiceContext();
522
523 serviceContext.setAddCommunityPermissions(true);
524 serviceContext.setAddGuestPermissions(true);
525 serviceContext.setScopeGroupId(node.getGroupId());
526
527 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
528 userId, null, categoryName, null, null, serviceContext);
529 }
530
531 if (Validator.isNotNull(description)) {
532 TagsPropertyLocalServiceUtil.addProperty(
533 userId, tagsEntry.getEntryId(), "description",
534 description);
535 }
536 }
537 catch (SystemException se) {
538 _log.error(se, se);
539 }
540
541 if ((i % 5) == 0) {
542 progressTracker.updateProgress((i * 10) / total);
543 }
544 }
545 }
546
547 protected String readParentTitle(String content) {
548 Matcher matcher = _parentPattern.matcher(content);
549
550 String redirectTitle = StringPool.BLANK;
551
552 if (matcher.find()) {
553 redirectTitle = matcher.group(1);
554
555 redirectTitle = normalizeTitle(redirectTitle);
556
557 redirectTitle += " (disambiguation)";
558 }
559
560 return redirectTitle;
561 }
562
563 protected String readRedirectTitle(String content) {
564 Matcher matcher = _redirectPattern.matcher(content);
565
566 String redirectTitle = StringPool.BLANK;
567
568 if (matcher.find()) {
569 redirectTitle = matcher.group(1);
570
571 redirectTitle = normalizeTitle(redirectTitle);
572 }
573
574 return redirectTitle;
575 }
576
577 protected List<String> readSpecialNamespaces(Element root)
578 throws ImportFilesException {
579
580 List<String> namespaces = new ArrayList<String>();
581
582 Element siteinfoEl = root.element("siteinfo");
583
584 if (siteinfoEl == null) {
585 throw new ImportFilesException("Invalid pages XML file");
586 }
587
588 Iterator<Element> itr = siteinfoEl.element(
589 "namespaces").elements("namespace").iterator();
590
591 while (itr.hasNext()) {
592 Element namespace = itr.next();
593
594 if (!namespace.attribute("key").getData().equals("0")) {
595 namespaces.add(namespace.getText());
596 }
597 }
598
599 return namespaces;
600 }
601
602 protected String[] readTagsEntries(
603 long userId, WikiNode node, String content)
604 throws PortalException, SystemException {
605
606 Matcher matcher = _categoriesPattern.matcher(content);
607
608 List<String> tagsEntries = new ArrayList<String>();
609
610 while (matcher.find()) {
611 String categoryName = matcher.group(1);
612
613 categoryName = normalize(categoryName, 75);
614
615 TagsEntry tagsEntry = null;
616
617 try {
618 tagsEntry = TagsEntryLocalServiceUtil.getEntry(
619 node.getCompanyId(), categoryName);
620 }
621 catch (NoSuchEntryException nsee) {
622 ServiceContext serviceContext = new ServiceContext();
623
624 serviceContext.setAddCommunityPermissions(true);
625 serviceContext.setAddGuestPermissions(true);
626 serviceContext.setScopeGroupId(node.getGroupId());
627
628 tagsEntry = TagsEntryLocalServiceUtil.addEntry(
629 userId, null, categoryName, null, null, serviceContext);
630 }
631
632 tagsEntries.add(tagsEntry.getName());
633 }
634
635 if (content.indexOf(_WORK_IN_PROGRESS) != -1) {
636 tagsEntries.add(_WORK_IN_PROGRESS_TAG);
637 }
638
639 return tagsEntries.toArray(new String[tagsEntries.size()]);
640 }
641
642 protected Map<String, String> readUsersFile(File usersFile)
643 throws IOException {
644
645 if ((usersFile == null) || (!usersFile.exists())) {
646 return Collections.EMPTY_MAP;
647 }
648
649 Map<String, String> usersMap = new HashMap<String, String>();
650
651 BufferedReader reader = new BufferedReader(new FileReader(usersFile));
652
653 String line = reader.readLine();
654
655 while (line != null) {
656 String[] array = StringUtil.split(line);
657
658 if ((array.length == 2) && (Validator.isNotNull(array[0])) &&
659 (Validator.isNotNull(array[1]))) {
660
661 usersMap.put(array[0], array[1]);
662 }
663 else {
664 if (_log.isInfoEnabled()) {
665 _log.info(
666 "Ignoring line " + line +
667 " because it does not contain exactly 2 columns");
668 }
669 }
670
671 line = reader.readLine();
672 }
673
674 return usersMap;
675 }
676
677 private static final String[] _SPECIAL_MEDIA_WIKI_DIRS = new String[]{
678 "thumb", "temp", "archive"};
679
680 private static final String _WORK_IN_PROGRESS = "{{Work in progress}}";
681
682 private static final String _WORK_IN_PROGRESS_TAG = "work in progress";
683
684 private static Log _log = LogFactoryUtil.getLog(MediaWikiImporter.class);
685
686 private static Pattern _categoriesPattern = Pattern.compile(
687 "\\[\\[[Cc]ategory:([^\\]]*)\\]\\][\\n]*");
688
689 private static Pattern _parentPattern = Pattern.compile(
690 "\\{{2}OtherTopics\\|([^\\}]*)\\}{2}");
691
692 private static Pattern _redirectPattern = Pattern.compile(
693 "#REDIRECT \\[\\[([^\\]]*)\\]\\]");
694
695 private MediaWikiToCreoleTranslator _translator =
696 new MediaWikiToCreoleTranslator();
697
698 }