001 /*
002
003 This is Textile
004 A Humane Web Text Generator
005
006
007 Original PHP Version
008 Version 1.0
009 21 Feb, 2003
010
011 Copyright (c) 2003, Dean Allen, www.textism.com
012 All rights reserved.
013
014 Version 1.0 of this java version
015 Gareth Simpson April 2003
016
017 _______
018 LICENSE
019
020 Redistribution and use in source and binary forms, with or without
021 modification, are permitted provided that the following conditions are met:
022
023 * Redistributions of source code must retain the above copyright notice,
024 this list of conditions and the following disclaimer.
025
026 * Redistributions in binary form must reproduce the above copyright notice,
027 this list of conditions and the following disclaimer in the documentation
028 and/or other materials provided with the distribution.
029
030 * Neither the name Textile nor the names of its contributors may be used to
031 endorse or promote products derived from this software without specific
032 prior written permission.
033
034 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
035 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
036 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
037 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
038 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
039 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
040 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
041 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
042 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
043 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
044 POSSIBILITY OF SUCH DAMAGE.
045
046 _____________
047 USING TEXTILE
048
049 Block modifier syntax:
050
051 Header: hn.
052 Paragraphs beginning with 'hn. ' (where n is 1-6) are wrapped in header tags.
053 Example: <h1>Text</h1>
054
055 Header with CSS class: hn(class).
056 Paragraphs beginning with 'hn(class). ' receive a CSS class attribute.
057 Example: <h1 class="class">Text</h1>
058
059 Paragraph: p. (applied by default)
060 Paragraphs beginning with 'p. ' are wrapped in paragraph tags.
061 Example: <p>Text</p>
062
063 Paragraph with CSS class: p(class).
064 Paragraphs beginning with 'p(class). ' receive a CSS class attribute.
065 Example: <p class="class">Text</p>
066
067 Blockquote: bq.
068 Paragraphs beginning with 'bq. ' are wrapped in block quote tags.
069 Example: <blockquote>Text</blockquote>
070
071 Blockquote with citation: bq(citeurl).
072 Paragraphs beginning with 'bq(citeurl). ' recieve a citation attribute.
073 Example: <blockquote cite="citeurl">Text</blockquote>
074
075 Numeric list: #
076 Consecutive paragraphs beginning with # are wrapped in ordered list tags.
077 Example: <ol><li>ordered list</li></ol>
078
079 Bulleted list: *
080 Consecutive paragraphs beginning with * are wrapped in unordered list tags.
081 Example: <ul><li>unordered list</li></ul>
082
083
084 Phrase modifier syntax:
085
086 _emphasis_ <em>emphasis</em>
087 __italic__ <i>italic</i>
088 *strong* <strong>strong</strong>
089 **bold** <b>bold</b>
090 ??citation?? <cite>citation</cite>
091 -deleted text- <del>deleted</del>
092 +inserted text+ <ins>inserted</ins>
093 ^superscript^ <sup>superscript</sup>
094 ~subscript~ <sub>subscript</sub>
095 @code@ <code>computer code</code>
096
097 ==notextile== leave text alone (do not format)
098
099 "linktext":url <a href="url">linktext</a>
100 "linktext(title)":url <a href="url" title="title">linktext</a>
101
102 !imageurl! <img src="imageurl">
103 !imageurl(alt text)! <img src="imageurl" alt="alt text" />
104 !imageurl!:linkurl <a href="linkurl"><img src="imageurl" /></a>
105
106 ABC(Always Be Closing) <acronym title="Always Be Closing">ABC</acronym>
107
108 */
109
110 import gnu.regexp.*;
111
112 import java.util.StringTokenizer;
113 import java.util.ArrayList;
114
115 public class JTextile
116 {
117 private static final int ENT_COMPAT = 0;
118 private static final int ENT_NOQUOTES = 2;
119 private static final int ENT_QUOTES = 3;
120
121
122
123 public JTextile()
124 {
125 }
126
127
128 public static String textile(String text) throws Exception
129 {
130
131 //$text = stripslashes($text);
132
133 //# turn any incoming ampersands into a dummy character for now.
134 //# This uses a negative lookahead for alphanumerics followed by a semicolon,
135 //# implying an incoming html entity, to be skipped
136 text = preg_replace("&(?![#a-zA-Z0-9]+;)","x%x%",text);
137
138 //# unentify angle brackets and ampersands
139 text = replace(text,">", ">");
140 text = replace(text,"<", "<");
141 text = replace(text,"&", "&");
142
143
144 //# zap carriage returns
145 text = replace(text,"\r\n", "\n");
146
147
148 //# zap tabs
149 text = replace(text,"\t", "" );
150
151 // trim each line
152 StringBuffer splitBuffer = new StringBuffer();
153
154 String[] sList = text.split("/\n/");
155 for(int i = 0; i < sList.length; i++)
156 {
157 splitBuffer.append(sList[i].trim());
158 splitBuffer.append("\n");
159 }
160
161 text = splitBuffer.toString();
162
163 //### Find and replace quick tags
164
165 //# double equal signs mean <notextile>
166 text = preg_replace("(^|\\s)==(.*?)==([^\\w]{0,2})","$1<notextile>$2</notextile>$3$4",text);
167
168 //# image qtag
169 text = preg_replace("!([^!\\s\\(=]+?)\\s?(\\(([^\\)]+?)\\))?!","<img src=\"$1\" alt=\"$3\" />",text);
170
171 //# image with hyperlink
172 text = preg_replace("(<img.+ \\/>):(\\S+)","<a href=\"$2\">$1</a>",text);
173
174 //# hyperlink qtag
175 text = preg_replace("\"([^\"\\(]+)\\s?(\\(([^\\)]+)\\))?\":(\\S+?)([^\\w\\s\\/;]|[1-9]*?)(\\s|$)","<a href=\"$4\" title=\"$3\">$1</a>$5$6",text);
176
177 //# arrange qtag delineators and replacements in an array
178 String[] srcTags = {"\\*\\*","\\*","\\?\\?","-","\\+","~","@"};
179 String[] replaceTags = {"b","strong","cite","del","ins","sub","code"};
180
181 //# loop through the array, replacing qtags with html
182 for(int i = 0; i < srcTags.length; i++)
183 {
184 //text = preg_replace("(^|\\s|>)" + srcTags[i] + "\\b(.+?)\\b([^\\w\\s]*?)" + srcTags[i] + "([^\\w\\s]{0,2})(\\s|$)","$1<" + replaceTags[i] + ">$2$3</" + replaceTags[i] + ">$4$5",text);
185 text = preg_replace("(^|\\s|>)" + srcTags[i] + "([^ ])(.+?)?([^\\w\\s]*?)([^ ])" + srcTags[i] + "([^\\w\\s]{0,2})(\\s|$)","$1<" + replaceTags[i] + ">$2$3$4$5</" + replaceTags[i] + ">$6$7",text);
186 }
187
188 //# some weird bs with underscores and \b word boundaries,
189 //# so we'll do those on their own
190
191 text = preg_replace("(^|\\s)__(.*?)__([^\\w\\s]{0,2})","$1<i>$2</i>$3",text);
192
193 text = preg_replace("(^|\\s)_(.*?)_([^\\w\\s]{0,2})","$1<em>$2</em>$3",text);
194
195 text = preg_replace("\\^(.*?)\\^","<sup>$1</sup>",text);
196
197 // ### Find and replace typographic chars and special tags
198
199 //# small problem with double quotes at the end of a string
200
201 text = preg_replace("\"$","\" ",text);
202
203 //# NB: all these will wreak havoc inside <html> tags
204
205 String[] glyph_search = {
206 "([^\\s[{<])?\\'([dmst]\\b|ll\\b|ve\\b|\\s|$)", // single closing
207 "\\'", // single opening
208 "([^\\s[{])?\"(\\s|$)", // # double closing
209 "\"", // double opening
210 "\\b( )?\\.{3}", // # ellipsis
211 "\\b([A-Z][A-Z0-9]{2,})\\b(\\(([^\\)]+)\\))", // # 3+ uppercase acronym
212 "(^|[^\"][>\\s])([A-Z][A-Z0-9 ]{2,})([^<a-z0-9]|$)", // # 3+ uppercase caps
213 "\\s?--\\s?", // # em dash
214 "\\s-\\s", // # en dash
215 "(\\d+)-(\\d+)", // # en dash
216 "(\\d+) ?x ?(\\d+)", //# dimension sign
217 "\\b ?(\\((tm|TM)\\))", // trademark
218 "\\b ?(\\([rR]\\))", // # registered
219 "\\b ?(\\([cC]\\))" // # registered
220 };
221
222
223 String[] glyph_replace = {
224 "$1’$2", //# single closing
225 "‘", //# single opening
226 "$1”$2", //# double closing
227 "“", //# double opening
228 "$1…", //# ellipsis
229 "<acronym title=\"$2\">$1</acronym>", //# 3+ uppercase acronym
230 //"$1<span class=\"caps\">$2</span>$3", //# 3+ uppercase caps
231 "$1$2$3", //# 3+ uppercase caps
232 "—", //# em dash
233 " – ", //# en dash
234 "$1–$2", //# en dash
235 "$1×$2", //# dimension sign
236 "™", //# trademark
237 "®", //# registered
238 "©" //# copyright
239 };
240
241
242
243
244 // # set toggle for turning off replacements between <code> or <pre>
245 boolean codepre = false;
246 boolean notextile = false;
247
248 //# if there is no html, do a simple search and replace
249
250 if(!preg_match("<.[^<]*>",text))
251 {
252 text = preg_replace(glyph_search,glyph_replace,text);
253 }
254 else
255 {
256
257 StringBuffer out = new StringBuffer();
258 //# else split the text into an array at <.*>
259 //$text = preg_split("/(<.*>)/U",$text,-1,PREG_SPLIT_DELIM_CAPTURE);
260 String[] textSplit = preg_split("<.[^<]*>",text);
261 for(int i = 0; i < textSplit.length; i++)
262 {
263
264 // # matches are off if we're between <code>, <pre> etc.
265 if(preg_match("<(code|pre|kbd)>",textSplit[i].toLowerCase()))
266 {
267 codepre = true;
268 }
269 if(preg_match("<notextile>",textSplit[i].toLowerCase()))
270 {
271 codepre = true;
272 notextile = true;
273 }
274 else if(preg_match("</(code|pre|kbd)>",textSplit[i].toLowerCase()))
275 {
276 codepre = false;
277 }
278 else if(preg_match("</notextile>",textSplit[i].toLowerCase()))
279 {
280 codepre = false;
281 notextile = false;
282 }
283
284 if(!preg_match("<.[^<]*?>",textSplit[i]) && codepre == false)
285 {
286 textSplit[i] = preg_replace(glyph_search,glyph_replace,textSplit[i]);
287 }
288
289 //# convert htmlspecial if between <code>
290 if (codepre == true && notextile == false){
291 textSplit[i] = htmlspecialchars(textSplit[i],ENT_NOQUOTES);
292 textSplit[i] = replace(textSplit[i],"<pre>","<pre>");
293 textSplit[i] = replace(textSplit[i],"<code>","<code>");
294 textSplit[i] = replace(textSplit[i],"<notextile>","<notextile>");
295 }
296
297 if(notextile == true)
298 {
299 textSplit[i] = replace(textSplit[i],"\n","({)(})");
300 }
301
302 //# each line gets pushed to a new array
303 out.append( textSplit[i]);
304 }
305
306 text = out.toString();
307
308
309 }
310
311 //### Block level formatting
312
313 //# deal with forced breaks; this is going to be a problem between
314 //# <pre> tags, but we'll clean them later
315
316
317 //////!!! not working
318 //text = preg_replace("(\\S)(_*)([[:punct:]]*) *\n([^#*\\s])", "$1$2$3<br />$4", text);
319 //text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
320
321
322 text = preg_replace("(\\S)(_*)([:punct:]*) *\\n([^#*\\s])", "$1$2$3<br />$4", text);
323
324
325 //# might be a problem with lists
326 text = replace(text,"l><br />", "l>\n");
327
328 boolean pre = false;
329
330
331 String[] block_find = {
332 "^\\s?\\*\\s(.*)", //# bulleted list *
333 "^\\s?#\\s(.*)", //# numeric list #
334 "^bq\\((\\S+?)\\). (.*)", //# blockquote bq.
335 "^h(\\d)\\(([\\w]+)\\)\\.\\s(.*)", //# header hn(class). w/ css class
336 "^h(\\d)\\. (.*)", //# plain header hn.
337 "^p\\(([[:alnum:]]+)\\)\\.\\s(.*)", //# para p(class). w/ css class
338 "^p\\. (.*)", //# plain paragraph
339 "^([^\\t ]+.*)" //# remaining plain paragraph
340 };
341
342 /*
343 String[] block_find = {
344 "/^\\s?\\*\\s(.*)/", // # bulleted list *
345 "/^\\s?#\\s(.*)/", // # numeric list #
346 "/^bq\\. (.*)/", // # blockquote bq.
347 "/^h(\\d)\\(([[:alnum:]]+)\\)\\.\\s(.*)/", // # header hn(class). w/ css class
348 "/^h(\\d)\\. (.*)/", // # plain header hn.
349 "/^p\\(([[:alnum:]]+)\\)\\.\\s(.*)/", // # para p(class). w/ css class
350 "/^p\\. (.*)/i", // # plain paragraph
351 "/^([^\\t ]+.*)/i" // # remaining plain paragraph
352 };
353 */
354 String[] block_replace = {
355 "\t<liu>$1</liu>$2",
356 "\t<lio>$1</lio>$2",
357 "\t<blockquote cite=\"$1\">$2</blockquote>$3",
358 "\t<h$1 class=\"$2\">$3</h$1>$4",
359 "\t<h$1>$2</h$1>$3",
360 "\t<p class=\"$1\">$2</p>$3",
361 "\t<p>$1</p>",
362 "\t<p>$1</p>$2"
363 };
364
365
366 StringBuffer blockBuffer = new StringBuffer();
367
368 String list = "";
369
370 // This done to ensure that lists close after themselves
371 text += " \n";
372
373
374 //# split the text into an array by newlines
375 String[] bList = text.split("\n");
376 for(int i = 0; i <= bList.length; i++)
377 {
378 String line = " ";
379 if(i < bList.length)
380 line = bList[i];
381
382
383 //#make sure the line isn't blank
384 if (true || line.length() > 0 ) // actually i think we want blank lines
385 {
386
387 //# matches are off if we're between <pre> or <code> tags
388 if(line.toLowerCase().indexOf("<pre>") > -1)
389 {
390 pre = true;
391 }
392
393 //# deal with block replacements first, then see if we're in a list
394 if (!pre)
395 {
396 line = preg_replace(block_find,block_replace,line);
397 }
398
399 //# kill any br tags that slipped in earlier
400 if (pre == true)
401 {
402 line = replace(line,"<br />","\n");
403 }
404
405 //# matches back on after </pre>
406 if(line.toLowerCase().indexOf("</pre>") > -1)
407 {
408 pre = false;
409 }
410
411 //# at the beginning of a list, $line switches to a value
412 if (list.length() == 0 && preg_match("\\t<li",line))
413 {
414 line = preg_replace("^(\\t<li)(o|u)","\n<$2l>\n$1$2",line);
415 list = line.substring(2,3);
416 }
417 //# at the end of a list, $line switches to empty
418 else if (list.length() > 0 && !preg_match("\\t<li" + list,line))
419 {
420 line = preg_replace("^(.*)$","</" + list + "l>\n$1",line);
421 list = "";
422 }
423 }
424 // push each line to a new array once it's processed
425 blockBuffer.append(line);
426 blockBuffer.append("\n");
427
428 }
429 text = blockBuffer.toString();
430
431
432
433 //#clean up <notextile>
434 text = preg_replace("<\\/?notextile>", "",text);
435
436 //#clean up <notextile>
437 text = replace(text,"({)(})", "\n");
438
439 //# clean up liu and lio
440 text = preg_replace("<(\\/?)li(u|o)>", "<$1li>",text);
441
442 //# turn the temp char back to an ampersand entity
443 text = replace(text,"x%x%","&");
444
445 //# Newline linebreaks, just for markup tidiness
446 text = replace(text,"<br />","<br />\n");
447
448 return text;
449 }
450
451
452
453 /**
454 * Does just that.
455 *
456 * @param source The string to start with
457 * @param searchFor The string we are looking for
458 * @param replaceWith The replacement
459 *
460 * @return The reformatted string
461 *
462 */
463 private static String replace ( String source , String searchFor , String replaceWith )
464 {
465 if (source == null || "".equals(source)) {
466 return source;
467 }
468
469 if (replaceWith == null) {
470 return source;
471 }
472
473 if ("".equals(searchFor)) {
474 return source;
475 }
476
477 int s = 0;
478 int e = 0;
479 StringBuffer result = new StringBuffer();
480
481 while ((e = source.indexOf(searchFor, s)) >= 0)
482 {
483 result.append(source.substring(s, e));
484 result.append(replaceWith);
485 s = e + searchFor.length();
486 }
487 result.append(source.substring(s));
488 return result.toString();
489
490 }
491
492 private static String htmlspecialchars(String text, int mode)
493 {
494 text = replace(text,"&", "&");
495 if (mode != ENT_NOQUOTES)
496 text = replace(text,"\"", """);
497 if (mode == ENT_QUOTES)
498 text = replace(text,"'", "'");
499 text = replace(text,"<", "<");
500 text = replace(text,">", ">");
501 return text ;
502 }
503
504 private static String preg_replace(String pattern,String replace,String text) throws Exception
505 {
506 gnu.regexp.RE r = new gnu.regexp.RE(pattern);
507 return r.substituteAll(text,replace);
508 }
509
510 private static String preg_replace(String[] pattern,String[] replace,String text) throws Exception
511 {
512 for(int i = 0; i < pattern.length; i++)
513 {
514 text = preg_replace(pattern[i],replace[i],text);
515 }
516 return text;
517 }
518
519 private static boolean preg_match(String pattern,String text) throws Exception
520 {
521 gnu.regexp.RE r = new gnu.regexp.RE(pattern);
522 return r.getMatch(text) != null;
523 }
524
525 private static String[] preg_split(String pattern,String text) throws Exception
526 {
527
528 int startAt = 0;
529 ArrayList tempList = new ArrayList();
530
531 gnu.regexp.RE r = new gnu.regexp.RE(pattern);
532
533 gnu.regexp.REMatch match = r.getMatch(text);
534
535 while(match != null)
536 {
537 String beforeMatch = text.substring(startAt,match.getStartIndex());
538 tempList.add(beforeMatch);
539 tempList.add(match.toString());
540 startAt = match.getEndIndex();
541 match = r.getMatch(text,startAt);
542 }
543
544 tempList.add(text.substring(startAt));
545
546 // copy out our templist to an array of strings which is what we return
547 String[] ret = new String[tempList.size()];
548
549 for(int i = 0; i < ret.length; i++)
550 {
551 ret[i] = (String)tempList.get(i);
552 }
553
554 return ret;
555
556 }
557
558 }
|