<html>
<body>
<span>This is sample text, and I want to <b>truncate</b> it, can you please help me!
</body>
</html>
and you want to truncate it, so that the length of text displayed on HTML page should be 50, but you don't want length of HTML tags to be considered while calculating truncated text length. In this situation below code will help you.
import java.util.Iterator;
import java.util.Stack;
import java.util.regex.Pattern;
import com.localmatters.util.StringUtils;
/**
* A class which will format the given HTML string by preserving the order of start and end HTML tags
*/
public class HtmlTextTruncator {
/**
* This method will return a substring of HTML text based on provided limit by preserving the order of HTML tags.
* Length of HTML tags will not be considered while calculating the length of return string.
*/
@SuppressWarnings("unchecked")
public static String htmlSubString(String inputString, int limit) {
int actualTextLength = 0; // Text length without considering HTML tags
boolean isNewTag = false;
String htmlTagPattern = "<[^<^>]*>"; // Any HTML tag (start or end)
Pattern htmlStartTagPattern = Pattern.compile("<[^/^<^>]*>"); // only start tag
Pattern htmlEndTagPattern = Pattern.compile("]*>"); // only end tag
Stack tags = new Stack(); // Stack varibale used for pushing and poping up the HTML tags
StringBuilder message = new StringBuilder();
if(StringUtils.isNotEmpty(inputString) && limit > 0) {
// Create the regular expression based tokenizer
Iterator htmlTokenizer = new RETokenizer(inputString, htmlTagPattern, true);
// Get the tokens (and delimiters)
while(htmlTokenizer.hasNext()) {
String tokenOrDelim = (String)htmlTokenizer.next();
if(htmlStartTagPattern.matcher(tokenOrDelim).matches()) {
if (actualTextLength <> tags.push(tokenOrDelim); // add tag to stack
message.append(tokenOrDelim);
} else {
isNewTag = true;
}
} else if(htmlEndTagPattern.matcher(tokenOrDelim).matches()) {
if (!isNewTag) {
tags.pop(); // remove tag from stack
message.append(tokenOrDelim);
} else {
isNewTag = false;
}
} else if (actualTextLength <>
StringTokenizer textTockens = new StringTokenizer(tokenOrDelim, " ", true);
while(textTockens.hasMoreElements()) {
String word = textTockens.nextToken();
if(limit - actualTextLength > 0) {
message.append(word);
actualTextLength+=word.length();
} else {
break;
}
}
}
}
} else {
message.append(inputString);
}
return message.toString();
}
}
@SuppressWarnings("unchecked")
class RETokenizer implements Iterator {
private CharSequence input;
private Matcher matcher;
private boolean returnDelims;
private String delim;
private String match;
private int lastEnd = 0;
public RETokenizer(CharSequence input, String patternStr, boolean returnDelims) {
// Save values
this.input = input;
this.returnDelims = returnDelims;
// Compile pattern and prepare input
Pattern pattern = Pattern.compile(patternStr);
matcher = pattern.matcher(input);
}
// Returns true if there are more tokens or delimiters.
public boolean hasNext() {
if (matcher == null) {
return false;
}
if (delim != null || match != null) {
return true;
}
if (matcher.find()) {
if (returnDelims) {
delim = input.subSequence(lastEnd, matcher.start()).toString();
}
match = matcher.group();
lastEnd = matcher.end();
} else if (returnDelims && lastEnd < input.length()) {
delim = input.subSequence(lastEnd, input.length()).toString();
lastEnd = input.length();
// Need to remove the matcher since it appears to automatically
// reset itself once it reaches the end.
matcher = null;
}
return delim != null || match != null;
}
// Returns the next token (or delimiter if returnDelims is true).
public Object next() {
String result = null;
if (delim != null) {
result = delim;
delim = null;
} else if (match != null) {
result = match;
match = null;
}
return result;
}
public boolean isNextToken() {
return delim == null && match != null;
}
public void remove() {
throw new UnsupportedOperationException();
}
}