1.
Split a String
public class Main {
public static void main(String[] args) {
String str = "one,two,three,four,five";
String[] elements = str.split(",");
for (int i = 0; i < elements.length; i++)
System.out.println(elements[i]);
}
}
/*
one
two
three
four
five
*/
2. Using split() with a space can be a problem
public class Main {
public static void main(String args[]) throws Exception {
String s3 = "A B C";
String[] words = s3.split(" ");
for (String s : words) {
System.out.println(s);
}
}
}
/*
A
B
C
*/
3. " ".split(" ") generates a NullPointerException
public class Main {
public static void main(String args[]) throws Exception {
String[] words = " ".split(" ");
String firstWord = words[0];
System.out.println(firstWord);
}
}
/*
Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 0
at Main.main(Main.java:5)
*/
4. String.split() is based on regular expression
public class Main {
public static void main(String args[]) throws Exception {
String s3 = "{A}{this is a test}{1234}";
String[] words = s3.split("[{}]");
for (String str : words) {
System.out.println(str);
}
}
}
/*
A
this is a test
1234
*/
5. Split String
import java.util.HashMap;
import java.util.regex.Pattern;
public class StringHelper {
/**
* Split the given String into tokens.
*
* This method is meant to be similar to the split
* function in other programming languages but it does
* not use regular expressions. Rather the String is
* split on a single String literal.
*
* Unlike java.util.StringTokenizer which accepts
* multiple character tokens as delimiters, the delimiter
* here is a single String literal.
*
* Each null token is returned as an empty String.
* Delimiters are never returned as tokens.
*
* If there is no delimiter because it is either empty or
* null, the only element in the result is the original String.
*
* StringHelper.split("1-2-3", "-");<br>
* result: {"1","2","3"}<br>
* StringHelper.split("-1--2-", "-");<br>
* result: {"","1","","2",""}<br>
* StringHelper.split("123", "");<br>
* result: {"123"}<br>
* StringHelper.split("1-2---3----4", "--");<br>
* result: {"1-2","-3","","4"}<br>
*
* @param s String to be split.
* @param delimiter String literal on which to split.
* @return an array of tokens.
* @throws NullPointerException if s is null.
*
* @since ostermillerutils 1.00.00
*/
public static String[] split(String s, String delimiter){
int delimiterLength;
// the next statement has the side effect of throwing a null pointer
// exception if s is null.
int stringLength = s.length();
if (delimiter == null || (delimiterLength = delimiter.length()) == 0
){
// it is not inherently clear what to do if there is no delimiter
// On one hand it would make sense to return each character becaus
e
// the null String can be found between each pair of characters in
// a String. However, it can be found many times there and we don
'
// want to be returning multiple null tokens.
// returning the whole String will be defined as the correct behav
ior
// in this instance.
return new String[] {s};
}
// a two pass solution is used because a one pass solution would
// require the possible resizing and copying of memory structures
// In the worst case it would have to be resized n times with each
// resize having a O(n) copy leading to an O(n^2) algorithm.
int count;
int start;
int end;
// Scan s and count the tokens.
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
count++;
start = end + delimiterLength;
}
count++;
// allocate an array to return the tokens,
// we now know how big it should be
String[] result = new String[count];
// Scan s again, but this time pick out the tokens
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
result[count] = (s.substring(start, end));
count++;
start = end + delimiterLength;
}
end = stringLength;
result[count] = s.substring(start, end);
return (result);
}
/**
* Split the given String into tokens. Delimiters will
* be returned as tokens.
*
* This method is meant to be similar to the split
* function in other programming languages but it does
* not use regular expressions. Rather the String is
* split on a single String literal.
*
* Unlike java.util.StringTokenizer which accepts
* multiple character tokens as delimiters, the delimiter
* here is a single String literal.
*
* Each null token is returned as an empty String.
* Delimiters are never returned as tokens.
*
* If there is no delimiter because it is either empty or
* null, the only element in the result is the original String.
*
* StringHelper.split("1-2-3", "-");<br>
* result: {"1","-","2","-","3"}<br>
* StringHelper.split("-1--2-", "-");<br>
* result: {"","-","1","-","","-","2","-",""}<br>
* StringHelper.split("123", "");<br>
* result: {"123"}<br>
* StringHelper.split("1-2--3---4----5", "--");<br>
* result: {"1-2","--","3","--","-4","--","","--","5"}<br>
*
* @param s String to be split.
* @param delimiter String literal on which to split.
* @return an array of tokens.
* @throws NullPointerException if s is null.
*
* @since ostermillerutils 1.05.00
*/
public static String[] splitIncludeDelimiters(String s, String delimit
er){
int delimiterLength;
// the next statement has the side effect of throwing a null pointer
// exception if s is null.
int stringLength = s.length();
if (delimiter == null || (delimiterLength = delimiter.length()) == 0
){
// it is not inherently clear what to do if there is no delimiter
// On one hand it would make sense to return each character becaus
e
// the null String can be found between each pair of characters in
// a String. However, it can be found many times there and we don
'
// want to be returning multiple null tokens.
// returning the whole String will be defined as the correct behav
ior
// in this instance.
return new String[] {s};
}
// a two pass solution is used because a one pass solution would
// require the possible resizing and copying of memory structures
// In the worst case it would have to be resized n times with each
// resize having a O(n) copy leading to an O(n^2) algorithm.
int count;
int start;
int end;
// Scan s and count the tokens.
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
count+=2;
start = end + delimiterLength;
}
count++;
// allocate an array to return the tokens,
// we now know how big it should be
String[] result = new String[count];
// Scan s again, but this time pick out the tokens
count = 0;
start = 0;
while((end = s.indexOf(delimiter, start)) != -1){
result[count] = (s.substring(start, end));
count++;
result[count] = delimiter;
count++;
start = end + delimiterLength;
}
end = stringLength;
result[count] = s.substring(start, end);
return (result);
}
6. String split on multicharacter delimiter
import java.util.List;
import java.util.ArrayList;
/**
* Utility methods for strings.
*
* @author <a href="mailto:[email protected]">Jonas Bon�r </a>
*/
public class Strings {
/**
* String split on multicharacter delimiter. <p/>Written by Tim Quinn
([email protected])
*
* @param stringToSplit
* @param delimiter
* @return
*/
public static final String[] splitString(String stringToSplit, String
delimiter) {
String[] aRet;
int iLast;
int iFrom;
int iFound;
int iRecords;
// return Blank Array if stringToSplit == "")
if (stringToSplit.equals("")) {
return new String[0];
}
// count Field Entries
iFrom = 0;
iRecords = 0;
while (true) {
iFound = stringToSplit.indexOf(delimiter, iFrom);
if (iFound == -1) {
break;
}
iRecords++;
iFrom = iFound + delimiter.length();
}
iRecords = iRecords + 1;
// populate aRet[]
aRet = new String[iRecords];
if (iRecords == 1) {
aRet[0] = stringToSplit;
} else {
iLast = 0;
iFrom = 0;
iFound = 0;
for (int i = 0; i < iRecords; i++) {
iFound = stringToSplit.indexOf(delimiter, iFrom);
if (iFound == -1) { // at End
aRet[i] = stringToSplit.substring(iLast + delimiter.le
ngth(), stringToSplit.length());
} else if (iFound == 0) { // at Beginning
aRet[i] = "";
} else { // somewhere in middle
aRet[i] = stringToSplit.substring(iFrom, iFound);
}
iLast = iFound;
iFrom = iFound + delimiter.length();
}
}
return aRet;
}
}
7. Split by dot/dollar
public class Main {
public static void main(String args[]) throws Exception {
String s = "A.BB.CCC";
String[] words = s.split("\\.");
for (String str : words) {
System.out.println(str);
}
}
}
/*
A
BB
CCC
*/
8. Split up a string into multiple strings based on a
delimiter
public class Main{
/** An empty string constant */
public static final String EMPTY = "";
/**
* Split up a string into multiple strings based on a delimiter.
*
* @param string String to split up.
* @param delim Delimiter.
* @param limit Limit the number of strings to split into
* (-1 for no limit).
* @return Array of strings.
*/
public static String[] split(final String string, final String delim,
final int limit)
{
// get the count of delim in string, if count is > limit
// then use limit for count. The number of delimiters is less by o
ne
// than the number of elements, so add one to count.
int count = count(string, delim) + 1;
if (limit > 0 && count > limit)
{
count = limit;
}
String strings[] = new String[count];
int begin = 0;
for (int i = 0; i < count; i++)
{
// get the next index of delim
int end = string.indexOf(delim, begin);
// if the end index is -1 or if this is the last element
// then use the string's length for the end index
if (end == -1 || i + 1 == count)
end = string.length();
// if end is 0, then the first element is empty
if (end == 0)
strings[i] = EMPTY;
else
strings[i] = string.substring(begin, end);
// update the begining index
begin = end + 1;
}
return strings;
}
/**
* Split up a string into multiple strings based on a delimiter.
*
* @param string String to split up.
* @param delim Delimiter.
* @return Array of strings.
*/
public static String[] split(final String string, final String delim)
{
return split(string, delim, -1);
}
/**
* Count the number of instances of substring within a string.
*
* @param string String to look for substring in.
* @param substring Sub-string to look for.
* @return Count of substrings in string.
*/
public static int count(final String string, final String substring)
{
int count = 0;
int idx = 0;
while ((idx = string.indexOf(substring, idx)) != -1)
{
idx++;
count++;
}
return count;
}
/**
* Count the number of instances of character within a string.
*
* @param string String to look for substring in.
* @param c Character to look for.
* @return Count of substrings in string.
*/
public static int count(final String string, final char c)
{
return count(string, String.valueOf(c));
}
9. Splits a string around matches of the given delimiter
character.
import java.util.StringTokenizer;
public class Main {
/**
* Splits a string around matches of the given delimiter character.
*
* Where applicable, this method can be used as a substitute for
* <code>String.split(String regex)</code>, which is not available
* on a JSR169/Java ME platform.
*
* @param str the string to be split
* @param delim the delimiter
* @throws NullPointerException if str is null
*/
static public String[] split(String str, char delim)
{
if (str == null) {
throw new NullPointerException("str can't be null");
}
// Note the javadoc on StringTokenizer:
// StringTokenizer is a legacy class that is retained for
// compatibility reasons although its use is discouraged in
// new code.
// In other words, if StringTokenizer is ever removed from the JDK
,
// we need to have a look at String.split() (or java.util.regex)
// if it is supported on a JSR169/Java ME platform by then.
StringTokenizer st = new StringTokenizer(str, String.valueOf(delim
));
int n = st.countTokens();
String[] s = new String[n];
for (int i = 0; i < n; i++) {
s[i] = st.nextToken();
}
return s;
}
}
10. Splits the provided text into an array, separator
string specified. Returns a maximum of max substrings
import java.util.ArrayList;
import java.util.List;
public class Main {
/**
* Splits the provided text into an array, separator string specified.
* Returns a maximum of <code>max</code> substrings.
*
* The separator(s) will not be included in the returned String array.
* Adjacent separators are treated as one separator.
*
* A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separator splits on whitespace.
*
* <pre>
* StringUtils.splitByWholeSeparator(null, *, *) = null
* StringUtils.splitByWholeSeparator("", *, *) = []
* StringUtils.splitByWholeSeparator("ab de fg", null, 0) = ["ab"
, "de", "fg"]
* StringUtils.splitByWholeSeparator("ab de fg", null, 0) = ["ab"
, "de", "fg"]
* StringUtils.splitByWholeSeparator("ab:cd:ef", ":", 2) = ["ab"
, "cd:ef"]
* StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-", 5) = ["ab"
, "cd", "ef"]
* StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-", 2) = ["ab"
, "cd-!-ef"]
* </pre>
*
* @param str the String to parse, may be null
* @param separator String containing the String to be used as a deli
miter,
* <code>null</code> splits on whitespace
* @param max the maximum number of elements to include in the return
ed
* array. A zero or negative value implies no limit.
* @return an array of parsed Strings, <code>null</code> if null Strin
g was input
*/
public static String[] splitByWholeSeparator( String str, String separ
ator, int max ) {
return splitByWholeSeparatorWorker(str, separator, max, false);
}
/**
* Performs the logic for the <code>splitByWholeSeparatorPreserveAllTo
kens</code> methods.
*
* @param str the String to parse, may be <code>null</code>
* @param separator String containing the String to be used as a deli
miter,
* <code>null</code> splits on whitespace
* @param max the maximum number of elements to include in the return
ed
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separators
are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null Strin
g input
* @since 2.4
*/
private static String[] splitByWholeSeparatorWorker(String str, String
separator, int max,
boolean preserveAl
lTokens)
{
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return new String[0];
}
if ((separator == null) || ("".equals(separator))) {
// Split on whitespace.
return splitWorker(str, null, max, preserveAllTokens);
}
int separatorLength = separator.length();
ArrayList substrings = new ArrayList();
int numberOfSubstrings = 0;
int beg = 0;
int end = 0;
while (end < len) {
end = str.indexOf(separator, beg);
if (end > -1) {
if (end > beg) {
numberOfSubstrings += 1;
if (numberOfSubstrings == max) {
end = len;
substrings.add(str.substring(beg));
} else {
// The following is OK, because String.substring(
beg, end ) excludes
// the character at the position 'end'.
substrings.add(str.substring(beg, end));
// Set the starting point for the next search.
// The following is equivalent to beg = end + (sep
aratorLength - 1) + 1,
// which is the right calculation:
beg = end + separatorLength;
}
} else {
// We found a consecutive occurrence of the separator,
so skip it.
if (preserveAllTokens) {
numberOfSubstrings += 1;
if (numberOfSubstrings == max) {
end = len;
substrings.add(str.substring(beg));
} else {
substrings.add("");
}
}
beg = end + separatorLength;
}
} else {
// String.substring( beg ) goes from 'beg' to the end of t
he String.
substrings.add(str.substring(beg));
end = len;
}
}
return (String[]) substrings.toArray(new String[substrings.size()]
);
}
/**
* Performs the logic for the <code>split</code> and
* <code>splitPreserveAllTokens</code> methods that return a maximum a
rray
* length.
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the separate character
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separators
are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null Strin
g input
*/
private static String[] splitWorker(String str, String separatorChars,
int max, boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return new String[0];
}
List list = new ArrayList();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if (separatorChars == null) {
// Null separator means use whitespace
while (i < len) {
if (Character.isWhitespace(str.charAt(i))) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else if (separatorChars.length() == 1) {
// Optimise 1 character case
char sep = separatorChars.charAt(0);
while (i < len) {
if (str.charAt(i) == sep) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else {
// standard case
while (i < len) {
if (separatorChars.indexOf(str.charAt(i)) >= 0) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
}
if (match || (preserveAllTokens && lastMatch)) {
list.add(str.substring(start, i));
}
return (String[]) list.toArray(new String[list.size()]);
}
11. Splits the provided text into an array, using
whitespace as the separator, preserving all tokens,
including empty tokens created by adjacent separators.
import java.util.ArrayList;
import java.util.List;
public class Main {
/**
* Splits the provided text into an array, using whitespace as the
* separator, preserving all tokens, including empty tokens created
by
* adjacent separators. This is an alternative to using StringToken
izer.
* Whitespace is defined by {@link Character#isWhitespace(char)}.
*
* The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* For more control over the split use the StrTokenizer class.
*
* A <code>null</code> input String returns <code>null</code>.
*
* <pre>
* StringUtils.splitPreserveAllTokens(null) = null
* StringUtils.splitPreserveAllTokens("") = []
* StringUtils.splitPreserveAllTokens("abc def") = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def") = ["abc", "", "de
f"]
* StringUtils.splitPreserveAllTokens(" abc ") = ["", "abc", ""]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @return an array of parsed Strings, <code>null</code> if null St
ring input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(String str) {
return splitWorker(str, null, -1, true);
}
/**
* Performs the logic for the <code>split</code> and
* <code>splitPreserveAllTokens</code> methods that return a maximu
m array
* length.
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the separate character
* @param max the maximum number of elements to include in the
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separato
rs are
* treated as empty token separators; if <code>false</code>, adjace
nt
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null St
ring input
*/
private static String[] splitWorker(String str, String separatorCha
rs, int max, boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if (str == null) {
return null;
}
int len = str.length();
if (len == 0) {
return new String[0];
}
List list = new ArrayList();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if (separatorChars == null) {
// Null separator means use whitespace
while (i < len) {
if (Character.isWhitespace(str.charAt(i))) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else if (separatorChars.length() == 1) {
// Optimise 1 character case
char sep = separatorChars.charAt(0);
while (i < len) {
if (str.charAt(i) == sep) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
} else {
// standard case
while (i < len) {
if (separatorChars.indexOf(str.charAt(i)) >= 0) {
if (match || preserveAllTokens) {
lastMatch = true;
if (sizePlus1++ == max) {
i = len;
lastMatch = false;
}
list.add(str.substring(start, i));
match = false;
}
start = ++i;
continue;
}
lastMatch = false;
match = true;
i++;
}
}
if (match || (preserveAllTokens && lastMatch)) {
list.add(str.substring(start, i));
}
return (String[]) list.toArray(new String[list.size()]);
}
}