001/*
002 * Copyright 2010-2015 Institut Pasteur.
003 * 
004 * This file is part of Icy.
005 * 
006 * Icy is free software: you can redistribute it and/or modify
007 * it under the terms of the GNU General Public License as published by
008 * the Free Software Foundation, either version 3 of the License, or
009 * (at your option) any later version.
010 * 
011 * Icy is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 * 
016 * You should have received a copy of the GNU General Public License
017 * along with Icy. If not, see <http://www.gnu.org/licenses/>.
018 */
019package icy.util;
020
021import icy.math.MathUtil;
022
023import java.util.ArrayList;
024import java.util.Comparator;
025import java.util.List;
026import java.util.regex.Matcher;
027import java.util.regex.Pattern;
028
029/**
030 * @author stephane
031 */
032public class StringUtil
033{
034    /*
035     * The Alphanum Algorithm is an improved sorting algorithm for strings
036     * containing numbers. Instead of sorting numbers in ASCII order like
037     * a standard sort, this algorithm sorts numbers in numeric order.
038     * 
039     * The Alphanum Algorithm is discussed at http://www.DaveKoelle.com
040     * 
041     * This library is free software; you can redistribute it and/or
042     * modify it under the terms of the GNU Lesser General Public
043     * License as published by the Free Software Foundation; either
044     * version 2.1 of the License, or any later version.
045     * 
046     * This library is distributed in the hope that it will be useful,
047     * but WITHOUT ANY WARRANTY; without even the implied warranty of
048     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
049     * Lesser General Public License for more details.
050     * 
051     * You should have received a copy of the GNU Lesser General Public
052     * License along with this library; if not, write to the Free Software
053     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
054     */
055    /**
056     * This is an updated version of Alphanum Algorithm Comparator
057     * with enhancements made by Daniel Migowski, Andre Bogus, and David Koelle
058     */
059    public static class AlphanumComparator implements Comparator<String>
060    {
061        /** Length of string is passed in for improved efficiency (only need to calculate it once) **/
062        private final static String getChunk(String s, int slength, int index)
063        {
064            int marker = index;
065            StringBuilder chunk = new StringBuilder();
066            char c = s.charAt(marker);
067            chunk.append(c);
068            marker++;
069            if (Character.isDigit(c))
070            {
071                while (marker < slength)
072                {
073                    c = s.charAt(marker);
074                    if (!Character.isDigit(c))
075                        break;
076                    chunk.append(c);
077                    marker++;
078                }
079            }
080            else
081            {
082                while (marker < slength)
083                {
084                    c = s.charAt(marker);
085                    if (Character.isDigit(c))
086                        break;
087                    chunk.append(c);
088                    marker++;
089                }
090            }
091            return chunk.toString();
092        }
093
094        @Override
095        public int compare(String s1, String s2)
096        {
097            int thisMarker = 0;
098            int thatMarker = 0;
099            int s1Length = s1.length();
100            int s2Length = s2.length();
101
102            while (thisMarker < s1Length && thatMarker < s2Length)
103            {
104                String thisChunk = getChunk(s1, s1Length, thisMarker);
105                thisMarker += thisChunk.length();
106
107                String thatChunk = getChunk(s2, s2Length, thatMarker);
108                thatMarker += thatChunk.length();
109
110                // If both chunks contain numeric characters, sort them numerically
111                int result = 0;
112                if (Character.isDigit(thisChunk.charAt(0)) && Character.isDigit(thatChunk.charAt(0)))
113                {
114                    // Simple chunk comparison by length.
115                    int thisChunkLength = thisChunk.length();
116                    result = thisChunkLength - thatChunk.length();
117                    // If equal, the first different number counts
118                    if (result == 0)
119                    {
120                        for (int i = 0; i < thisChunkLength; i++)
121                        {
122                            result = thisChunk.charAt(i) - thatChunk.charAt(i);
123
124                            if (result != 0)
125                                return result;
126                        }
127                    }
128                }
129                else
130                    result = thisChunk.compareTo(thatChunk);
131
132                if (result != 0)
133                    return result;
134            }
135
136            return s1Length - s2Length;
137        }
138    }
139
140    /**
141     * Return defaultValue if value is empty
142     */
143    public static String getValue(String value, String defaultValue)
144    {
145        if (StringUtil.isEmpty(value))
146            return defaultValue;
147
148        return value;
149    }
150
151    /**
152     * Returns the next number found from specified <code>startIndex</code> in specified string.<br>
153     * Returns an empty string if no number was found.
154     */
155    public static CharSequence getNextNumber(CharSequence text, int index)
156    {
157        final int len = text.length();
158
159        // get starting digit char index
160        final int st = getNextDigitCharIndex(text, index);
161
162        // we find a digit char ?
163        if (st >= 0)
164        {
165            // get ending digit char index
166            int end = StringUtil.getNextNonDigitCharIndex(text, st);
167            if (end < 0)
168                end = len;
169
170            // get value
171            return text.subSequence(st, end);
172        }
173
174        return "";
175    }
176
177    /**
178     * Return the index of previous digit char from specified index in specified string<br>
179     * return -1 if not found
180     */
181    public static int getPreviousDigitCharIndex(CharSequence value, int from)
182    {
183        final int len = value.length();
184
185        if (from >= len)
186            return -1;
187
188        int index = from;
189        while (index >= 0)
190        {
191            if (Character.isDigit(value.charAt(index)))
192                return index;
193            index--;
194        }
195
196        return -1;
197    }
198
199    /**
200     * Return the index of previous letter char from specified index in specified string<br>
201     * return -1 if not found
202     */
203    public static int getPreviousLetterCharIndex(CharSequence value, int from)
204    {
205        final int len = value.length();
206
207        if (from >= len)
208            return -1;
209
210        int index = from;
211        while (index >= 0)
212        {
213            if (Character.isLetter(value.charAt(index)))
214                return index;
215            index--;
216        }
217
218        return -1;
219    }
220
221    /**
222     * Return the index of previous non digit char from specified index in specified string<br>
223     * return -1 if not found
224     */
225    public static int getPreviousNonDigitCharIndex(CharSequence value, int from)
226    {
227        final int len = value.length();
228
229        if (from >= len)
230            return -1;
231
232        int index = from;
233        while (index >= 0)
234        {
235            if (!Character.isDigit(value.charAt(index)))
236                return index;
237            index--;
238        }
239
240        return -1;
241    }
242
243    /**
244     * Return the index of previous non letter char from specified index in specified string<br>
245     * Return -1 if not found.
246     */
247    public static int getPreviousNonLetterCharIndex(CharSequence value, int from)
248    {
249        final int len = value.length();
250
251        if (from >= len)
252            return -1;
253
254        int index = from;
255        while (index >= 0)
256        {
257            if (!Character.isLetter(value.charAt(index)))
258                return index;
259            index--;
260        }
261
262        return -1;
263    }
264
265    /**
266     * Return the index of next digit char from specified index in specified string<br>
267     * return -1 if not found
268     */
269    public static int getNextDigitCharIndex(CharSequence value, int from)
270    {
271        final int len = value.length();
272
273        if (from < 0)
274            return -1;
275
276        int index = from;
277        while (index < len)
278        {
279            if (Character.isDigit(value.charAt(index)))
280                return index;
281            index++;
282        }
283
284        return -1;
285    }
286
287    /**
288     * Return the index of next letter char from specified index in specified string<br>
289     * return -1 if not found
290     */
291    public static int getNextLetterCharIndex(CharSequence value, int from)
292    {
293        final int len = value.length();
294
295        if (from < 0)
296            return -1;
297
298        int index = from;
299        while (index < len)
300        {
301            if (Character.isDigit(value.charAt(index)))
302                return index;
303            index++;
304        }
305
306        return -1;
307    }
308
309    /**
310     * Return the index of next non digit char from specified index in specified string<br>
311     * return -1 if not found
312     */
313    public static int getNextNonDigitCharIndex(CharSequence value, int from)
314    {
315        final int len = value.length();
316
317        if (from < 0)
318            return -1;
319
320        int index = from;
321        while (index < len)
322        {
323            if (!Character.isDigit(value.charAt(index)))
324                return index;
325            index++;
326        }
327
328        return -1;
329    }
330
331    /**
332     * Return the index of next non letter char from specified index in specified string<br>
333     * return -1 if not found
334     */
335    public static int getNextNonLetterCharIndex(CharSequence value, int from)
336    {
337        final int len = value.length();
338
339        if (from < 0)
340            return -1;
341
342        int index = from;
343        while (index < len)
344        {
345            if (!Character.isLetter(value.charAt(index)))
346                return index;
347            index++;
348        }
349
350        return -1;
351    }
352
353    /**
354     * Return the index of next control char from specified <code>startIndex</code> in specified
355     * string.<br>
356     * return -1 if no control character found.
357     */
358    public static int getNextCtrlCharIndex(CharSequence value, int startIndex)
359    {
360        final int len = value.length();
361
362        if (startIndex < 0)
363            return -1;
364
365        int index = startIndex;
366        while (index < len)
367        {
368            if (Character.isISOControl(value.charAt(index)))
369                return index;
370            index++;
371        }
372
373        return -1;
374    }
375
376    /**
377     * Limit the length of the specified string to maxlen.
378     */
379    public static String limit(String value, int maxlen, boolean tailLimit)
380    {
381        if (value == null)
382            return null;
383
384        final int len = value.length();
385
386        if (len > maxlen)
387        {
388            // simple truncation
389            if (tailLimit || (maxlen <= 8))
390                return value.substring(0, maxlen - 2).trim() + "...";
391
392            // cut center
393            final int cut = (maxlen - 3) / 2;
394            return value.substring(0, cut).trim() + "..." + value.substring(len - cut).trim();
395        }
396
397        return value;
398    }
399
400    /**
401     * Limit the length of the specified string to maxlen.
402     */
403    public static String limit(String value, int maxlen)
404    {
405        return limit(value, maxlen, false);
406    }
407
408    /**
409     * Truncate the text to a specific size, according a keyword.<br>
410     * The text will be truncated around the place where the keyword is found.<br>
411     * If the string is found at the beginning, the text will be like this:<br/>
412     * <b><center>Lorem ipsum dolor sit amet, consec...</center><b/>
413     * 
414     * @param fullText
415     *        : text to be truncated.
416     * @param keyword
417     *        : string to be found in the text and truncated around.
418     * @param maxSize
419     *        : max size of the string
420     */
421    public static String trunc(String fullText, String keyword, int maxSize)
422    {
423        int idx = fullText.toLowerCase().indexOf(keyword.toLowerCase());
424
425        // key not found
426        if (idx == -1)
427            return "";
428
429        String toReturn = fullText;
430        int fullTextSize = fullText.length();
431
432        if (fullTextSize > maxSize)
433        {
434            int firstSpaceAfter;
435            String textBeforeWord;
436            int lastSpaceBefore;
437
438            // extract the full word from the text
439            firstSpaceAfter = fullText.indexOf(' ', idx);
440            firstSpaceAfter = firstSpaceAfter == -1 ? fullTextSize : firstSpaceAfter;
441
442            textBeforeWord = fullText.substring(0, idx);
443            lastSpaceBefore = textBeforeWord.lastIndexOf(' ');
444            lastSpaceBefore = lastSpaceBefore == -1 ? 0 : lastSpaceBefore;
445
446            // determine if we are at the beginning, the end, or at the middle
447            if (idx <= maxSize / 2)
448            {
449                toReturn = fullText.substring(0, maxSize);
450                toReturn = toReturn.trim() + "...";
451            }
452            else if ((fullTextSize - idx) <= maxSize / 2)
453            {
454                toReturn = fullText.substring(fullTextSize - maxSize, fullTextSize);
455                toReturn = "..." + toReturn.trim();
456            }
457            else
458            {
459                int beginIndex = idx - maxSize / 2;
460                int endIndex = idx + maxSize / 2;
461                if (endIndex > fullTextSize)
462                    System.out.println(endIndex);
463                // beginIndex = beginIndex < 0 ? 0 : beginIndex;
464                // endIndex = endIndex > fullTextSize ? fullTextSize : endIndex;
465                toReturn = "..." + fullText.substring(beginIndex, endIndex).trim() + "...";
466            }
467        }
468
469        return toReturn;
470    }
471
472    /**
473     * Return true if the specified String are exactly the same.
474     * 
475     * @param trim
476     *        if true then string are trimmed before comparison
477     */
478    public static boolean equals(String s1, String s2, boolean trim)
479    {
480        if (isEmpty(s1, trim))
481            return isEmpty(s2, trim);
482        else if (isEmpty(s2, trim))
483            return false;
484
485        if (trim)
486            return s1.trim().equals(s2.trim());
487
488        return s1.equals(s2);
489    }
490
491    /**
492     * Return true if the specified String are exactly the same
493     */
494    public static boolean equals(String s1, String s2)
495    {
496        return equals(s1, s2, false);
497    }
498
499    /**
500     * Return true if the specified String is empty.
501     * 
502     * @param trim
503     *        trim the String before doing the empty test
504     */
505    public static boolean isEmpty(String value, boolean trim)
506    {
507        if (value != null)
508        {
509            if (trim)
510                return value.trim().length() == 0;
511
512            return value.length() == 0;
513        }
514
515        return true;
516    }
517
518    /**
519     * Return true if the specified String is empty.
520     * The String is trimed by default before doing the test
521     */
522    public static boolean isEmpty(String value)
523    {
524        return isEmpty(value, true);
525    }
526
527    /**
528     * Try to parse a boolean from the specified String and return it.
529     * Return 'def' is we can't parse any boolean from the string.
530     */
531    public static boolean parseBoolean(String s, boolean def)
532    {
533        if (s == null)
534            return def;
535
536        final String value = s.toLowerCase();
537
538        if (value.equals(Boolean.toString(true)))
539            return true;
540        if (value.equals(Boolean.toString(false)))
541            return false;
542
543        return def;
544    }
545
546    /**
547     * Try to parse a integer from the specified String and return it.
548     * Return 'def' is we can't parse any integer from the string.
549     */
550    public static int parseInt(String s, int def)
551    {
552        try
553        {
554            return Integer.parseInt(s);
555        }
556        catch (NumberFormatException E)
557        {
558            return def;
559        }
560    }
561
562    /**
563     * Try to parse a long integer from the specified String and return it.
564     * Return 'def' is we can't parse any integer from the string.
565     */
566    public static long parseLong(String s, long def)
567    {
568        try
569        {
570            return Long.parseLong(s);
571        }
572        catch (NumberFormatException E)
573        {
574            return def;
575        }
576    }
577
578    /**
579     * Try to parse a float from the specified String and return it.
580     * Return 'def' is we can't parse any float from the string.
581     */
582    public static float parseFloat(String s, float def)
583    {
584        try
585        {
586            return Float.parseFloat(s);
587        }
588        catch (NumberFormatException E)
589        {
590            return def;
591        }
592    }
593
594    /**
595     * Try to parse a double from the specified String and return it.
596     * Return 'def' is we can't parse any double from the string.
597     */
598    public static double parseDouble(String s, double def)
599    {
600        try
601        {
602            return Double.parseDouble(s);
603        }
604        catch (NumberFormatException E)
605        {
606            return def;
607        }
608    }
609
610    /**
611     * Try to parse a array of byte from the specified String and return it.
612     * Return 'def' is we can't parse any array of byte from the string.
613     */
614    public static byte[] parseBytes(String s, byte[] def)
615    {
616        if (s == null)
617            return def;
618
619        return s.getBytes();
620    }
621
622    /**
623     * Returns a <tt>String</tt> object representing the specified
624     * boolean. If the specified boolean is <code>true</code>, then
625     * the string {@code "true"} will be returned, otherwise the
626     * string {@code "false"} will be returned.
627     */
628    public static String toString(boolean value)
629    {
630        return Boolean.toString(value);
631    }
632
633    /**
634     * Returns a <code>String</code> object representing the specified integer.
635     */
636    public static String toString(int value)
637    {
638        return Integer.toString(value);
639    }
640
641    /**
642     * Returns a <code>String</code> object representing the specified integer.<br>
643     * If the returned String is shorter than specified length<br>
644     * then leading '0' are added to the string.
645     */
646    public static String toString(int value, int minSize)
647    {
648        String result = Integer.toString(value);
649
650        while (result.length() < minSize)
651            result = "0" + result;
652
653        return result;
654    }
655
656    /**
657     * Returns a <code>String</code> object representing the specified <code>long</code>.
658     */
659    public static String toString(long value)
660    {
661        return Long.toString(value);
662    }
663
664    /**
665     * Returns a string representation of the <code>float</code> argument.
666     */
667    public static String toString(float value)
668    {
669        return Float.toString(value);
670    }
671
672    /**
673     * Returns a string representation of the <code>double</code> argument.
674     */
675    public static String toString(double value)
676    {
677        final int i = (int) value;
678
679        if (i == value)
680            return toString(i);
681
682        return Double.toString(value);
683    }
684
685    /**
686     * Returns a string representation of the <code>double</code> argument
687     * with specified number of decimal.
688     */
689    public static String toString(double value, int numDecimal)
690    {
691        return Double.toString(MathUtil.round(value, numDecimal));
692    }
693
694    /**
695     * Returns a string representation of the <code>double</code> argument with specified size :<br>
696     * <code>toString(1.23456, 5)</code> --> <code>"1.2345"</code><br>
697     * <code>toString(123.4567, 4)</code> --> <code>"123.4"</code><br>
698     * <code>toString(1234.567, 2)</code> --> <code>"1234"</code> as we never trunk integer part.<br>
699     * <code>toString(1234.5, 10)</code> --> <code>"1234.5"</code> as we never trunk integer part.<br>
700     */
701    public static String toStringEx(double value, int size)
702    {
703        final int i = (int) value;
704
705        if (i == value)
706            return toString(i);
707
708        return Double.toString(MathUtil.roundSignificant(value, size, true));
709    }
710
711    /**
712     * Return a string representation of the byte array argument.
713     */
714    public static String toString(byte[] value)
715    {
716        return new String(value);
717    }
718
719    /**
720     * Returns a string representation of the integer argument as an
721     * unsigned integer in base 16.
722     */
723    public static String toHexaString(int value)
724    {
725        return Integer.toHexString(value);
726    }
727
728    /**
729     * Returns a string representation of the integer argument as an
730     * unsigned integer in base 16.<br>
731     * Force the returned string to have the specified size :<br>
732     * If the string is longer then only last past is kept.<br>
733     * If the string is shorter then leading 0 are added to the string.
734     */
735    public static String toHexaString(int value, int size)
736    {
737        String result = Integer.toHexString(value);
738
739        if (result.length() > size)
740            return result.substring(result.length() - size);
741
742        while (result.length() < size)
743            result = "0" + result;
744        return result;
745    }
746
747    /**
748     * Remove <code>count</code> characters from the end of specified string.
749     */
750    public static String removeLast(String value, int count)
751    {
752        if (value == null)
753            return null;
754
755        final int l = value.length();
756
757        if (l < 2)
758            return "";
759
760        return value.substring(0, l - count);
761    }
762
763    /**
764     * Creates a flattened version of the provided String. The flattening operation splits the
765     * string by inserting spaces between words starting with an upper case letter, and converts
766     * upper case letters to lower case (with the exception of the first word). Note that
767     * <b>consecutive upper case letters will remain grouped</b>, as they are considered to
768     * represent an acronym.<br/>
769     * <br/>
770     * <u>NOTE:</u> This method is optimized for class names that follow the Java naming convention. <br/>
771     * Examples:<br/>
772     * MyGreatClass -> "My great class"<br/>
773     * MyXYZClass -> "My XYZ class"
774     * 
775     * @param string
776     *        the string to flatten
777     * @return a flattened (i.e. pretty-printed) String based on the name of the string
778     */
779    public static String getFlattened(String string)
780    {
781        String[] words = string.split("(?=[A-Z])");
782
783        String output = words[0];
784        if (words.length > 1)
785        {
786            // words[0] is always empty here
787            output = words[1];
788
789            for (int i = 2; i < words.length; i++)
790            {
791                String word = words[i];
792                if (word.length() == 1)
793                {
794                    // single letter
795                    if (words[i - 1].length() == 1)
796                    {
797                        // append to the previous letter (acronym)
798                        output += word;
799                    }
800                    else
801                    {
802                        // new isolated letter or acronym
803                        output += " " + word;
804                    }
805                }
806                else
807                    output += " " + word.toLowerCase();
808            }
809        }
810
811        return output;
812    }
813
814    /**
815     * Replace all C line break sequence : <code>"\n", "\r", "\r\n"</code><br>
816     * from the specified <code>text</code> by <code>str</code>.
817     */
818    public static String replaceCR(String text, String str)
819    {
820        return text.replaceAll("(\r\n|\n\r|\r|\n)", str);
821    }
822
823    /**
824     * Remove all C line break sequence : <code>"\n", "\r", "\r\n"</code><br>
825     * from the specified text.
826     */
827    public static String removeCR(String text)
828    {
829        return replaceCR(text, "");
830    }
831
832    /**
833     * Convert the C line break sequence : <code>"\n", "\r", "\r\n"</code><br>
834     * to HTML line break sequence.
835     */
836    public static String toHtmlCR(String text)
837    {
838        return replaceCR(text, "<br>").replaceAll("(<BR>|<br/>|<BR/>)", "<br>");
839    }
840
841    /**
842     * Return true if the specified text contains HTML line break sequence.
843     */
844    public static boolean containHtmlCR(String text)
845    {
846        return (text.indexOf("<br>") != -1) || (text.indexOf("<BR>") != -1) || (text.indexOf("<br/>") != -1)
847                || (text.indexOf("<BR/>") != -1);
848    }
849
850    /**
851     * Bold (inserting HTML bold tag) the specified keyword in the text.
852     */
853    public static String htmlBoldSubstring(String text, String keyword, boolean ignoreCase)
854    {
855        // right now we just ignore 'b' keyword with produce error because of the <b> sequence.
856        if (!isEmpty(text) && !isEmpty(keyword) && !keyword.toLowerCase().equals("b"))
857        {
858            final int keywordLen = keyword.length();
859            final String key;
860
861            if (ignoreCase)
862                key = keyword.toLowerCase();
863            else
864                key = keyword;
865
866            String result = text;
867            int index;
868
869            if (ignoreCase)
870                index = result.toLowerCase().indexOf(key);
871            else
872                index = result.indexOf(key);
873
874            while (index != -1)
875            {
876                result = result.substring(0, index) + "<b>" + result.substring(index, index + keywordLen) + "</b>"
877                        + result.substring(index + keywordLen);
878
879                if (ignoreCase)
880                    index = result.toLowerCase().indexOf(key, index + keywordLen + 6);
881                else
882                    index = result.indexOf(key, index + keywordLen + 6);
883            }
884
885            return result;
886        }
887
888        return text;
889    }
890
891    /**
892     * Split a text into word based on space character while preserving quoted sentences.
893     * 
894     * @param text
895     *        text to split into word.<br>
896     *        Example:<br>
897     *        <i>this book is named "the red cat"</i> --> <br>
898     *        <li>this</li>
899     *        <li>book</li>
900     *        <li>is</li>
901     *        <li>named</li>
902     *        <li>the red cat</li>
903     * @return String array representing words
904     */
905    public static List<String> split(String text)
906    {
907        // want to preserve quoted string as single words
908        final List<String> result = new ArrayList<String>();
909        final Matcher m = Pattern.compile("([^\"]\\S*|\".+?\")\\s*").matcher(text);
910
911        while (m.find())
912            result.add(m.group(1).replace("\"", ""));
913
914        return result;
915    }
916
917    /**
918     * Converts wildcard to regular expression.
919     * 
920     * @param wildcard
921     * @return regex
922     */
923    public static String wildcardToRegex(String wildcard)
924    {
925        final StringBuffer s = new StringBuffer(wildcard.length());
926
927        s.append('^');
928        for (int i = 0, is = wildcard.length(); i < is; i++)
929        {
930            char c = wildcard.charAt(i);
931            switch (c)
932            {
933                case '*':
934                    s.append(".*");
935                    break;
936                case '?':
937                    s.append(".");
938                    break;
939                case '(':
940                case ')':
941                case '[':
942                case ']':
943                case '$':
944                case '^':
945                case '.':
946                case '{':
947                case '}':
948                case '|':
949                case '\\':
950                    s.append("\\");
951                    s.append(c);
952                    break;
953                default:
954                    s.append(c);
955                    break;
956            }
957        }
958        s.append('$');
959
960        return (s.toString());
961    }
962}