diff --git a/lib/jcharset-2.1/CHANGES.txt b/lib/jcharset-2.1/CHANGES.txt new file mode 100644 index 0000000..1edc83d --- /dev/null +++ b/lib/jcharset-2.1/CHANGES.txt @@ -0,0 +1,110 @@ + + +CHANGES IN THE 2.1 RELEASE +-------------------------- +- Added CR padding support to PackedGSMCharset. +- Added CRCCPackedGSMCharset and CRSCPackedGSMCharset packed GSM variants with CR padding enabled. +- Added KZ-1048 charset, with aliases STRK1048-2002, RK1048, csKZ1048. +- Improved javadocs. + + + + +CHANGES IN THE 2.0 RELEASE +-------------------------- +- Added 32 national variants of the ISO/IEC 646 charset. +- Moved GSM classes to separate sub-package. +- Changed UTF-7 decoding to be lenient in accepting trailing zero bits in shift sequences. +- Changed UTF7Charset.contains to reflect full Unicode equivalency. +- Added a command-line utility supporting file charset conversion. +- Added ByteLookupCharset.createTable utility method. +- Generalized createInverseLookupTableDefinition to Utils.toInverseLookupTableDefinition. +- Applied many refactorings, simplifications, clarifications and clean-ups. +- Applied various optimizations to encode/decode loops. +- Improved docs. + + + + +CHANGES IN THE 1.6 RELEASE +-------------------------- +- Migrated to Maven build system, directory structure and artifact conventions. +- Added OSGi headers to jar manifest. +- Fixed javadoc errors when building with JDK 8. +- Improved javadocs and misc. minor refactorings. + + + + +CHANGES IN THE 1.5 RELEASE +-------------------------- +- Fixed GSMCharset encoding of non-breakable space character (0x00A0), which shouldn't be encoded. +- Fixed PackedGSMCharset decoder edge case of handling overflow continuation for large strings (>256) + when calling decoder directly (not via String methods). +- Fixed PackedGSMCharset decoder edge case of string size which is a multiple of internal buffer size (256) + greater than 256 and has escaped characters on decoded buffer boundaries. +- Simplified CharsetProvider.charsetForName flow. + + + + +CHANGES IN THE 1.4 RELEASE +-------------------------- +- Dropped support for JDK 1.4 and earlier. +- Added MIK charset. +- Added KOI8_U as a KOI8-U alias. +- Optimized EscapedByteLookupCharset encoding buffer allocation for strings with no escape chars. +- Added ByteLookupCharset.updateInverseLookupTable convenience method. +- Improved docs. + + + + +CHANGES IN THE 1.3 RELEASE +-------------------------- +- Added X-roman8 as an hp-roman8 alias. +- Added the generic EscapedByteLookupCharset to simplify implementation of single-escape-byte charsets. +- Created two flavors of the GSM charset: CCGSMCharset (mapping the Latin capital letter C with cedilla) + and SCGSMCharset (mapping the Latin small letter c with cedilla). See javadocs for details. +- Added support for Packed GSM charset, with the two flavors as well. +- Renamed the canonical charset name for the new GSM family, to make the flavor choices explicit. + + + + +CHANGES IN THE 1.2.1 RELEASE +---------------------------- +- Fixed a combined JavaMail-JCharset bug that could cause an infinite loop on some inputs. +- Updated the ISO-8859-8-i/e mapping for the MACRON character. + The incorrect mapping in the JDK's implementation of ISO-8859-8 is fixed as of JDK 1.5 + (see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4760496). We now determine the + running JDK version, and if it's JDK 1.5 or higher we use the correct mapping. This + way we remain consistent with the running JDK ISO-8859-8 charset implementation. + + + + +CHANGES IN THE 1.2 RELEASE +-------------------------- +- Added KOI8-U charset. + + + + +CHANGES IN THE 1.1 RELEASE +-------------------------- + +- Added ByteLookupCharset class to simplify implementation of single byte charsets. +- Added GSM-default-alphabet charset (used in SMPP). +- Added hp-roman8 charset. +- Added ISO-8859-8-i/e charset. +- Added ISO-8859-6-i/e charset. + + + + +CHANGES IN THE 1.0 RELEASE +-------------------------- + +- This is the first release of the Java Charset package. + diff --git a/lib/jcharset-2.1/LICENSE.commercial.txt b/lib/jcharset-2.1/LICENSE.commercial.txt new file mode 100644 index 0000000..bd21975 --- /dev/null +++ b/lib/jcharset-2.1/LICENSE.commercial.txt @@ -0,0 +1,4 @@ + +This software is dual-licensed - a commercial licensing option is available for those who need it. + +For details, please contact support@freeutils.net. diff --git a/lib/jcharset-2.1/LICENSE.gpl.txt b/lib/jcharset-2.1/LICENSE.gpl.txt new file mode 100644 index 0000000..89e08fb --- /dev/null +++ b/lib/jcharset-2.1/LICENSE.gpl.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/lib/jcharset-2.1/README.txt b/lib/jcharset-2.1/README.txt new file mode 100644 index 0000000..0f7ce63 --- /dev/null +++ b/lib/jcharset-2.1/README.txt @@ -0,0 +1,153 @@ + + +JCharset - Java Charset package 2.1 +=================================== + +Copyright © 2005-2019 Amichai Rothman + + + +1. What is the Java Charset package? + + The Java Charset package is an open-source implementation of character + sets that were missing from the standard Java platform. + + It has been in use in many production systems around the world for over a + decade, including products by small start-ups, large open-source service + providers, and well-known multinational corporations. + + +2. How do I use the Java Charset package? + + The Java Charset package is written in pure Java, runs on JDK 1.5 or later, + and requires no special installation - just add the jar file to your + classpath, or place it in any of the usual extension directories. + + It is also available on Maven Central at the artifact coordinates + net.freeutils:jcharset:2.1. + + The JVM will recognize the supported character sets automatically, and they + will be available anywhere character sets are used in the Java platform. + + As an example, you can take a look at java.lang.String's constructor and + getBytes() method, both of which have an overloaded version that receives + a charset name as an argument. + + A command-line utility is included which supports converting files + between charsets. For help on usage and available options, run it using + the command 'java -jar jcharset-2.1.jar -h'. + + Note: Some web/mail containers run each application in its own JVM context. + In this case check the container documentation for information on where and + how to configure the classpath, such as in WEB-INF/lib, shared/lib, + jre/lib/ext, etc. You may need to restart the server for changes to take + effect. However, if you use Oracle's JRE, it will work only if you put it in + the jre/lib/ext extension directory, or in the container's classpath. + This is due to a bug in Oracle's JRE implementation + (http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4619777). + +3. Which charsets are supported? + + "UTF-7" (a.k.a. "UTF7", "UNICODE-1-1-UTF-7", "csUnicode11UTF7", + "UNICODE-2-0-UTF-7") + The 7-bit Unicode character encoding defined in RFC 2152. + The O-set characters are encoded as a shift sequence. + Both O-set flavors (direct and shifted) are decoded. + + "UTF-7-OPTIONAL" (a.k.a. "UTF-7O", "UTF7O", "UTF-7-O") + The 7-bit Unicode character encoding defined in RFC 2152. + The O-set characters are directly encoded. + Both O-set flavors (direct and shifted) are decoded. + + "SCGSM" (a.k.a. "GSM-default-alphabet", "GSM_0338", "GSM_DEFAULT", + "GSM7", "GSM-7BIT") + The GSM default charset as specified in GSM 03.38, used in SMPP for + encoding SMS text messages. + + Additional flavors of the GSM charset are "CCGSM", "SCPGSM", "CCPGSM", + "CRSCPGSM" and "CRCCPGSM": The CC prefix signifies mapping the Latin + capital letter C with cedilla character, the SC prefix signifies + mapping the Latin small letter c with cedilla character, the P prefix + signifies the packed form (8 characters packed in 7 bytes), and the + CR prefix signifies padding with CR instead of zeros to avoid ambiguity, + all as specified by the spec. See javadocs for details. + + "hp-roman8" (a.k.a. "roman8", "r8", "csHPRoman8", "X-roman8") + The HP Roman-8 charset, as provided in RFC 1345. + + ISO/IEC 646 National Variants: + "ISO646-CA" ("ISO-IR-121") + "ISO646-CA2" ("ISO-IR-122") + "ISO646-CH" + "ISO646-CN" ("ISO-IR-57") + "ISO646-CU" ("ISO-IR-151") + "ISO646-DE" ("ISO-IR-21", "DIN_66003") + "ISO646-DK" + "ISO646-ES" ("ISO-IR-17") + "ISO646-ES2" ("ISO-IR-85") + "ISO646-FI" ("ISO646-SE", "ISO-IR-10") + "ISO646-FR" ("ISO-IR-69") + "ISO646-FR1" ("ISO-IR-25") + "ISO646-GB" ("ISO-IR-4") + "ISO646-HU" ("ISO-IR-86") + "ISO646-IE" ("ISO-IR-207") + "ISO646-INV" ("ISO-IR-170") + "ISO646-IRV" ("ISO-IR-2", "ISO_646.IRV:1983") + "ISO646-IS" + "ISO646-IT" ("ISO-IR-15") + "ISO646-JAO" ("ISO646-JP-OCR-B", "ISO-IR-92") + "ISO646-JP" ("ISO-IR-14") + "ISO646-KR" + "ISO646-MT" + "ISO646-NO" ("ISO-IR-60") + "ISO646-NO2" ("ISO-IR-61") + "ISO646-PT" ("ISO-IR-16") + "ISO646-PT2" ("ISO-IR-84") + "ISO646-SE2" ("ISO-IR-11") + "ISO646-T61" ("ISO-IR-102") + "ISO646-TW" + "ISO646-US" ("ISO-IR-6", "ISO_646.irv:1991") + "ISO646-YU" ("ISO-IR-141") + + "ISO-8859-8-BIDI" (a.k.a. "csISO88598I", "ISO-8859-8-I", "ISO_8859-8-I", + "csISO88598E", "ISO-8859-8-E", "ISO_8859-8-E") + The ISO 8859-8 charset implementation exists in the standard JRE. + However, it is lacking the i/e aliases, which specify whether + bidirectionality is implicit or explicit. The charsets conversions + themselves are similar. This charset complements the standard one. + + "ISO-8859-6-BIDI" (a.k.a. "csISO88596I", "ISO-8859-6-I", "ISO_8859-6-I", + "csISO88596E", "ISO-8859-6-E", "ISO_8859-6-E") + The ISO 8859-6 charset implementation exists in the standard JRE. + However, it is lacking the i/e aliases, which specify whether + bidirectionality is implicit or explicit. The charsets conversions + themselves are similar. This charset complements the standard one. + + "KOI8-U" (a.k.a. "KOI8-RU", "KOI8_U") + The KOI8-U Ukrainian charset, as defined in RFC 2319. + + "KZ-1048" (a.k.a. "STRK1048-2002", "RK1048", "csKZ1048") + The KZ-1048 charset, which is the Kazakhstan national standard. + + "MIK" + The MIK cyrillic code page, commonly used by DOS applications + in Bulgaria. + + +4. License + + The Java Charset package is provided under the GNU General Public + License agreement. Please read the full license agreement in the + included LICENSE.gpl.txt file. + + For non-GPL commercial licensing please contact the address below. + + +5. Contact + + Please write to support@freeutils.net with any bugs, suggestions, fixes, + contributions, or just to drop a good word and let me know you've found + this package useful and you'd like it to keep being maintained. + + Updates and additional info can be found at + http://www.freeutils.net/source/jcharset/ diff --git a/lib/jcharset-2.1/lib/jcharset-2.1.jar b/lib/jcharset-2.1/lib/jcharset-2.1.jar new file mode 100644 index 0000000..f2166e4 Binary files /dev/null and b/lib/jcharset-2.1/lib/jcharset-2.1.jar differ diff --git a/lib/jcharset-2.1/pom.xml b/lib/jcharset-2.1/pom.xml new file mode 100644 index 0000000..365b14c --- /dev/null +++ b/lib/jcharset-2.1/pom.xml @@ -0,0 +1,84 @@ + + + 4.0.0 + + + net.freeutils + parent + 1.0 + ../parent/pom.xml + + + jcharset + 2.1 + jar + + JCharset + The Java Charset package + http://www.freeutils.net/source/jcharset/ + + + GNU General Public License (GPL), Version 2.0 + http://www.gnu.org/licenses/gpl-2.0.html + + + Commercial License + LICENSE.commercial.txt + + + + + 1.5 + java15 + false + + + + + + + org.apache.felix + maven-bundle-plugin + + + bundle-manifest + process-classes + + manifest + + + + ${bundle.symbolicName} + ${project.version} + ${bundle.namespace}.*;version="${project.version}" + ${bundle.namespace}.* + + + osgi.serviceloader; osgi.serviceloader=java.nio.charset.spi.CharsetProvider + + + jar + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + + true + + + net.freeutils.charset.Utils + + + + + + + + diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ByteLookupCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ByteLookupCharset.java new file mode 100644 index 0000000..374f31c --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ByteLookupCharset.java @@ -0,0 +1,293 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; +import java.util.Arrays; + +/** + * The ByteLookupCharset class handles the encoding and decoding of + * single-byte charsets where the byte-to-char conversion is performed + * using a simple lookup table. + * + * @author Amichai Rothman + * @since 2005-06-30 + */ +public abstract class ByteLookupCharset extends Charset { + + final int[] byteToChar; + final int[][] charToByte; + + /** + * Creates a new lookup table with 256 elements, all initialized to -1. + * + * @return the new table + */ + public static int[] createTable() { + int[] table = new int[256]; + Arrays.fill(table, -1); + return table; + } + + /** + * Returns a copy of the given array in which several items + * are modified. + * + * @param src the array to mutate + * @param indices the array of indices at which the values will be modified + * @param values the respective values to place in these indices + * @return the mutated array + */ + public static int[] mutate(int[] src, int[] indices, int[] values) { + int[] mutated = new int[src.length]; + System.arraycopy(src, 0, mutated, 0, src.length); + for (int i = 0; i < indices.length; i++) + mutated[indices[i]] = values[i]; + return mutated; + } + + /** + * Creates an inverse lookup table for the given byte-to-char lookup table. + *

+ * The returned table contains 256 tables, one per high-order byte of a + * potential character to be converted (unused ones are null), and each + * such table can be indexed using the character's low-order byte, to + * obtain the actual converted byte value. + * A null table in the top level table, or a -1 within a lower level table, + * both indicate that there is no legal mapping for the given character. + * + * @param chars a lookup table which holds the character value + * that each byte value (0-255) is converted to + * @return the created inverse lookup (char-to-byte) table + */ + public static int[][] createInverseLookupTable(int[] chars) { + int[][] tables = new int[256][]; + for (int i = 0; i < 256; i++) { + int c = chars[i]; + if (c > -1) + updateInverseLookupTable(tables, c, i); + } + return tables; + } + + /** + * Updates an inverse lookup table with an additional mapping, + * replacing a previous mapping of the same value if it exists. + * + * @param tables the inverse lookup table to update + * (see {@link #createInverseLookupTable}) + * @param c the character to map + * @param b the byte value to which c is mapped, or -1 to mark an illegal mapping + * @return the updated inverse lookup (char-to-byte) table + */ + public static int[][] updateInverseLookupTable(int[][] tables, int c, int b) { + int high = (c >>> 8) & 0xFF; + int low = c & 0xFF; + int[] table = tables[high]; + if (table != null) { + table[low] = b; + } else if (b > -1) { + table = createTable(); + tables[high] = table; + table[low] = b; + } + return tables; + } + + /** + * Updates an inverse lookup table with additional mappings, + * replacing previous mappings of the same values if they exists. + * + * @param tables the inverse lookup table to update + * (see {@link #createInverseLookupTable}) + * @param chars the characters to map + * @param bytes the respective byte values to which the chars are mapped, + * or -1 to mark an illegal mapping + * @return the updated inverse lookup (char-to-byte) table + */ + public static int[][] updateInverseLookupTable(int[][] tables, int[] chars, int[] bytes) { + for (int i = 0; i < chars.length; i++) + updateInverseLookupTable(tables, chars[i], bytes[i]); + return tables; + } + + /** + * Initializes a new charset with the given canonical name and alias + * set, and byte-to-char/char-to-byte lookup tables. + * + * @param canonicalName the canonical name of this charset + * @param aliases an array of this charset's aliases, or null if it has no aliases + * @param byteToChar a byte-to-char conversion table for this charset + * @param charToByte a char-to-byte conversion table for this charset. It can + * be generated on-the-fly by calling createInverseLookupTable(byteToChar). + * @throws java.nio.charset.IllegalCharsetNameException + * if the canonical name or any of the aliases are illegal + */ + protected ByteLookupCharset(String canonicalName, String[] aliases, + int[] byteToChar, int[][] charToByte) { + super(canonicalName, aliases); + this.byteToChar = byteToChar; + this.charToByte = charToByte; + } + + /** + * Tells whether or not this charset contains the given charset. + * + *

A charset C is said to contain a charset D if, + * and only if, every character representable in D is also + * representable in C. If this relationship holds then it is + * guaranteed that every string that can be encoded in D can also be + * encoded in C without performing any replacements. + * + *

That C contains D does not imply that each character + * representable in C by a particular byte sequence is represented + * in D by the same byte sequence, although sometimes this is the + * case. + * + *

Every charset contains itself. + * + *

This method computes an approximation of the containment relation: + * If it returns true then the given charset is known to be + * contained by this charset; if it returns false, however, then + * it is not necessarily the case that the given charset is not contained + * in this charset. + * + * @param charset the given charset + * + * @return true if the given charset is contained in this charset + */ + @Override + public boolean contains(Charset charset) { + return this.getClass().isInstance(charset); + } + + /** + * Constructs a new decoder for this charset. + * + * @return a new decoder for this charset + */ + @Override + public CharsetDecoder newDecoder() { + return new Decoder(this); + } + + /** + * Constructs a new encoder for this charset. + * + * @return a new encoder for this charset + */ + @Override + public CharsetEncoder newEncoder() { + return new Encoder(this); + } + + /** + * The Encoder inner class handles the encoding of the + * charset using the lookup table. + */ + protected class Encoder extends CharsetEncoder { + + /** + * Constructs an Encoder. + * + * @param charset the charset that created this encoder + */ + protected Encoder(Charset charset) { + super(charset, 1f, 1f); + } + + /** + * Encodes one or more characters into one or more bytes. + * + * @param in the input character buffer + * @param out the output byte buffer + * @return a coder-result object describing the reason for termination + */ + @Override + protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + int[][] lookup = charToByte; // getfield bytecode optimization + int remainingIn = in.remaining(); + int remainingOut = out.remaining(); + while (remainingIn-- > 0) { + if (remainingOut-- < 1) + return CoderResult.OVERFLOW; // we need exactly one byte per char + int c = in.get(); + int[] table = lookup[c >>> 8]; + int b = table == null ? -1 : table[c & 0xFF]; + if (b == -1) { + in.position(in.position() - 1); + return CoderResult.unmappableForLength(1); + } + out.put((byte)(b & 0xFF)); + } + return CoderResult.UNDERFLOW; + } + + } + + /** + * The Decoder inner class handles the decoding of the + * charset using the inverse lookup table. + */ + protected class Decoder extends CharsetDecoder { + + /** + * Constructs a Decoder. + * + * @param charset the charset that created this decoder + */ + protected Decoder(Charset charset) { + super(charset, 1f, 1f); + } + + /** + * Decodes one or more bytes into one or more characters. + * + * @param in the input byte buffer + * @param out the output character buffer + * @return a coder-result object describing the reason for termination + */ + @Override + protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + int[] lookup = byteToChar; // getfield bytecode optimization + int remainingIn = in.remaining(); + int remainingOut = out.remaining(); + while (remainingIn-- > 0) { + if (remainingOut-- < 1) + return CoderResult.OVERFLOW; // we need exactly one char per byte + int c = lookup[in.get() & 0xFF]; + if (c == -1) { + in.position(in.position() - 1); + return CoderResult.malformedForLength(1); + } + out.put((char)c); + } + return CoderResult.UNDERFLOW; + } + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/CharsetProvider.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/CharsetProvider.java new file mode 100644 index 0000000..274701f --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/CharsetProvider.java @@ -0,0 +1,150 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +import java.nio.charset.Charset; +import java.util.*; +import net.freeutils.charset.gsm.*; +import net.freeutils.charset.iso646.*; + +/** + * The CharsetProvider class is a Charset Provider implementation. + * + * @author Amichai Rothman + * @since 2005-06-10 + */ +public class CharsetProvider extends java.nio.charset.spi.CharsetProvider { + + static Map nameToCharset; + static Collection charsets; + + /** + * Retrieves a charset for the given charset name. + * + * @param charsetName the name of the requested charset; + * may be either a canonical name or an alias + * + * @return a charset object for the named charset, + * or null if the named charset + * is not supported by this provider + */ + @Override + public Charset charsetForName(String charsetName) { + if (nameToCharset == null) + init(); + + // get charset instance for given name (case insensitive) + Charset charset = nameToCharset.get(charsetName.toLowerCase()); + if (charset != null) { + try { + return charset.getClass().newInstance(); + } catch (Exception ignore) { + // if we can't create an instance, we don't + } + } + return null; + } + + /** + * Creates an iterator that iterates over the charsets supported by this + * provider. This method is used in the implementation of the {@link + * java.nio.charset.Charset#availableCharsets Charset.availableCharsets} + * method. + * + * @return the new iterator + */ + @Override + public Iterator charsets() { + if (charsets == null) + init(); + + return charsets.iterator(); + } + + /** + * Initializes this charset provider's data. + */ + void init() { + // prepare supported charsets + Charset[] allCharsets = { + new UTF7Charset(), + new UTF7OptionalCharset(), + new SCGSMCharset(), + new CCGSMCharset(), + new SCPackedGSMCharset(), + new CCPackedGSMCharset(), + new CRSCPackedGSMCharset(), + new CRCCPackedGSMCharset(), + new HPRoman8Charset(), + new KOI8UCharset(), + new KZ1048Charset(), + new ISO88598Charset(), + new ISO88596Charset(), + new MIKCharset(), + new ISO646CACharset(), + new ISO646CA2Charset(), + new ISO646CHCharset(), + new ISO646CNCharset(), + new ISO646CUCharset(), + new ISO646DECharset(), + new ISO646DKCharset(), + new ISO646ESCharset(), + new ISO646ES2Charset(), + new ISO646FISECharset(), + new ISO646FRCharset(), + new ISO646FR1Charset(), + new ISO646GBCharset(), + new ISO646HUCharset(), + new ISO646IECharset(), + new ISO646INVCharset(), + new ISO646IRVCharset(), + new ISO646ISCharset(), + new ISO646ITCharset(), + new ISO646JAOCharset(), + new ISO646JPCharset(), + new ISO646KRCharset(), + new ISO646MTCharset(), + new ISO646NO2Charset(), + new ISO646NOCharset(), + new ISO646PTCharset(), + new ISO646PT2Charset(), + new ISO646SE2Charset(), + new ISO646T61Charset(), + new ISO646TWCharset(), + new ISO646USCharset(), + new ISO646YUCharset(), + }; + + // initialize charset collection + charsets = Collections.unmodifiableCollection(Arrays.asList(allCharsets)); + + // initialize name to charset map + Map map = new HashMap(); + for (Charset charset : allCharsets) { + map.put(charset.name().toLowerCase(), charset); + for (String alias : charset.aliases()) + map.put(alias.toLowerCase(), charset); + } + nameToCharset = map; + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/EscapedByteLookupCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/EscapedByteLookupCharset.java new file mode 100644 index 0000000..887ba3f --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/EscapedByteLookupCharset.java @@ -0,0 +1,285 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +/** + * The EscapedByteLookupCharset class handles the encoding and + * decoding of simple charsets where the byte-to-char conversion + * is performed using a simple lookup table, with the addition of a special + * escape byte, such that the single byte following it is converted using + * an alternate lookup table. + * + * @author Amichai Rothman + * @since 2007-03-26 + */ +public abstract class EscapedByteLookupCharset extends Charset { + + final int[] byteToChar; + final int[] byteToCharEscaped; + final int[][] charToByte; + final int[][] charToByteEscaped; + final byte escapeByte; + + /** + * Initializes a new charset with the given canonical name and alias + * set, and byte-to-char/char-to-byte lookup tables. + * + * @param canonicalName the canonical name of this charset + * @param aliases an array of this charset's aliases, or null if it has no aliases + * @param escapeByte the special escape byte value + * @param byteToChar a byte-to-char conversion table for this charset + * @param byteToCharEscaped a byte-to-char conversion table for this charset + * for the escaped characters + * @param charToByte a char-to-byte conversion table for this charset. It can + * be generated on-the-fly by calling + * {@link ByteLookupCharset#createInverseLookupTable + * createInverseLookupTable(byteToChar)}. + * @param charToByteEscaped a char-to-byte conversion table for this charset + * for the escaped characters + * @throws java.nio.charset.IllegalCharsetNameException + * if the canonical name or any of the aliases are illegal + */ + protected EscapedByteLookupCharset(String canonicalName, String[] aliases, + byte escapeByte, int[] byteToChar, int[] byteToCharEscaped, + int[][] charToByte, int[][] charToByteEscaped) { + super(canonicalName, aliases); + this.escapeByte = escapeByte; + this.byteToChar = byteToChar; + this.charToByte = charToByte; + this.byteToCharEscaped = byteToCharEscaped; + this.charToByteEscaped = charToByteEscaped; + } + + /** + * Tells whether or not this charset contains the given charset. + * + *

A charset C is said to contain a charset D if, + * and only if, every character representable in D is also + * representable in C. If this relationship holds then it is + * guaranteed that every string that can be encoded in D can also be + * encoded in C without performing any replacements. + * + *

That C contains D does not imply that each character + * representable in C by a particular byte sequence is represented + * in D by the same byte sequence, although sometimes this is the + * case. + * + *

Every charset contains itself. + * + *

This method computes an approximation of the containment relation: + * If it returns true then the given charset is known to be + * contained by this charset; if it returns false, however, then + * it is not necessarily the case that the given charset is not contained + * in this charset. + * + * @param charset the given charset + * + * @return true if the given charset is contained in this charset + */ + @Override + public boolean contains(Charset charset) { + return this.getClass().isInstance(charset); + } + + /** + * Constructs a new decoder for this charset. + * + * @return a new decoder for this charset + */ + @Override + public CharsetDecoder newDecoder() { + return new Decoder(this); + } + + /** + * Constructs a new encoder for this charset. + * + * @return a new encoder for this charset + */ + @Override + public CharsetEncoder newEncoder() { + return new Encoder(this); + } + + /** + * The Encoder inner class handles the encoding of the + * charset using the lookup tables. + */ + protected class Encoder extends CharsetEncoder { + + /** + * Constructs an Encoder. + * + * @param charset the charset that created this encoder + */ + protected Encoder(Charset charset) { + super(charset, 1f, 2f); + } + + /** + * Constructs an Encoder. + * + * @param charset the charset that created this encoder + * @param averageBytesPerChar a positive float value indicating the expected + * number of bytes that will be produced for each input character + * + * @param maxBytesPerChar a positive float value indicating the maximum + * number of bytes that will be produced for each input character + */ + protected Encoder(Charset charset, float averageBytesPerChar, float maxBytesPerChar) { + super(charset, averageBytesPerChar, maxBytesPerChar); + } + + /** + * Encodes one or more characters into one or more bytes. + * + * @param in the input character buffer + * @param out the output byte buffer + * @return a coder-result object describing the reason for termination + */ + @Override + protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + byte escape = escapeByte; // getfield bytecode optimization + int[][] lookup = charToByte; // getfield bytecode optimization + int[][] lookupEscaped = charToByteEscaped; // getfield bytecode optimization + int remainingIn = in.remaining(); + int remainingOut = out.remaining(); + while (remainingIn-- > 0) { + // make sure we have room for output + if (remainingOut-- < 1) + return CoderResult.OVERFLOW; + // get next char + int c = in.get(); + // look for corresponding regular byte + int[] table = lookup[c >> 8]; + int b = table == null ? -1 : table[c & 0xFF]; + if (b == -1) { + // look for corresponding escaped byte + table = lookupEscaped[c >> 8]; + b = table == null ? -1 : table[c & 0xFF]; + if (b == -1) { + // there's no regular nor escaped byte - it's unmappable + in.position(in.position() - 1); // unread the char + return CoderResult.unmappableForLength(1); + } + // it's an escapable char, make sure we have room for extra output + if (remainingOut-- < 1) { + in.position(in.position() - 1); // unread the char + return CoderResult.OVERFLOW; + } + // write the escape byte (output byte will follow) + out.put(escape); + } + // write the output byte + out.put((byte)(b & 0xFF)); + } + // no more input available + return CoderResult.UNDERFLOW; + } + + } + + /** + * The Decoder inner class handles the decoding of the + * charset using the inverse lookup tables. + */ + protected class Decoder extends CharsetDecoder { + + /** + * Constructs a Decoder. + * + * @param charset the charset that created this decoder + */ + protected Decoder(Charset charset) { + super(charset, 1f, 1f); + } + + /** + * Constructs a Decoder. + * + * @param charset the charset that created this decoder + * @param averageCharsPerByte a positive float value indicating the expected + * number of characters that will be produced for each input byte + * @param maxCharsPerByte a positive float value indicating the maximum + * number of characters that will be produced for each input byte + */ + protected Decoder(Charset charset, float averageCharsPerByte, float maxCharsPerByte) { + super(charset, averageCharsPerByte, maxCharsPerByte); + } + + /** + * Decodes one or more bytes into one or more characters. + * + * @param in the input byte buffer + * @param out the output character buffer + * @return a coder-result object describing the reason for termination + */ + @Override + protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + byte escape = escapeByte; // getfield bytecode optimization + int[] lookup = byteToChar; // getfield bytecode optimization + int[] lookupEscaped = byteToCharEscaped; // getfield bytecode optimization + int remainingIn = in.remaining(); + int remainingOut = out.remaining(); + while (remainingIn-- > 0) { + // make sure we have room for output + if (remainingOut-- < 1) + return CoderResult.OVERFLOW; + // get next byte + int c; + int b = in.get(); + if (b == escape) { + // it's the escape byte - make sure we have the next byte + if (remainingIn-- < 1) { + in.position(in.position() - 1); // unread the byte + return CoderResult.UNDERFLOW; + } + // get next byte + b = in.get(); + // look for corresponding escaped char + c = lookupEscaped[b & 0xFF]; + } else { + // look for corresponding regular char + c = lookup[b & 0xFF]; + } + + if (c == -1) { + // there's no regular nor escaped char - it's malformed + in.position(in.position() - 1); // unread the byte + return CoderResult.malformedForLength(1); + } + // write the output char + out.put((char)c); + } + // no more input available + return CoderResult.UNDERFLOW; + } + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/HPRoman8Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/HPRoman8Charset.java new file mode 100644 index 0000000..3cc03a2 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/HPRoman8Charset.java @@ -0,0 +1,82 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +/** + * The HPRoman8Charset class handles the encoding and decoding of the + * HP Roman-8 charset, as provided in RFC 1345. + * + * @author Amichai Rothman + * @since 2005-06-30 + */ +public class HPRoman8Charset extends ByteLookupCharset { + + static final String NAME = "hp-roman8"; + + static final String[] ALIASES = { + "roman8", "r8", "csHPRoman8", "X-roman8" }; + + static final int[] BYTE_TO_CHAR = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F, + 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF, + 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4, + 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1, + 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2, + 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA, + 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC, + 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6, + 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4, + 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3, + 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF, + 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC, + 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, -1, + }; + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the HPRoman8Charset. + */ + public HPRoman8Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ISO88596Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ISO88596Charset.java new file mode 100644 index 0000000..dd56a8b --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ISO88596Charset.java @@ -0,0 +1,88 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +/** + * The ISO88596Charset class handles the encoding and decoding of the + * ISO 8859-6 charset. Although the JRE includes an implementation of this + * charset, it does not recognize two aliases for this charset: + * ISO-8859-6-i and ISO-8859-6-e. These signify whether bidirectionality + * is implicit or explicit. However, this is the displayer's responsibility + * in any case, and the character conversion is the same. + * See RFC 1556. + * + * @author Amichai Rothman + * @since 2005-06-30 + */ +public class ISO88596Charset extends ByteLookupCharset { + + static final String NAME = "ISO-8859-6-BIDI"; + + static final String[] ALIASES = { + "csISO88596I", "ISO-8859-6-I", "ISO_8859-6-I", + "csISO88596E", "ISO-8859-6-E", "ISO_8859-6-E" }; + + static final int[] BYTE_TO_CHAR = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, -1, -1, -1, 0x00a4, -1, -1, -1, + -1, -1, -1, -1, 0x060c, 0x00ad, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, 0x061b, -1, -1, -1, 0x061f, + -1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627, + 0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f, + 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637, + 0x0638, 0x0639, 0x063a, -1, -1, -1, -1, -1, + 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647, + 0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, + 0x0650, 0x0651, 0x0652, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + }; + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO88596Charset. + */ + public ISO88596Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ISO88598Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ISO88598Charset.java new file mode 100644 index 0000000..e081e4f --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/ISO88598Charset.java @@ -0,0 +1,98 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +/** + * The ISO88598Charset class handles the encoding and decoding of the + * ISO 8859-8 charset. Although the JRE includes an implementation of this + * charset, it does not recognize two aliases for this charset: + * ISO-8859-8-i and ISO-8859-8-e. These signify whether bidirectionality + * is implicit or explicit. However, this is the displayer's responsibility + * in any case, and the character conversion is the same. + * See RFC 1556. + * + * @author Amichai Rothman + * @since 2005-06-30 + */ +public class ISO88598Charset extends ByteLookupCharset { + + static final String NAME = "ISO-8859-8-BIDI"; + + static final String[] ALIASES = { + "csISO88598I", "ISO-8859-8-I", "ISO_8859-8-I", + "csISO88598E", "ISO-8859-8-E", "ISO_8859-8-E" }; + + static final int[] BYTE_TO_CHAR = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, + 0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, + 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, + 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f, + 0x00a0, -1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7, + 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x203e, // 0x00af or 0x203e? + 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, + 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, 0x2017, + 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7, + 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df, + 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7, + 0x05e8, 0x05e9, 0x05ea, -1, -1, 0x200e, 0x200f, -1, // 0x200e/0x200f added to spec + }; + + static { + // update the mapping for the MACRON character + // (see http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4760496). + // apply the fix only if we're running in JDK 1.5 or higher, + // so that we remain consistent with the JDK ISO-8859-8 charset + // implementation. + if (Utils.isJDK15()) + BYTE_TO_CHAR[0xaf] = 0x00af; + } + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO88598Charset. + */ + public ISO88598Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/KOI8UCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/KOI8UCharset.java new file mode 100644 index 0000000..2d53ae9 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/KOI8UCharset.java @@ -0,0 +1,81 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +/** + * The KOI8UCharset class handles the encoding and decoding of the + * KOI8-U charset, as provided in RFC 2319. + * + * @author Amichai Rothman + * @since 2005-08-02 + */ +public class KOI8UCharset extends ByteLookupCharset { + + static final String NAME = "KOI8-U"; + + static final String[] ALIASES = { "KOI8-RU", "KOI8_U" }; + + static final int[] BYTE_TO_CHAR = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f, + 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524, + 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590, + 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248, + 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7, + 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457, + 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E, + 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407, + 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9, + 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433, + 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, + 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432, + 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A, + 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413, + 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, + 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412, + 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A, + }; + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the KOI8UCharset. + */ + public KOI8UCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/KZ1048Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/KZ1048Charset.java new file mode 100644 index 0000000..03f1643 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/KZ1048Charset.java @@ -0,0 +1,87 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +/** + * The KZ1048Charset class handles the encoding and decoding of the + * KZ-1048 charset, created as Kazakhstan national standard STRK1048-2002 + * by modifying windows-1251, the Windows Cyrillic code page, with + * 16 modified mappings. It is registered in IANA as KZ-1048. + *

+ * The encoding and decoding are based on the mapping at + * https://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KZ1048.TXT + * and https://www.iana.org/assignments/charset-reg/KZ-1048 + * + * @author Amichai Rothman + * @since 2019-03-31 + */ +public class KZ1048Charset extends ByteLookupCharset { + + static final String NAME = "KZ-1048"; + + static final String[] ALIASES = { "STRK1048-2002", "RK1048", "csKZ1048" }; + + static final int[] BYTE_TO_CHAR = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021, + 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x049A, 0x04BA, 0x040F, + 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, + -1, 0x2122, 0x0459, 0x203A, 0x045A, 0x049B, 0x04BB, 0x045F, + 0x00A0, 0x04B0, 0x04B1, 0x04D8, 0x00A4, 0x04E8, 0x00A6, 0x00A7, + 0x0401, 0x00A9, 0x0492, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x04AE, + 0x00B0, 0x00B1, 0x0406, 0x0456, 0x04E9, 0x00B5, 0x00B6, 0x00B7, + 0x0451, 0x2116, 0x0493, 0x00BB, 0x04D9, 0x04A2, 0x04A3, 0x04AF, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + }; + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the KZ1048Charset. + */ + public KZ1048Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/MIKCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/MIKCharset.java new file mode 100644 index 0000000..b7dc51d --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/MIKCharset.java @@ -0,0 +1,87 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +/** + * The MIKCharset class handles the encoding and decoding of the + * MIK cyrillic code page, commonly used by DOS applications in Bulgaria. + * + * @author Amichai Rothman + * @since 2009-12-16 + */ +public class MIKCharset extends ByteLookupCharset { + + static final String NAME = "MIK"; + + static final String[] ALIASES = {}; + + static final int[] BYTE_TO_CHAR = { + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F, + 0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017, + 0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F, + 0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F, + 0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F, + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, + 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, + 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, + 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, + 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x2563, 0x2551, + 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2510, + 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2116, 0x00A7, 0x2557, + 0x255D, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580, + 0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, + 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229, + 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, + 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0, + }; + + static final int[][] CHAR_TO_BYTE = // updated with ambiguous mappings + updateInverseLookupTable( + createInverseLookupTable(BYTE_TO_CHAR), + new int[] { 0x00DF, 0x2211, 0x00B5, 0x2126, 0x2205, 0x2208, + 0x03B2, 0x03A3, 0x03BC, 0x03A9, 0x03C6, 0x03B5 }, + new int[] { 0xE1, 0xE4, 0xE6, 0xEA, 0xED, 0xEE, + 0xE1, 0xE4, 0xE6, 0xEA, 0xED, 0xEE }); + + /** + * Constructs an instance of the MIKCharset. + */ + public MIKCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/UTF7Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/UTF7Charset.java new file mode 100644 index 0000000..fe89d27 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/UTF7Charset.java @@ -0,0 +1,482 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.*; + +/** + * The UTF7Charset class handles the encoding and decoding of the + * UTF-7 charset. + *

+ * The encoding and decoding are based on RFC 2152 + * (http://www.ietf.org/rfc/rfc2152.txt) + * + * @author Amichai Rothman + * @since 2005-06-10 + */ +public class UTF7Charset extends Charset { + + static final String NAME = "UTF-7"; + + static final String[] ALIASES = { + "UTF7", "UNICODE-1-1-UTF-7", "csUnicode11UTF7", "UNICODE-2-0-UTF-7" }; + + // a lookup table for characters that are part of the D Set + static final boolean[] D_SET = { + false, false, false, false, false, false, false, false, + false, true, true, false, false, true, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + true, false, false, false, false, false, false, true, + true, true, false, false, true, true, true, true, + true, true, true, true, true, true, true, true, + true, true, true, false, false, false, false, true, + false, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, + true, true, true, false, false, false, false, false, + false, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, + true, true, true, true, true, true, true, true, + true, true, true, false, false, false, false, false, + }; + + // a lookup table for characters that are part of the O Set + static final boolean[] O_SET = { + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, true, true, true, true, true, true, false, + false, false, true, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, true, true, true, true, false, + true, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, true, false, true, true, true, + true, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, + false, false, false, true, true, true, false, false, + }; + + // a lookup table for characters that are part of the B Set + static final int[] B_SET = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, + -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, + -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, + }; + + // an inverse lookup table for characters that are part of the B Set + static final int[] B_SET_INVERSE = { + 65, 66, 67, 68, 69, 70, 71, 72, + 73, 74, 75, 76, 77, 78, 79, 80, + 81, 82, 83, 84, 85, 86, 87, 88, + 89, 90, 97, 98, 99, 100, 101, 102, + 103, 104, 105, 106, 107, 108, 109, 110, + 111, 112, 113, 114, 115, 116, 117, 118, + 119, 120, 121, 122, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 43, 47, + }; + + // the RFC specifies that the O-set characters may + // optionally be directly encoded. Whether they are + // encoded directly or using a shift sequence depends + // on the value of the optionalDirect flag. + final boolean optionalDirect; + + static boolean isDSet(byte b) { + return b >= 0 && D_SET[b]; + } + + static boolean isDSet(char c) { + return c < 0x80 && D_SET[c]; + } + + static boolean isOSet(byte b) { + return b >= 0 && O_SET[b]; + } + + static boolean isOSet(char c) { + return c < 0x80 && O_SET[c]; + } + + static boolean isDorOSet(byte b) { + return b >= 0 && (D_SET[b] || O_SET[b]); + } + + static boolean isDorOSet(char c) { + return c < 0x80 && (D_SET[c] || O_SET[c]); + } + + static boolean isBSet(byte b) { + return b >= 0 && B_SET[b] != -1; + } + + static boolean isBSet(char c) { + return c < 0x80 && B_SET[c] != -1; + } + + static byte fromBase64(byte b) { + return (byte)(b < 0 ? -1 : B_SET[b]); + } + + static byte toBase64(byte b) { + return (byte)(b < 0 || b >= 64 ? -1 : B_SET_INVERSE[b]); + } + + /** + * Constructs an instance of the UTF7Charset. + *

+ * O-set characters are not directly encoded. + */ + public UTF7Charset() { + this(NAME, ALIASES, false); + } + + /** + * Constructs an instance of the UTF7Charset, specifying whether the + * O-set characters are to be encoded directly or using a shift sequence. + * + * @param canonicalName the canonical name of this charset + * @param aliases an array of this charset's aliases, or null if it has no aliases + * @param optionalDirect if true, O-set characters are encoded directly, + * otherwise they are encoded using a shift sequence + * @throws IllegalCharsetNameException + * if the canonical name or any of the aliases are illegal + */ + public UTF7Charset(String canonicalName, String[] aliases, boolean optionalDirect) { + super(canonicalName, aliases); + this.optionalDirect = optionalDirect; + } + + /** + * Returns whether the given character is encoded directly + * or using a shift sequence. + * + * @param c the character to check + * @return true if the character is encoded directly, + * false if it is encoded using a shift sequence + */ + boolean isDirect(char c) { + return c < 0x80 && (D_SET[c] || (optionalDirect && O_SET[c])); + } + + /** + * Tells whether or not this charset contains the given charset. + * + *

A charset C is said to contain a charset D if, + * and only if, every character representable in D is also + * representable in C. If this relationship holds then it is + * guaranteed that every string that can be encoded in D can also be + * encoded in C without performing any replacements. + * + *

That C contains D does not imply that each character + * representable in C by a particular byte sequence is represented + * in D by the same byte sequence, although sometimes this is the + * case. + * + *

Every charset contains itself. + * + *

This method computes an approximation of the containment relation: + * If it returns true then the given charset is known to be + * contained by this charset; if it returns false, however, then + * it is not necessarily the case that the given charset is not contained + * in this charset. + * + * @return true if, and only if, the given charset + * is contained in this charset + */ + @Override + public boolean contains(Charset charset) { + return getClass().isInstance(charset) || Charset.forName("UTF-16").contains(charset); + } + + /** + * Constructs a new decoder for this charset. + * + * @return a new decoder for this charset + */ + @Override + public CharsetDecoder newDecoder() { + return new Decoder(this); + } + + /** + * Constructs a new encoder for this charset. + * + * @return a new encoder for this charset + */ + @Override + public CharsetEncoder newEncoder() { + return new Encoder(this); + } + + /** + * The Encoder inner class handles the encoding of the UTF7 charset. + */ + protected class Encoder extends CharsetEncoder { + + boolean shifted; // flags whether we are currently in a shift sequence + char encodedChar; // holds the bits of previous partially encoded char + int requiredBits; // number of bits required to complete a 6-bit value + + /** + * Constructs an Encoder. + * + * @param charset the charset that created this encoder + */ + protected Encoder(Charset charset) { + super(charset, 1f, 5f); + } + + /** + * Resets this decoder, clearing any charset-specific internal state. + */ + @Override + protected void implReset() { + shifted = false; + } + + /** + * Flushes this encoder. + * + * @param out the output byte buffer + * @return a coder-result object, either {@link CoderResult#UNDERFLOW} or + * {@link CoderResult#OVERFLOW} + */ + @Override + protected CoderResult implFlush(ByteBuffer out) { + if (shifted) { + if (out.remaining() < 2) + return CoderResult.OVERFLOW; + flushBase64Char(out); + out.put((byte)'-'); // terminate shift sequence explicitly + } + return CoderResult.UNDERFLOW; + } + + /** + * Encodes one or more characters into one or more bytes. + * + * @param in the input character buffer + * @param out the output byte buffer + * @return a coder-result object describing the reason for termination + */ + @Override + protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + int remaining = in.remaining(); + + while (remaining-- > 0) { + // make sure we have output space (the max we might need) + if (out.remaining() < 3) + return CoderResult.OVERFLOW; + + // get next byte + char c = in.get(); + + // if not in shift sequence + if (!shifted) { + // if char is in set D, write it as byte directly + if (isDirect(c)) { + out.put((byte)c); + } else if (c == '+') { // specially encoded char + out.put((byte)'+').put((byte)'-'); + } else { // start shift sequence + out.put((byte)'+'); + shifted = true; + requiredBits = 6; + writeBase64Char(out, c); + } + } else { // shifted + if (isDirect(c)) { // direct char + // terminate shift sequence + shifted = false; + flushBase64Char(out); + if (isBSet(c) || c == '-') // requires explicit termination + out.put((byte)'-'); + // write direct char + out.put((byte)c); + } else { // another encoded char + writeBase64Char(out, c); + } + } + } + + return CoderResult.UNDERFLOW; + } + + /** + * Writes the base64 bytes representing the given character + * to the given output ByteBuffer. Bits left over from + * previously written characters are written first, followed + * by this character's bits. Similarly, bits left over from + * this character are saved until the next call to this method. + * + * @param out the ByteBuffer to which the base64 bytes are written + * @param c the character to be written + */ + void writeBase64Char(ByteBuffer out, char c) { + int bits = requiredBits; // getfield bytecode optimization + byte b = (byte)(((encodedChar << bits) & 0x3F) | (c >>> (16 - bits))); + out.put(toBase64(b)); + b = (byte)((c >>> (10 - bits)) & 0x3F); + out.put(toBase64(b)); + + if (bits != 6) { + b = (byte)((c >>> (4 - bits)) & 0x3F); + out.put(toBase64(b)); + requiredBits += 2; + } else { + requiredBits = 2; + } + encodedChar = c; + } + + /** + * Writes any left-over base64 bits. + * + * @param out the ByteBuffer to which the base64 bytes are written + */ + void flushBase64Char(ByteBuffer out) { + if (requiredBits != 6) { // dump last encoded byte, zero-bit padded + byte b = (byte)((encodedChar << requiredBits) & 0x3F); + out.put(toBase64(b)); + } + } + + } // Encoder class + + /** + * The Decoder inner class handles the decoding of the UTF7 charset. + */ + protected class Decoder extends CharsetDecoder { + + boolean shifted; // flags whether we are currently in a shift sequence + boolean emptyShift; // flags whether the current shift sequence is empty + char decodedChar; // holds the bits of previous partially decoded char + int requiredBits; // number of bits required to complete a 16-bit char + + /** + * Constructs a Decoder. + * + * @param charset the charset that created this decoder + */ + protected Decoder(Charset charset) { + super(charset, 1f, 1f); + } + + /** + * Resets this decoder, clearing any charset-specific internal state. + */ + @Override + protected void implReset() { + shifted = false; + } + + /** + * Decodes one or more bytes into one or more characters. + * + * @param in the input byte buffer + * @param out the output character buffer + * @return a coder-result object describing the reason for termination + */ + @Override + protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + int val; + int remaining = in.remaining(); + + while (remaining-- > 0) { + // make sure we have output space + if (out.remaining() < 1) + return CoderResult.OVERFLOW; + + // get next byte + byte b = in.get(); + + // if not in shift sequence + if (!shifted) { + // if byte is in set D or O, write it as char + if (isDorOSet(b)) { + out.put((char)b); + } else if (b == '+') { // start shift sequence + shifted = true; + emptyShift = true; + requiredBits = 16; + } else { // invalid byte + in.position(in.position() - 1); // position input at error byte + return CoderResult.malformedForLength(1); // invalid byte + } + } else if ((val = fromBase64(b)) != -1) { // valid base64 byte + // get bits from shift sequence byte + emptyShift = false; + // 6 is the max number of bits we can get from a single input byte + int bits = requiredBits >= 6 ? 6 : requiredBits; + // add new bits to currently decoded char + decodedChar = (char)((decodedChar << bits) | (val >> (6 - bits))); + requiredBits -= bits; + // check if we're done decoding a full 16-bit char + if (requiredBits == 0) { + // output it + out.put(decodedChar); + // and start off next char with remaining bits + requiredBits = 10 + bits; // 16 - (6 - bits) + decodedChar = (char)val; // save the extra bits for later + } + } else { // terminating a shift sequence + shifted = false; + // any leftover bits when terminating the shift sequence + // are discarded if they are zero, or invalid if they are nonzero + if ((char)(decodedChar << requiredBits) != 0) { + in.position(in.position() - 1); // position input at error byte + return CoderResult.malformedForLength(1); // invalid byte + } + // process implicit or explicit shift sequence termination + if (b == '-') { + if (emptyShift) // a "+-" sequence outputs a '+' + out.put('+'); + // otherwise shift ends, and '-' is absorbed + } else { + // process regular char that ended base64 sequence + if (isDorOSet(b)) { // output regular char + out.put((char)b); + } else { + in.position(in.position() - 1); // position input at error byte + return CoderResult.malformedForLength(1); // invalid byte + } + } + } + } + + return CoderResult.UNDERFLOW; + } + } // Decoder class + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/UTF7OptionalCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/UTF7OptionalCharset.java new file mode 100644 index 0000000..d84e080 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/UTF7OptionalCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +/** + * The UTF7OptionalCharset class handles the encoding and decoding of the + * UTF-7 charset. + *

+ * The O-Set characters are encoded directly + * (this is optional according to the RFC). + *

+ * The encoding and decoding are based on RFC 2152 + * (http://www.ietf.org/rfc/rfc2152.txt) + * + * @author Amichai Rothman + * @since 2005-06-10 + */ +public class UTF7OptionalCharset extends UTF7Charset { + + static final String NAME = "UTF-7-OPTIONAL"; + + static final String[] ALIASES = { "UTF-7O", "UTF7O", "UTF-7-O" }; + + /** + * Constructs an instance of the UTF7OptionalCharset. + *

+ * O-set characters are directly encoded. + */ + public UTF7OptionalCharset() { + super(NAME, ALIASES, true); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/Utils.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/Utils.java new file mode 100644 index 0000000..5e9727c --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/Utils.java @@ -0,0 +1,183 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset; + +import java.io.*; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; + +/** + * The Utils class contains utility methods used at runtime + * by charsets, as well as development tools for creating new Charsets. + * + * @author Amichai Rothman + * @since 2015-10-14 + */ +public class Utils { + + // prevents instantiation + private Utils() {} + + /** + * Returns whether the running JDK version is at least 1.5. + * + * @return true if running in JDK 1.5 or higher, false otherwise + */ + static boolean isJDK15() { + try { + float version = Float.parseFloat(System.getProperty("java.class.version")); + return version >= 49.0; // 49.0 is the class version of JDK 1.5 + } catch (Exception e) { + return false; + } + } + + /** + * Returns a string containing the Java definitions of the + * given inverse lookup (char-to-byte) table. + *

+ * This is a convenient utility method for design-time building + * of charsets based on a lookup table mapping, as an alternative + * to creating these inverse lookup tables on-the-fly. + * + * @param tables the inverse lookup (char-to-byte) table + * @return the Java definitions of the inverse lookup + * (char-to-byte) table + */ + public static String toInverseLookupTableDefinition(int[][] tables) { + StringBuilder sb = new StringBuilder(); + int nulls = 0; + sb.append("static final int[][] CHAR_TO_BYTE = {\n\t"); + for (int i = 0; i < tables.length; i++) { + int[] table = tables[i]; + if (table == null) { + if (nulls++ % 8 == 0 && nulls > 1) + sb.append("\n\t"); + sb.append("null, "); + } else { + if (nulls > 0) + sb.append("\n\t"); + nulls = 0; + sb.append("{ // high byte = 0x"); + if (i < 0x10) + sb.append('0'); + sb.append(Integer.toHexString(i)); + sb.append("\n\t"); + for (int j = 0; j < table.length; j++) { + if (table[j] == -1) { + sb.append(" -1, "); + } else { + sb.append("0x"); + if (table[j] < 0x10) + sb.append('0'); + sb.append(Integer.toHexString(table[j])).append(", "); + } + if ((j + 1) % 8 == 0) + sb.append("\n\t"); + } + sb.append("}, \n\t"); + } + } + sb.append("\n\t};"); + return sb.toString(); + } + + /** + * Main entry point for command-line utility. + * + * @param args the command line arguments + * @throws IOException if an error occurs + */ + public static void main(String[] args) throws IOException { + // parse arguments + String from = "UTF-8"; + String to = "UTF-8"; + List inputFiles = new ArrayList(); + String outputFile = null; + PrintStream so = System.out; + try { + for (int i = 0; i < args.length; i++) { + String arg = args[i]; + if (arg.startsWith("-") && arg.length() > 1 && inputFiles.isEmpty()) { + if (arg.equals("-o")) { + outputFile = args[++i]; // throws IOOBE + } else if (arg.equals("-f")) { + from = args[++i]; // throws IOOBE + } else if (arg.equals("-t")) { + to = args[++i]; // throws IOOBE + } else if (arg.equals("-l") || arg.equals("-ll")) { + String filter = arg.equals("-ll") ? Utils.class.getPackage().getName() : ""; + for (Charset charset : Charset.availableCharsets().values()) + if (charset.getClass().getName().startsWith(filter)) + so.println(charset.name() + " " + charset.aliases()); + System.exit(0); + } else if (arg.equals("-?") || arg.equals("-h")) { + so.println("Usage: java -jar jcharset.jar [options] [inputFiles...]\n"); + so.println("Converts the charset encoding of one or more (concatenated) input files."); + so.println("If no files or '-' (dash) is specified, input is read from stdin."); + so.println("\nOptions:"); + so.println(" -f \tthe name of the input charset [default UTF-8]"); + so.println(" -t \tthe name of the output charset [default UTF-8]"); + so.println(" -o \tthe output file name [default stdout]"); + so.println(" -l\t\t\tlist all available charset names and aliases"); + so.println(" -ll\t\t\tlist all JCharset charset names and aliases"); + so.println(" -h, -?\t\tshow this help information"); + System.exit(1); + } else { + throw new IndexOutOfBoundsException(); + } + } else { + inputFiles.add(arg); // all remaining args are input files + } + } + } catch (IndexOutOfBoundsException ioobe) { + System.err.println("Error: invalid argument"); + System.err.println("Use the -h option for help"); + System.exit(2); + } + if (inputFiles.isEmpty()) + inputFiles.add("-"); + // perform conversion + OutputStream os = outputFile == null ? so : new FileOutputStream(outputFile); + OutputStreamWriter writer = null; + try { + char[] buf = new char[16384]; + writer = new OutputStreamWriter(os, to); + for (String inputFile : inputFiles) { + InputStream is = inputFile.equals("-") ? System.in : new FileInputStream(inputFile); + try { + int count; + Reader reader = new InputStreamReader(is, from); + while ((count = reader.read(buf)) > -1) + writer.write(buf, 0, count); + } finally { + is.close(); + } + } + } finally { + if (writer != null) + writer.close(); // also flushes encoder + os.close(); + } + } +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CCGSMCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CCGSMCharset.java new file mode 100644 index 0000000..dc89c80 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CCGSMCharset.java @@ -0,0 +1,50 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.gsm; + +/** + * The CCGSMCharset class handles the encoding and decoding of the + * GSM default encoding charset. In this variant, byte 0x09 is mapped + * to the LATIN CAPITAL LETTER C WITH CEDILLA character. + *

+ * The encoding and decoding are based on the mapping at + * http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * @author Amichai Rothman + * @since 2007-03-26 + */ +public class CCGSMCharset extends GSMCharset { + + static final String NAME = "CCGSM"; + + static final String[] ALIASES = {}; + + /** + * Constructs an instance of the CCGSMCharset. + */ + public CCGSMCharset() { + super(NAME, ALIASES, + BYTE_TO_CHAR_CAPITAL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT, + CHAR_TO_BYTE_CAPITAL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CCPackedGSMCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CCPackedGSMCharset.java new file mode 100644 index 0000000..5cebdfb --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CCPackedGSMCharset.java @@ -0,0 +1,51 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.gsm; + +/** + * The CCPackedGSMCharset class handles the encoding and decoding of the + * GSM default encoding charset. In this variant, byte 0x09 is mapped + * to the LATIN CAPITAL LETTER C WITH CEDILLA character. + *

+ * The encoding and decoding are based on the mapping at + * http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * @author Amichai Rothman + * @since 2007-03-26 + */ +public class CCPackedGSMCharset extends PackedGSMCharset { + + static final String NAME = "CCPGSM"; + + static final String[] ALIASES = {}; + + /** + * Constructs an instance of the CCPackedGSMCharset. + */ + public CCPackedGSMCharset() { + super(NAME, ALIASES, + BYTE_TO_CHAR_CAPITAL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT, + CHAR_TO_BYTE_CAPITAL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT, + false); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CRCCPackedGSMCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CRCCPackedGSMCharset.java new file mode 100644 index 0000000..0f8bd19 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CRCCPackedGSMCharset.java @@ -0,0 +1,53 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.gsm; + +/** + * The CRCCPackedGSMCharset class handles the encoding and decoding of the + * GSM default encoding charset. In this variant, byte 0x09 is mapped + * to the LATIN CAPITAL LETTER C WITH CEDILLA character. + * It also uses {@link PackedGSMCharset CR-padding} instead of + * zero-padding to avoid ambiguous interpretation of an '@' character. + *

+ * The encoding and decoding are based on the mapping at + * http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * @author Amichai Rothman + * @since 2019-03-31 + */ +public class CRCCPackedGSMCharset extends PackedGSMCharset { + + static final String NAME = "CRCCPGSM"; + + static final String[] ALIASES = {}; + + /** + * Constructs an instance of the CRCCPackedGSMCharset. + */ + public CRCCPackedGSMCharset() { + super(NAME, ALIASES, + BYTE_TO_CHAR_CAPITAL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT, + CHAR_TO_BYTE_CAPITAL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT, + true); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CRSCPackedGSMCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CRSCPackedGSMCharset.java new file mode 100644 index 0000000..b7205b8 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/CRSCPackedGSMCharset.java @@ -0,0 +1,53 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.gsm; + +/** + * The CRSCPackedGSMCharset class handles the encoding and decoding of the + * GSM default encoding charset. In this variant, byte 0x09 is mapped + * to the LATIN SMALL LETTER C WITH CEDILLA character. + * It also uses {@link PackedGSMCharset CR-padding} instead of + * zero-padding to avoid ambiguous interpretation of an '@' character. + *

+ * The encoding and decoding are based on the mapping at + * http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * @author Amichai Rothman + * @since 2019-03-31 + */ +public class CRSCPackedGSMCharset extends PackedGSMCharset { + + static final String NAME = "CRSCPGSM"; + + static final String[] ALIASES = {}; + + /** + * Constructs an instance of the CRSCPackedGSMCharset. + */ + public CRSCPackedGSMCharset() { + super(NAME, ALIASES, + BYTE_TO_CHAR_SMALL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT, + CHAR_TO_BYTE_SMALL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT, + true); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/GSMCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/GSMCharset.java new file mode 100644 index 0000000..04957e2 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/GSMCharset.java @@ -0,0 +1,147 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.gsm; + +import static net.freeutils.charset.ByteLookupCharset.createInverseLookupTable; +import static net.freeutils.charset.ByteLookupCharset.mutate; +import net.freeutils.charset.EscapedByteLookupCharset; + +/** + * The GSMCharset class handles the encoding and decoding of the + * GSM default encoding charset. + *

+ * The encoding and decoding are based on the mapping at + * http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * @author Amichai Rothman + * @since 2005-05-26 + */ +public class GSMCharset extends EscapedByteLookupCharset { + + static final byte ESCAPE = 0x1B; + + static final int[] BYTE_TO_CHAR_SMALL_C_CEDILLA = { + 0x0040, 0x00A3, 0x0024, 0x00A5, 0x00E8, 0x00E9, 0x00F9, 0x00EC, + 0x00F2, 0x00E7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5, + 0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8, + 0x03A3, 0x0398, 0x039E, -1, 0x00C6, 0x00E6, 0x00DF, 0x00C9, + 0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027, + 0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F, + 0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037, + 0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F, + 0x00A1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, + 0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, + 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, + 0x0058, 0x0059, 0x005A, 0x00C4, 0x00D6, 0x00D1, 0x00DC, 0x00A7, + 0x00BF, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, + 0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, + 0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + }; + + static final int[] BYTE_TO_CHAR_ESCAPED_DEFAULT = { + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 0x000C, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 0x005E, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + 0x007B, 0x007D, -1, -1, -1, -1, -1, 0x005C, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 0x005B, 0x007E, 0x005D, -1, + 0x007C, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, 0x20AC, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + }; + + static final int[][] CHAR_TO_BYTE_SMALL_C_CEDILLA = + createInverseLookupTable(BYTE_TO_CHAR_SMALL_C_CEDILLA); + + static final int[][] CHAR_TO_BYTE_ESCAPED_DEFAULT = + createInverseLookupTable(BYTE_TO_CHAR_ESCAPED_DEFAULT); + + static final int[] BYTE_TO_CHAR_CAPITAL_C_CEDILLA = + mutate(BYTE_TO_CHAR_SMALL_C_CEDILLA, new int[] { 9 }, new int[] { 0x00C7 }); + + static final int[][] CHAR_TO_BYTE_CAPITAL_C_CEDILLA = + createInverseLookupTable(BYTE_TO_CHAR_CAPITAL_C_CEDILLA); + + /** + * Initializes a new charset with the given canonical name and alias + * set, and byte-to-char/char-to-byte lookup tables. + * + * @param canonicalName the canonical name of this charset + * @param aliases an array of this charset's aliases, or null if it has no aliases + * @param byteToChar a byte-to-char conversion table for this charset + * @param byteToCharEscaped a byte-to-char conversion table for this charset + * for the escaped characters + * @param charToByte a char-to-byte conversion table for this charset. It can + * be generated on-the-fly by calling createInverseLookupTable(byteToChar). + * @param charToByteEscaped a char-to-byte conversion table for this charset + * for the escaped characters + * @throws java.nio.charset.IllegalCharsetNameException + * if the canonical name or any of the aliases are illegal + */ + protected GSMCharset(String canonicalName, String[] aliases, + int[] byteToChar, int[] byteToCharEscaped, + int[][] charToByte, int[][] charToByteEscaped) { + super(canonicalName, aliases, ESCAPE, + byteToChar, byteToCharEscaped, charToByte, charToByteEscaped); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/PackedGSMCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/PackedGSMCharset.java new file mode 100644 index 0000000..d6e1a8c --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/PackedGSMCharset.java @@ -0,0 +1,463 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.gsm; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CoderResult; + +/** + * The PackedGSMCharset class handles the encoding and decoding of the + * GSM default encoding charset, with packing as per GSM 03.38 / ETSI TS 123 038 spec. + *

+ * When there are 8*n-1 encoded bytes, there is ambiguity + * since it's impossible to distinguish whether the final byte + * contains a trailing '@' character (which is mapped to 0) + * or 7 zero bits of padding following 7 data bytes. + *

+ * When decoding, we opt for the latter interpretation + * since it's far more likely, at the cost of losing a + * trailing '@' character in strings whose unpacked size + * is a multiple of 8, and whose last character is '@'. + *

+ * An application that wishes to handle this rare case + * properly must disambiguate this case externally, such + * as by obtaining the original string length, and + * appending the trailing '@' if the length + * shows that there is one character missing. + *

+ * Alternatively, the spec supports replacing the zero + * padding in such a case with a CR character, which is + * then removed by the receiver, but is harmless also on + * devices that display it as-is since a CR is invisible. + * This implementation has configurable support for CR padding. + *

+ * However, this CR padding introduces a new ambiguity, with + * a string that really does end with a CR character on an + * 8-byte boundary, so in this case an extra CR is appended + * to it, and due to the semantics of CR in the spec, a double + * CR is equivalent to a single CR, so this is harmless as well. + *

+ * The encoding and decoding are based on the mapping at + * http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * @author Amichai Rothman + * @since 2007-03-20 + */ +public class PackedGSMCharset extends GSMCharset { + + static final int BUFFER_SIZE = 256; + static final byte CR = 0x0D; + + /** + * Specifies whether to use CR padding instead of zero padding + * when encoding/decoding in order to disambiguate the 7 padding + * zero bits in strings whose length is 8*n-1 bytes from a + * trailing '@' character in strings of length 8*n. + */ + final boolean padWithCR; + + /** + * Initializes a new charset with the given canonical name and alias + * set, and byte-to-char/char-to-byte lookup tables. + * + * @param canonicalName the canonical name of this charset + * @param aliases an array of this charset's aliases, or null if it has no aliases + * @param byteToChar a byte-to-char conversion table for this charset + * @param byteToCharEscaped a byte-to-char conversion table for this charset + * for the escaped characters + * @param charToByte a char-to-byte conversion table for this charset. It can + * be generated on-the-fly by calling createInverseLookupTable(byteToChar). + * @param charToByteEscaped a char-to-byte conversion table for this charset + * for the escaped characters + * @param padWithCR specifies whether to apply {@link PackedGSMCharset CR padding} + * or the original (but ambiguous) zero padding + * @throws java.nio.charset.IllegalCharsetNameException + * if the canonical name or any of the aliases are illegal + */ + protected PackedGSMCharset(String canonicalName, String[] aliases, + int[] byteToChar, int[] byteToCharEscaped, + int[][] charToByte, int[][] charToByteEscaped, + boolean padWithCR) { + super(canonicalName, aliases, + byteToChar, byteToCharEscaped, charToByte, charToByteEscaped); + this.padWithCR = padWithCR; + } + + /** + * Constructs a new decoder for this charset. + * + * @return a new decoder for this charset + */ + @Override + public CharsetDecoder newDecoder() { + return new Decoder(this); + } + + /** + * Constructs a new encoder for this charset. + * + * @return a new encoder for this charset + */ + @Override + public CharsetEncoder newEncoder() { + return new Encoder(this); + } + + /** + * The Encoder inner class handles the encoding of the + * Packed GSM default encoding charset. + */ + protected class Encoder extends GSMCharset.Encoder { + + int bitpos; + byte current; + ByteBuffer buf; + + /** + * Constructs an Encoder. + * + * @param charset the charset that created this encoder + */ + protected Encoder(Charset charset) { + super(charset, 7 / 8f, 2f); + buf = ByteBuffer.allocate(BUFFER_SIZE); + implReset(); + } + + /** + * Resets this encoder, clearing any charset-specific internal state. + */ + @Override + protected void implReset() { + bitpos = 0; + current = 0; + buf.limit(0); + } + + /** + * Flushes this encoder. + * + * @param out the output byte buffer + * + * @return a coder-result object, either {@link CoderResult#UNDERFLOW} or + * {@link CoderResult#OVERFLOW} + */ + @Override + protected CoderResult implFlush(ByteBuffer out) { + // flush buffer + CoderResult result = pack(buf, out); + // handle CR padding if necessary + if (padWithCR && bitpos <= 1) { // bitpos is 0 or 1 + if (bitpos == 1) { + // if the output is 8*n-1 bytes long, the last byte has 7 padding zero + // bits which may be ambiguously interpreted as an '@' character, + // so in this case we replace the padding with a harmless CR + current |= (CR << 1); + } else if (out.position() > 0 && out.get(out.position() - 1) >>> 1 == CR) { + // if the output is 8*n bytes long and really does end with a CR, + // we need to disambiguate this from the CR padding, + // so we add an extra CR (due to the spec's definition of CR, + // this is equivalent to a single CR and thus also harmless) + current = CR; + bitpos = 7; + } + } + // flush last (current) partial byte if it exists + if (bitpos != 0) { + if (!out.hasRemaining()) + return CoderResult.OVERFLOW; + out.put(current); // write final leftover byte + } + return result; + } + + /** + * Encodes one or more characters into one or more bytes. + * + * @param in the input character buffer + * @param out the output byte buffer + * @return a coder-result object describing the reason for termination + */ + @Override + protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) { + CoderResult result; + while (true) { + // output buffered data + if (buf.hasRemaining()) { + result = pack(buf, out); + if (result == CoderResult.OVERFLOW) + return result; + } + // process new data into buffer + buf.clear(); + result = super.encodeLoop(in, buf); + buf.flip(); + // stop if out of input or error + if (!buf.hasRemaining() || result.isError()) + return result; + } + } + + /** + * Packs the given data into full bytes. + * + * @param in the input byte buffer + * @param out the output byte buffer + * @return a coder-result object, either {@link CoderResult#UNDERFLOW} or + * {@link CoderResult#OVERFLOW} + */ + protected CoderResult pack(ByteBuffer in, ByteBuffer out) { + int remaining = in.remaining(); + while (remaining-- > 0) { + if (!out.hasRemaining()) + return CoderResult.OVERFLOW; + byte b = (byte)(in.get() & 0x7F); // remove top bit + // assign first group of partial bits + current |= b << bitpos; + // assign second group of partial bits (if exist) + if (bitpos > 0) { // if packed byte is full + out.put(current); + current = (byte)(b >> (8 - bitpos)); // keep left-over bits (if any) + } + bitpos = (bitpos + 7) % 8; + } + return CoderResult.UNDERFLOW; + } + + } + + /** + * The Decoder inner class handles the decoding of the + * Packed GSM default encoding charset. + */ + protected class Decoder extends GSMCharset.Decoder { + + int bitpos; + byte current; + byte prev; + int unpackedCount; + ByteBuffer buf; + + /** + * Constructs a Decoder. + * + * @param charset the charset that created this decoder + */ + protected Decoder(Charset charset) { + super(charset, 8 / 7f, 2f); + buf = ByteBuffer.allocate(BUFFER_SIZE); + implReset(); + } + + /** + * Resets this decoder, clearing any charset-specific internal state. + */ + @Override + protected void implReset() { + bitpos = 0; + current = 0; + prev = 0; + unpackedCount = 0; + buf.limit(0); + } + + /** + * Flushes this decoder. + * + * @param out the output character buffer + * + * @return a coder-result object, either {@link CoderResult#UNDERFLOW} or + * {@link CoderResult#OVERFLOW} + */ + @Override + protected CoderResult implFlush(CharBuffer out) { + // fix output edge cases caused by ambiguous padding, + // depending on the CR padding configuration: + // either remove a trailing '@' character if the string length is 8*n, + // or remove a trailing CR character if the string length is 8*n + // or if the string length is 8*n+1 and it ends with two CR characters + int mod = unpackedCount % 8; + if (mod <= 1) { // mod is 0 or 1 + int pos = out.position() - 1; + if (pos > 0) { + char c = out.get(pos); + if (c == '@' && !padWithCR && mod == 0 || + c == CR && padWithCR && (mod == 0 || out.get(pos - 1) == CR)) + out.position(pos); // remove last character + } + } + return CoderResult.UNDERFLOW; + } + + /** + * Decodes one or more bytes into one or more characters. + * + * @param in the input byte buffer + * @param out the output character buffer + * @return a coder-result object describing the reason for termination + */ + @Override + protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) { + while (true) { + // unpack input data into buffer + unpackedCount -= buf.remaining(); // it will be counted again after unpack + buf.compact(); // move data to beginning and prepare to write more + CoderResult unpackResult = unpack(in, buf); + buf.flip(); // prepare to read + if (!buf.hasRemaining()) + return unpackResult; // underflow + unpackedCount += buf.remaining(); + // decode buffered unpacked data to output + CoderResult decodeResult = super.decodeLoop(buf, out); + // handle out of output space and buffer still has data in it + if (buf.hasRemaining() || decodeResult.isError()) { + if (decodeResult.isUnderflow()) { // last byte is escape byte + // if there's more input or at least another unpacked byte + // (the 8th doesn't require reading from input), just continue + if (in.hasRemaining() || unpackResult.isOverflow()) + continue; + // otherwise we really need more input, so undo the last byte + // (escape sequence which was cut in middle) so caller can + // properly handle malformed input if there is no more input + in.position(in.position() - 1); // unread the byte + bitpos = (bitpos + 9) % 8; // undo its unpacking too + current = prev; + buf.limit(buf.position()); + unpackedCount--; + } + return decodeResult; + } + } + } + + /** + * Unpacks the given data into original bytes. + * + * @param in the input byte buffer + * @param out the output byte buffer + * @return a coder-result object, either {@link CoderResult#UNDERFLOW} or + * {@link CoderResult#OVERFLOW} + */ + protected CoderResult unpack(ByteBuffer in, ByteBuffer out) { + int remaining = out.remaining(); + while (remaining-- > 0) { + if (!in.hasRemaining() && bitpos != 1) + return CoderResult.UNDERFLOW; + if (bitpos == 0) { + prev = current; + current = in.get(); + } + // remove top bit and assign first group of partial bits + byte b = (byte)(((current & 0xFF) >> bitpos) & 0x7F); + // remove top bit and assign second group of partial bits (if exist) + if (bitpos >= 2) { + prev = current; + current = in.get(); + b |= (byte)((current << (8 - bitpos)) & 0x7F); + } + bitpos = (bitpos + 7) % 8; + out.put(b); + } + return CoderResult.OVERFLOW; + } + } + + /** + * Unpacks the given data into original bytes. + *

+ * This is an external utility method and is not used + * internally by the Charset implementation. + * + * @param in the input bytes + * @return the unpacked output bytes + */ + public static byte[] unpack(byte[] in) { + byte[] out = new byte[(in.length * 8) / 7]; + int len = out.length; + int current = 0; + int bitpos = 0; + for (int i = 0; i < len; i++) { + // remove top bit and assign first group of partial bits + out[i] = (byte)(((in[current] & 0xFF) >> bitpos) & 0x7F); + // remove top bit and assign second group of partial bits (if exist) + if (bitpos > 1) + out[i] |= (byte)((in[++current] << (8 - bitpos)) & 0x7F); + else if (bitpos == 1) + current++; + bitpos = (bitpos + 7) % 8; + } + // this fixes an ambiguity bug in the specs + // where the last of 8 packed bytes is 0 + // and it's impossible to distinguish whether it is a + // trailing '@' character (which is mapped to 0) + // or extra zero-bit padding for 7 actual data bytes. + // + // we opt for the latter, since it's far more likely, + // at the cost of losing a trailing '@' character + // in strings whose unpacked size modulo 8 is 0, + // and whose last character is '@'. + // + // an application that wishes to handle this rare case + // properly must disambiguate this case externally, such + // as by obtaining the original string length, and + // appending the trailing '@' if the length + // shows that there is one character missing. + if (len % 8 == 0 && len > 0 && out[len - 1] == 0) { + byte[] fixed = new byte[len - 1]; + System.arraycopy(out, 0, fixed, 0, len - 1); + out = fixed; + } + return out; + } + + /** + * Packs the given data into full bytes. + *

+ * This is an external utility method and is not used + * internally by the Charset implementation. + * + * @param in the input bytes + * @return the packed output bytes + */ + public static byte[] pack(byte[] in) { + byte[] out = new byte[(int)Math.ceil((in.length * 7) / 8f)]; + int current = 0; + int bitpos = 0; + for (byte b : in) { + b &= 0x7F; // remove top bit + // assign first group of partial bits + out[current] |= b << bitpos; + // assign second group of partial bits (if exist) + if (bitpos > 1) + out[++current] |= b >> 8 - bitpos; + else if (bitpos == 1) // packed byte is full (but no left-over bits) + current++; + bitpos = (bitpos + 7) % 8; + } + return out; + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/SCGSMCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/SCGSMCharset.java new file mode 100644 index 0000000..efc534d --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/SCGSMCharset.java @@ -0,0 +1,51 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.gsm; + +/** + * The SCGSMCharset class handles the encoding and decoding of the + * GSM default encoding charset. In this variant, byte 0x09 is mapped + * to the LATIN SMALL LETTER C WITH CEDILLA character. + *

+ * The encoding and decoding are based on the mapping at + * http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * @author Amichai Rothman + * @since 2007-03-26 + */ +public class SCGSMCharset extends GSMCharset { + + static final String NAME = "SCGSM"; + + static final String[] ALIASES = { + "GSM-DEFAULT-ALPHABET", "GSM_0338", "GSM_DEFAULT", "GSM7", "GSM-7BIT" }; + + /** + * Constructs an instance of the SCGSMCharset. + */ + public SCGSMCharset() { + super(NAME, ALIASES, + BYTE_TO_CHAR_SMALL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT, + CHAR_TO_BYTE_SMALL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/SCPackedGSMCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/SCPackedGSMCharset.java new file mode 100644 index 0000000..2f94a87 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/gsm/SCPackedGSMCharset.java @@ -0,0 +1,51 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.gsm; + +/** + * The SCPackedGSMCharset class handles the encoding and decoding of the + * GSM default encoding charset. In this variant, byte 0x09 is mapped + * to the LATIN SMALL LETTER C WITH CEDILLA character. + *

+ * The encoding and decoding are based on the mapping at + * http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT + * + * @author Amichai Rothman + * @since 2007-03-26 + */ +public class SCPackedGSMCharset extends PackedGSMCharset { + + static final String NAME = "SCPGSM"; + + static final String[] ALIASES = {}; + + /** + * Constructs an instance of the SCPackedGSMCharset. + */ + public SCPackedGSMCharset() { + super(NAME, ALIASES, + BYTE_TO_CHAR_SMALL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT, + CHAR_TO_BYTE_SMALL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT, + false); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CA2Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CA2Charset.java new file mode 100644 index 0000000..e119b91 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CA2Charset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646CA2Charset class handles the encoding and decoding of the + * ISO646-CA2 national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646CA2Charset extends ByteLookupCharset { + + static final String NAME = "ISO646-CA2"; + + static final String[] ALIASES = { "ISO-IR-122" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xE0, 0xE2, 0xE7, 0xEA, 0xC9, 0xF4, 0xE9, 0xF9, 0xE8, 0xFB }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646CA2Charset. + */ + public ISO646CA2Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CACharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CACharset.java new file mode 100644 index 0000000..89a3e5e --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CACharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646CACharset class handles the encoding and decoding of the + * ISO646-CA national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646CACharset extends ByteLookupCharset { + + static final String NAME = "ISO646-CA"; + + static final String[] ALIASES = { "ISO-IR-121" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xE0, 0xE2, 0xE7, 0xEA, 0xEE, 0xF4, 0xE9, 0xF9, 0xE8, 0xFB }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646CACharset. + */ + public ISO646CACharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CHCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CHCharset.java new file mode 100644 index 0000000..7f9e2ed --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CHCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646CHCharset class handles the encoding and decoding of the + * ISO646-CH national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646CHCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-CH"; + + static final String[] ALIASES = {}; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xF9, 0xE0, 0xE9, 0xE7, 0xEA, 0xEE, 0xE8, 0xF4, 0xE4, 0xF6, 0xFC, 0xFB }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646CHCharset. + */ + public ISO646CHCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CNCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CNCharset.java new file mode 100644 index 0000000..b7b07ef --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CNCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646CNCharset class handles the encoding and decoding of the + * ISO646-CN national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646CNCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-CN"; + + static final String[] ALIASES = { "ISO-IR-57" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x24, 0x7E }, + new int[] { 0xA5, 0xAF }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646CNCharset. + */ + public ISO646CNCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CUCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CUCharset.java new file mode 100644 index 0000000..8d1357b --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646CUCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646CUCharset class handles the encoding and decoding of the + * ISO646-CU national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646CUCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-CU"; + + static final String[] ALIASES = { "ISO-IR-151" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x24, 0x5B, 0x5C, 0x5E, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA4, 0xA1, 0xD1, 0xBF, 0xB4, 0xF1, 0x5B, 0xA8 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646CUCharset. + */ + public ISO646CUCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646DECharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646DECharset.java new file mode 100644 index 0000000..439a560 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646DECharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646DECharset class handles the encoding and decoding of the + * ISO646-DE national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646DECharset extends ByteLookupCharset { + + static final String NAME = "ISO646-DE"; + + static final String[] ALIASES = { "ISO-IR-21", "DIN_66003" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA7, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0xDF }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646DECharset. + */ + public ISO646DECharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646DKCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646DKCharset.java new file mode 100644 index 0000000..e89aa76 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646DKCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646DKCharset class handles the encoding and decoding of the + * ISO646-DK national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646DKCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-DK"; + + static final String[] ALIASES = {}; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D }, + new int[] { 0xC6, 0xD8, 0xC5, 0xE6, 0xF8, 0xE5 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646DKCharset. + */ + public ISO646DKCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ES2Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ES2Charset.java new file mode 100644 index 0000000..24c2ec5 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ES2Charset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646ES2Charset class handles the encoding and decoding of the + * ISO646-ES2 national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646ES2Charset extends ByteLookupCharset { + + static final String NAME = "ISO646-ES2"; + + static final String[] ALIASES = { "ISO-IR-85" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0x2022, 0xA1, 0xD1, 0xC7, 0xBF, 0xB4, 0xF1, 0xE7, 0xA8 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646ES2Charset. + */ + public ISO646ES2Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ESCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ESCharset.java new file mode 100644 index 0000000..0772d18 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ESCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646ESCharset class handles the encoding and decoding of the + * ISO646-ES national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646ESCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-ES"; + + static final String[] ALIASES = { "ISO-IR-17" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D }, + new int[] { 0xA3, 0xA7, 0xA1, 0xD1, 0xBF, 0xB0, 0xF1, 0xE7 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646ESCharset. + */ + public ISO646ESCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FISECharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FISECharset.java new file mode 100644 index 0000000..862560b --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FISECharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646FISECharset class handles the encoding and decoding of the + * ISO646-FI/ISO646-SE national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646FISECharset extends ByteLookupCharset { + + static final String NAME = "ISO646-FI"; + + static final String[] ALIASES = { "ISO646-SE", "ISO-IR-10" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x24, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA4, 0xC4, 0xD6, 0xC5, 0xE4, 0xF6, 0xE5, 0xAF }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646FISECharset. + */ + public ISO646FISECharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FR1Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FR1Charset.java new file mode 100644 index 0000000..cf5cbc4 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FR1Charset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646FR1Charset class handles the encoding and decoding of the + * ISO646-FR1 national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646FR1Charset extends ByteLookupCharset { + + static final String NAME = "ISO646-FR1"; + + static final String[] ALIASES = { "ISO-IR-25" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA3, 0xE0, 0xB0, 0xE7, 0xA7, 0xE9, 0xF9, 0xE8, 0xA8 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646FR1Charset. + */ + public ISO646FR1Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FRCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FRCharset.java new file mode 100644 index 0000000..8656373 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646FRCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646FRCharset class handles the encoding and decoding of the + * ISO646-FR national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646FRCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-FR"; + + static final String[] ALIASES = { "ISO-IR-69" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA3, 0xE0, 0xB0, 0xE7, 0xA7, 0xB5, 0xE9, 0xF9, 0xE8, 0xA8 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646FRCharset. + */ + public ISO646FRCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646GBCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646GBCharset.java new file mode 100644 index 0000000..39516a8 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646GBCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646GBCharset class handles the encoding and decoding of the + * ISO646-GB national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646GBCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-GB"; + + static final String[] ALIASES = { "ISO-IR-4" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x7E }, + new int[] { 0xA3, 0xAF }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646GBCharset. + */ + public ISO646GBCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646HUCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646HUCharset.java new file mode 100644 index 0000000..986bc6c --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646HUCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646HUCharset class handles the encoding and decoding of the + * ISO646-HU national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646HUCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-HU"; + + static final String[] ALIASES = { "ISO-IR-86" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x24, 0x40, 0x5B, 0x5C, 0x5D, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA4, 0xC1, 0xC9, 0xD6, 0xDC, 0xE1, 0xE9, 0xF6, 0xFC, 0x02DD }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646HUCharset. + */ + public ISO646HUCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646IECharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646IECharset.java new file mode 100644 index 0000000..2f48713 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646IECharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646IECharset class handles the encoding and decoding of the + * ISO646-IE national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646IECharset extends ByteLookupCharset { + + static final String NAME = "ISO646-IE"; + + static final String[] ALIASES = { "ISO-IR-207" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA3, 0xD3, 0xC9, 0xCD, 0xDA, 0xC1, 0xF3, 0xE9, 0xED, 0xFA, 0xE1 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646IECharset. + */ + public ISO646IECharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646INVCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646INVCharset.java new file mode 100644 index 0000000..1f8e088 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646INVCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646INVCharset class handles the encoding and decoding of the + * ISO646-INV national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646INVCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-INV"; + + static final String[] ALIASES = { "ISO-IR-170" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x24, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646USCharset. + */ + public ISO646INVCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646IRVCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646IRVCharset.java new file mode 100644 index 0000000..9f93d0b --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646IRVCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646IRVCharset class handles the encoding and decoding of the + * ISO646-IRV national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646IRVCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-IRV"; + + static final String[] ALIASES = { "ISO-IR-2", "ISO_646.IRV:1983" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x24 }, + new int[] { 0xA4 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646IRVCharset. + */ + public ISO646IRVCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ISCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ISCharset.java new file mode 100644 index 0000000..19cc052 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ISCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646ISCharset class handles the encoding and decoding of the + * ISO646-IS national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646ISCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-IS"; + + static final String[] ALIASES = {}; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x40, 0x5B, 0x5D, 0x5E, 0x60, 0x7B, 0x7D, 0x7E }, + new int[] { 0xD0, 0xDE, 0xC6, 0xD6, 0xF0, 0xFE, 0xE6, 0xF6 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646ISCharset. + */ + public ISO646ISCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ITCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ITCharset.java new file mode 100644 index 0000000..310b909 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646ITCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646ITCharset class handles the encoding and decoding of the + * ISO646-ita national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646ITCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-IT"; + + static final String[] ALIASES = { "ISO-IR-15" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA3, 0xA7, 0xB0, 0xE7, 0xE9, 0xF9, 0xE0, 0xF2, 0xE8, 0xEC }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646ITCharset. + */ + public ISO646ITCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646JAOCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646JAOCharset.java new file mode 100644 index 0000000..96fcbeb --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646JAOCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646JAOCharset class handles the encoding and decoding of the + * ISO646-JAO national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646JAOCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-JAO"; + + static final String[] ALIASES = { "ISO646-JP-OCR-B", "ISO-IR-92" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x5B, 0x5C, 0x5D, 0x60, 0x7E }, + new int[] { 0x2329, 0xA5, 0x232A, -1, -1 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646JAOCharset. + */ + public ISO646JAOCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646JPCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646JPCharset.java new file mode 100644 index 0000000..f705c2e --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646JPCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646JPCharset class handles the encoding and decoding of the + * ISO646-JP national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646JPCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-JP"; + + static final String[] ALIASES = { "ISO-IR-14" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x5C, 0x7E }, + new int[] { 0xA5, 0xAF }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646JPCharset. + */ + public ISO646JPCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646KRCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646KRCharset.java new file mode 100644 index 0000000..53defcf --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646KRCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646KRCharset class handles the encoding and decoding of the + * ISO646-KR national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646KRCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-KR"; + + static final String[] ALIASES = {}; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x5C }, + new int[] { 0x20A9 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646KRCharset. + */ + public ISO646KRCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646MTCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646MTCharset.java new file mode 100644 index 0000000..ddf89af --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646MTCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646MTCharset class handles the encoding and decoding of the + * ISO646-MT national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646MTCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-MT"; + + static final String[] ALIASES = {}; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x5B, 0x5C, 0x5D, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0x0121, 0x017C, 0x0127, 0x010B, 0x0120, 0x017B, 0x0126, 0x010A }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646MTCharset. + */ + public ISO646MTCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646NO2Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646NO2Charset.java new file mode 100644 index 0000000..c09b3c7 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646NO2Charset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646NO2Charset class handles the encoding and decoding of the + * ISO646-NO2 national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646NO2Charset extends ByteLookupCharset { + + static final String NAME = "ISO646-NO2"; + + static final String[] ALIASES = { "ISO-IR-61" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x23, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA7, 0xC6, 0xD8, 0xC5, 0xE6, 0xF8, 0xE5, 0x7C }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646NO2Charset. + */ + public ISO646NO2Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646NOCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646NOCharset.java new file mode 100644 index 0000000..44ef949 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646NOCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646NOCharset class handles the encoding and decoding of the + * ISO646-NO national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646NOCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-NO"; + + static final String[] ALIASES = { "ISO-IR-60" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xC6, 0xD8, 0xC5, 0xE6, 0xF8, 0xE5, 0xAF }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646NOCharset. + */ + public ISO646NOCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646PT2Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646PT2Charset.java new file mode 100644 index 0000000..76410c2 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646PT2Charset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646PT2Charset class handles the encoding and decoding of the + * ISO646-PT2 national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646PT2Charset extends ByteLookupCharset { + + static final String NAME = "ISO646-PT2"; + + static final String[] ALIASES = { "ISO-IR-84" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D }, + new int[] { 0xB4, 0xC3, 0xC7, 0xD5, 0xE3, 0xE7, 0xF5 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646PT2Charset. + */ + public ISO646PT2Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646PTCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646PTCharset.java new file mode 100644 index 0000000..fe22aa6 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646PTCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646PTCharset class handles the encoding and decoding of the + * ISO646-PT national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646PTCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-PT"; + + static final String[] ALIASES = { "ISO-IR-16" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA7, 0xC3, 0xC7, 0xD5, 0xE3, 0xE7, 0xF5, 0xB0 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646PTCharset. + */ + public ISO646PTCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646SE2Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646SE2Charset.java new file mode 100644 index 0000000..c96706a --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646SE2Charset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646SE2Charset class handles the encoding and decoding of the + * ISO646-SE2 national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646SE2Charset extends ByteLookupCharset { + + static final String NAME = "ISO646-SE2"; + + static final String[] ALIASES = { "ISO-IR-11" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x24, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0xA4, 0xC9, 0xC4, 0xD6, 0xC5, 0xDC, 0xE9, 0xE4, 0xF6, 0xE5, 0xFC }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646SE2Charset. + */ + public ISO646SE2Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646T61Charset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646T61Charset.java new file mode 100644 index 0000000..3e4f5b3 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646T61Charset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646T61Charset class handles the encoding and decoding of the + * ISO646-T61 national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646T61Charset extends ByteLookupCharset { + + static final String NAME = "ISO646-T61"; + + static final String[] ALIASES = { "ISO-IR-102" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x24, 0x5C, 0x5E, 0x60, 0x7B, 0x7D, 0x7E }, + new int[] { 0xA4, -1, -1, -1, -1, -1, -1 }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646T61Charset. + */ + public ISO646T61Charset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646TWCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646TWCharset.java new file mode 100644 index 0000000..fb5aecd --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646TWCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646TWCharset class handles the encoding and decoding of the + * ISO646-TW national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646TWCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-TW"; + + static final String[] ALIASES = {}; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x7E }, + new int[] { 0xAF }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646TWCharset. + */ + public ISO646TWCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646USCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646USCharset.java new file mode 100644 index 0000000..462dea3 --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646USCharset.java @@ -0,0 +1,56 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646USCharset class handles the encoding and decoding of the + * ISO646-US national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646USCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-US"; + + static final String[] ALIASES = { "ISO-IR-6", "ISO_646.irv:1991" }; + + static final int[] BYTE_TO_CHAR; + + static { + BYTE_TO_CHAR = createTable(); + for (int i = 0; i < 128; i++) + BYTE_TO_CHAR[i] = i; + } + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646USCharset. + */ + public ISO646USCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646YUCharset.java b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646YUCharset.java new file mode 100644 index 0000000..0dac92f --- /dev/null +++ b/lib/jcharset-2.1/src/main/java/net/freeutils/charset/iso646/ISO646YUCharset.java @@ -0,0 +1,52 @@ +/* + * Copyright © 2005-2019 Amichai Rothman + * + * This file is part of JCharset - the Java Charset package. + * + * JCharset is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * JCharset is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with JCharset. If not, see . + * + * For additional info see http://www.freeutils.net/source/jcharset/ + */ + +package net.freeutils.charset.iso646; + +import net.freeutils.charset.ByteLookupCharset; + +/** + * The ISO646YUCharset class handles the encoding and decoding of the + * ISO646-YU national variant of the ISO/IEC 646 charset. + * + * @author Amichai Rothman + * @since 2015-08-18 + */ +public class ISO646YUCharset extends ByteLookupCharset { + + static final String NAME = "ISO646-YU"; + + static final String[] ALIASES = { "ISO-IR-141" }; + + static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR, + new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E }, + new int[] { 0x017D, 0x0160, 0x0110, 0x0106, 0x010C, 0x017E, 0x0161, 0x0111, 0x0107, 0x010D }); + + static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR); + + /** + * Constructs an instance of the ISO646YUCharset. + */ + public ISO646YUCharset() { + super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE); + } + +} diff --git a/lib/jcharset-2.1/src/main/resources/META-INF/services/java.nio.charset.spi.CharsetProvider b/lib/jcharset-2.1/src/main/resources/META-INF/services/java.nio.charset.spi.CharsetProvider new file mode 100644 index 0000000..e97a0bc --- /dev/null +++ b/lib/jcharset-2.1/src/main/resources/META-INF/services/java.nio.charset.spi.CharsetProvider @@ -0,0 +1 @@ +net.freeutils.charset.CharsetProvider diff --git a/src/Backend/Runtime/Testfile.java b/src/Backend/Runtime/Testfile.java index d4cae74..1b5d6c7 100644 --- a/src/Backend/Runtime/Testfile.java +++ b/src/Backend/Runtime/Testfile.java @@ -4,6 +4,5 @@ import java.util.ArrayList; public class Testfile { ArrayList lsTestfile; - } \ No newline at end of file