jcharset-2.1 hinzugefügt
This commit is contained in:
110
lib/jcharset-2.1/CHANGES.txt
Normal file
110
lib/jcharset-2.1/CHANGES.txt
Normal file
@@ -0,0 +1,110 @@
|
||||
|
||||
|
||||
CHANGES IN THE 2.1 RELEASE
|
||||
--------------------------
|
||||
- Added CR padding support to PackedGSMCharset.
|
||||
- Added CRCCPackedGSMCharset and CRSCPackedGSMCharset packed GSM variants with CR padding enabled.
|
||||
- Added KZ-1048 charset, with aliases STRK1048-2002, RK1048, csKZ1048.
|
||||
- Improved javadocs.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 2.0 RELEASE
|
||||
--------------------------
|
||||
- Added 32 national variants of the ISO/IEC 646 charset.
|
||||
- Moved GSM classes to separate sub-package.
|
||||
- Changed UTF-7 decoding to be lenient in accepting trailing zero bits in shift sequences.
|
||||
- Changed UTF7Charset.contains to reflect full Unicode equivalency.
|
||||
- Added a command-line utility supporting file charset conversion.
|
||||
- Added ByteLookupCharset.createTable utility method.
|
||||
- Generalized createInverseLookupTableDefinition to Utils.toInverseLookupTableDefinition.
|
||||
- Applied many refactorings, simplifications, clarifications and clean-ups.
|
||||
- Applied various optimizations to encode/decode loops.
|
||||
- Improved docs.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 1.6 RELEASE
|
||||
--------------------------
|
||||
- Migrated to Maven build system, directory structure and artifact conventions.
|
||||
- Added OSGi headers to jar manifest.
|
||||
- Fixed javadoc errors when building with JDK 8.
|
||||
- Improved javadocs and misc. minor refactorings.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 1.5 RELEASE
|
||||
--------------------------
|
||||
- Fixed GSMCharset encoding of non-breakable space character (0x00A0), which shouldn't be encoded.
|
||||
- Fixed PackedGSMCharset decoder edge case of handling overflow continuation for large strings (>256)
|
||||
when calling decoder directly (not via String methods).
|
||||
- Fixed PackedGSMCharset decoder edge case of string size which is a multiple of internal buffer size (256)
|
||||
greater than 256 and has escaped characters on decoded buffer boundaries.
|
||||
- Simplified CharsetProvider.charsetForName flow.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 1.4 RELEASE
|
||||
--------------------------
|
||||
- Dropped support for JDK 1.4 and earlier.
|
||||
- Added MIK charset.
|
||||
- Added KOI8_U as a KOI8-U alias.
|
||||
- Optimized EscapedByteLookupCharset encoding buffer allocation for strings with no escape chars.
|
||||
- Added ByteLookupCharset.updateInverseLookupTable convenience method.
|
||||
- Improved docs.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 1.3 RELEASE
|
||||
--------------------------
|
||||
- Added X-roman8 as an hp-roman8 alias.
|
||||
- Added the generic EscapedByteLookupCharset to simplify implementation of single-escape-byte charsets.
|
||||
- Created two flavors of the GSM charset: CCGSMCharset (mapping the Latin capital letter C with cedilla)
|
||||
and SCGSMCharset (mapping the Latin small letter c with cedilla). See javadocs for details.
|
||||
- Added support for Packed GSM charset, with the two flavors as well.
|
||||
- Renamed the canonical charset name for the new GSM family, to make the flavor choices explicit.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 1.2.1 RELEASE
|
||||
----------------------------
|
||||
- Fixed a combined JavaMail-JCharset bug that could cause an infinite loop on some inputs.
|
||||
- Updated the ISO-8859-8-i/e mapping for the MACRON character.
|
||||
The incorrect mapping in the JDK's implementation of ISO-8859-8 is fixed as of JDK 1.5
|
||||
(see http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=4760496). We now determine the
|
||||
running JDK version, and if it's JDK 1.5 or higher we use the correct mapping. This
|
||||
way we remain consistent with the running JDK ISO-8859-8 charset implementation.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 1.2 RELEASE
|
||||
--------------------------
|
||||
- Added KOI8-U charset.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 1.1 RELEASE
|
||||
--------------------------
|
||||
|
||||
- Added ByteLookupCharset class to simplify implementation of single byte charsets.
|
||||
- Added GSM-default-alphabet charset (used in SMPP).
|
||||
- Added hp-roman8 charset.
|
||||
- Added ISO-8859-8-i/e charset.
|
||||
- Added ISO-8859-6-i/e charset.
|
||||
|
||||
|
||||
|
||||
|
||||
CHANGES IN THE 1.0 RELEASE
|
||||
--------------------------
|
||||
|
||||
- This is the first release of the Java Charset package.
|
||||
|
||||
4
lib/jcharset-2.1/LICENSE.commercial.txt
Normal file
4
lib/jcharset-2.1/LICENSE.commercial.txt
Normal file
@@ -0,0 +1,4 @@
|
||||
|
||||
This software is dual-licensed - a commercial licensing option is available for those who need it.
|
||||
|
||||
For details, please contact support@freeutils.net.
|
||||
339
lib/jcharset-2.1/LICENSE.gpl.txt
Normal file
339
lib/jcharset-2.1/LICENSE.gpl.txt
Normal file
@@ -0,0 +1,339 @@
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
Version 2, June 1991
|
||||
|
||||
Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The licenses for most software are designed to take away your
|
||||
freedom to share and change it. By contrast, the GNU General Public
|
||||
License is intended to guarantee your freedom to share and change free
|
||||
software--to make sure the software is free for all its users. This
|
||||
General Public License applies to most of the Free Software
|
||||
Foundation's software and to any other program whose authors commit to
|
||||
using it. (Some other Free Software Foundation software is covered by
|
||||
the GNU Lesser General Public License instead.) You can apply it to
|
||||
your programs, too.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
this service if you wish), that you receive source code or can get it
|
||||
if you want it, that you can change the software or use pieces of it
|
||||
in new free programs; and that you know you can do these things.
|
||||
|
||||
To protect your rights, we need to make restrictions that forbid
|
||||
anyone to deny you these rights or to ask you to surrender the rights.
|
||||
These restrictions translate to certain responsibilities for you if you
|
||||
distribute copies of the software, or if you modify it.
|
||||
|
||||
For example, if you distribute copies of such a program, whether
|
||||
gratis or for a fee, you must give the recipients all the rights that
|
||||
you have. You must make sure that they, too, receive or can get the
|
||||
source code. And you must show them these terms so they know their
|
||||
rights.
|
||||
|
||||
We protect your rights with two steps: (1) copyright the software, and
|
||||
(2) offer you this license which gives you legal permission to copy,
|
||||
distribute and/or modify the software.
|
||||
|
||||
Also, for each author's protection and ours, we want to make certain
|
||||
that everyone understands that there is no warranty for this free
|
||||
software. If the software is modified by someone else and passed on, we
|
||||
want its recipients to know that what they have is not the original, so
|
||||
that any problems introduced by others will not reflect on the original
|
||||
authors' reputations.
|
||||
|
||||
Finally, any free program is threatened constantly by software
|
||||
patents. We wish to avoid the danger that redistributors of a free
|
||||
program will individually obtain patent licenses, in effect making the
|
||||
program proprietary. To prevent this, we have made it clear that any
|
||||
patent must be licensed for everyone's free use or not licensed at all.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
GNU GENERAL PUBLIC LICENSE
|
||||
TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
|
||||
|
||||
0. This License applies to any program or other work which contains
|
||||
a notice placed by the copyright holder saying it may be distributed
|
||||
under the terms of this General Public License. The "Program", below,
|
||||
refers to any such program or work, and a "work based on the Program"
|
||||
means either the Program or any derivative work under copyright law:
|
||||
that is to say, a work containing the Program or a portion of it,
|
||||
either verbatim or with modifications and/or translated into another
|
||||
language. (Hereinafter, translation is included without limitation in
|
||||
the term "modification".) Each licensee is addressed as "you".
|
||||
|
||||
Activities other than copying, distribution and modification are not
|
||||
covered by this License; they are outside its scope. The act of
|
||||
running the Program is not restricted, and the output from the Program
|
||||
is covered only if its contents constitute a work based on the
|
||||
Program (independent of having been made by running the Program).
|
||||
Whether that is true depends on what the Program does.
|
||||
|
||||
1. You may copy and distribute verbatim copies of the Program's
|
||||
source code as you receive it, in any medium, provided that you
|
||||
conspicuously and appropriately publish on each copy an appropriate
|
||||
copyright notice and disclaimer of warranty; keep intact all the
|
||||
notices that refer to this License and to the absence of any warranty;
|
||||
and give any other recipients of the Program a copy of this License
|
||||
along with the Program.
|
||||
|
||||
You may charge a fee for the physical act of transferring a copy, and
|
||||
you may at your option offer warranty protection in exchange for a fee.
|
||||
|
||||
2. You may modify your copy or copies of the Program or any portion
|
||||
of it, thus forming a work based on the Program, and copy and
|
||||
distribute such modifications or work under the terms of Section 1
|
||||
above, provided that you also meet all of these conditions:
|
||||
|
||||
a) You must cause the modified files to carry prominent notices
|
||||
stating that you changed the files and the date of any change.
|
||||
|
||||
b) You must cause any work that you distribute or publish, that in
|
||||
whole or in part contains or is derived from the Program or any
|
||||
part thereof, to be licensed as a whole at no charge to all third
|
||||
parties under the terms of this License.
|
||||
|
||||
c) If the modified program normally reads commands interactively
|
||||
when run, you must cause it, when started running for such
|
||||
interactive use in the most ordinary way, to print or display an
|
||||
announcement including an appropriate copyright notice and a
|
||||
notice that there is no warranty (or else, saying that you provide
|
||||
a warranty) and that users may redistribute the program under
|
||||
these conditions, and telling the user how to view a copy of this
|
||||
License. (Exception: if the Program itself is interactive but
|
||||
does not normally print such an announcement, your work based on
|
||||
the Program is not required to print an announcement.)
|
||||
|
||||
These requirements apply to the modified work as a whole. If
|
||||
identifiable sections of that work are not derived from the Program,
|
||||
and can be reasonably considered independent and separate works in
|
||||
themselves, then this License, and its terms, do not apply to those
|
||||
sections when you distribute them as separate works. But when you
|
||||
distribute the same sections as part of a whole which is a work based
|
||||
on the Program, the distribution of the whole must be on the terms of
|
||||
this License, whose permissions for other licensees extend to the
|
||||
entire whole, and thus to each and every part regardless of who wrote it.
|
||||
|
||||
Thus, it is not the intent of this section to claim rights or contest
|
||||
your rights to work written entirely by you; rather, the intent is to
|
||||
exercise the right to control the distribution of derivative or
|
||||
collective works based on the Program.
|
||||
|
||||
In addition, mere aggregation of another work not based on the Program
|
||||
with the Program (or with a work based on the Program) on a volume of
|
||||
a storage or distribution medium does not bring the other work under
|
||||
the scope of this License.
|
||||
|
||||
3. You may copy and distribute the Program (or a work based on it,
|
||||
under Section 2) in object code or executable form under the terms of
|
||||
Sections 1 and 2 above provided that you also do one of the following:
|
||||
|
||||
a) Accompany it with the complete corresponding machine-readable
|
||||
source code, which must be distributed under the terms of Sections
|
||||
1 and 2 above on a medium customarily used for software interchange; or,
|
||||
|
||||
b) Accompany it with a written offer, valid for at least three
|
||||
years, to give any third party, for a charge no more than your
|
||||
cost of physically performing source distribution, a complete
|
||||
machine-readable copy of the corresponding source code, to be
|
||||
distributed under the terms of Sections 1 and 2 above on a medium
|
||||
customarily used for software interchange; or,
|
||||
|
||||
c) Accompany it with the information you received as to the offer
|
||||
to distribute corresponding source code. (This alternative is
|
||||
allowed only for noncommercial distribution and only if you
|
||||
received the program in object code or executable form with such
|
||||
an offer, in accord with Subsection b above.)
|
||||
|
||||
The source code for a work means the preferred form of the work for
|
||||
making modifications to it. For an executable work, complete source
|
||||
code means all the source code for all modules it contains, plus any
|
||||
associated interface definition files, plus the scripts used to
|
||||
control compilation and installation of the executable. However, as a
|
||||
special exception, the source code distributed need not include
|
||||
anything that is normally distributed (in either source or binary
|
||||
form) with the major components (compiler, kernel, and so on) of the
|
||||
operating system on which the executable runs, unless that component
|
||||
itself accompanies the executable.
|
||||
|
||||
If distribution of executable or object code is made by offering
|
||||
access to copy from a designated place, then offering equivalent
|
||||
access to copy the source code from the same place counts as
|
||||
distribution of the source code, even though third parties are not
|
||||
compelled to copy the source along with the object code.
|
||||
|
||||
4. You may not copy, modify, sublicense, or distribute the Program
|
||||
except as expressly provided under this License. Any attempt
|
||||
otherwise to copy, modify, sublicense or distribute the Program is
|
||||
void, and will automatically terminate your rights under this License.
|
||||
However, parties who have received copies, or rights, from you under
|
||||
this License will not have their licenses terminated so long as such
|
||||
parties remain in full compliance.
|
||||
|
||||
5. You are not required to accept this License, since you have not
|
||||
signed it. However, nothing else grants you permission to modify or
|
||||
distribute the Program or its derivative works. These actions are
|
||||
prohibited by law if you do not accept this License. Therefore, by
|
||||
modifying or distributing the Program (or any work based on the
|
||||
Program), you indicate your acceptance of this License to do so, and
|
||||
all its terms and conditions for copying, distributing or modifying
|
||||
the Program or works based on it.
|
||||
|
||||
6. Each time you redistribute the Program (or any work based on the
|
||||
Program), the recipient automatically receives a license from the
|
||||
original licensor to copy, distribute or modify the Program subject to
|
||||
these terms and conditions. You may not impose any further
|
||||
restrictions on the recipients' exercise of the rights granted herein.
|
||||
You are not responsible for enforcing compliance by third parties to
|
||||
this License.
|
||||
|
||||
7. If, as a consequence of a court judgment or allegation of patent
|
||||
infringement or for any other reason (not limited to patent issues),
|
||||
conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot
|
||||
distribute so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you
|
||||
may not distribute the Program at all. For example, if a patent
|
||||
license would not permit royalty-free redistribution of the Program by
|
||||
all those who receive copies directly or indirectly through you, then
|
||||
the only way you could satisfy both it and this License would be to
|
||||
refrain entirely from distribution of the Program.
|
||||
|
||||
If any portion of this section is held invalid or unenforceable under
|
||||
any particular circumstance, the balance of the section is intended to
|
||||
apply and the section as a whole is intended to apply in other
|
||||
circumstances.
|
||||
|
||||
It is not the purpose of this section to induce you to infringe any
|
||||
patents or other property right claims or to contest validity of any
|
||||
such claims; this section has the sole purpose of protecting the
|
||||
integrity of the free software distribution system, which is
|
||||
implemented by public license practices. Many people have made
|
||||
generous contributions to the wide range of software distributed
|
||||
through that system in reliance on consistent application of that
|
||||
system; it is up to the author/donor to decide if he or she is willing
|
||||
to distribute software through any other system and a licensee cannot
|
||||
impose that choice.
|
||||
|
||||
This section is intended to make thoroughly clear what is believed to
|
||||
be a consequence of the rest of this License.
|
||||
|
||||
8. If the distribution and/or use of the Program is restricted in
|
||||
certain countries either by patents or by copyrighted interfaces, the
|
||||
original copyright holder who places the Program under this License
|
||||
may add an explicit geographical distribution limitation excluding
|
||||
those countries, so that distribution is permitted only in or among
|
||||
countries not thus excluded. In such case, this License incorporates
|
||||
the limitation as if written in the body of this License.
|
||||
|
||||
9. The Free Software Foundation may publish revised and/or new versions
|
||||
of the General Public License from time to time. Such new versions will
|
||||
be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the Program
|
||||
specifies a version number of this License which applies to it and "any
|
||||
later version", you have the option of following the terms and conditions
|
||||
either of that version or of any later version published by the Free
|
||||
Software Foundation. If the Program does not specify a version number of
|
||||
this License, you may choose any version ever published by the Free Software
|
||||
Foundation.
|
||||
|
||||
10. If you wish to incorporate parts of the Program into other free
|
||||
programs whose distribution conditions are different, write to the author
|
||||
to ask for permission. For software which is copyrighted by the Free
|
||||
Software Foundation, write to the Free Software Foundation; we sometimes
|
||||
make exceptions for this. Our decision will be guided by the two goals
|
||||
of preserving the free status of all derivatives of our free software and
|
||||
of promoting the sharing and reuse of software generally.
|
||||
|
||||
NO WARRANTY
|
||||
|
||||
11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
|
||||
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
|
||||
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
|
||||
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
|
||||
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
|
||||
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
|
||||
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
|
||||
REPAIR OR CORRECTION.
|
||||
|
||||
12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
|
||||
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
|
||||
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
|
||||
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
|
||||
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
|
||||
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
|
||||
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGES.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
convey the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If the program is interactive, make it output a short notice like this
|
||||
when it starts in an interactive mode:
|
||||
|
||||
Gnomovision version 69, Copyright (C) year name of author
|
||||
Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
|
||||
This is free software, and you are welcome to redistribute it
|
||||
under certain conditions; type `show c' for details.
|
||||
|
||||
The hypothetical commands `show w' and `show c' should show the appropriate
|
||||
parts of the General Public License. Of course, the commands you use may
|
||||
be called something other than `show w' and `show c'; they could even be
|
||||
mouse-clicks or menu items--whatever suits your program.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or your
|
||||
school, if any, to sign a "copyright disclaimer" for the program, if
|
||||
necessary. Here is a sample; alter the names:
|
||||
|
||||
Yoyodyne, Inc., hereby disclaims all copyright interest in the program
|
||||
`Gnomovision' (which makes passes at compilers) written by James Hacker.
|
||||
|
||||
<signature of Ty Coon>, 1 April 1989
|
||||
Ty Coon, President of Vice
|
||||
|
||||
This General Public License does not permit incorporating your program into
|
||||
proprietary programs. If your program is a subroutine library, you may
|
||||
consider it more useful to permit linking proprietary applications with the
|
||||
library. If this is what you want to do, use the GNU Lesser General
|
||||
Public License instead of this License.
|
||||
153
lib/jcharset-2.1/README.txt
Normal file
153
lib/jcharset-2.1/README.txt
Normal file
@@ -0,0 +1,153 @@
|
||||
|
||||
|
||||
JCharset - Java Charset package 2.1
|
||||
===================================
|
||||
|
||||
Copyright © 2005-2019 Amichai Rothman
|
||||
|
||||
|
||||
|
||||
1. What is the Java Charset package?
|
||||
|
||||
The Java Charset package is an open-source implementation of character
|
||||
sets that were missing from the standard Java platform.
|
||||
|
||||
It has been in use in many production systems around the world for over a
|
||||
decade, including products by small start-ups, large open-source service
|
||||
providers, and well-known multinational corporations.
|
||||
|
||||
|
||||
2. How do I use the Java Charset package?
|
||||
|
||||
The Java Charset package is written in pure Java, runs on JDK 1.5 or later,
|
||||
and requires no special installation - just add the jar file to your
|
||||
classpath, or place it in any of the usual extension directories.
|
||||
|
||||
It is also available on Maven Central at the artifact coordinates
|
||||
net.freeutils:jcharset:2.1.
|
||||
|
||||
The JVM will recognize the supported character sets automatically, and they
|
||||
will be available anywhere character sets are used in the Java platform.
|
||||
|
||||
As an example, you can take a look at java.lang.String's constructor and
|
||||
getBytes() method, both of which have an overloaded version that receives
|
||||
a charset name as an argument.
|
||||
|
||||
A command-line utility is included which supports converting files
|
||||
between charsets. For help on usage and available options, run it using
|
||||
the command 'java -jar jcharset-2.1.jar -h'.
|
||||
|
||||
Note: Some web/mail containers run each application in its own JVM context.
|
||||
In this case check the container documentation for information on where and
|
||||
how to configure the classpath, such as in WEB-INF/lib, shared/lib,
|
||||
jre/lib/ext, etc. You may need to restart the server for changes to take
|
||||
effect. However, if you use Oracle's JRE, it will work only if you put it in
|
||||
the jre/lib/ext extension directory, or in the container's classpath.
|
||||
This is due to a bug in Oracle's JRE implementation
|
||||
(http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4619777).
|
||||
|
||||
3. Which charsets are supported?
|
||||
|
||||
"UTF-7" (a.k.a. "UTF7", "UNICODE-1-1-UTF-7", "csUnicode11UTF7",
|
||||
"UNICODE-2-0-UTF-7")
|
||||
The 7-bit Unicode character encoding defined in RFC 2152.
|
||||
The O-set characters are encoded as a shift sequence.
|
||||
Both O-set flavors (direct and shifted) are decoded.
|
||||
|
||||
"UTF-7-OPTIONAL" (a.k.a. "UTF-7O", "UTF7O", "UTF-7-O")
|
||||
The 7-bit Unicode character encoding defined in RFC 2152.
|
||||
The O-set characters are directly encoded.
|
||||
Both O-set flavors (direct and shifted) are decoded.
|
||||
|
||||
"SCGSM" (a.k.a. "GSM-default-alphabet", "GSM_0338", "GSM_DEFAULT",
|
||||
"GSM7", "GSM-7BIT")
|
||||
The GSM default charset as specified in GSM 03.38, used in SMPP for
|
||||
encoding SMS text messages.
|
||||
|
||||
Additional flavors of the GSM charset are "CCGSM", "SCPGSM", "CCPGSM",
|
||||
"CRSCPGSM" and "CRCCPGSM": The CC prefix signifies mapping the Latin
|
||||
capital letter C with cedilla character, the SC prefix signifies
|
||||
mapping the Latin small letter c with cedilla character, the P prefix
|
||||
signifies the packed form (8 characters packed in 7 bytes), and the
|
||||
CR prefix signifies padding with CR instead of zeros to avoid ambiguity,
|
||||
all as specified by the spec. See javadocs for details.
|
||||
|
||||
"hp-roman8" (a.k.a. "roman8", "r8", "csHPRoman8", "X-roman8")
|
||||
The HP Roman-8 charset, as provided in RFC 1345.
|
||||
|
||||
ISO/IEC 646 National Variants:
|
||||
"ISO646-CA" ("ISO-IR-121")
|
||||
"ISO646-CA2" ("ISO-IR-122")
|
||||
"ISO646-CH"
|
||||
"ISO646-CN" ("ISO-IR-57")
|
||||
"ISO646-CU" ("ISO-IR-151")
|
||||
"ISO646-DE" ("ISO-IR-21", "DIN_66003")
|
||||
"ISO646-DK"
|
||||
"ISO646-ES" ("ISO-IR-17")
|
||||
"ISO646-ES2" ("ISO-IR-85")
|
||||
"ISO646-FI" ("ISO646-SE", "ISO-IR-10")
|
||||
"ISO646-FR" ("ISO-IR-69")
|
||||
"ISO646-FR1" ("ISO-IR-25")
|
||||
"ISO646-GB" ("ISO-IR-4")
|
||||
"ISO646-HU" ("ISO-IR-86")
|
||||
"ISO646-IE" ("ISO-IR-207")
|
||||
"ISO646-INV" ("ISO-IR-170")
|
||||
"ISO646-IRV" ("ISO-IR-2", "ISO_646.IRV:1983")
|
||||
"ISO646-IS"
|
||||
"ISO646-IT" ("ISO-IR-15")
|
||||
"ISO646-JAO" ("ISO646-JP-OCR-B", "ISO-IR-92")
|
||||
"ISO646-JP" ("ISO-IR-14")
|
||||
"ISO646-KR"
|
||||
"ISO646-MT"
|
||||
"ISO646-NO" ("ISO-IR-60")
|
||||
"ISO646-NO2" ("ISO-IR-61")
|
||||
"ISO646-PT" ("ISO-IR-16")
|
||||
"ISO646-PT2" ("ISO-IR-84")
|
||||
"ISO646-SE2" ("ISO-IR-11")
|
||||
"ISO646-T61" ("ISO-IR-102")
|
||||
"ISO646-TW"
|
||||
"ISO646-US" ("ISO-IR-6", "ISO_646.irv:1991")
|
||||
"ISO646-YU" ("ISO-IR-141")
|
||||
|
||||
"ISO-8859-8-BIDI" (a.k.a. "csISO88598I", "ISO-8859-8-I", "ISO_8859-8-I",
|
||||
"csISO88598E", "ISO-8859-8-E", "ISO_8859-8-E")
|
||||
The ISO 8859-8 charset implementation exists in the standard JRE.
|
||||
However, it is lacking the i/e aliases, which specify whether
|
||||
bidirectionality is implicit or explicit. The charsets conversions
|
||||
themselves are similar. This charset complements the standard one.
|
||||
|
||||
"ISO-8859-6-BIDI" (a.k.a. "csISO88596I", "ISO-8859-6-I", "ISO_8859-6-I",
|
||||
"csISO88596E", "ISO-8859-6-E", "ISO_8859-6-E")
|
||||
The ISO 8859-6 charset implementation exists in the standard JRE.
|
||||
However, it is lacking the i/e aliases, which specify whether
|
||||
bidirectionality is implicit or explicit. The charsets conversions
|
||||
themselves are similar. This charset complements the standard one.
|
||||
|
||||
"KOI8-U" (a.k.a. "KOI8-RU", "KOI8_U")
|
||||
The KOI8-U Ukrainian charset, as defined in RFC 2319.
|
||||
|
||||
"KZ-1048" (a.k.a. "STRK1048-2002", "RK1048", "csKZ1048")
|
||||
The KZ-1048 charset, which is the Kazakhstan national standard.
|
||||
|
||||
"MIK"
|
||||
The MIK cyrillic code page, commonly used by DOS applications
|
||||
in Bulgaria.
|
||||
|
||||
|
||||
4. License
|
||||
|
||||
The Java Charset package is provided under the GNU General Public
|
||||
License agreement. Please read the full license agreement in the
|
||||
included LICENSE.gpl.txt file.
|
||||
|
||||
For non-GPL commercial licensing please contact the address below.
|
||||
|
||||
|
||||
5. Contact
|
||||
|
||||
Please write to support@freeutils.net with any bugs, suggestions, fixes,
|
||||
contributions, or just to drop a good word and let me know you've found
|
||||
this package useful and you'd like it to keep being maintained.
|
||||
|
||||
Updates and additional info can be found at
|
||||
http://www.freeutils.net/source/jcharset/
|
||||
BIN
lib/jcharset-2.1/lib/jcharset-2.1.jar
Normal file
BIN
lib/jcharset-2.1/lib/jcharset-2.1.jar
Normal file
Binary file not shown.
84
lib/jcharset-2.1/pom.xml
Normal file
84
lib/jcharset-2.1/pom.xml
Normal file
@@ -0,0 +1,84 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<parent>
|
||||
<groupId>net.freeutils</groupId>
|
||||
<artifactId>parent</artifactId>
|
||||
<version>1.0</version>
|
||||
<relativePath>../parent/pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>jcharset</artifactId>
|
||||
<version>2.1</version>
|
||||
<packaging>jar</packaging>
|
||||
|
||||
<name>JCharset</name>
|
||||
<description>The Java Charset package</description>
|
||||
<url>http://www.freeutils.net/source/jcharset/</url>
|
||||
<licenses>
|
||||
<license>
|
||||
<name>GNU General Public License (GPL), Version 2.0</name>
|
||||
<url>http://www.gnu.org/licenses/gpl-2.0.html</url>
|
||||
</license>
|
||||
<license>
|
||||
<name>Commercial License</name>
|
||||
<url>LICENSE.commercial.txt</url>
|
||||
</license>
|
||||
</licenses>
|
||||
|
||||
<properties>
|
||||
<jdk.version>1.5</jdk.version>
|
||||
<animal.sniffer.jdk.version>java15</animal.sniffer.jdk.version>
|
||||
<skip.assembly>false</skip.assembly>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<!-- make it an OSGi bundle (override parent config to add SPI capability) -->
|
||||
<plugin>
|
||||
<groupId>org.apache.felix</groupId>
|
||||
<artifactId>maven-bundle-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>bundle-manifest</id>
|
||||
<phase>process-classes</phase>
|
||||
<goals>
|
||||
<goal>manifest</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<instructions>
|
||||
<Bundle-SymbolicName>${bundle.symbolicName}</Bundle-SymbolicName>
|
||||
<Bundle-Version>${project.version}</Bundle-Version>
|
||||
<Export-Package>${bundle.namespace}.*;version="${project.version}"</Export-Package>
|
||||
<Private-Package>${bundle.namespace}.*</Private-Package>
|
||||
<!-- specify provided SPI capability-->
|
||||
<!-- (no Require-Capability since the package can also be used without SPI) -->
|
||||
<Provide-Capability>osgi.serviceloader; osgi.serviceloader=java.nio.charset.spi.CharsetProvider</Provide-Capability>
|
||||
</instructions>
|
||||
<supportedProjectTypes>
|
||||
<supportedProjectType>jar</supportedProjectType>
|
||||
</supportedProjectTypes>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<configuration>
|
||||
<!-- use the generated bundle manifest -->
|
||||
<useDefaultManifestFile>true</useDefaultManifestFile>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>net.freeutils.charset.Utils</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,293 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import java.util.Arrays;
|
||||
|
||||
/**
|
||||
* The <b>ByteLookupCharset</b> class handles the encoding and decoding of
|
||||
* single-byte charsets where the byte-to-char conversion is performed
|
||||
* using a simple lookup table.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-06-30
|
||||
*/
|
||||
public abstract class ByteLookupCharset extends Charset {
|
||||
|
||||
final int[] byteToChar;
|
||||
final int[][] charToByte;
|
||||
|
||||
/**
|
||||
* Creates a new lookup table with 256 elements, all initialized to -1.
|
||||
*
|
||||
* @return the new table
|
||||
*/
|
||||
public static int[] createTable() {
|
||||
int[] table = new int[256];
|
||||
Arrays.fill(table, -1);
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the given array in which several items
|
||||
* are modified.
|
||||
*
|
||||
* @param src the array to mutate
|
||||
* @param indices the array of indices at which the values will be modified
|
||||
* @param values the respective values to place in these indices
|
||||
* @return the mutated array
|
||||
*/
|
||||
public static int[] mutate(int[] src, int[] indices, int[] values) {
|
||||
int[] mutated = new int[src.length];
|
||||
System.arraycopy(src, 0, mutated, 0, src.length);
|
||||
for (int i = 0; i < indices.length; i++)
|
||||
mutated[indices[i]] = values[i];
|
||||
return mutated;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an inverse lookup table for the given byte-to-char lookup table.
|
||||
* <p>
|
||||
* The returned table contains 256 tables, one per high-order byte of a
|
||||
* potential character to be converted (unused ones are null), and each
|
||||
* such table can be indexed using the character's low-order byte, to
|
||||
* obtain the actual converted byte value.
|
||||
* A null table in the top level table, or a -1 within a lower level table,
|
||||
* both indicate that there is no legal mapping for the given character.
|
||||
*
|
||||
* @param chars a lookup table which holds the character value
|
||||
* that each byte value (0-255) is converted to
|
||||
* @return the created inverse lookup (char-to-byte) table
|
||||
*/
|
||||
public static int[][] createInverseLookupTable(int[] chars) {
|
||||
int[][] tables = new int[256][];
|
||||
for (int i = 0; i < 256; i++) {
|
||||
int c = chars[i];
|
||||
if (c > -1)
|
||||
updateInverseLookupTable(tables, c, i);
|
||||
}
|
||||
return tables;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates an inverse lookup table with an additional mapping,
|
||||
* replacing a previous mapping of the same value if it exists.
|
||||
*
|
||||
* @param tables the inverse lookup table to update
|
||||
* (see {@link #createInverseLookupTable})
|
||||
* @param c the character to map
|
||||
* @param b the byte value to which c is mapped, or -1 to mark an illegal mapping
|
||||
* @return the updated inverse lookup (char-to-byte) table
|
||||
*/
|
||||
public static int[][] updateInverseLookupTable(int[][] tables, int c, int b) {
|
||||
int high = (c >>> 8) & 0xFF;
|
||||
int low = c & 0xFF;
|
||||
int[] table = tables[high];
|
||||
if (table != null) {
|
||||
table[low] = b;
|
||||
} else if (b > -1) {
|
||||
table = createTable();
|
||||
tables[high] = table;
|
||||
table[low] = b;
|
||||
}
|
||||
return tables;
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates an inverse lookup table with additional mappings,
|
||||
* replacing previous mappings of the same values if they exists.
|
||||
*
|
||||
* @param tables the inverse lookup table to update
|
||||
* (see {@link #createInverseLookupTable})
|
||||
* @param chars the characters to map
|
||||
* @param bytes the respective byte values to which the chars are mapped,
|
||||
* or -1 to mark an illegal mapping
|
||||
* @return the updated inverse lookup (char-to-byte) table
|
||||
*/
|
||||
public static int[][] updateInverseLookupTable(int[][] tables, int[] chars, int[] bytes) {
|
||||
for (int i = 0; i < chars.length; i++)
|
||||
updateInverseLookupTable(tables, chars[i], bytes[i]);
|
||||
return tables;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes a new charset with the given canonical name and alias
|
||||
* set, and byte-to-char/char-to-byte lookup tables.
|
||||
*
|
||||
* @param canonicalName the canonical name of this charset
|
||||
* @param aliases an array of this charset's aliases, or null if it has no aliases
|
||||
* @param byteToChar a byte-to-char conversion table for this charset
|
||||
* @param charToByte a char-to-byte conversion table for this charset. It can
|
||||
* be generated on-the-fly by calling createInverseLookupTable(byteToChar).
|
||||
* @throws java.nio.charset.IllegalCharsetNameException
|
||||
* if the canonical name or any of the aliases are illegal
|
||||
*/
|
||||
protected ByteLookupCharset(String canonicalName, String[] aliases,
|
||||
int[] byteToChar, int[][] charToByte) {
|
||||
super(canonicalName, aliases);
|
||||
this.byteToChar = byteToChar;
|
||||
this.charToByte = charToByte;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether or not this charset contains the given charset.
|
||||
*
|
||||
* <p> A charset <i>C</i> is said to <i>contain</i> a charset <i>D</i> if,
|
||||
* and only if, every character representable in <i>D</i> is also
|
||||
* representable in <i>C</i>. If this relationship holds then it is
|
||||
* guaranteed that every string that can be encoded in <i>D</i> can also be
|
||||
* encoded in <i>C</i> without performing any replacements.
|
||||
*
|
||||
* <p> That <i>C</i> contains <i>D</i> does not imply that each character
|
||||
* representable in <i>C</i> by a particular byte sequence is represented
|
||||
* in <i>D</i> by the same byte sequence, although sometimes this is the
|
||||
* case.
|
||||
*
|
||||
* <p> Every charset contains itself.
|
||||
*
|
||||
* <p> This method computes an approximation of the containment relation:
|
||||
* If it returns <tt>true</tt> then the given charset is known to be
|
||||
* contained by this charset; if it returns <tt>false</tt>, however, then
|
||||
* it is not necessarily the case that the given charset is not contained
|
||||
* in this charset.
|
||||
*
|
||||
* @param charset the given charset
|
||||
*
|
||||
* @return <tt>true</tt> if the given charset is contained in this charset
|
||||
*/
|
||||
@Override
|
||||
public boolean contains(Charset charset) {
|
||||
return this.getClass().isInstance(charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new decoder for this charset.
|
||||
*
|
||||
* @return a new decoder for this charset
|
||||
*/
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new encoder for this charset.
|
||||
*
|
||||
* @return a new encoder for this charset
|
||||
*/
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new Encoder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* The <b>Encoder</b> inner class handles the encoding of the
|
||||
* charset using the lookup table.
|
||||
*/
|
||||
protected class Encoder extends CharsetEncoder {
|
||||
|
||||
/**
|
||||
* Constructs an Encoder.
|
||||
*
|
||||
* @param charset the charset that created this encoder
|
||||
*/
|
||||
protected Encoder(Charset charset) {
|
||||
super(charset, 1f, 1f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes one or more characters into one or more bytes.
|
||||
*
|
||||
* @param in the input character buffer
|
||||
* @param out the output byte buffer
|
||||
* @return a coder-result object describing the reason for termination
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
int[][] lookup = charToByte; // getfield bytecode optimization
|
||||
int remainingIn = in.remaining();
|
||||
int remainingOut = out.remaining();
|
||||
while (remainingIn-- > 0) {
|
||||
if (remainingOut-- < 1)
|
||||
return CoderResult.OVERFLOW; // we need exactly one byte per char
|
||||
int c = in.get();
|
||||
int[] table = lookup[c >>> 8];
|
||||
int b = table == null ? -1 : table[c & 0xFF];
|
||||
if (b == -1) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.unmappableForLength(1);
|
||||
}
|
||||
out.put((byte)(b & 0xFF));
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* The <b>Decoder</b> inner class handles the decoding of the
|
||||
* charset using the inverse lookup table.
|
||||
*/
|
||||
protected class Decoder extends CharsetDecoder {
|
||||
|
||||
/**
|
||||
* Constructs a Decoder.
|
||||
*
|
||||
* @param charset the charset that created this decoder
|
||||
*/
|
||||
protected Decoder(Charset charset) {
|
||||
super(charset, 1f, 1f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes one or more bytes into one or more characters.
|
||||
*
|
||||
* @param in the input byte buffer
|
||||
* @param out the output character buffer
|
||||
* @return a coder-result object describing the reason for termination
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
int[] lookup = byteToChar; // getfield bytecode optimization
|
||||
int remainingIn = in.remaining();
|
||||
int remainingOut = out.remaining();
|
||||
while (remainingIn-- > 0) {
|
||||
if (remainingOut-- < 1)
|
||||
return CoderResult.OVERFLOW; // we need exactly one char per byte
|
||||
int c = lookup[in.get() & 0xFF];
|
||||
if (c == -1) {
|
||||
in.position(in.position() - 1);
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
out.put((char)c);
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.*;
|
||||
import net.freeutils.charset.gsm.*;
|
||||
import net.freeutils.charset.iso646.*;
|
||||
|
||||
/**
|
||||
* The <b>CharsetProvider</b> class is a Charset Provider implementation.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-06-10
|
||||
*/
|
||||
public class CharsetProvider extends java.nio.charset.spi.CharsetProvider {
|
||||
|
||||
static Map<String, Charset> nameToCharset;
|
||||
static Collection<Charset> charsets;
|
||||
|
||||
/**
|
||||
* Retrieves a charset for the given charset name.
|
||||
*
|
||||
* @param charsetName the name of the requested charset;
|
||||
* may be either a canonical name or an alias
|
||||
*
|
||||
* @return a charset object for the named charset,
|
||||
* or <tt>null</tt> if the named charset
|
||||
* is not supported by this provider
|
||||
*/
|
||||
@Override
|
||||
public Charset charsetForName(String charsetName) {
|
||||
if (nameToCharset == null)
|
||||
init();
|
||||
|
||||
// get charset instance for given name (case insensitive)
|
||||
Charset charset = nameToCharset.get(charsetName.toLowerCase());
|
||||
if (charset != null) {
|
||||
try {
|
||||
return charset.getClass().newInstance();
|
||||
} catch (Exception ignore) {
|
||||
// if we can't create an instance, we don't
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates an iterator that iterates over the charsets supported by this
|
||||
* provider. This method is used in the implementation of the {@link
|
||||
* java.nio.charset.Charset#availableCharsets Charset.availableCharsets}
|
||||
* method.
|
||||
*
|
||||
* @return the new iterator
|
||||
*/
|
||||
@Override
|
||||
public Iterator<Charset> charsets() {
|
||||
if (charsets == null)
|
||||
init();
|
||||
|
||||
return charsets.iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes this charset provider's data.
|
||||
*/
|
||||
void init() {
|
||||
// prepare supported charsets
|
||||
Charset[] allCharsets = {
|
||||
new UTF7Charset(),
|
||||
new UTF7OptionalCharset(),
|
||||
new SCGSMCharset(),
|
||||
new CCGSMCharset(),
|
||||
new SCPackedGSMCharset(),
|
||||
new CCPackedGSMCharset(),
|
||||
new CRSCPackedGSMCharset(),
|
||||
new CRCCPackedGSMCharset(),
|
||||
new HPRoman8Charset(),
|
||||
new KOI8UCharset(),
|
||||
new KZ1048Charset(),
|
||||
new ISO88598Charset(),
|
||||
new ISO88596Charset(),
|
||||
new MIKCharset(),
|
||||
new ISO646CACharset(),
|
||||
new ISO646CA2Charset(),
|
||||
new ISO646CHCharset(),
|
||||
new ISO646CNCharset(),
|
||||
new ISO646CUCharset(),
|
||||
new ISO646DECharset(),
|
||||
new ISO646DKCharset(),
|
||||
new ISO646ESCharset(),
|
||||
new ISO646ES2Charset(),
|
||||
new ISO646FISECharset(),
|
||||
new ISO646FRCharset(),
|
||||
new ISO646FR1Charset(),
|
||||
new ISO646GBCharset(),
|
||||
new ISO646HUCharset(),
|
||||
new ISO646IECharset(),
|
||||
new ISO646INVCharset(),
|
||||
new ISO646IRVCharset(),
|
||||
new ISO646ISCharset(),
|
||||
new ISO646ITCharset(),
|
||||
new ISO646JAOCharset(),
|
||||
new ISO646JPCharset(),
|
||||
new ISO646KRCharset(),
|
||||
new ISO646MTCharset(),
|
||||
new ISO646NO2Charset(),
|
||||
new ISO646NOCharset(),
|
||||
new ISO646PTCharset(),
|
||||
new ISO646PT2Charset(),
|
||||
new ISO646SE2Charset(),
|
||||
new ISO646T61Charset(),
|
||||
new ISO646TWCharset(),
|
||||
new ISO646USCharset(),
|
||||
new ISO646YUCharset(),
|
||||
};
|
||||
|
||||
// initialize charset collection
|
||||
charsets = Collections.unmodifiableCollection(Arrays.asList(allCharsets));
|
||||
|
||||
// initialize name to charset map
|
||||
Map<String, Charset> map = new HashMap<String, Charset>();
|
||||
for (Charset charset : allCharsets) {
|
||||
map.put(charset.name().toLowerCase(), charset);
|
||||
for (String alias : charset.aliases())
|
||||
map.put(alias.toLowerCase(), charset);
|
||||
}
|
||||
nameToCharset = map;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* The <b>EscapedByteLookupCharset</b> class handles the encoding and
|
||||
* decoding of simple charsets where the byte-to-char conversion
|
||||
* is performed using a simple lookup table, with the addition of a special
|
||||
* escape byte, such that the single byte following it is converted using
|
||||
* an alternate lookup table.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2007-03-26
|
||||
*/
|
||||
public abstract class EscapedByteLookupCharset extends Charset {
|
||||
|
||||
final int[] byteToChar;
|
||||
final int[] byteToCharEscaped;
|
||||
final int[][] charToByte;
|
||||
final int[][] charToByteEscaped;
|
||||
final byte escapeByte;
|
||||
|
||||
/**
|
||||
* Initializes a new charset with the given canonical name and alias
|
||||
* set, and byte-to-char/char-to-byte lookup tables.
|
||||
*
|
||||
* @param canonicalName the canonical name of this charset
|
||||
* @param aliases an array of this charset's aliases, or null if it has no aliases
|
||||
* @param escapeByte the special escape byte value
|
||||
* @param byteToChar a byte-to-char conversion table for this charset
|
||||
* @param byteToCharEscaped a byte-to-char conversion table for this charset
|
||||
* for the escaped characters
|
||||
* @param charToByte a char-to-byte conversion table for this charset. It can
|
||||
* be generated on-the-fly by calling
|
||||
* {@link ByteLookupCharset#createInverseLookupTable
|
||||
* createInverseLookupTable(byteToChar)}.
|
||||
* @param charToByteEscaped a char-to-byte conversion table for this charset
|
||||
* for the escaped characters
|
||||
* @throws java.nio.charset.IllegalCharsetNameException
|
||||
* if the canonical name or any of the aliases are illegal
|
||||
*/
|
||||
protected EscapedByteLookupCharset(String canonicalName, String[] aliases,
|
||||
byte escapeByte, int[] byteToChar, int[] byteToCharEscaped,
|
||||
int[][] charToByte, int[][] charToByteEscaped) {
|
||||
super(canonicalName, aliases);
|
||||
this.escapeByte = escapeByte;
|
||||
this.byteToChar = byteToChar;
|
||||
this.charToByte = charToByte;
|
||||
this.byteToCharEscaped = byteToCharEscaped;
|
||||
this.charToByteEscaped = charToByteEscaped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether or not this charset contains the given charset.
|
||||
*
|
||||
* <p> A charset <i>C</i> is said to <i>contain</i> a charset <i>D</i> if,
|
||||
* and only if, every character representable in <i>D</i> is also
|
||||
* representable in <i>C</i>. If this relationship holds then it is
|
||||
* guaranteed that every string that can be encoded in <i>D</i> can also be
|
||||
* encoded in <i>C</i> without performing any replacements.
|
||||
*
|
||||
* <p> That <i>C</i> contains <i>D</i> does not imply that each character
|
||||
* representable in <i>C</i> by a particular byte sequence is represented
|
||||
* in <i>D</i> by the same byte sequence, although sometimes this is the
|
||||
* case.
|
||||
*
|
||||
* <p> Every charset contains itself.
|
||||
*
|
||||
* <p> This method computes an approximation of the containment relation:
|
||||
* If it returns <tt>true</tt> then the given charset is known to be
|
||||
* contained by this charset; if it returns <tt>false</tt>, however, then
|
||||
* it is not necessarily the case that the given charset is not contained
|
||||
* in this charset.
|
||||
*
|
||||
* @param charset the given charset
|
||||
*
|
||||
* @return <tt>true</tt> if the given charset is contained in this charset
|
||||
*/
|
||||
@Override
|
||||
public boolean contains(Charset charset) {
|
||||
return this.getClass().isInstance(charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new decoder for this charset.
|
||||
*
|
||||
* @return a new decoder for this charset
|
||||
*/
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new encoder for this charset.
|
||||
*
|
||||
* @return a new encoder for this charset
|
||||
*/
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new Encoder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* The <b>Encoder</b> inner class handles the encoding of the
|
||||
* charset using the lookup tables.
|
||||
*/
|
||||
protected class Encoder extends CharsetEncoder {
|
||||
|
||||
/**
|
||||
* Constructs an Encoder.
|
||||
*
|
||||
* @param charset the charset that created this encoder
|
||||
*/
|
||||
protected Encoder(Charset charset) {
|
||||
super(charset, 1f, 2f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an Encoder.
|
||||
*
|
||||
* @param charset the charset that created this encoder
|
||||
* @param averageBytesPerChar a positive float value indicating the expected
|
||||
* number of bytes that will be produced for each input character
|
||||
*
|
||||
* @param maxBytesPerChar a positive float value indicating the maximum
|
||||
* number of bytes that will be produced for each input character
|
||||
*/
|
||||
protected Encoder(Charset charset, float averageBytesPerChar, float maxBytesPerChar) {
|
||||
super(charset, averageBytesPerChar, maxBytesPerChar);
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes one or more characters into one or more bytes.
|
||||
*
|
||||
* @param in the input character buffer
|
||||
* @param out the output byte buffer
|
||||
* @return a coder-result object describing the reason for termination
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
byte escape = escapeByte; // getfield bytecode optimization
|
||||
int[][] lookup = charToByte; // getfield bytecode optimization
|
||||
int[][] lookupEscaped = charToByteEscaped; // getfield bytecode optimization
|
||||
int remainingIn = in.remaining();
|
||||
int remainingOut = out.remaining();
|
||||
while (remainingIn-- > 0) {
|
||||
// make sure we have room for output
|
||||
if (remainingOut-- < 1)
|
||||
return CoderResult.OVERFLOW;
|
||||
// get next char
|
||||
int c = in.get();
|
||||
// look for corresponding regular byte
|
||||
int[] table = lookup[c >> 8];
|
||||
int b = table == null ? -1 : table[c & 0xFF];
|
||||
if (b == -1) {
|
||||
// look for corresponding escaped byte
|
||||
table = lookupEscaped[c >> 8];
|
||||
b = table == null ? -1 : table[c & 0xFF];
|
||||
if (b == -1) {
|
||||
// there's no regular nor escaped byte - it's unmappable
|
||||
in.position(in.position() - 1); // unread the char
|
||||
return CoderResult.unmappableForLength(1);
|
||||
}
|
||||
// it's an escapable char, make sure we have room for extra output
|
||||
if (remainingOut-- < 1) {
|
||||
in.position(in.position() - 1); // unread the char
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
// write the escape byte (output byte will follow)
|
||||
out.put(escape);
|
||||
}
|
||||
// write the output byte
|
||||
out.put((byte)(b & 0xFF));
|
||||
}
|
||||
// no more input available
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* The <b>Decoder</b> inner class handles the decoding of the
|
||||
* charset using the inverse lookup tables.
|
||||
*/
|
||||
protected class Decoder extends CharsetDecoder {
|
||||
|
||||
/**
|
||||
* Constructs a Decoder.
|
||||
*
|
||||
* @param charset the charset that created this decoder
|
||||
*/
|
||||
protected Decoder(Charset charset) {
|
||||
super(charset, 1f, 1f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a Decoder.
|
||||
*
|
||||
* @param charset the charset that created this decoder
|
||||
* @param averageCharsPerByte a positive float value indicating the expected
|
||||
* number of characters that will be produced for each input byte
|
||||
* @param maxCharsPerByte a positive float value indicating the maximum
|
||||
* number of characters that will be produced for each input byte
|
||||
*/
|
||||
protected Decoder(Charset charset, float averageCharsPerByte, float maxCharsPerByte) {
|
||||
super(charset, averageCharsPerByte, maxCharsPerByte);
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes one or more bytes into one or more characters.
|
||||
*
|
||||
* @param in the input byte buffer
|
||||
* @param out the output character buffer
|
||||
* @return a coder-result object describing the reason for termination
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
byte escape = escapeByte; // getfield bytecode optimization
|
||||
int[] lookup = byteToChar; // getfield bytecode optimization
|
||||
int[] lookupEscaped = byteToCharEscaped; // getfield bytecode optimization
|
||||
int remainingIn = in.remaining();
|
||||
int remainingOut = out.remaining();
|
||||
while (remainingIn-- > 0) {
|
||||
// make sure we have room for output
|
||||
if (remainingOut-- < 1)
|
||||
return CoderResult.OVERFLOW;
|
||||
// get next byte
|
||||
int c;
|
||||
int b = in.get();
|
||||
if (b == escape) {
|
||||
// it's the escape byte - make sure we have the next byte
|
||||
if (remainingIn-- < 1) {
|
||||
in.position(in.position() - 1); // unread the byte
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
// get next byte
|
||||
b = in.get();
|
||||
// look for corresponding escaped char
|
||||
c = lookupEscaped[b & 0xFF];
|
||||
} else {
|
||||
// look for corresponding regular char
|
||||
c = lookup[b & 0xFF];
|
||||
}
|
||||
|
||||
if (c == -1) {
|
||||
// there's no regular nor escaped char - it's malformed
|
||||
in.position(in.position() - 1); // unread the byte
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
// write the output char
|
||||
out.put((char)c);
|
||||
}
|
||||
// no more input available
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
/**
|
||||
* The <b>HPRoman8Charset</b> class handles the encoding and decoding of the
|
||||
* HP Roman-8 charset, as provided in RFC 1345.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-06-30
|
||||
*/
|
||||
public class HPRoman8Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "hp-roman8";
|
||||
|
||||
static final String[] ALIASES = {
|
||||
"roman8", "r8", "csHPRoman8", "X-roman8" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
|
||||
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||||
0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
||||
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||||
0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
||||
0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF,
|
||||
0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4,
|
||||
0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1,
|
||||
0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2,
|
||||
0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA,
|
||||
0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC,
|
||||
0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6,
|
||||
0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4,
|
||||
0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3,
|
||||
0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF,
|
||||
0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC,
|
||||
0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, -1,
|
||||
};
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the HPRoman8Charset.
|
||||
*/
|
||||
public HPRoman8Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
/**
|
||||
* The <b>ISO88596Charset</b> class handles the encoding and decoding of the
|
||||
* ISO 8859-6 charset. Although the JRE includes an implementation of this
|
||||
* charset, it does not recognize two aliases for this charset:
|
||||
* ISO-8859-6-i and ISO-8859-6-e. These signify whether bidirectionality
|
||||
* is implicit or explicit. However, this is the displayer's responsibility
|
||||
* in any case, and the character conversion is the same.
|
||||
* See RFC 1556.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-06-30
|
||||
*/
|
||||
public class ISO88596Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO-8859-6-BIDI";
|
||||
|
||||
static final String[] ALIASES = {
|
||||
"csISO88596I", "ISO-8859-6-I", "ISO_8859-6-I",
|
||||
"csISO88596E", "ISO-8859-6-E", "ISO_8859-6-E" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
|
||||
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||||
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
|
||||
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||||
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
|
||||
0x00a0, -1, -1, -1, 0x00a4, -1, -1, -1,
|
||||
-1, -1, -1, -1, 0x060c, 0x00ad, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, 0x061b, -1, -1, -1, 0x061f,
|
||||
-1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
|
||||
0x0628, 0x0629, 0x062a, 0x062b, 0x062c, 0x062d, 0x062e, 0x062f,
|
||||
0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
|
||||
0x0638, 0x0639, 0x063a, -1, -1, -1, -1, -1,
|
||||
0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
|
||||
0x0648, 0x0649, 0x064a, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f,
|
||||
0x0650, 0x0651, 0x0652, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO88596Charset.
|
||||
*/
|
||||
public ISO88596Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
/**
|
||||
* The <b>ISO88598Charset</b> class handles the encoding and decoding of the
|
||||
* ISO 8859-8 charset. Although the JRE includes an implementation of this
|
||||
* charset, it does not recognize two aliases for this charset:
|
||||
* ISO-8859-8-i and ISO-8859-8-e. These signify whether bidirectionality
|
||||
* is implicit or explicit. However, this is the displayer's responsibility
|
||||
* in any case, and the character conversion is the same.
|
||||
* See RFC 1556.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-06-30
|
||||
*/
|
||||
public class ISO88598Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO-8859-8-BIDI";
|
||||
|
||||
static final String[] ALIASES = {
|
||||
"csISO88598I", "ISO-8859-8-I", "ISO_8859-8-I",
|
||||
"csISO88598E", "ISO-8859-8-E", "ISO_8859-8-E" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
|
||||
0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
||||
0x0088, 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f,
|
||||
0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
||||
0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 0x009d, 0x009e, 0x009f,
|
||||
0x00a0, -1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
|
||||
0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x203e, // 0x00af or 0x203e?
|
||||
0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
|
||||
0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, 0x2017,
|
||||
0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
|
||||
0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
|
||||
0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
|
||||
0x05e8, 0x05e9, 0x05ea, -1, -1, 0x200e, 0x200f, -1, // 0x200e/0x200f added to spec
|
||||
};
|
||||
|
||||
static {
|
||||
// update the mapping for the MACRON character
|
||||
// (see http://bugs.java.com/bugdatabase/view_bug.do?bug_id=4760496).
|
||||
// apply the fix only if we're running in JDK 1.5 or higher,
|
||||
// so that we remain consistent with the JDK ISO-8859-8 charset
|
||||
// implementation.
|
||||
if (Utils.isJDK15())
|
||||
BYTE_TO_CHAR[0xaf] = 0x00af;
|
||||
}
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO88598Charset.
|
||||
*/
|
||||
public ISO88598Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
/**
|
||||
* The <b>KOI8UCharset</b> class handles the encoding and decoding of the
|
||||
* KOI8-U charset, as provided in RFC 2319.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-08-02
|
||||
*/
|
||||
public class KOI8UCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "KOI8-U";
|
||||
|
||||
static final String[] ALIASES = { "KOI8-RU", "KOI8_U" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, 0x002e, 0x002f,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005a, 0x005b, 0x005c, 0x005d, 0x005e, 0x005f,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007a, 0x007b, 0x007c, 0x007d, 0x007e, 0x007f,
|
||||
0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
|
||||
0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
|
||||
0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
|
||||
0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
|
||||
0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
|
||||
0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
|
||||
0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
|
||||
0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
|
||||
0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
|
||||
0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
|
||||
0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
|
||||
0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
|
||||
0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
|
||||
0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
|
||||
0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
|
||||
0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A,
|
||||
};
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the KOI8UCharset.
|
||||
*/
|
||||
public KOI8UCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
/**
|
||||
* The <b>KZ1048Charset</b> class handles the encoding and decoding of the
|
||||
* KZ-1048 charset, created as Kazakhstan national standard STRK1048-2002
|
||||
* by modifying windows-1251, the Windows Cyrillic code page, with
|
||||
* 16 modified mappings. It is registered in IANA as KZ-1048.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* https://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KZ1048.TXT
|
||||
* and https://www.iana.org/assignments/charset-reg/KZ-1048
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2019-03-31
|
||||
*/
|
||||
public class KZ1048Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "KZ-1048";
|
||||
|
||||
static final String[] ALIASES = { "STRK1048-2002", "RK1048", "csKZ1048" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
|
||||
0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
|
||||
0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x049A, 0x04BA, 0x040F,
|
||||
0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
||||
-1, 0x2122, 0x0459, 0x203A, 0x045A, 0x049B, 0x04BB, 0x045F,
|
||||
0x00A0, 0x04B0, 0x04B1, 0x04D8, 0x00A4, 0x04E8, 0x00A6, 0x00A7,
|
||||
0x0401, 0x00A9, 0x0492, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x04AE,
|
||||
0x00B0, 0x00B1, 0x0406, 0x0456, 0x04E9, 0x00B5, 0x00B6, 0x00B7,
|
||||
0x0451, 0x2116, 0x0493, 0x00BB, 0x04D9, 0x04A2, 0x04A3, 0x04AF,
|
||||
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
||||
0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
|
||||
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
||||
0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
|
||||
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
||||
0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
|
||||
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
||||
0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
|
||||
};
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the KZ1048Charset.
|
||||
*/
|
||||
public KZ1048Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
/**
|
||||
* The <b>MIKCharset</b> class handles the encoding and decoding of the
|
||||
* MIK cyrillic code page, commonly used by DOS applications in Bulgaria.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2009-12-16
|
||||
*/
|
||||
public class MIKCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "MIK";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
static final int[] BYTE_TO_CHAR = {
|
||||
0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
|
||||
0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
|
||||
0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
|
||||
0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
|
||||
0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
|
||||
0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
|
||||
0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
||||
0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
|
||||
0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
||||
0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
|
||||
0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
||||
0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
|
||||
0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
||||
0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
|
||||
0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x2563, 0x2551,
|
||||
0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2510,
|
||||
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2116, 0x00A7, 0x2557,
|
||||
0x255D, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
|
||||
0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
|
||||
0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229,
|
||||
0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
|
||||
0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0,
|
||||
};
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = // updated with ambiguous mappings
|
||||
updateInverseLookupTable(
|
||||
createInverseLookupTable(BYTE_TO_CHAR),
|
||||
new int[] { 0x00DF, 0x2211, 0x00B5, 0x2126, 0x2205, 0x2208,
|
||||
0x03B2, 0x03A3, 0x03BC, 0x03A9, 0x03C6, 0x03B5 },
|
||||
new int[] { 0xE1, 0xE4, 0xE6, 0xEA, 0xED, 0xEE,
|
||||
0xE1, 0xE4, 0xE6, 0xEA, 0xED, 0xEE });
|
||||
|
||||
/**
|
||||
* Constructs an instance of the MIKCharset.
|
||||
*/
|
||||
public MIKCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,482 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.*;
|
||||
|
||||
/**
|
||||
* The <b>UTF7Charset</b> class handles the encoding and decoding of the
|
||||
* UTF-7 charset.
|
||||
* <p>
|
||||
* The encoding and decoding are based on RFC 2152
|
||||
* (http://www.ietf.org/rfc/rfc2152.txt)
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-06-10
|
||||
*/
|
||||
public class UTF7Charset extends Charset {
|
||||
|
||||
static final String NAME = "UTF-7";
|
||||
|
||||
static final String[] ALIASES = {
|
||||
"UTF7", "UNICODE-1-1-UTF-7", "csUnicode11UTF7", "UNICODE-2-0-UTF-7" };
|
||||
|
||||
// a lookup table for characters that are part of the D Set
|
||||
static final boolean[] D_SET = {
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, true, true, false, false, true, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
true, false, false, false, false, false, false, true,
|
||||
true, true, false, false, true, true, true, true,
|
||||
true, true, true, true, true, true, true, true,
|
||||
true, true, true, false, false, false, false, true,
|
||||
false, true, true, true, true, true, true, true,
|
||||
true, true, true, true, true, true, true, true,
|
||||
true, true, true, true, true, true, true, true,
|
||||
true, true, true, false, false, false, false, false,
|
||||
false, true, true, true, true, true, true, true,
|
||||
true, true, true, true, true, true, true, true,
|
||||
true, true, true, true, true, true, true, true,
|
||||
true, true, true, false, false, false, false, false,
|
||||
};
|
||||
|
||||
// a lookup table for characters that are part of the O Set
|
||||
static final boolean[] O_SET = {
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, true, true, true, true, true, true, false,
|
||||
false, false, true, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, true, true, true, true, false,
|
||||
true, false, false, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, true, false, true, true, true,
|
||||
true, false, false, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, false, false, false, false, false,
|
||||
false, false, false, true, true, true, false, false,
|
||||
};
|
||||
|
||||
// a lookup table for characters that are part of the B Set
|
||||
static final int[] B_SET = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
|
||||
52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
|
||||
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
|
||||
-1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
|
||||
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
// an inverse lookup table for characters that are part of the B Set
|
||||
static final int[] B_SET_INVERSE = {
|
||||
65, 66, 67, 68, 69, 70, 71, 72,
|
||||
73, 74, 75, 76, 77, 78, 79, 80,
|
||||
81, 82, 83, 84, 85, 86, 87, 88,
|
||||
89, 90, 97, 98, 99, 100, 101, 102,
|
||||
103, 104, 105, 106, 107, 108, 109, 110,
|
||||
111, 112, 113, 114, 115, 116, 117, 118,
|
||||
119, 120, 121, 122, 48, 49, 50, 51,
|
||||
52, 53, 54, 55, 56, 57, 43, 47,
|
||||
};
|
||||
|
||||
// the RFC specifies that the O-set characters may
|
||||
// optionally be directly encoded. Whether they are
|
||||
// encoded directly or using a shift sequence depends
|
||||
// on the value of the optionalDirect flag.
|
||||
final boolean optionalDirect;
|
||||
|
||||
static boolean isDSet(byte b) {
|
||||
return b >= 0 && D_SET[b];
|
||||
}
|
||||
|
||||
static boolean isDSet(char c) {
|
||||
return c < 0x80 && D_SET[c];
|
||||
}
|
||||
|
||||
static boolean isOSet(byte b) {
|
||||
return b >= 0 && O_SET[b];
|
||||
}
|
||||
|
||||
static boolean isOSet(char c) {
|
||||
return c < 0x80 && O_SET[c];
|
||||
}
|
||||
|
||||
static boolean isDorOSet(byte b) {
|
||||
return b >= 0 && (D_SET[b] || O_SET[b]);
|
||||
}
|
||||
|
||||
static boolean isDorOSet(char c) {
|
||||
return c < 0x80 && (D_SET[c] || O_SET[c]);
|
||||
}
|
||||
|
||||
static boolean isBSet(byte b) {
|
||||
return b >= 0 && B_SET[b] != -1;
|
||||
}
|
||||
|
||||
static boolean isBSet(char c) {
|
||||
return c < 0x80 && B_SET[c] != -1;
|
||||
}
|
||||
|
||||
static byte fromBase64(byte b) {
|
||||
return (byte)(b < 0 ? -1 : B_SET[b]);
|
||||
}
|
||||
|
||||
static byte toBase64(byte b) {
|
||||
return (byte)(b < 0 || b >= 64 ? -1 : B_SET_INVERSE[b]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an instance of the UTF7Charset.
|
||||
* <p>
|
||||
* O-set characters are not directly encoded.
|
||||
*/
|
||||
public UTF7Charset() {
|
||||
this(NAME, ALIASES, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an instance of the UTF7Charset, specifying whether the
|
||||
* O-set characters are to be encoded directly or using a shift sequence.
|
||||
*
|
||||
* @param canonicalName the canonical name of this charset
|
||||
* @param aliases an array of this charset's aliases, or null if it has no aliases
|
||||
* @param optionalDirect if true, O-set characters are encoded directly,
|
||||
* otherwise they are encoded using a shift sequence
|
||||
* @throws IllegalCharsetNameException
|
||||
* if the canonical name or any of the aliases are illegal
|
||||
*/
|
||||
public UTF7Charset(String canonicalName, String[] aliases, boolean optionalDirect) {
|
||||
super(canonicalName, aliases);
|
||||
this.optionalDirect = optionalDirect;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the given character is encoded directly
|
||||
* or using a shift sequence.
|
||||
*
|
||||
* @param c the character to check
|
||||
* @return true if the character is encoded directly,
|
||||
* false if it is encoded using a shift sequence
|
||||
*/
|
||||
boolean isDirect(char c) {
|
||||
return c < 0x80 && (D_SET[c] || (optionalDirect && O_SET[c]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether or not this charset contains the given charset.
|
||||
*
|
||||
* <p> A charset <i>C</i> is said to <i>contain</i> a charset <i>D</i> if,
|
||||
* and only if, every character representable in <i>D</i> is also
|
||||
* representable in <i>C</i>. If this relationship holds then it is
|
||||
* guaranteed that every string that can be encoded in <i>D</i> can also be
|
||||
* encoded in <i>C</i> without performing any replacements.
|
||||
*
|
||||
* <p> That <i>C</i> contains <i>D</i> does not imply that each character
|
||||
* representable in <i>C</i> by a particular byte sequence is represented
|
||||
* in <i>D</i> by the same byte sequence, although sometimes this is the
|
||||
* case.
|
||||
*
|
||||
* <p> Every charset contains itself.
|
||||
*
|
||||
* <p> This method computes an approximation of the containment relation:
|
||||
* If it returns <tt>true</tt> then the given charset is known to be
|
||||
* contained by this charset; if it returns <tt>false</tt>, however, then
|
||||
* it is not necessarily the case that the given charset is not contained
|
||||
* in this charset.
|
||||
*
|
||||
* @return <tt>true</tt> if, and only if, the given charset
|
||||
* is contained in this charset
|
||||
*/
|
||||
@Override
|
||||
public boolean contains(Charset charset) {
|
||||
return getClass().isInstance(charset) || Charset.forName("UTF-16").contains(charset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new decoder for this charset.
|
||||
*
|
||||
* @return a new decoder for this charset
|
||||
*/
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new encoder for this charset.
|
||||
*
|
||||
* @return a new encoder for this charset
|
||||
*/
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new Encoder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* The <b>Encoder</b> inner class handles the encoding of the UTF7 charset.
|
||||
*/
|
||||
protected class Encoder extends CharsetEncoder {
|
||||
|
||||
boolean shifted; // flags whether we are currently in a shift sequence
|
||||
char encodedChar; // holds the bits of previous partially encoded char
|
||||
int requiredBits; // number of bits required to complete a 6-bit value
|
||||
|
||||
/**
|
||||
* Constructs an Encoder.
|
||||
*
|
||||
* @param charset the charset that created this encoder
|
||||
*/
|
||||
protected Encoder(Charset charset) {
|
||||
super(charset, 1f, 5f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this decoder, clearing any charset-specific internal state.
|
||||
*/
|
||||
@Override
|
||||
protected void implReset() {
|
||||
shifted = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Flushes this encoder.
|
||||
*
|
||||
* @param out the output byte buffer
|
||||
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
|
||||
* {@link CoderResult#OVERFLOW}
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult implFlush(ByteBuffer out) {
|
||||
if (shifted) {
|
||||
if (out.remaining() < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
flushBase64Char(out);
|
||||
out.put((byte)'-'); // terminate shift sequence explicitly
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes one or more characters into one or more bytes.
|
||||
*
|
||||
* @param in the input character buffer
|
||||
* @param out the output byte buffer
|
||||
* @return a coder-result object describing the reason for termination
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
int remaining = in.remaining();
|
||||
|
||||
while (remaining-- > 0) {
|
||||
// make sure we have output space (the max we might need)
|
||||
if (out.remaining() < 3)
|
||||
return CoderResult.OVERFLOW;
|
||||
|
||||
// get next byte
|
||||
char c = in.get();
|
||||
|
||||
// if not in shift sequence
|
||||
if (!shifted) {
|
||||
// if char is in set D, write it as byte directly
|
||||
if (isDirect(c)) {
|
||||
out.put((byte)c);
|
||||
} else if (c == '+') { // specially encoded char
|
||||
out.put((byte)'+').put((byte)'-');
|
||||
} else { // start shift sequence
|
||||
out.put((byte)'+');
|
||||
shifted = true;
|
||||
requiredBits = 6;
|
||||
writeBase64Char(out, c);
|
||||
}
|
||||
} else { // shifted
|
||||
if (isDirect(c)) { // direct char
|
||||
// terminate shift sequence
|
||||
shifted = false;
|
||||
flushBase64Char(out);
|
||||
if (isBSet(c) || c == '-') // requires explicit termination
|
||||
out.put((byte)'-');
|
||||
// write direct char
|
||||
out.put((byte)c);
|
||||
} else { // another encoded char
|
||||
writeBase64Char(out, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes the base64 bytes representing the given character
|
||||
* to the given output ByteBuffer. Bits left over from
|
||||
* previously written characters are written first, followed
|
||||
* by this character's bits. Similarly, bits left over from
|
||||
* this character are saved until the next call to this method.
|
||||
*
|
||||
* @param out the ByteBuffer to which the base64 bytes are written
|
||||
* @param c the character to be written
|
||||
*/
|
||||
void writeBase64Char(ByteBuffer out, char c) {
|
||||
int bits = requiredBits; // getfield bytecode optimization
|
||||
byte b = (byte)(((encodedChar << bits) & 0x3F) | (c >>> (16 - bits)));
|
||||
out.put(toBase64(b));
|
||||
b = (byte)((c >>> (10 - bits)) & 0x3F);
|
||||
out.put(toBase64(b));
|
||||
|
||||
if (bits != 6) {
|
||||
b = (byte)((c >>> (4 - bits)) & 0x3F);
|
||||
out.put(toBase64(b));
|
||||
requiredBits += 2;
|
||||
} else {
|
||||
requiredBits = 2;
|
||||
}
|
||||
encodedChar = c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes any left-over base64 bits.
|
||||
*
|
||||
* @param out the ByteBuffer to which the base64 bytes are written
|
||||
*/
|
||||
void flushBase64Char(ByteBuffer out) {
|
||||
if (requiredBits != 6) { // dump last encoded byte, zero-bit padded
|
||||
byte b = (byte)((encodedChar << requiredBits) & 0x3F);
|
||||
out.put(toBase64(b));
|
||||
}
|
||||
}
|
||||
|
||||
} // Encoder class
|
||||
|
||||
/**
|
||||
* The <b>Decoder</b> inner class handles the decoding of the UTF7 charset.
|
||||
*/
|
||||
protected class Decoder extends CharsetDecoder {
|
||||
|
||||
boolean shifted; // flags whether we are currently in a shift sequence
|
||||
boolean emptyShift; // flags whether the current shift sequence is empty
|
||||
char decodedChar; // holds the bits of previous partially decoded char
|
||||
int requiredBits; // number of bits required to complete a 16-bit char
|
||||
|
||||
/**
|
||||
* Constructs a Decoder.
|
||||
*
|
||||
* @param charset the charset that created this decoder
|
||||
*/
|
||||
protected Decoder(Charset charset) {
|
||||
super(charset, 1f, 1f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this decoder, clearing any charset-specific internal state.
|
||||
*/
|
||||
@Override
|
||||
protected void implReset() {
|
||||
shifted = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes one or more bytes into one or more characters.
|
||||
*
|
||||
* @param in the input byte buffer
|
||||
* @param out the output character buffer
|
||||
* @return a coder-result object describing the reason for termination
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
int val;
|
||||
int remaining = in.remaining();
|
||||
|
||||
while (remaining-- > 0) {
|
||||
// make sure we have output space
|
||||
if (out.remaining() < 1)
|
||||
return CoderResult.OVERFLOW;
|
||||
|
||||
// get next byte
|
||||
byte b = in.get();
|
||||
|
||||
// if not in shift sequence
|
||||
if (!shifted) {
|
||||
// if byte is in set D or O, write it as char
|
||||
if (isDorOSet(b)) {
|
||||
out.put((char)b);
|
||||
} else if (b == '+') { // start shift sequence
|
||||
shifted = true;
|
||||
emptyShift = true;
|
||||
requiredBits = 16;
|
||||
} else { // invalid byte
|
||||
in.position(in.position() - 1); // position input at error byte
|
||||
return CoderResult.malformedForLength(1); // invalid byte
|
||||
}
|
||||
} else if ((val = fromBase64(b)) != -1) { // valid base64 byte
|
||||
// get bits from shift sequence byte
|
||||
emptyShift = false;
|
||||
// 6 is the max number of bits we can get from a single input byte
|
||||
int bits = requiredBits >= 6 ? 6 : requiredBits;
|
||||
// add new bits to currently decoded char
|
||||
decodedChar = (char)((decodedChar << bits) | (val >> (6 - bits)));
|
||||
requiredBits -= bits;
|
||||
// check if we're done decoding a full 16-bit char
|
||||
if (requiredBits == 0) {
|
||||
// output it
|
||||
out.put(decodedChar);
|
||||
// and start off next char with remaining bits
|
||||
requiredBits = 10 + bits; // 16 - (6 - bits)
|
||||
decodedChar = (char)val; // save the extra bits for later
|
||||
}
|
||||
} else { // terminating a shift sequence
|
||||
shifted = false;
|
||||
// any leftover bits when terminating the shift sequence
|
||||
// are discarded if they are zero, or invalid if they are nonzero
|
||||
if ((char)(decodedChar << requiredBits) != 0) {
|
||||
in.position(in.position() - 1); // position input at error byte
|
||||
return CoderResult.malformedForLength(1); // invalid byte
|
||||
}
|
||||
// process implicit or explicit shift sequence termination
|
||||
if (b == '-') {
|
||||
if (emptyShift) // a "+-" sequence outputs a '+'
|
||||
out.put('+');
|
||||
// otherwise shift ends, and '-' is absorbed
|
||||
} else {
|
||||
// process regular char that ended base64 sequence
|
||||
if (isDorOSet(b)) { // output regular char
|
||||
out.put((char)b);
|
||||
} else {
|
||||
in.position(in.position() - 1); // position input at error byte
|
||||
return CoderResult.malformedForLength(1); // invalid byte
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
} // Decoder class
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
/**
|
||||
* The <b>UTF7OptionalCharset</b> class handles the encoding and decoding of the
|
||||
* UTF-7 charset.
|
||||
* <p>
|
||||
* The O-Set characters are encoded directly
|
||||
* (this is optional according to the RFC).
|
||||
* <p>
|
||||
* The encoding and decoding are based on RFC 2152
|
||||
* (http://www.ietf.org/rfc/rfc2152.txt)
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-06-10
|
||||
*/
|
||||
public class UTF7OptionalCharset extends UTF7Charset {
|
||||
|
||||
static final String NAME = "UTF-7-OPTIONAL";
|
||||
|
||||
static final String[] ALIASES = { "UTF-7O", "UTF7O", "UTF-7-O" };
|
||||
|
||||
/**
|
||||
* Constructs an instance of the UTF7OptionalCharset.
|
||||
* <p>
|
||||
* O-set characters are directly encoded.
|
||||
*/
|
||||
public UTF7OptionalCharset() {
|
||||
super(NAME, ALIASES, true);
|
||||
}
|
||||
|
||||
}
|
||||
183
lib/jcharset-2.1/src/main/java/net/freeutils/charset/Utils.java
Normal file
183
lib/jcharset-2.1/src/main/java/net/freeutils/charset/Utils.java
Normal file
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* The <b>Utils</b> class contains utility methods used at runtime
|
||||
* by charsets, as well as development tools for creating new Charsets.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-10-14
|
||||
*/
|
||||
public class Utils {
|
||||
|
||||
// prevents instantiation
|
||||
private Utils() {}
|
||||
|
||||
/**
|
||||
* Returns whether the running JDK version is at least 1.5.
|
||||
*
|
||||
* @return true if running in JDK 1.5 or higher, false otherwise
|
||||
*/
|
||||
static boolean isJDK15() {
|
||||
try {
|
||||
float version = Float.parseFloat(System.getProperty("java.class.version"));
|
||||
return version >= 49.0; // 49.0 is the class version of JDK 1.5
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string containing the Java definitions of the
|
||||
* given inverse lookup (char-to-byte) table.
|
||||
* <p>
|
||||
* This is a convenient utility method for design-time building
|
||||
* of charsets based on a lookup table mapping, as an alternative
|
||||
* to creating these inverse lookup tables on-the-fly.
|
||||
*
|
||||
* @param tables the inverse lookup (char-to-byte) table
|
||||
* @return the Java definitions of the inverse lookup
|
||||
* (char-to-byte) table
|
||||
*/
|
||||
public static String toInverseLookupTableDefinition(int[][] tables) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int nulls = 0;
|
||||
sb.append("static final int[][] CHAR_TO_BYTE = {\n\t");
|
||||
for (int i = 0; i < tables.length; i++) {
|
||||
int[] table = tables[i];
|
||||
if (table == null) {
|
||||
if (nulls++ % 8 == 0 && nulls > 1)
|
||||
sb.append("\n\t");
|
||||
sb.append("null, ");
|
||||
} else {
|
||||
if (nulls > 0)
|
||||
sb.append("\n\t");
|
||||
nulls = 0;
|
||||
sb.append("{ // high byte = 0x");
|
||||
if (i < 0x10)
|
||||
sb.append('0');
|
||||
sb.append(Integer.toHexString(i));
|
||||
sb.append("\n\t");
|
||||
for (int j = 0; j < table.length; j++) {
|
||||
if (table[j] == -1) {
|
||||
sb.append(" -1, ");
|
||||
} else {
|
||||
sb.append("0x");
|
||||
if (table[j] < 0x10)
|
||||
sb.append('0');
|
||||
sb.append(Integer.toHexString(table[j])).append(", ");
|
||||
}
|
||||
if ((j + 1) % 8 == 0)
|
||||
sb.append("\n\t");
|
||||
}
|
||||
sb.append("}, \n\t");
|
||||
}
|
||||
}
|
||||
sb.append("\n\t};");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Main entry point for command-line utility.
|
||||
*
|
||||
* @param args the command line arguments
|
||||
* @throws IOException if an error occurs
|
||||
*/
|
||||
public static void main(String[] args) throws IOException {
|
||||
// parse arguments
|
||||
String from = "UTF-8";
|
||||
String to = "UTF-8";
|
||||
List<String> inputFiles = new ArrayList<String>();
|
||||
String outputFile = null;
|
||||
PrintStream so = System.out;
|
||||
try {
|
||||
for (int i = 0; i < args.length; i++) {
|
||||
String arg = args[i];
|
||||
if (arg.startsWith("-") && arg.length() > 1 && inputFiles.isEmpty()) {
|
||||
if (arg.equals("-o")) {
|
||||
outputFile = args[++i]; // throws IOOBE
|
||||
} else if (arg.equals("-f")) {
|
||||
from = args[++i]; // throws IOOBE
|
||||
} else if (arg.equals("-t")) {
|
||||
to = args[++i]; // throws IOOBE
|
||||
} else if (arg.equals("-l") || arg.equals("-ll")) {
|
||||
String filter = arg.equals("-ll") ? Utils.class.getPackage().getName() : "";
|
||||
for (Charset charset : Charset.availableCharsets().values())
|
||||
if (charset.getClass().getName().startsWith(filter))
|
||||
so.println(charset.name() + " " + charset.aliases());
|
||||
System.exit(0);
|
||||
} else if (arg.equals("-?") || arg.equals("-h")) {
|
||||
so.println("Usage: java -jar jcharset.jar [options] [inputFiles...]\n");
|
||||
so.println("Converts the charset encoding of one or more (concatenated) input files.");
|
||||
so.println("If no files or '-' (dash) is specified, input is read from stdin.");
|
||||
so.println("\nOptions:");
|
||||
so.println(" -f <fromCharset>\tthe name of the input charset [default UTF-8]");
|
||||
so.println(" -t <toCharset>\tthe name of the output charset [default UTF-8]");
|
||||
so.println(" -o <outputFile>\tthe output file name [default stdout]");
|
||||
so.println(" -l\t\t\tlist all available charset names and aliases");
|
||||
so.println(" -ll\t\t\tlist all JCharset charset names and aliases");
|
||||
so.println(" -h, -?\t\tshow this help information");
|
||||
System.exit(1);
|
||||
} else {
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
} else {
|
||||
inputFiles.add(arg); // all remaining args are input files
|
||||
}
|
||||
}
|
||||
} catch (IndexOutOfBoundsException ioobe) {
|
||||
System.err.println("Error: invalid argument");
|
||||
System.err.println("Use the -h option for help");
|
||||
System.exit(2);
|
||||
}
|
||||
if (inputFiles.isEmpty())
|
||||
inputFiles.add("-");
|
||||
// perform conversion
|
||||
OutputStream os = outputFile == null ? so : new FileOutputStream(outputFile);
|
||||
OutputStreamWriter writer = null;
|
||||
try {
|
||||
char[] buf = new char[16384];
|
||||
writer = new OutputStreamWriter(os, to);
|
||||
for (String inputFile : inputFiles) {
|
||||
InputStream is = inputFile.equals("-") ? System.in : new FileInputStream(inputFile);
|
||||
try {
|
||||
int count;
|
||||
Reader reader = new InputStreamReader(is, from);
|
||||
while ((count = reader.read(buf)) > -1)
|
||||
writer.write(buf, 0, count);
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (writer != null)
|
||||
writer.close(); // also flushes encoder
|
||||
os.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.gsm;
|
||||
|
||||
/**
|
||||
* The <b>CCGSMCharset</b> class handles the encoding and decoding of the
|
||||
* GSM default encoding charset. In this variant, byte 0x09 is mapped
|
||||
* to the LATIN CAPITAL LETTER C WITH CEDILLA character.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2007-03-26
|
||||
*/
|
||||
public class CCGSMCharset extends GSMCharset {
|
||||
|
||||
static final String NAME = "CCGSM";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
/**
|
||||
* Constructs an instance of the CCGSMCharset.
|
||||
*/
|
||||
public CCGSMCharset() {
|
||||
super(NAME, ALIASES,
|
||||
BYTE_TO_CHAR_CAPITAL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT,
|
||||
CHAR_TO_BYTE_CAPITAL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.gsm;
|
||||
|
||||
/**
|
||||
* The <b>CCPackedGSMCharset</b> class handles the encoding and decoding of the
|
||||
* GSM default encoding charset. In this variant, byte 0x09 is mapped
|
||||
* to the LATIN CAPITAL LETTER C WITH CEDILLA character.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2007-03-26
|
||||
*/
|
||||
public class CCPackedGSMCharset extends PackedGSMCharset {
|
||||
|
||||
static final String NAME = "CCPGSM";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
/**
|
||||
* Constructs an instance of the CCPackedGSMCharset.
|
||||
*/
|
||||
public CCPackedGSMCharset() {
|
||||
super(NAME, ALIASES,
|
||||
BYTE_TO_CHAR_CAPITAL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT,
|
||||
CHAR_TO_BYTE_CAPITAL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT,
|
||||
false);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.gsm;
|
||||
|
||||
/**
|
||||
* The <b>CRCCPackedGSMCharset</b> class handles the encoding and decoding of the
|
||||
* GSM default encoding charset. In this variant, byte 0x09 is mapped
|
||||
* to the LATIN CAPITAL LETTER C WITH CEDILLA character.
|
||||
* It also uses {@link PackedGSMCharset CR-padding} instead of
|
||||
* zero-padding to avoid ambiguous interpretation of an '@' character.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2019-03-31
|
||||
*/
|
||||
public class CRCCPackedGSMCharset extends PackedGSMCharset {
|
||||
|
||||
static final String NAME = "CRCCPGSM";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
/**
|
||||
* Constructs an instance of the CRCCPackedGSMCharset.
|
||||
*/
|
||||
public CRCCPackedGSMCharset() {
|
||||
super(NAME, ALIASES,
|
||||
BYTE_TO_CHAR_CAPITAL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT,
|
||||
CHAR_TO_BYTE_CAPITAL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT,
|
||||
true);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.gsm;
|
||||
|
||||
/**
|
||||
* The <b>CRSCPackedGSMCharset</b> class handles the encoding and decoding of the
|
||||
* GSM default encoding charset. In this variant, byte 0x09 is mapped
|
||||
* to the LATIN SMALL LETTER C WITH CEDILLA character.
|
||||
* It also uses {@link PackedGSMCharset CR-padding} instead of
|
||||
* zero-padding to avoid ambiguous interpretation of an '@' character.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2019-03-31
|
||||
*/
|
||||
public class CRSCPackedGSMCharset extends PackedGSMCharset {
|
||||
|
||||
static final String NAME = "CRSCPGSM";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
/**
|
||||
* Constructs an instance of the CRSCPackedGSMCharset.
|
||||
*/
|
||||
public CRSCPackedGSMCharset() {
|
||||
super(NAME, ALIASES,
|
||||
BYTE_TO_CHAR_SMALL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT,
|
||||
CHAR_TO_BYTE_SMALL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT,
|
||||
true);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,147 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.gsm;
|
||||
|
||||
import static net.freeutils.charset.ByteLookupCharset.createInverseLookupTable;
|
||||
import static net.freeutils.charset.ByteLookupCharset.mutate;
|
||||
import net.freeutils.charset.EscapedByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>GSMCharset</b> class handles the encoding and decoding of the
|
||||
* GSM default encoding charset.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2005-05-26
|
||||
*/
|
||||
public class GSMCharset extends EscapedByteLookupCharset {
|
||||
|
||||
static final byte ESCAPE = 0x1B;
|
||||
|
||||
static final int[] BYTE_TO_CHAR_SMALL_C_CEDILLA = {
|
||||
0x0040, 0x00A3, 0x0024, 0x00A5, 0x00E8, 0x00E9, 0x00F9, 0x00EC,
|
||||
0x00F2, 0x00E7, 0x000A, 0x00D8, 0x00F8, 0x000D, 0x00C5, 0x00E5,
|
||||
0x0394, 0x005F, 0x03A6, 0x0393, 0x039B, 0x03A9, 0x03A0, 0x03A8,
|
||||
0x03A3, 0x0398, 0x039E, -1, 0x00C6, 0x00E6, 0x00DF, 0x00C9,
|
||||
0x0020, 0x0021, 0x0022, 0x0023, 0x00A4, 0x0025, 0x0026, 0x0027,
|
||||
0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
|
||||
0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
|
||||
0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
|
||||
0x00A1, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
|
||||
0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
|
||||
0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
|
||||
0x0058, 0x0059, 0x005A, 0x00C4, 0x00D6, 0x00D1, 0x00DC, 0x00A7,
|
||||
0x00BF, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
|
||||
0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
|
||||
0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
|
||||
0x0078, 0x0079, 0x007A, 0x00E4, 0x00F6, 0x00F1, 0x00FC, 0x00E0,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
static final int[] BYTE_TO_CHAR_ESCAPED_DEFAULT = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, 0x000C, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, 0x005E, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0x007B, 0x007D, -1, -1, -1, -1, -1, 0x005C,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, 0x005B, 0x007E, 0x005D, -1,
|
||||
0x007C, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, 0x20AC, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1,
|
||||
};
|
||||
|
||||
static final int[][] CHAR_TO_BYTE_SMALL_C_CEDILLA =
|
||||
createInverseLookupTable(BYTE_TO_CHAR_SMALL_C_CEDILLA);
|
||||
|
||||
static final int[][] CHAR_TO_BYTE_ESCAPED_DEFAULT =
|
||||
createInverseLookupTable(BYTE_TO_CHAR_ESCAPED_DEFAULT);
|
||||
|
||||
static final int[] BYTE_TO_CHAR_CAPITAL_C_CEDILLA =
|
||||
mutate(BYTE_TO_CHAR_SMALL_C_CEDILLA, new int[] { 9 }, new int[] { 0x00C7 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE_CAPITAL_C_CEDILLA =
|
||||
createInverseLookupTable(BYTE_TO_CHAR_CAPITAL_C_CEDILLA);
|
||||
|
||||
/**
|
||||
* Initializes a new charset with the given canonical name and alias
|
||||
* set, and byte-to-char/char-to-byte lookup tables.
|
||||
*
|
||||
* @param canonicalName the canonical name of this charset
|
||||
* @param aliases an array of this charset's aliases, or null if it has no aliases
|
||||
* @param byteToChar a byte-to-char conversion table for this charset
|
||||
* @param byteToCharEscaped a byte-to-char conversion table for this charset
|
||||
* for the escaped characters
|
||||
* @param charToByte a char-to-byte conversion table for this charset. It can
|
||||
* be generated on-the-fly by calling createInverseLookupTable(byteToChar).
|
||||
* @param charToByteEscaped a char-to-byte conversion table for this charset
|
||||
* for the escaped characters
|
||||
* @throws java.nio.charset.IllegalCharsetNameException
|
||||
* if the canonical name or any of the aliases are illegal
|
||||
*/
|
||||
protected GSMCharset(String canonicalName, String[] aliases,
|
||||
int[] byteToChar, int[] byteToCharEscaped,
|
||||
int[][] charToByte, int[][] charToByteEscaped) {
|
||||
super(canonicalName, aliases, ESCAPE,
|
||||
byteToChar, byteToCharEscaped, charToByte, charToByteEscaped);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,463 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.gsm;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.CharBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
|
||||
/**
|
||||
* The <b>PackedGSMCharset</b> class handles the encoding and decoding of the
|
||||
* GSM default encoding charset, with packing as per GSM 03.38 / ETSI TS 123 038 spec.
|
||||
* <p>
|
||||
* When there are 8*n-1 encoded bytes, there is ambiguity
|
||||
* since it's impossible to distinguish whether the final byte
|
||||
* contains a trailing '@' character (which is mapped to 0)
|
||||
* or 7 zero bits of padding following 7 data bytes.
|
||||
* <p>
|
||||
* When decoding, we opt for the latter interpretation
|
||||
* since it's far more likely, at the cost of losing a
|
||||
* trailing '@' character in strings whose unpacked size
|
||||
* is a multiple of 8, and whose last character is '@'.
|
||||
* <p>
|
||||
* An application that wishes to handle this rare case
|
||||
* properly must disambiguate this case externally, such
|
||||
* as by obtaining the original string length, and
|
||||
* appending the trailing '@' if the length
|
||||
* shows that there is one character missing.
|
||||
* <p>
|
||||
* Alternatively, the spec supports replacing the zero
|
||||
* padding in such a case with a CR character, which is
|
||||
* then removed by the receiver, but is harmless also on
|
||||
* devices that display it as-is since a CR is invisible.
|
||||
* This implementation has configurable support for CR padding.
|
||||
* <p>
|
||||
* However, this CR padding introduces a new ambiguity, with
|
||||
* a string that really does end with a CR character on an
|
||||
* 8-byte boundary, so in this case an extra CR is appended
|
||||
* to it, and due to the semantics of CR in the spec, a double
|
||||
* CR is equivalent to a single CR, so this is harmless as well.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2007-03-20
|
||||
*/
|
||||
public class PackedGSMCharset extends GSMCharset {
|
||||
|
||||
static final int BUFFER_SIZE = 256;
|
||||
static final byte CR = 0x0D;
|
||||
|
||||
/**
|
||||
* Specifies whether to use CR padding instead of zero padding
|
||||
* when encoding/decoding in order to disambiguate the 7 padding
|
||||
* zero bits in strings whose length is 8*n-1 bytes from a
|
||||
* trailing '@' character in strings of length 8*n.
|
||||
*/
|
||||
final boolean padWithCR;
|
||||
|
||||
/**
|
||||
* Initializes a new charset with the given canonical name and alias
|
||||
* set, and byte-to-char/char-to-byte lookup tables.
|
||||
*
|
||||
* @param canonicalName the canonical name of this charset
|
||||
* @param aliases an array of this charset's aliases, or null if it has no aliases
|
||||
* @param byteToChar a byte-to-char conversion table for this charset
|
||||
* @param byteToCharEscaped a byte-to-char conversion table for this charset
|
||||
* for the escaped characters
|
||||
* @param charToByte a char-to-byte conversion table for this charset. It can
|
||||
* be generated on-the-fly by calling createInverseLookupTable(byteToChar).
|
||||
* @param charToByteEscaped a char-to-byte conversion table for this charset
|
||||
* for the escaped characters
|
||||
* @param padWithCR specifies whether to apply {@link PackedGSMCharset CR padding}
|
||||
* or the original (but ambiguous) zero padding
|
||||
* @throws java.nio.charset.IllegalCharsetNameException
|
||||
* if the canonical name or any of the aliases are illegal
|
||||
*/
|
||||
protected PackedGSMCharset(String canonicalName, String[] aliases,
|
||||
int[] byteToChar, int[] byteToCharEscaped,
|
||||
int[][] charToByte, int[][] charToByteEscaped,
|
||||
boolean padWithCR) {
|
||||
super(canonicalName, aliases,
|
||||
byteToChar, byteToCharEscaped, charToByte, charToByteEscaped);
|
||||
this.padWithCR = padWithCR;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new decoder for this charset.
|
||||
*
|
||||
* @return a new decoder for this charset
|
||||
*/
|
||||
@Override
|
||||
public CharsetDecoder newDecoder() {
|
||||
return new Decoder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new encoder for this charset.
|
||||
*
|
||||
* @return a new encoder for this charset
|
||||
*/
|
||||
@Override
|
||||
public CharsetEncoder newEncoder() {
|
||||
return new Encoder(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* The <b>Encoder</b> inner class handles the encoding of the
|
||||
* Packed GSM default encoding charset.
|
||||
*/
|
||||
protected class Encoder extends GSMCharset.Encoder {
|
||||
|
||||
int bitpos;
|
||||
byte current;
|
||||
ByteBuffer buf;
|
||||
|
||||
/**
|
||||
* Constructs an Encoder.
|
||||
*
|
||||
* @param charset the charset that created this encoder
|
||||
*/
|
||||
protected Encoder(Charset charset) {
|
||||
super(charset, 7 / 8f, 2f);
|
||||
buf = ByteBuffer.allocate(BUFFER_SIZE);
|
||||
implReset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this encoder, clearing any charset-specific internal state.
|
||||
*/
|
||||
@Override
|
||||
protected void implReset() {
|
||||
bitpos = 0;
|
||||
current = 0;
|
||||
buf.limit(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flushes this encoder.
|
||||
*
|
||||
* @param out the output byte buffer
|
||||
*
|
||||
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
|
||||
* {@link CoderResult#OVERFLOW}
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult implFlush(ByteBuffer out) {
|
||||
// flush buffer
|
||||
CoderResult result = pack(buf, out);
|
||||
// handle CR padding if necessary
|
||||
if (padWithCR && bitpos <= 1) { // bitpos is 0 or 1
|
||||
if (bitpos == 1) {
|
||||
// if the output is 8*n-1 bytes long, the last byte has 7 padding zero
|
||||
// bits which may be ambiguously interpreted as an '@' character,
|
||||
// so in this case we replace the padding with a harmless CR
|
||||
current |= (CR << 1);
|
||||
} else if (out.position() > 0 && out.get(out.position() - 1) >>> 1 == CR) {
|
||||
// if the output is 8*n bytes long and really does end with a CR,
|
||||
// we need to disambiguate this from the CR padding,
|
||||
// so we add an extra CR (due to the spec's definition of CR,
|
||||
// this is equivalent to a single CR and thus also harmless)
|
||||
current = CR;
|
||||
bitpos = 7;
|
||||
}
|
||||
}
|
||||
// flush last (current) partial byte if it exists
|
||||
if (bitpos != 0) {
|
||||
if (!out.hasRemaining())
|
||||
return CoderResult.OVERFLOW;
|
||||
out.put(current); // write final leftover byte
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Encodes one or more characters into one or more bytes.
|
||||
*
|
||||
* @param in the input character buffer
|
||||
* @param out the output byte buffer
|
||||
* @return a coder-result object describing the reason for termination
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult encodeLoop(CharBuffer in, ByteBuffer out) {
|
||||
CoderResult result;
|
||||
while (true) {
|
||||
// output buffered data
|
||||
if (buf.hasRemaining()) {
|
||||
result = pack(buf, out);
|
||||
if (result == CoderResult.OVERFLOW)
|
||||
return result;
|
||||
}
|
||||
// process new data into buffer
|
||||
buf.clear();
|
||||
result = super.encodeLoop(in, buf);
|
||||
buf.flip();
|
||||
// stop if out of input or error
|
||||
if (!buf.hasRemaining() || result.isError())
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Packs the given data into full bytes.
|
||||
*
|
||||
* @param in the input byte buffer
|
||||
* @param out the output byte buffer
|
||||
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
|
||||
* {@link CoderResult#OVERFLOW}
|
||||
*/
|
||||
protected CoderResult pack(ByteBuffer in, ByteBuffer out) {
|
||||
int remaining = in.remaining();
|
||||
while (remaining-- > 0) {
|
||||
if (!out.hasRemaining())
|
||||
return CoderResult.OVERFLOW;
|
||||
byte b = (byte)(in.get() & 0x7F); // remove top bit
|
||||
// assign first group of partial bits
|
||||
current |= b << bitpos;
|
||||
// assign second group of partial bits (if exist)
|
||||
if (bitpos > 0) { // if packed byte is full
|
||||
out.put(current);
|
||||
current = (byte)(b >> (8 - bitpos)); // keep left-over bits (if any)
|
||||
}
|
||||
bitpos = (bitpos + 7) % 8;
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* The <b>Decoder</b> inner class handles the decoding of the
|
||||
* Packed GSM default encoding charset.
|
||||
*/
|
||||
protected class Decoder extends GSMCharset.Decoder {
|
||||
|
||||
int bitpos;
|
||||
byte current;
|
||||
byte prev;
|
||||
int unpackedCount;
|
||||
ByteBuffer buf;
|
||||
|
||||
/**
|
||||
* Constructs a Decoder.
|
||||
*
|
||||
* @param charset the charset that created this decoder
|
||||
*/
|
||||
protected Decoder(Charset charset) {
|
||||
super(charset, 8 / 7f, 2f);
|
||||
buf = ByteBuffer.allocate(BUFFER_SIZE);
|
||||
implReset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this decoder, clearing any charset-specific internal state.
|
||||
*/
|
||||
@Override
|
||||
protected void implReset() {
|
||||
bitpos = 0;
|
||||
current = 0;
|
||||
prev = 0;
|
||||
unpackedCount = 0;
|
||||
buf.limit(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Flushes this decoder.
|
||||
*
|
||||
* @param out the output character buffer
|
||||
*
|
||||
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
|
||||
* {@link CoderResult#OVERFLOW}
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult implFlush(CharBuffer out) {
|
||||
// fix output edge cases caused by ambiguous padding,
|
||||
// depending on the CR padding configuration:
|
||||
// either remove a trailing '@' character if the string length is 8*n,
|
||||
// or remove a trailing CR character if the string length is 8*n
|
||||
// or if the string length is 8*n+1 and it ends with two CR characters
|
||||
int mod = unpackedCount % 8;
|
||||
if (mod <= 1) { // mod is 0 or 1
|
||||
int pos = out.position() - 1;
|
||||
if (pos > 0) {
|
||||
char c = out.get(pos);
|
||||
if (c == '@' && !padWithCR && mod == 0 ||
|
||||
c == CR && padWithCR && (mod == 0 || out.get(pos - 1) == CR))
|
||||
out.position(pos); // remove last character
|
||||
}
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decodes one or more bytes into one or more characters.
|
||||
*
|
||||
* @param in the input byte buffer
|
||||
* @param out the output character buffer
|
||||
* @return a coder-result object describing the reason for termination
|
||||
*/
|
||||
@Override
|
||||
protected CoderResult decodeLoop(ByteBuffer in, CharBuffer out) {
|
||||
while (true) {
|
||||
// unpack input data into buffer
|
||||
unpackedCount -= buf.remaining(); // it will be counted again after unpack
|
||||
buf.compact(); // move data to beginning and prepare to write more
|
||||
CoderResult unpackResult = unpack(in, buf);
|
||||
buf.flip(); // prepare to read
|
||||
if (!buf.hasRemaining())
|
||||
return unpackResult; // underflow
|
||||
unpackedCount += buf.remaining();
|
||||
// decode buffered unpacked data to output
|
||||
CoderResult decodeResult = super.decodeLoop(buf, out);
|
||||
// handle out of output space and buffer still has data in it
|
||||
if (buf.hasRemaining() || decodeResult.isError()) {
|
||||
if (decodeResult.isUnderflow()) { // last byte is escape byte
|
||||
// if there's more input or at least another unpacked byte
|
||||
// (the 8th doesn't require reading from input), just continue
|
||||
if (in.hasRemaining() || unpackResult.isOverflow())
|
||||
continue;
|
||||
// otherwise we really need more input, so undo the last byte
|
||||
// (escape sequence which was cut in middle) so caller can
|
||||
// properly handle malformed input if there is no more input
|
||||
in.position(in.position() - 1); // unread the byte
|
||||
bitpos = (bitpos + 9) % 8; // undo its unpacking too
|
||||
current = prev;
|
||||
buf.limit(buf.position());
|
||||
unpackedCount--;
|
||||
}
|
||||
return decodeResult;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpacks the given data into original bytes.
|
||||
*
|
||||
* @param in the input byte buffer
|
||||
* @param out the output byte buffer
|
||||
* @return a coder-result object, either {@link CoderResult#UNDERFLOW} or
|
||||
* {@link CoderResult#OVERFLOW}
|
||||
*/
|
||||
protected CoderResult unpack(ByteBuffer in, ByteBuffer out) {
|
||||
int remaining = out.remaining();
|
||||
while (remaining-- > 0) {
|
||||
if (!in.hasRemaining() && bitpos != 1)
|
||||
return CoderResult.UNDERFLOW;
|
||||
if (bitpos == 0) {
|
||||
prev = current;
|
||||
current = in.get();
|
||||
}
|
||||
// remove top bit and assign first group of partial bits
|
||||
byte b = (byte)(((current & 0xFF) >> bitpos) & 0x7F);
|
||||
// remove top bit and assign second group of partial bits (if exist)
|
||||
if (bitpos >= 2) {
|
||||
prev = current;
|
||||
current = in.get();
|
||||
b |= (byte)((current << (8 - bitpos)) & 0x7F);
|
||||
}
|
||||
bitpos = (bitpos + 7) % 8;
|
||||
out.put(b);
|
||||
}
|
||||
return CoderResult.OVERFLOW;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unpacks the given data into original bytes.
|
||||
* <p>
|
||||
* This is an external utility method and is not used
|
||||
* internally by the Charset implementation.
|
||||
*
|
||||
* @param in the input bytes
|
||||
* @return the unpacked output bytes
|
||||
*/
|
||||
public static byte[] unpack(byte[] in) {
|
||||
byte[] out = new byte[(in.length * 8) / 7];
|
||||
int len = out.length;
|
||||
int current = 0;
|
||||
int bitpos = 0;
|
||||
for (int i = 0; i < len; i++) {
|
||||
// remove top bit and assign first group of partial bits
|
||||
out[i] = (byte)(((in[current] & 0xFF) >> bitpos) & 0x7F);
|
||||
// remove top bit and assign second group of partial bits (if exist)
|
||||
if (bitpos > 1)
|
||||
out[i] |= (byte)((in[++current] << (8 - bitpos)) & 0x7F);
|
||||
else if (bitpos == 1)
|
||||
current++;
|
||||
bitpos = (bitpos + 7) % 8;
|
||||
}
|
||||
// this fixes an ambiguity bug in the specs
|
||||
// where the last of 8 packed bytes is 0
|
||||
// and it's impossible to distinguish whether it is a
|
||||
// trailing '@' character (which is mapped to 0)
|
||||
// or extra zero-bit padding for 7 actual data bytes.
|
||||
//
|
||||
// we opt for the latter, since it's far more likely,
|
||||
// at the cost of losing a trailing '@' character
|
||||
// in strings whose unpacked size modulo 8 is 0,
|
||||
// and whose last character is '@'.
|
||||
//
|
||||
// an application that wishes to handle this rare case
|
||||
// properly must disambiguate this case externally, such
|
||||
// as by obtaining the original string length, and
|
||||
// appending the trailing '@' if the length
|
||||
// shows that there is one character missing.
|
||||
if (len % 8 == 0 && len > 0 && out[len - 1] == 0) {
|
||||
byte[] fixed = new byte[len - 1];
|
||||
System.arraycopy(out, 0, fixed, 0, len - 1);
|
||||
out = fixed;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/**
|
||||
* Packs the given data into full bytes.
|
||||
* <p>
|
||||
* This is an external utility method and is not used
|
||||
* internally by the Charset implementation.
|
||||
*
|
||||
* @param in the input bytes
|
||||
* @return the packed output bytes
|
||||
*/
|
||||
public static byte[] pack(byte[] in) {
|
||||
byte[] out = new byte[(int)Math.ceil((in.length * 7) / 8f)];
|
||||
int current = 0;
|
||||
int bitpos = 0;
|
||||
for (byte b : in) {
|
||||
b &= 0x7F; // remove top bit
|
||||
// assign first group of partial bits
|
||||
out[current] |= b << bitpos;
|
||||
// assign second group of partial bits (if exist)
|
||||
if (bitpos > 1)
|
||||
out[++current] |= b >> 8 - bitpos;
|
||||
else if (bitpos == 1) // packed byte is full (but no left-over bits)
|
||||
current++;
|
||||
bitpos = (bitpos + 7) % 8;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.gsm;
|
||||
|
||||
/**
|
||||
* The <b>SCGSMCharset</b> class handles the encoding and decoding of the
|
||||
* GSM default encoding charset. In this variant, byte 0x09 is mapped
|
||||
* to the LATIN SMALL LETTER C WITH CEDILLA character.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2007-03-26
|
||||
*/
|
||||
public class SCGSMCharset extends GSMCharset {
|
||||
|
||||
static final String NAME = "SCGSM";
|
||||
|
||||
static final String[] ALIASES = {
|
||||
"GSM-DEFAULT-ALPHABET", "GSM_0338", "GSM_DEFAULT", "GSM7", "GSM-7BIT" };
|
||||
|
||||
/**
|
||||
* Constructs an instance of the SCGSMCharset.
|
||||
*/
|
||||
public SCGSMCharset() {
|
||||
super(NAME, ALIASES,
|
||||
BYTE_TO_CHAR_SMALL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT,
|
||||
CHAR_TO_BYTE_SMALL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.gsm;
|
||||
|
||||
/**
|
||||
* The <b>SCPackedGSMCharset</b> class handles the encoding and decoding of the
|
||||
* GSM default encoding charset. In this variant, byte 0x09 is mapped
|
||||
* to the LATIN SMALL LETTER C WITH CEDILLA character.
|
||||
* <p>
|
||||
* The encoding and decoding are based on the mapping at
|
||||
* http://www.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2007-03-26
|
||||
*/
|
||||
public class SCPackedGSMCharset extends PackedGSMCharset {
|
||||
|
||||
static final String NAME = "SCPGSM";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
/**
|
||||
* Constructs an instance of the SCPackedGSMCharset.
|
||||
*/
|
||||
public SCPackedGSMCharset() {
|
||||
super(NAME, ALIASES,
|
||||
BYTE_TO_CHAR_SMALL_C_CEDILLA, BYTE_TO_CHAR_ESCAPED_DEFAULT,
|
||||
CHAR_TO_BYTE_SMALL_C_CEDILLA, CHAR_TO_BYTE_ESCAPED_DEFAULT,
|
||||
false);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646CA2Charset</b> class handles the encoding and decoding of the
|
||||
* ISO646-CA2 national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646CA2Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-CA2";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-122" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xE0, 0xE2, 0xE7, 0xEA, 0xC9, 0xF4, 0xE9, 0xF9, 0xE8, 0xFB });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646CA2Charset.
|
||||
*/
|
||||
public ISO646CA2Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646CACharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-CA national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646CACharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-CA";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-121" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xE0, 0xE2, 0xE7, 0xEA, 0xEE, 0xF4, 0xE9, 0xF9, 0xE8, 0xFB });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646CACharset.
|
||||
*/
|
||||
public ISO646CACharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646CHCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-CH national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646CHCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-CH";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xF9, 0xE0, 0xE9, 0xE7, 0xEA, 0xEE, 0xE8, 0xF4, 0xE4, 0xF6, 0xFC, 0xFB });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646CHCharset.
|
||||
*/
|
||||
public ISO646CHCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646CNCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-CN national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646CNCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-CN";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-57" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x24, 0x7E },
|
||||
new int[] { 0xA5, 0xAF });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646CNCharset.
|
||||
*/
|
||||
public ISO646CNCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646CUCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-CU national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646CUCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-CU";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-151" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x24, 0x5B, 0x5C, 0x5E, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA4, 0xA1, 0xD1, 0xBF, 0xB4, 0xF1, 0x5B, 0xA8 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646CUCharset.
|
||||
*/
|
||||
public ISO646CUCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646DECharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-DE national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646DECharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-DE";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-21", "DIN_66003" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA7, 0xC4, 0xD6, 0xDC, 0xE4, 0xF6, 0xFC, 0xDF });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646DECharset.
|
||||
*/
|
||||
public ISO646DECharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646DKCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-DK national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646DKCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-DK";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D },
|
||||
new int[] { 0xC6, 0xD8, 0xC5, 0xE6, 0xF8, 0xE5 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646DKCharset.
|
||||
*/
|
||||
public ISO646DKCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646ES2Charset</b> class handles the encoding and decoding of the
|
||||
* ISO646-ES2 national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646ES2Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-ES2";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-85" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0x2022, 0xA1, 0xD1, 0xC7, 0xBF, 0xB4, 0xF1, 0xE7, 0xA8 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646ES2Charset.
|
||||
*/
|
||||
public ISO646ES2Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646ESCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-ES national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646ESCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-ES";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-17" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D },
|
||||
new int[] { 0xA3, 0xA7, 0xA1, 0xD1, 0xBF, 0xB0, 0xF1, 0xE7 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646ESCharset.
|
||||
*/
|
||||
public ISO646ESCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646FISECharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-FI/ISO646-SE national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646FISECharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-FI";
|
||||
|
||||
static final String[] ALIASES = { "ISO646-SE", "ISO-IR-10" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x24, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA4, 0xC4, 0xD6, 0xC5, 0xE4, 0xF6, 0xE5, 0xAF });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646FISECharset.
|
||||
*/
|
||||
public ISO646FISECharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646FR1Charset</b> class handles the encoding and decoding of the
|
||||
* ISO646-FR1 national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646FR1Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-FR1";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-25" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA3, 0xE0, 0xB0, 0xE7, 0xA7, 0xE9, 0xF9, 0xE8, 0xA8 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646FR1Charset.
|
||||
*/
|
||||
public ISO646FR1Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646FRCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-FR national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646FRCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-FR";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-69" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA3, 0xE0, 0xB0, 0xE7, 0xA7, 0xB5, 0xE9, 0xF9, 0xE8, 0xA8 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646FRCharset.
|
||||
*/
|
||||
public ISO646FRCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646GBCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-GB national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646GBCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-GB";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-4" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x7E },
|
||||
new int[] { 0xA3, 0xAF });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646GBCharset.
|
||||
*/
|
||||
public ISO646GBCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646HUCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-HU national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646HUCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-HU";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-86" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x24, 0x40, 0x5B, 0x5C, 0x5D, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA4, 0xC1, 0xC9, 0xD6, 0xDC, 0xE1, 0xE9, 0xF6, 0xFC, 0x02DD });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646HUCharset.
|
||||
*/
|
||||
public ISO646HUCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646IECharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-IE national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646IECharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-IE";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-207" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA3, 0xD3, 0xC9, 0xCD, 0xDA, 0xC1, 0xF3, 0xE9, 0xED, 0xFA, 0xE1 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646IECharset.
|
||||
*/
|
||||
public ISO646IECharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646INVCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-INV national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646INVCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-INV";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-170" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x24, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646USCharset.
|
||||
*/
|
||||
public ISO646INVCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646IRVCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-IRV national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646IRVCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-IRV";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-2", "ISO_646.IRV:1983" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x24 },
|
||||
new int[] { 0xA4 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646IRVCharset.
|
||||
*/
|
||||
public ISO646IRVCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646ISCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-IS national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646ISCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-IS";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x40, 0x5B, 0x5D, 0x5E, 0x60, 0x7B, 0x7D, 0x7E },
|
||||
new int[] { 0xD0, 0xDE, 0xC6, 0xD6, 0xF0, 0xFE, 0xE6, 0xF6 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646ISCharset.
|
||||
*/
|
||||
public ISO646ISCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646ITCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-ita national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646ITCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-IT";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-15" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x40, 0x5B, 0x5C, 0x5D, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA3, 0xA7, 0xB0, 0xE7, 0xE9, 0xF9, 0xE0, 0xF2, 0xE8, 0xEC });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646ITCharset.
|
||||
*/
|
||||
public ISO646ITCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646JAOCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-JAO national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646JAOCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-JAO";
|
||||
|
||||
static final String[] ALIASES = { "ISO646-JP-OCR-B", "ISO-IR-92" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x5B, 0x5C, 0x5D, 0x60, 0x7E },
|
||||
new int[] { 0x2329, 0xA5, 0x232A, -1, -1 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646JAOCharset.
|
||||
*/
|
||||
public ISO646JAOCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646JPCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-JP national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646JPCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-JP";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-14" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x5C, 0x7E },
|
||||
new int[] { 0xA5, 0xAF });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646JPCharset.
|
||||
*/
|
||||
public ISO646JPCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646KRCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-KR national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646KRCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-KR";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x5C },
|
||||
new int[] { 0x20A9 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646KRCharset.
|
||||
*/
|
||||
public ISO646KRCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646MTCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-MT national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646MTCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-MT";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x5B, 0x5C, 0x5D, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0x0121, 0x017C, 0x0127, 0x010B, 0x0120, 0x017B, 0x0126, 0x010A });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646MTCharset.
|
||||
*/
|
||||
public ISO646MTCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646NO2Charset</b> class handles the encoding and decoding of the
|
||||
* ISO646-NO2 national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646NO2Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-NO2";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-61" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x23, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA7, 0xC6, 0xD8, 0xC5, 0xE6, 0xF8, 0xE5, 0x7C });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646NO2Charset.
|
||||
*/
|
||||
public ISO646NO2Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646NOCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-NO national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646NOCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-NO";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-60" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xC6, 0xD8, 0xC5, 0xE6, 0xF8, 0xE5, 0xAF });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646NOCharset.
|
||||
*/
|
||||
public ISO646NOCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646PT2Charset</b> class handles the encoding and decoding of the
|
||||
* ISO646-PT2 national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646PT2Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-PT2";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-84" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D },
|
||||
new int[] { 0xB4, 0xC3, 0xC7, 0xD5, 0xE3, 0xE7, 0xF5 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646PT2Charset.
|
||||
*/
|
||||
public ISO646PT2Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646PTCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-PT national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646PTCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-PT";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-16" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA7, 0xC3, 0xC7, 0xD5, 0xE3, 0xE7, 0xF5, 0xB0 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646PTCharset.
|
||||
*/
|
||||
public ISO646PTCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646SE2Charset</b> class handles the encoding and decoding of the
|
||||
* ISO646-SE2 national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646SE2Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-SE2";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-11" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x24, 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0xA4, 0xC9, 0xC4, 0xD6, 0xC5, 0xDC, 0xE9, 0xE4, 0xF6, 0xE5, 0xFC });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646SE2Charset.
|
||||
*/
|
||||
public ISO646SE2Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646T61Charset</b> class handles the encoding and decoding of the
|
||||
* ISO646-T61 national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646T61Charset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-T61";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-102" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x24, 0x5C, 0x5E, 0x60, 0x7B, 0x7D, 0x7E },
|
||||
new int[] { 0xA4, -1, -1, -1, -1, -1, -1 });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646T61Charset.
|
||||
*/
|
||||
public ISO646T61Charset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646TWCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-TW national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646TWCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-TW";
|
||||
|
||||
static final String[] ALIASES = {};
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x7E },
|
||||
new int[] { 0xAF });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646TWCharset.
|
||||
*/
|
||||
public ISO646TWCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646USCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-US national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646USCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-US";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-6", "ISO_646.irv:1991" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR;
|
||||
|
||||
static {
|
||||
BYTE_TO_CHAR = createTable();
|
||||
for (int i = 0; i < 128; i++)
|
||||
BYTE_TO_CHAR[i] = i;
|
||||
}
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646USCharset.
|
||||
*/
|
||||
public ISO646USCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright © 2005-2019 Amichai Rothman
|
||||
*
|
||||
* This file is part of JCharset - the Java Charset package.
|
||||
*
|
||||
* JCharset is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* JCharset is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with JCharset. If not, see <http://www.gnu.org/licenses/>.
|
||||
*
|
||||
* For additional info see http://www.freeutils.net/source/jcharset/
|
||||
*/
|
||||
|
||||
package net.freeutils.charset.iso646;
|
||||
|
||||
import net.freeutils.charset.ByteLookupCharset;
|
||||
|
||||
/**
|
||||
* The <b>ISO646YUCharset</b> class handles the encoding and decoding of the
|
||||
* ISO646-YU national variant of the ISO/IEC 646 charset.
|
||||
*
|
||||
* @author Amichai Rothman
|
||||
* @since 2015-08-18
|
||||
*/
|
||||
public class ISO646YUCharset extends ByteLookupCharset {
|
||||
|
||||
static final String NAME = "ISO646-YU";
|
||||
|
||||
static final String[] ALIASES = { "ISO-IR-141" };
|
||||
|
||||
static final int[] BYTE_TO_CHAR = mutate(ISO646USCharset.BYTE_TO_CHAR,
|
||||
new int[] { 0x40, 0x5B, 0x5C, 0x5D, 0x5E, 0x60, 0x7B, 0x7C, 0x7D, 0x7E },
|
||||
new int[] { 0x017D, 0x0160, 0x0110, 0x0106, 0x010C, 0x017E, 0x0161, 0x0111, 0x0107, 0x010D });
|
||||
|
||||
static final int[][] CHAR_TO_BYTE = createInverseLookupTable(BYTE_TO_CHAR);
|
||||
|
||||
/**
|
||||
* Constructs an instance of the ISO646YUCharset.
|
||||
*/
|
||||
public ISO646YUCharset() {
|
||||
super(NAME, ALIASES, BYTE_TO_CHAR, CHAR_TO_BYTE);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
net.freeutils.charset.CharsetProvider
|
||||
@@ -4,6 +4,5 @@ import java.util.ArrayList;
|
||||
|
||||
public class Testfile {
|
||||
ArrayList<String> lsTestfile;
|
||||
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user