/*

                        Copyright (c) 1994 by
                        Advanced Visual Systems Inc.
                        All Rights Reserved

        This software comprises unpublished confidential information of
        Advanced Visual Systems Inc. and may not be used, copied or made
        available to anyone, except in accordance with the license
        under which it is furnished.

        This file is under Perforce control
        $Id: //depot/express/fcs70/include/avs/kanji.h#1 $
*/

/*---------------------------------------------------------------------*
 *
 * kanji.h - kanji utilities header file
 *
 *---------------------------------------------------------------------*/

#ifndef XP_KANJI_INCLUDED
#define XP_KANJI_INCLUDED

/*---------------------------------------------------------------------*/

#include <avs/port.h>

/*---------------------------------------------------------------------*/
/*                          Encoding types                             */
/*---------------------------------------------------------------------*/

typedef enum
{
  KANJI_UNDEF,
  KANJI_EUC,
  KANJI_SHIFT_JIS,
  KANJI_JIS
} KanjiEncoding;

/*---------------------------------------------------------------------*/
/*                     JIS escape characters                           */
/*								       */
/* tri-code syntax:   <ESC_CODE>  <'$'or'('>  <charset code>           */
/* quad-code sntax:   <ESC_CODE>    $    (    <charset code>           */
/*---------------------------------------------------------------------*/

#define ESC_CODE    0x1b

/*-- input codes recognised --*/

/* two-byte character escapes */
#define JIS_6226     '@'     /* JIS C 6226 1978 Old-JIS */
#define JIS_0208     'B'     /* JIS X 0208 1983 New-JIS */
#define JIS_0212     'D'     /* JIS X 0212-1990 extended kanji (quad-code) */

/* one-byte character escapes */
#define JIS_ASCII    'B'     /* ASCII     */
#define JIS_JISR     'J'     /* JIS-Roman */
#define JIS_JISR2    'H'     /* JIS-Roman */

/*-- non-Japanese input codes --*/

/* one-byte character escapes */
#define JIS_GBR      'T'     /* GB-Roman */

/* two-byte character escapes (quad-codes) */
#define JIS_KS5601   'C'     /* Korea KS C 5601-1992 kanji */
#define JIS_GB2312   'A'     /* China GB 2312-80 kanji     */

/*-- input codes not recognised --*/

/* half-width katakana */
#define JIS_HWK      'I'     /* 1-byte HWK escape   */
#define JIS_JIS8_IN  0x0f    /* a sub-mode of HWK   */
#define JIS_JIS8_OUT 0x0e    /* return to basic HWK */

/*---------------------------------------------------------------------*/
/*                      EUC 'escape' characters                        */
/*---------------------------------------------------------------------*/

#define EUC_HWK   0x8e       /* half-width katakana      */
#define EUC_EXT   0x8f       /* 0212-1990 extended kanji */

/*---------------------------------------------------------------------*/
/*                          Character types                            */
/*---------------------------------------------------------------------*/

/* this is used to index an array, so it must start at 0 */

typedef enum
{
  CHAR_1BYTE=0,  /* ASCII, JIS-Roman, GB-Roman, SJIS/JIS hw katakana */
  CHAR_2BYTE,    /* all 0208-1990, EUC hw katakana, JIS 0212-1990    */
  CHAR_3BYTE,    /* EUC encoding of JIS X 0212-1990                  */
  CHAR_3ESC,     /* JIS 3-byte escapes for all ASCII and Japanese    */
  CHAR_4ESC      /* JIS 4-byte escapes for 0212-90, Korean, Chinese  */
} CharType;

/*
 * This is a simple test on the first byte 
 * for selecting between valid codes.
 * It ignores 1-byte and 2-byte half-width katakana
 * A random escape byte will probably return CHAR_3ESC
 */

/* unsigned char *c */
#define CHAR_TYPE(c)  (*(c) == ESC_CODE ? \
			(*(c+2) == '(' ? CHAR_4ESC : \
					 CHAR_3ESC ) \
			: \
			(*(c) == EUC_EXT ? CHAR_3BYTE : \
			    (*(c) & 0x80 ? CHAR_2BYTE : \
			 		   CHAR_1BYTE )))

/*---------------------------------------------------------------------*/
/*                         Character modes                             */
/*---------------------------------------------------------------------*/

typedef enum
{
  MODE_1BYTE,    /* ASCII Roman etc.     */
  MODE_2BYTE,    /* general kana & kanji */
  MODE_3BYTE     /* EUC JIS X 0212-1990  */
} CharMode;

/*---------------------------------------------------------------------*/
/*                          Dump formats                               */
/*---------------------------------------------------------------------*/

typedef enum
{
  DUMP_DECIMAL,
  DUMP_HEX,
  DUMP_OCTAL,
  DUMP_KUTEN,
  DUMP_ASCII
} DumpFormat;

/* unsigned char *c; DumpFormat f */
#define PRINT_DECIMAL(c)  printf( "%u.%u ",        *c, *(c+1) );
#define PRINT_HEX(c)      printf( "0x%02x%02x ",   *c, *(c+1) );
#define PRINT_OCTAL(c)    printf( "\\%03o\\%03o ", *c, *(c+1) );
#define PRINT_ASCII(C)    printf( "%c%c ",         *C, *(C+1) );
#define PRINT_KUTEN(c)    printf( "%u.%u ", *(c)-0xa0, *(c+1)-0xa0 );

#define PRINT_1BYTE(C)    printf( "%c ", *C ); C++;

#define PRINT_2BYTE(c,f) \
   switch(f) { \
     case DUMP_DECIMAL: PRINT_DECIMAL(c); break; \
     case DUMP_HEX:     PRINT_HEX(c);     break; \
     case DUMP_OCTAL:   PRINT_OCTAL(c);   break; \
     case DUMP_KUTEN:   PRINT_KUTEN(c);   break; \
     case DUMP_ASCII:   PRINT_ASCII(c);   break; } c+=2;

#define PRINT_3BYTE(c,f) { printf( "<ext>" ); c++; \
			   PRINT_2BYTE(c,f); }

#define PRINT_3ESC(c) { printf( "<ESC>" ); c++; \
			PRINT_2BYTE(c,DUMP_ASCII); }

#define PRINT_4ESC(c) { printf( "<ESC>$" ); c+=2; \
			PRINT_2BYTE(c,DUMP_ASCII); }

/*---------------------------------------------------------------------*/
/*                    Encoding tests for kanji bytes                   */
/*---------------------------------------------------------------------*/

/*
 * This ignores half-width katakana and the JIS8 modality
 */
#define JIS_P(c)   (0x20 < (c) && (c) < 0x7f)

/* 
 * This ignores the half-width katakana, 0212 extended kanji,
 * boundary codes 0xa0 0xff 
 * and the extra trail byte block 0x40-0x7e for Big Five
 */
#define EUC_P(c)   (0xa0 < (c) && (c) < 0xff)

/*
 * There is a slight ambiguity about 0x80, 0xa0, 0xe0 
 * properly resolved by considering lead and trail separately
 */
#define SJIS_P(c)  ((0x80 <= (c) && (c) <= 0x9f) || \
		    (0xe0 <= (c) && (c) <= 0xef)  )

/*---------------------------------------------------------------------*/
/*                       Character conversion macros                   */
/*---------------------------------------------------------------------*/

#define SJIS_TO_EUC(i1, i2, c1, c2) \
{ \
  if (i2 >= 0x9f) { \
    if (i1 >= 0xe0) c1 = i1*2 - 0xe0; \
    else            c1 = i1*2 - 0x60; \
    c2 = i2 + 2; \
  } else { \
    if (i1 >= 0xe0) c1 = i1*2 - 0xe1; \
    else            c1 = i1*2 - 0x61; \
    if (i2 >= 0x7f) c2 = i2 + 0x60; \
    else            c2 = i2 + 0x61; \
  } \
}

#define EUC_TO_SJIS(i1, i2, c1, c2) \
{ \
  if (i1 & 1) { \
    if (i1 < 0xdf)  c1 = i1/2 + 0x31; \
    else            c1 = i1/2 + 0x71; \
    if (i2 >= 0xe0) c2 = i2 - 0x60; \
    else            c2 = i2 - 0x61; \
  } else { \
    if (i1 < 0xdf) c1 = i1/2 + 0x30; \
    else           c1 = i1/2 + 0x70; \
    c2 = i2 - 2; \
  } \
}

#define EUC_TO_RAW_JIS(c) ((c) - 0x80)

#define RAW_JIS_TO_EUC(c) ((c) + 0x80)

/*---------------------------------------------------------------------*/
/*                          X font test macro                          */
/*---------------------------------------------------------------------*/

#ifndef MSDOS
/*        XFontStruct *pFont;                                          */
#define XFONT_IS_16BIT(pFont)    ((pFont)->max_byte1 > 0)
#endif

/*---------------------------------------------------------------------*/
/*                          Function prototypes                        */
/*---------------------------------------------------------------------*/

#ifdef __cplusplus
extern "C" {
#endif

/* find encoding */
KanjiEncoding KANJIencoding    ( const char * );

/* code conversion */
int           KANJIeuc_to_jis  ( const unsigned char *, unsigned char * );
int           KANJIjis_to_euc  ( const unsigned char *, unsigned char * );
int           KANJIsjis_to_euc ( const unsigned char *, unsigned char * );
int           KANJIeuc_to_sjis ( const unsigned char *, unsigned char * );
char *        KANJIto_euc      ( const char * );

/* dump a kanji string to an ASCII terminal */
int           KANJIprint       ( const char *, DumpFormat );

#ifdef __cplusplus
}
#endif

/*---------------------------------------------------------------------*/

#endif /* XP_KANJI_INCLUDED */
