src/cmd/auto_ef/auto_ef_util.c
author jenda
Mon, 28 Nov 2011 09:40:57 +0100
changeset 636 e4ac77666de3
parent 0 542988ea726d
permissions -rw-r--r--
7115607 cmd/auto_ef compilation fails on b175a - wrong iconv args (backport of s11u1:fb701ba7adfd)
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     1
/*
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     2
 * CDDL HEADER START
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     3
 *
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     4
 * The contents of this file are subject to the terms of the
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     5
 * Common Development and Distribution License (the "License").  
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     6
 * You may not use this file except in compliance with the License.
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     7
 *
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     8
 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
     9
 * or http://www.opensolaris.org/os/licensing.
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    10
 * See the License for the specific language governing permissions
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    11
 * and limitations under the License.
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    12
 *
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    13
 * When distributing Covered Code, include this CDDL HEADER in each
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    14
 * file and include the License file at src/OPENSOLARIS.LICENSE.
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    15
 * If applicable, add the following below this CDDL HEADER, with the
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    16
 * fields enclosed by brackets "[]" replaced with your own identifying
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    17
 * information: Portions Copyright [yyyy] [name of copyright owner]
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    18
 *
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    19
 * CDDL HEADER END
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    20
 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    21
/*
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    22
 * Copyright (c) 2003, by Sun Microsystems, Inc.
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    23
 * All rights reserved.
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    24
 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    25
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    26
#ident  "@(#)auto_ef_util.c 1.18 07/04/12 SMI"
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    27
#include "auto_ef_lib.h"
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    28
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    29
int IdentfyEncoding(int, size_t, int *, char *, char *, const char *,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    30
	_auto_ef_t *, int *, char *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    31
int IsSingleByte_buf(const char *, double *, char *, int, srd *,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    32
	double *, double *, _auto_ef_t *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    33
int IsHKSCSOrBIG5(char *, const char *, char *, size_t, _auto_ef_t *, double);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    34
int RegistBIG5(char *, size_t, char *, const char *, _auto_ef_t *,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    35
	srd *, double *, double *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    36
int RegistEUC(char *, int, char *, _auto_ef_t *, srd *, double *, double *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    37
int RegistHashTable(unsigned char, unsigned char, srd *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    38
int HashTableOpen(char *, srd *, double *, double *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    39
void get_hash_name(char *, char *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    40
int Regist_ASCII_ISO2022JP(int, char *, _auto_ef_t *);
636
e4ac77666de3 7115607 cmd/auto_ef compilation fails on b175a - wrong iconv args (backport of s11u1:fb701ba7adfd)
jenda
parents: 0
diff changeset
    41
int IsAsciiOr2022_buf(char *, int, char *);
0
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    42
int Is2022KROrCN(int, char *, char *, size_t, _auto_ef_t *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    43
int Hash(unsigned char, unsigned char);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    44
int TotalScore_buf(const char *, double *, int, srd *, double *, double *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    45
int FindKeyWord(unsigned char, unsigned char, srd *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    46
int GetScore(unsigned char, unsigned char, srd *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    47
double Calc_SD(int, double *, double *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    48
char *chopbuf(char *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    49
void FreeHashTable(srd *);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    50
int CheckISO2022CN(unsigned char, unsigned char, unsigned char, unsigned char);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    51
int CheckISO2022KR(unsigned char, unsigned char, unsigned char, unsigned char);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    52
void ThaiSpecificCheck(const char *, char *, size_t);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    53
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    54
/*
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    55
 Map from Single Byte encoding to Language
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    56
 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    57
const char single_byte_langs[SINGLE_ENCODING_MAX][ICONV_LOCALE_MAX][ENCODING_LENGTH] = {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    58
  /* ISO-8859-1 : 0 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    59
  {"Germany", "Spain", "France", "Italy", "Sweden", "Denmark", "Finland",
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    60
   "Iceland", "Catalonia", "Netherland", "Norway", "Portugal"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    61
  /* ISO-8859-2 : 1 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    62
  {"Croatia", "Hungary", "Poland", "Serbia", "Slovakia", "Slovenia"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    63
  /* ISO-8859-5 : 2 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    64
  {"Bulgaria", "Russia"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    65
  /* ISO-8859-6 : 3 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    66
  {"Arabia"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    67
  /* ISO-8859-7 : 4 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    68
  {"Greece"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    69
  /* ISO-8859-8 : 5 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    70
  {"Hebrew"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    71
  /* koi8_r : 6 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    72
  {"Russia"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    73
  /* CP1250 : 7 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    74
  {"Croatia", "Hungary", "Poland", "Serbia", "Slovakia", "Slovenia"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    75
  /* CP1251 : 8 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    76
  {"Bulgaria", "Russia"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    77
  /* CP1252 : 9 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    78
  {"Germany", "Spain", "France", "Italy", "Sweden", "Denmark", "Finland", 
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    79
   "Iceland", "Catalonia", "Netherland", "Norway", "Portugal"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    80
  /* CP1253 : 10 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    81
  {"Greece"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    82
  /* CP1255 : 11 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    83
  {"Hebrew"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    84
  /* CP1256 : 12 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    85
  {"Arabia"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    86
  /* CP874 : 14 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    87
  {"Thai"},
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    88
  /* TIS620.2533 : 15 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    89
  {"Thai"} 
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    90
};
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    91
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    92
extern const char *to_code;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    93
const char roothash[64]={
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    94
	"/usr/lib/auto_ef/hashtable."
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    95
};
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    96
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    97
int IdentfyEncoding(int code_num, size_t buf_size,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    98
	int *found_target, char *from_code, char *inputp, const char *to_code,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
    99
	_auto_ef_t *root_autoef, int *end_auto_ef, char *input_buf) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   100
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   101
	int i, utf_flag;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   102
	char hashfilename[PATH_MAX];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   103
	srd hashtable[HASHSIZE];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   104
	double average = 0.0, SD = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   105
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   106
	for (i = 0; i < HASHSIZE; i++)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   107
		hashtable[i] = NULL;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   108
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   109
	for (i = 0; i < PATH_MAX; i++)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   110
		hashfilename[i] = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   111
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   112
	switch (code_num) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   113
	case 1: 
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   114
		/* UTF-8 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   115
		*found_target = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   116
		utf_flag = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   117
		for (i = 0; i < (int)buf_size; i++) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   118
			if (inputp[i] == '\0')
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   119
				break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   120
			if ((unsigned char)inputp[i] > 127) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   121
				utf_flag = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   122
				break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   123
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   124
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   125
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   126
		if (Is2022KROrCN(utf_flag, from_code, input_buf, buf_size, root_autoef) != -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   127
				*end_auto_ef = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   128
					
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   129
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   130
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   131
		
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   132
	case 2: /* ISO-2022-JP or ASCII */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   133
		*found_target = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   134
		
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   135
		i = IsAsciiOr2022_buf(inputp, buf_size, from_code);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   136
		if (Regist_ASCII_ISO2022JP(i, from_code, root_autoef) == -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   137
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   138
			
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   139
		*end_auto_ef = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   140
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   141
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   142
	case 3: /* EUC series */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   143
		get_hash_name(hashfilename, from_code);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   144
		if (HashTableOpen(hashfilename, hashtable,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   145
			&average, &SD) >= 0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   146
			
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   147
			*found_target = RegistEUC(from_code, buf_size, inputp, 
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   148
				root_autoef, hashtable, &average, &SD);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   149
				
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   150
			if (*found_target == -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   151
				return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   152
		} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   153
			errno = EACCES;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   154
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   155
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   156
		FreeHashTable(hashtable);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   157
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   158
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   159
	case 7: /* PCK, zh_HK.hkscs, GB18030, ISO-2022-KR, zh_CN.iso2022-CN */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   160
		get_hash_name(hashfilename, from_code);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   161
		if (HashTableOpen(hashfilename, hashtable,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   162
			&average, &SD) >= 0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   163
			
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   164
			if (RegistBIG5(from_code, buf_size,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   165
				inputp, to_code, root_autoef, hashtable,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   166
				&average, &SD) == -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   167
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   168
				return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   169
		} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   170
			errno = EACCES;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   171
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   172
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   173
		FreeHashTable(hashtable);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   174
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   175
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   176
	case 8: /* 8859 or CP series */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   177
		if (!*found_target) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   178
			double total_score = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   179
			double single_byte_score = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   180
			double highest_score = -3.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   181
			int i;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   182
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   183
			if (IsSingleByte_buf(inputp, &total_score,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   184
				from_code, buf_size, hashtable,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   185
				&average, &SD, root_autoef) == -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   186
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   187
				return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   188
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   189
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   190
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   191
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   192
	default:
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   193
		errno = EACCES;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   194
		return (-1);	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   195
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   196
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   197
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   198
	return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   199
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   200
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   201
int IsSingleByte_buf(const char *input_buf,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   202
	double *total_score, char *encoding, int buf_size, srd *hashtable,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   203
	double *average, double *SD, _auto_ef_t *root_autoef)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   204
{
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   205
	FILE *fp;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   206
	int i;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   207
	int sflag = -1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   208
	char tablename[PATH_MAX];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   209
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   210
	double highest_score = -3.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   211
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   212
	if (strcmp(encoding, I8859_1) == 0) sflag = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   213
	if (strcmp(encoding, I8859_2) == 0) sflag = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   214
	if (strcmp(encoding, I8859_5) == 0) sflag = 2;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   215
	if (strcmp(encoding, I8859_6) == 0) sflag = 3;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   216
	if (strcmp(encoding, I8859_7) == 0) sflag = 4;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   217
	if (strcmp(encoding, I8859_8) == 0) sflag = 5;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   218
	if (strcmp(encoding, KOI8)    == 0) sflag = 6;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   219
	if (strcmp(encoding, CP1250)  == 0) sflag = 7;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   220
	if (strcmp(encoding, CP1251)  == 0) sflag = 8;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   221
	if (strcmp(encoding, CP1252)  == 0) sflag = 9;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   222
	if (strcmp(encoding, CP1253)  == 0) sflag = 10;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   223
	if (strcmp(encoding, CP1255)  == 0) sflag = 11;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   224
	if (strcmp(encoding, CP1256)  == 0) sflag = 12;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   225
	if (strcmp(encoding, CP874)  == 0) sflag = 14;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   226
	if (strcmp(encoding, TIS620)  == 0) sflag = 15;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   227
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   228
	for (i = 0; i < ICONV_LOCALE_MAX; i++) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   229
		if (single_byte_langs[sflag][i][0] == '\0')
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   230
			break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   231
		*total_score = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   232
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   233
		get_hash_name(tablename, encoding);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   234
		strcat(tablename, "_");
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   235
		strcat(tablename, "\0");
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   236
		strcat(tablename, single_byte_langs[sflag][i]);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   237
		strcat(tablename, "\0");
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   238
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   239
		if (HashTableOpen(tablename, hashtable, average, SD) >= 0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   240
			if (TotalScore_buf(input_buf, total_score, buf_size,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   241
				hashtable, average, SD) >= 0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   242
				
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   243
				/*			*/
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   244
				/* encoding specific 	*/
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   245
				/*			*/
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   246
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   247
				if (sflag == 14 || sflag == 15) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   248
					ThaiSpecificCheck(input_buf, encoding, buf_size);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   249
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   250
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   251
				if (*total_score != 0.0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   252
					if (Regist_AUTOEF(encoding, *total_score,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   253
						(char *)single_byte_langs[sflag][i],
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   254
						root_autoef) == -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   255
						
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   256
						errno = ENOMEM;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   257
						return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   258
					}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   259
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   260
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   261
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   262
		} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   263
			errno = EACCES;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   264
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   265
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   266
		FreeHashTable(hashtable);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   267
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   268
	return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   269
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   270
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   271
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   272
int IsHKSCSOrBIG5(char *from_code, const char *to_code,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   273
	char *inputp, size_t buf_size, _auto_ef_t *root_autoef, double total_score)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   274
{
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   275
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   276
	iconv_t cd;
636
e4ac77666de3 7115607 cmd/auto_ef compilation fails on b175a - wrong iconv args (backport of s11u1:fb701ba7adfd)
jenda
parents: 0
diff changeset
   277
	char *context;
0
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   278
	char *convert;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   279
	char *tbuf, *retbuf;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   280
	size_t fsize, tsize;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   281
	size_t ret;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   282
	size_t comp_size = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   283
	const char *p;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   284
	int succeed_flag = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   285
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   286
	int tmp_bufsize = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   287
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   288
	if ((cd = iconv_open(to_code, BIG5)) == (iconv_t)-1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   289
		/* Use iconv_open errno */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   290
		return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   291
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   292
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   293
	errno = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   294
	p = context = &inputp[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   295
	while (*p)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   296
		if ( *p == '\0') {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   297
			break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   298
		} else p++;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   299
		
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   300
	fsize = p - &inputp[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   301
	comp_size = fsize;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   302
	tsize = fsize * 4;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   303
	if ((tbuf = (char *) malloc(tsize)) == NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   304
		errno = ENOMEM;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   305
		iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   306
		return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   307
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   308
	tbuf[0] = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   309
	convert = &tbuf[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   310
	ret = iconv(cd, &context, &fsize, &convert, &tsize);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   311
	if (ret == (size_t) -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   312
		succeed_flag = -1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   313
	else
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   314
		*convert = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   315
	iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   316
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   317
	if (succeed_flag != -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   318
		if ((cd = iconv_open(BIG5, to_code)) == (iconv_t)-1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   319
			/* Use iconv_open errno */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   320
			free(tbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   321
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   322
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   323
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   324
		errno = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   325
		p = context = &tbuf[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   326
		while (*p)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   327
			if (*p == '\0'){
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   328
				break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   329
			} else p++;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   330
			
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   331
		fsize = p - &tbuf[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   332
		tsize = fsize * 4;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   333
		if ((retbuf = (char *) malloc(tsize)) == NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   334
			errno = ENOMEM;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   335
			iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   336
			free(tbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   337
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   338
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   339
		retbuf[0] = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   340
		convert = &retbuf[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   341
		ret = iconv(cd, &context, &fsize, &convert, &tsize);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   342
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   343
		if (ret == (size_t) -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   344
			succeed_flag = -1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   345
		else
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   346
			*convert = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   347
		iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   348
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   349
		if (succeed_flag != -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   350
			if (strcmp(inputp, retbuf) == 0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   351
				if (Regist_AUTOEF(BIG5, total_score,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   352
					M_FromCodeToLang(from_code),
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   353
					root_autoef) == -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   354
					
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   355
					free(tbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   356
					free(retbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   357
					return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   358
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   359
			} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   360
				if (Regist_AUTOEF(from_code, total_score,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   361
					M_FromCodeToLang(from_code),
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   362
					root_autoef) == -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   363
					
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   364
					free(tbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   365
					free(retbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   366
					return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   367
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   368
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   369
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   370
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   371
	free(tbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   372
	free(retbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   373
	return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   374
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   375
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   376
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   377
int RegistBIG5(char *from_code, size_t buf_size,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   378
	char *inputp, const char *to_code, _auto_ef_t *root_autoef,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   379
	srd *hashtable, double *average, double *SD)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   380
{
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   381
	double total_score = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   382
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   383
	if (TotalScore_buf(inputp, &total_score, buf_size, 
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   384
		hashtable, average, SD) >= 0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   385
		if (total_score != 0.0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   386
			/* If the encoding is zh_HK.hkscs, have to check */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   387
			/* the buf have extended code point from zh_TW.BIG5 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   388
			if (strcmp(from_code, HKSCS) == 0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   389
				if (IsHKSCSOrBIG5(from_code, to_code, 
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   390
					inputp, buf_size, root_autoef, total_score) == -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   391
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   392
					return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   393
			} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   394
				if (Regist_AUTOEF(from_code, total_score,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   395
					M_FromCodeToLang(from_code),
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   396
					root_autoef) == -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   397
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   398
					return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   399
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   400
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   401
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   402
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   403
	return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   404
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   405
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   406
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   407
int RegistEUC(char *from_code, int buf_size, char *inputp,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   408
	_auto_ef_t *root_autoef, srd *hashtable,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   409
	double *average, double *SD) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   410
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   411
	double total_score = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   412
	int found_target = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   413
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   414
	if (TotalScore_buf(inputp, &total_score, buf_size,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   415
		hashtable, average, SD) >= 0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   416
		if (total_score != 0.0) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   417
			found_target = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   418
			if (Regist_AUTOEF(from_code, total_score,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   419
				M_FromCodeToLang(from_code),
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   420
				root_autoef) == -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   421
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   422
				return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   423
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   424
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   425
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   426
	return (found_target);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   427
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   428
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   429
int RegistHashTable(unsigned char a, unsigned char b, srd *hashtable) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   430
	int i;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   431
	srd p, lastp;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   432
	int hashval;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   433
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   434
	srd newrecordsrd = (srd) malloc(sizeof (SRD));
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   435
	if (newrecordsrd == NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   436
		errno = ENOMEM;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   437
		return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   438
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   439
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   440
	newrecordsrd->keyword[0] = a;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   441
	newrecordsrd->keyword[1] = b;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   442
	newrecordsrd->score = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   443
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   444
	hashval = Hash(a, b);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   445
	if (hashtable[hashval] == NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   446
		hashtable[hashval] = newrecordsrd;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   447
		newrecordsrd->nextsrd = NULL;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   448
	} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   449
		p = hashtable[hashval];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   450
		while (p->nextsrd != NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   451
			p = p->nextsrd;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   452
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   453
		p->nextsrd = newrecordsrd;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   454
		newrecordsrd->nextsrd = NULL;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   455
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   456
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   457
	return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   458
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   459
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   460
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   461
int HashTableOpen(char *table, srd *hashtable, double *average,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   462
	double *SD) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   463
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   464
	FILE *fp;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   465
	char buf[LONG_BIT];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   466
	int i;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   467
	int tableline = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   468
	int hash_score = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   469
	int total_ent = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   470
	double sum_of_score = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   471
	double sum_of_deviation = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   472
	srd srdp;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   473
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   474
	if ((fp = fopen(table, "r")) == NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   475
		errno = EACCES;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   476
		return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   477
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   478
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   479
	while (fgets(buf, LONG_BIT, fp) != NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   480
		char *p;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   481
		srd srdp;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   482
		unsigned char point[3];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   483
		unsigned char keyword_a, keyword_b;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   484
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   485
		chopbuf(buf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   486
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   487
		if (tableline == 3)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   488
			tableline = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   489
			
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   490
		switch (tableline) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   491
		case 0:
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   492
			break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   493
		case 1:
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   494
			point[0] = buf[0]; point[1] = buf[1]; point[2] = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   495
			keyword_a =
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   496
				(unsigned char)strtol((const char *)point,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   497
				(char **)NULL, 16);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   498
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   499
			point[0] = buf[2]; point[1] = buf[3]; point[2] = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   500
			keyword_b =
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   501
				(unsigned char)strtol((const char *)point,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   502
				(char **)NULL, 16);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   503
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   504
			if (RegistHashTable(keyword_a, keyword_b,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   505
				hashtable) == -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   506
				
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   507
				errno = EACCES;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   508
				return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   509
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   510
			break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   511
		case 2:
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   512
			hash_score = atoi(buf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   513
			for (srdp = hashtable[Hash(keyword_a, keyword_b)];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   514
				srdp != NULL; srdp = srdp->nextsrd) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   515
				if ((srdp->keyword[0] == keyword_a) &&
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   516
					(srdp->keyword[1] == keyword_b)) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   517
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   518
					srdp->score = hash_score;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   519
					total_ent++;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   520
					sum_of_score += hash_score;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   521
					break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   522
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   523
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   524
			break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   525
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   526
		tableline++;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   527
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   528
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   529
	*average = sum_of_score / (double)total_ent;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   530
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   531
	for (i = 0; i < HASHSIZE; i++) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   532
		for (srdp = hashtable[i]; srdp != NULL; srdp = srdp->nextsrd) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   533
			sum_of_deviation +=
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   534
				((double)srdp->score - *average) *
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   535
				((double)srdp->score - *average);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   536
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   537
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   538
	*SD = sqrt(sum_of_deviation/(total_ent -1));
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   539
	fclose(fp);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   540
	return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   541
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   542
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   543
void get_hash_name(char *hashfile, char *encoding) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   544
	strcpy(hashfile, roothash);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   545
	strcat(hashfile, "\0");
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   546
	strcat(hashfile, encoding);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   547
	strcat(hashfile, "\0");
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   548
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   549
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   550
int Regist_ASCII_ISO2022JP(int i, char *from_code, _auto_ef_t *root_autoef) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   551
	switch (i) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   552
	case 0:
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   553
		if (Regist_AUTOEF(ASCII, FULL, ASCII, root_autoef) == -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   554
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   555
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   556
	case 1:
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   557
		if (Regist_AUTOEF(from_code, FULL, M_FromCodeToLang(from_code), root_autoef) == -1)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   558
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   559
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   560
	case -1:
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   561
		/*
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   562
		 * errno is from IsAsciiOr2022_buf
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   563
		 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   564
		return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   565
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   566
	return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   567
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   568
636
e4ac77666de3 7115607 cmd/auto_ef compilation fails on b175a - wrong iconv args (backport of s11u1:fb701ba7adfd)
jenda
parents: 0
diff changeset
   569
int IsAsciiOr2022_buf(char *input_buf, int buf_size,
0
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   570
	char *from_encoding) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   571
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   572
	char *tbuf;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   573
	iconv_t cd;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   574
	char *convert;
636
e4ac77666de3 7115607 cmd/auto_ef compilation fails on b175a - wrong iconv args (backport of s11u1:fb701ba7adfd)
jenda
parents: 0
diff changeset
   575
	char *context;
e4ac77666de3 7115607 cmd/auto_ef compilation fails on b175a - wrong iconv args (backport of s11u1:fb701ba7adfd)
jenda
parents: 0
diff changeset
   576
	const char *p;
0
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   577
	size_t fsize, tsize, ret;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   578
	int i;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   579
	int tmp_bufsize = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   580
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   581
	if ((cd = iconv_open(UTF8, from_encoding)) == (iconv_t)-1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   582
		/* Use iconv_open errno */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   583
		iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   584
		return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   585
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   586
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   587
	errno = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   588
	context = &input_buf[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   589
	tmp_bufsize = buf_size;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   590
	p = lengthbuf(input_buf, &tmp_bufsize);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   591
	fsize = p - &input_buf[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   592
	tsize = fsize * 4;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   593
	if ((tbuf = (char *) malloc(tsize)) == NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   594
			errno = ENOMEM;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   595
			iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   596
			return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   597
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   598
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   599
	tbuf[0] = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   600
	convert = &tbuf[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   601
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   602
	ret = iconv(cd, &context, &fsize, &convert, &tsize);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   603
	if (ret == (size_t) -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   604
		iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   605
		errno = EINVAL;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   606
		free(tbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   607
		return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   608
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   609
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   610
	*convert = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   611
	for (i = 0; i < buf_size; i++) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   612
		if (tbuf[i] == '\0') break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   613
		if ((unsigned char)tbuf[i] > 127) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   614
			iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   615
			free(tbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   616
			return (1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   617
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   618
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   619
	iconv_close(cd);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   620
	free(tbuf);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   621
	return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   622
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   623
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   624
int Is2022KROrCN(int utf_flag, char *from_code, char *inputp, 
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   625
	size_t buf_size, _auto_ef_t *root_autoef) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   626
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   627
	int i;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   628
	char iso_2022_encoding[PATH_MAX];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   629
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   630
	switch (utf_flag) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   631
	case 0: 
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   632
		/* For ISO-2022-KR, CN/CN-EXT encoding */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   633
		for (i = 0; i < buf_size; i++) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   634
			unsigned char fst, snd, trd, fth;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   635
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   636
			fst = (unsigned char) inputp[i];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   637
			snd = (unsigned char)inputp[i+1];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   638
			trd = (unsigned char)inputp[i+2];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   639
			fth = (unsigned char)inputp[i+3];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   640
			if (CheckISO2022KR(fst, snd, trd, fth) == 1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   641
				if (Regist_AUTOEF(ISOKR, FULL, M_FromCodeToLang(ISOKR),
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   642
					root_autoef) != -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   643
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   644
					return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   645
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   646
				break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   647
			} else if (CheckISO2022CN(fst, snd, trd, fth) == 1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   648
				if (Regist_AUTOEF(ISOCN, FULL, M_FromCodeToLang(ISOCN),
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   649
					root_autoef) != -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   650
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   651
					return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   652
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   653
				break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   654
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   655
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   656
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   657
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   658
	case 1: /* Not ISO-2022-KR, CN/CN-EXT is UTF-8 */
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   659
		if (Regist_AUTOEF(from_code, FULL, from_code, root_autoef) != -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   660
			return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   661
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   662
		break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   663
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   664
	return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   665
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   666
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   667
int Hash(unsigned char a, unsigned char b) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   668
	unsigned int hashval = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   669
	hashval = (unsigned int)a + (unsigned int)b;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   670
	return (hashval % HASHSIZE);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   671
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   672
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   673
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   674
int TotalScore_buf(const char *input_buf, double *total_score,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   675
	int buf_size, srd *hashtable, double *average, double *SD) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   676
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   677
	int i;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   678
	int score = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   679
	int found = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   680
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   681
	*total_score = 0.0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   682
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   683
	for (i = 0; i < buf_size - 1; i++) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   684
		unsigned char keywords[2];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   685
		if (input_buf[i] == '\0') break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   686
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   687
		if ((unsigned)input_buf[i] < MSBFLAG)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   688
			continue;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   689
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   690
		if (i == 0 && input_buf[i+1] != '\0' ) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   691
			if ((FindKeyWord(input_buf[i], input_buf[i+1], hashtable)) == TRUE) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   692
				score = GetScore(input_buf[i], input_buf[i+1], hashtable);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   693
				if (score != -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   694
					*total_score += Calc_SD(score, average, SD);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   695
					found = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   696
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   697
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   698
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   699
		} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   700
			if (input_buf[i+1] != '\0') {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   701
				if ((FindKeyWord(input_buf[i], input_buf[i+1], hashtable)) == TRUE) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   702
					score = GetScore(input_buf[i], input_buf[i+1], hashtable);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   703
					if (score != -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   704
						*total_score += Calc_SD(score, average, SD);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   705
						found = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   706
					}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   707
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   708
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   709
				
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   710
			if ((FindKeyWord(input_buf[i-1], input_buf[i], hashtable)) == TRUE) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   711
				score = GetScore(input_buf[i-1], input_buf[i], hashtable);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   712
				if (score != -1) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   713
					*total_score += Calc_SD(score, average, SD);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   714
					found = 1;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   715
				}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   716
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   717
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   718
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   719
	return (found);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   720
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   721
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   722
int FindKeyWord(unsigned char a, unsigned char b, srd *hashtable) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   723
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   724
	srd srdp;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   725
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   726
	for (srdp = hashtable[Hash(a, b)]; srdp != NULL; srdp = srdp->nextsrd) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   727
		if ((srdp->keyword[0] == a) && (srdp->keyword[1] == b)) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   728
			return (TRUE);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   729
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   730
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   731
	return (FALSE);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   732
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   733
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   734
int GetScore(unsigned char a, unsigned char b, srd *hashtable) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   735
	srd srdp;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   736
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   737
	for (srdp = hashtable[Hash(a, b)]; srdp != NULL;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   738
		srdp = srdp->nextsrd) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   739
		if ((srdp->keyword[0] == a) && (srdp->keyword[1] == b)) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   740
			return (srdp->score);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   741
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   742
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   743
	return (-1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   744
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   745
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   746
double Calc_SD(int score, double *average, double *SD) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   747
	double z_score;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   748
	int SD_index = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   749
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   750
	z_score = ((double)score - *average)/(*SD);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   751
	return (z_score);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   752
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   753
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   754
char *chopbuf(char *buf) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   755
	char *p;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   756
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   757
	p = &buf[0];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   758
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   759
	while (*p)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   760
		if (*p == '\n') {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   761
			*p = '\0';
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   762
			break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   763
		} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   764
			p++;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   765
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   766
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   767
	return (p);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   768
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   769
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   770
void FreeHashTable(srd *hashtable) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   771
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   772
	int i;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   773
	srd p, q;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   774
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   775
	for (i = 0; i < HASHSIZE; i++) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   776
		for (p = hashtable[i]; p != NULL; ) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   777
			if ((p->nextsrd) != NULL) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   778
				q = p->nextsrd;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   779
				free(p);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   780
				p = q;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   781
			} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   782
				free(p);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   783
				break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   784
			}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   785
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   786
		hashtable[i] = (srd)NULL;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   787
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   788
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   789
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   790
int CheckISO2022CN(unsigned char a, unsigned char b,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   791
	unsigned char c, unsigned char d)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   792
{
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   793
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   794
	if (a == 0x1b && b == 0x24 && c == 0x29 && d == 0x41 ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   795
	a == 0x1b && b == 0x24 && c == 0x29 && d == 0x47 ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   796
	a == 0x1b && b == 0x24 && c == 0x2a && d == 0x48 ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   797
	a == 0x1b && b == 0x24 && c == 0x29 && d == 0x45 ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   798
	a == 0x1b && b == 0x24 && c == 0x2b && d == 0x49 ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   799
	a == 0x1b && b == 0x24 && c == 0x2b && d == 0x4a ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   800
	a == 0x1b && b == 0x24 && c == 0x2b && d == 0x4b ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   801
	a == 0x1b && b == 0x24 && c == 0x2b && d == 0x4c ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   802
	a == 0x1b && b == 0x24 && c == 0x2b && d == 0x4d) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   803
		return (1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   804
	} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   805
		return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   806
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   807
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   808
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   809
int CheckISO2022KR(unsigned char a, unsigned char b,
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   810
	unsigned char c, unsigned char d)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   811
{
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   812
	if (a == 0x1b && b == 0x24 && c == 0x29 && d == 0x43) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   813
		return (1);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   814
	} else {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   815
		return (0);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   816
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   817
}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   818
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   819
void ThaiSpecificCheck(const char *input_buf, char *encoding, size_t buf_size)
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   820
{
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   821
	int i = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   822
	unsigned char a = 0;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   823
	for (i=0; i < buf_size; i++) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   824
		if (input_buf[i] == '\0') break;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   825
		a = (unsigned char) input_buf[i];
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   826
		if (a == 0x80 || a == 0x85 || a == 0x91 ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   827
			a == 0x92 || a == 0x93 || a == 0x94 ||
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   828
			a == 0x95 || a == 0x96 || a == 0x97) {
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   829
			strlcpy(encoding, CP874, ENCODING_LENGTH);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   830
			return;
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   831
		}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   832
	}
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   833
	
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   834
	strlcpy(encoding, TIS620, ENCODING_LENGTH);
542988ea726d initial version of Nevada G11N repository
simford
parents:
diff changeset
   835
}