Open Chinese Convert  0.4.3
A project for conversion between Traditional and Simplified Chinese
 All Data Structures Files Functions Variables Groups Pages
opencc.c
Go to the documentation of this file.
1 
23 #include "common.h"
24 #include "config_reader.h"
25 #include "converter.h"
26 #include "dict_group.h"
27 #include "dict_chain.h"
28 #include "encoding.h"
29 #include "opencc.h"
30 
31 typedef struct {
32  DictChain* dict_chain;
33  Converter* converter;
34 } OpenccDesc;
35 
36 static opencc_error errnum = OPENCC_ERROR_VOID;
37 static int lib_initialized = 0;
38 
39 static void lib_initialize(void) {
40 #ifdef ENABLE_GETTEXT
41  bindtextdomain(PACKAGE_NAME, LOCALEDIR);
42 #endif /* ifdef ENABLE_GETTEXT */
43  lib_initialized = 1;
44 }
45 
46 size_t opencc_convert(opencc_t t_opencc,
47  ucs4_t** inbuf,
48  size_t* inbuf_left,
49  ucs4_t** outbuf,
50  size_t* outbuf_left) {
51  if (!lib_initialized) {
52  lib_initialize();
53  }
54  OpenccDesc* opencc = (OpenccDesc*)t_opencc;
55  size_t retval = converter_convert(opencc->converter,
56  inbuf,
57  inbuf_left,
58  outbuf,
59  outbuf_left);
60  if (retval == (size_t)-1) {
61  errnum = OPENCC_ERROR_CONVERTER;
62  }
63  return retval;
64 }
65 
66 char* opencc_convert_utf8(opencc_t t_opencc, const char* inbuf, size_t length) {
67  if (!lib_initialized) {
68  lib_initialize();
69  }
70  size_t actual_length = strlen(inbuf);
71  if ((length == (size_t)-1) || (length > actual_length)) {
72  length = actual_length;
73  }
74  ucs4_t* winbuf = utf8_to_ucs4(inbuf, length);
75  if (winbuf == (ucs4_t*)-1) {
76  /* Can not convert input UTF8 to UCS4 */
77  errnum = OPENCC_ERROR_ENCODING;
78  return (char*)-1;
79  }
80  /* Set up UTF8 buffer */
81  size_t outbuf_len = length;
82  size_t outsize = outbuf_len;
83  char* original_outbuf = (char*)malloc(sizeof(char) * (outbuf_len + 1));
84  char* outbuf = original_outbuf;
85  original_outbuf[0] = '\0';
86  /* Set conversion buffer */
87  size_t wbufsize = length + 64;
88  ucs4_t* woutbuf = (ucs4_t*)malloc(sizeof(ucs4_t) * (wbufsize + 1));
89  ucs4_t* pinbuf = winbuf;
90  ucs4_t* poutbuf = woutbuf;
91  size_t inbuf_left, outbuf_left;
92  inbuf_left = ucs4len(winbuf);
93  outbuf_left = wbufsize;
94  while (inbuf_left > 0) {
95  size_t retval = opencc_convert(t_opencc,
96  &pinbuf,
97  &inbuf_left,
98  &poutbuf,
99  &outbuf_left);
100  if (retval == (size_t)-1) {
101  free(outbuf);
102  free(winbuf);
103  free(woutbuf);
104  return (char*)-1;
105  }
106  *poutbuf = L'\0';
107  char* ubuff = ucs4_to_utf8(woutbuf, (size_t)-1);
108  if (ubuff == (char*)-1) {
109  free(outbuf);
110  free(winbuf);
111  free(woutbuf);
112  errnum = OPENCC_ERROR_ENCODING;
113  return (char*)-1;
114  }
115  size_t ubuff_len = strlen(ubuff);
116  while (ubuff_len > outsize) {
117  size_t outbuf_offset = outbuf - original_outbuf;
118  outsize += outbuf_len;
119  outbuf_len += outbuf_len;
120  original_outbuf =
121  (char*)realloc(original_outbuf, sizeof(char) * outbuf_len);
122  outbuf = original_outbuf + outbuf_offset;
123  }
124  strncpy(outbuf, ubuff, ubuff_len);
125  free(ubuff);
126  outbuf += ubuff_len;
127  *outbuf = '\0';
128  outbuf_left = wbufsize;
129  poutbuf = woutbuf;
130  }
131  free(winbuf);
132  free(woutbuf);
133  original_outbuf = (char*)realloc(original_outbuf,
134  sizeof(char) * (strlen(original_outbuf) + 1));
135  return original_outbuf;
136 }
137 
138 void opencc_convert_utf8_free(char* buf) {
139  free(buf);
140 }
141 
142 opencc_t opencc_open(const char* config_file) {
143  if (!lib_initialized) {
144  lib_initialize();
145  }
146  OpenccDesc* opencc;
147  opencc = (OpenccDesc*)malloc(sizeof(OpenccDesc));
148  opencc->dict_chain = NULL;
149  opencc->converter = converter_open();
150  converter_set_conversion_mode(opencc->converter, OPENCC_CONVERSION_FAST);
151  if (config_file == NULL) {
152  /* TODO load default */
153  assert(0);
154  } else {
155  /* Load config */
156  Config* config = config_open(config_file);
157  if (config == (Config*)-1) {
158  errnum = OPENCC_ERROR_CONFIG;
159  return (opencc_t)-1;
160  }
161  opencc->dict_chain = config_get_dict_chain(config);
162  converter_assign_dictionary(opencc->converter, opencc->dict_chain);
163  config_close(config);
164  }
165  return (opencc_t)opencc;
166 }
167 
168 int opencc_close(opencc_t t_opencc) {
169  if (!lib_initialized) {
170  lib_initialize();
171  }
172  OpenccDesc* opencc = (OpenccDesc*)t_opencc;
173  converter_close(opencc->converter);
174  if (opencc->dict_chain != NULL) {
175  dict_chain_delete(opencc->dict_chain);
176  }
177  free(opencc);
178  return 0;
179 }
180 
181 int opencc_dict_load(opencc_t t_opencc,
182  const char* dict_filename,
183  opencc_dictionary_type dict_type) {
184  if (!lib_initialized) {
185  lib_initialize();
186  }
187  OpenccDesc* opencc = (OpenccDesc*)t_opencc;
189  if (opencc->dict_chain == NULL) {
190  opencc->dict_chain = dict_chain_new(NULL);
191  DictGroup = dict_chain_add_group(opencc->dict_chain);
192  } else {
193  DictGroup = dict_chain_get_group(opencc->dict_chain, 0);
194  }
195  int retval = dict_group_load(DictGroup, dict_filename, dict_type);
196  if (retval == -1) {
197  errnum = OPENCC_ERROR_DICTLOAD;
198  return -1;
199  }
200  converter_assign_dictionary(opencc->converter, opencc->dict_chain);
201  return retval;
202 }
203 
204 void opencc_set_conversion_mode(opencc_t t_opencc,
205  opencc_conversion_mode conversion_mode) {
206  if (!lib_initialized) {
207  lib_initialize();
208  }
209  OpenccDesc* opencc = (OpenccDesc*)t_opencc;
210  converter_set_conversion_mode(opencc->converter, conversion_mode);
211 }
212 
213 opencc_error opencc_errno(void) {
214  if (!lib_initialized) {
215  lib_initialize();
216  }
217  return errnum;
218 }
219 
220 void opencc_perror(const char* spec) {
221  if (!lib_initialized) {
222  lib_initialize();
223  }
224  perr(spec);
225  perr("\n");
226  switch (errnum) {
227  case OPENCC_ERROR_VOID:
228  break;
229  case OPENCC_ERROR_DICTLOAD:
230  dictionary_perror(_("Dictionary loading error"));
231  break;
232  case OPENCC_ERROR_CONFIG:
233  config_perror(_("Configuration error"));
234  break;
235  case OPENCC_ERROR_CONVERTER:
236  converter_perror(_("Converter error"));
237  break;
238  case OPENCC_ERROR_ENCODING:
239  perr(_("Encoding error"));
240  break;
241  default:
242  perr(_("Unknown"));
243  }
244  perr("\n");
245 }