Open Chinese Convert  0.4.3
A project for conversion between Traditional and Simplified Chinese
 All Data Structures Files Functions Variables Groups Pages
opencc.c
1 /*
2  * Open Chinese Convert
3  *
4  * Copyright 2010-2013 BYVoid <byvoid@byvoid.com>
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #include "../opencc.h"
20 #include "../utils.h"
21 #include <getopt.h>
22 #include <locale.h>
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 
27 #ifndef VERSION
28 #define VERSION ""
29 #endif
30 
31 #define BUFFER_SIZE 65536
32 
33 void convert(const char* input_file,
34  const char* output_file,
35  const char* config_file) {
36  opencc_t od = opencc_open(config_file);
37  if (od == (opencc_t)-1) {
38  opencc_perror(_("OpenCC initialization error"));
39  exit(1);
40  }
41  FILE* fp = stdin;
42  FILE* fpo = stdout;
43  if (input_file) {
44  fp = fopen(input_file, "r");
45  if (!fp) {
46  fprintf(stderr, _("Can not read file: %s\n"), input_file);
47  exit(1);
48  }
49  skip_utf8_bom(fp);
50  }
51  if (output_file) {
52  fpo = fopen(output_file, "w");
53  if (!fpo) {
54  fprintf(stderr, _("Can not write file: %s\n"), output_file);
55  exit(1);
56  }
57  }
58  size_t size = BUFFER_SIZE;
59  char* buffer_in = NULL, * buffer_out = NULL;
60  buffer_in = (char*)malloc(size * sizeof(char));
61  char* lookahead = (char*)malloc(size * sizeof(char));
62  size_t lookahead_size = 0;
63  while (!feof(fp)) {
64  size_t read;
65  if (lookahead_size > 0) {
66  memcpy(buffer_in, lookahead, lookahead_size);
67  read =
68  fread(buffer_in + lookahead_size, 1, size - lookahead_size,
69  fp) + lookahead_size;
70  lookahead_size = 0;
71  } else {
72  read = fread(buffer_in, 1, size, fp);
73  }
74  // If we haven't finished reading after filling the entire buffer,
75  // then it could be that we broke within an UTF-8 character, in
76  // that case we must backtrack and find the boundary
77  if (read == size) {
78  // Find the boundary of last UTF-8 character
79  int i;
80  for (i = read - 1; i >= 0; i--) {
81  char c = buffer_in[i];
82  if (!(c & 0x80) || ((c & 0xC0) == 0xC0)) {
83  break;
84  }
85  }
86  assert(i >= 0);
87  memcpy(lookahead, buffer_in + i, read - i);
88  lookahead_size = read - i;
89  buffer_in[i] = '\0';
90  } else {
91  buffer_in[read] = '\0';
92  }
93  buffer_out = opencc_convert_utf8(od, buffer_in, (size_t)-1);
94  if (buffer_out != (char*)-1) {
95  fprintf(fpo, "%s", buffer_out);
96  opencc_convert_utf8_free(buffer_out);
97  } else {
98  opencc_perror(_("OpenCC error"));
99  break;
100  }
101  }
102 
103  if (lookahead_size > 0) {
104  assert(lookahead_size < size);
105  lookahead[lookahead_size] = '\0';
106  buffer_out = opencc_convert_utf8(od, lookahead, (size_t)-1);
107  if (buffer_out != (char*)-1) {
108  fprintf(fpo, "%s", buffer_out);
109  opencc_convert_utf8_free(buffer_out);
110  } else {
111  opencc_perror(_("OpenCC error"));
112  }
113  }
114  opencc_close(od);
115  free(lookahead);
116  free(buffer_in);
117  fclose(fp);
118  fclose(fpo);
119 }
120 
121 void show_version() {
122  printf(_("\n"));
123  printf(_("Open Chinese Convert (OpenCC) Command Line Tool\n"));
124  printf(_("Version %s\n"), VERSION);
125  printf(_("\n"));
126  printf(_("Author: %s\n"), "BYVoid <byvoid@byvoid.com>");
127  printf(_("Bug Report: %s\n"), "http://github.com/BYVoid/OpenCC/issues");
128  printf(_("\n"));
129 }
130 
131 void show_usage() {
132  show_version();
133  printf(_("Usage:\n"));
134  printf(_(" opencc [Options]\n"));
135  printf(_("\n"));
136  printf(_("Options:\n"));
137  printf(_(" -i [file], --input=[file] Read original text from [file].\n"));
138  printf(_(" -o [file], --output=[file] Write converted text to [file].\n"));
139  printf(_(
140  " -c [file], --config=[file] Load configuration of conversion from [file].\n"));
141  printf(_(" -v, --version Print version and build information.\n"));
142  printf(_(" -h, --help Print this help.\n"));
143  printf(_("\n"));
144  printf(_(
145  "With no input file, reads standard input and writes converted stream to standard output.\n"));
146  printf(_(
147  "Default configuration(%s) will be loaded if not set.\n"),
149  printf(_("\n"));
150 }
151 
152 int main(int argc, char** argv) {
153 #ifdef ENABLE_GETTEXT
154  setlocale(LC_ALL, "");
155  bindtextdomain(PACKAGE_NAME, LOCALEDIR);
156 #endif /* ifdef ENABLE_GETTEXT */
157  static struct option longopts[] =
158  {
159  { "version", no_argument, NULL, 'v' },
160  { "help", no_argument, NULL, 'h' },
161  { "input", required_argument, NULL, 'i' },
162  { "output", required_argument, NULL, 'o' },
163  { "config", required_argument, NULL, 'c' },
164  { 0, 0, 0, 0 },
165  };
166  static int oc;
167  static char* input_file, * output_file, * config_file;
168  while ((oc = getopt_long(argc, argv, "vh?i:o:c:", longopts, NULL)) != -1) {
169  switch (oc) {
170  case 'v':
171  show_version();
172  return 0;
173  case 'h':
174  case '?':
175  show_usage();
176  return 0;
177  case 'i':
178  input_file = mstrcpy(optarg);
179  break;
180  case 'o':
181  output_file = mstrcpy(optarg);
182  break;
183  case 'c':
184  config_file = mstrcpy(optarg);
185  break;
186  }
187  }
188  if (config_file == NULL) {
189  config_file = mstrcpy(OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD);
190  }
191  convert(input_file, output_file, config_file);
192  free(input_file);
193  free(output_file);
194  free(config_file);
195  return 0;
196 }