Skip to content

Commit 7596445

Browse files
committed
Merge branch 'break_iterator'
* break_iterator: Fix typo in error message BreakIterator: fix compat with old ICU versions Fix build error one ext/intl BreakIterator::getPartsIterator: new optional arg Added IntlCodePointBreakIterator. Add Intl prefix to BreakIterator/RuleBasedBI Remove trailing space Replaced zend_parse_method_params with plain zpp BreakIter: Removed getAvailableLocales/getHashCode Change in BreakIterator::getPartsIterator() BreakIterator: add rules status constants Tests for (RuleBased)BreakIterator. BreakIterator and RuleBasedBreakiterator added
2 parents 715e59a + 0df73a8 commit 7596445

File tree

57 files changed

+3375
-122
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+3375
-122
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,397 @@
1+
/*
2+
+----------------------------------------------------------------------+
3+
| PHP Version 5 |
4+
+----------------------------------------------------------------------+
5+
| This source file is subject to version 3.01 of the PHP license, |
6+
| that is bundled with this package in the file LICENSE, and is |
7+
| available through the world-wide-web at the following url: |
8+
| http://www.php.net/license/3_01.txt |
9+
| If you did not receive a copy of the PHP license and are unable to |
10+
| obtain it through the world-wide-web, please send a note to |
11+
| [email protected] so we can mail you a copy immediately. |
12+
+----------------------------------------------------------------------+
13+
| Authors: Gustavo Lopes <[email protected]> |
14+
+----------------------------------------------------------------------+
15+
*/
16+
17+
#ifdef HAVE_CONFIG_H
18+
#include "config.h"
19+
#endif
20+
21+
#include <unicode/brkiter.h>
22+
#include <unicode/rbbi.h>
23+
#include "codepointiterator_internal.h"
24+
25+
#include "breakiterator_iterators.h"
26+
27+
#include <typeinfo>
28+
29+
extern "C" {
30+
#define USE_BREAKITERATOR_POINTER 1
31+
#include "breakiterator_class.h"
32+
#include "breakiterator_methods.h"
33+
#include "rulebasedbreakiterator_methods.h"
34+
#include "codepointiterator_methods.h"
35+
#include <zend_exceptions.h>
36+
#include <zend_interfaces.h>
37+
#include <assert.h>
38+
}
39+
40+
using PHP::CodePointBreakIterator;
41+
42+
/* {{{ Global variables */
43+
zend_class_entry *BreakIterator_ce_ptr;
44+
zend_class_entry *RuleBasedBreakIterator_ce_ptr;
45+
zend_class_entry *CodePointBreakIterator_ce_ptr;
46+
zend_object_handlers BreakIterator_handlers;
47+
/* }}} */
48+
49+
U_CFUNC void breakiterator_object_create(zval *object,
50+
BreakIterator *biter TSRMLS_DC)
51+
{
52+
UClassID classId = biter->getDynamicClassID();
53+
zend_class_entry *ce;
54+
55+
if (classId == RuleBasedBreakIterator::getStaticClassID()) {
56+
ce = RuleBasedBreakIterator_ce_ptr;
57+
} else if (classId == CodePointBreakIterator::getStaticClassID()) {
58+
ce = CodePointBreakIterator_ce_ptr;
59+
} else {
60+
ce = BreakIterator_ce_ptr;
61+
}
62+
63+
object_init_ex(object, ce);
64+
breakiterator_object_construct(object, biter TSRMLS_CC);
65+
}
66+
67+
U_CFUNC void breakiterator_object_construct(zval *object,
68+
BreakIterator *biter TSRMLS_DC)
69+
{
70+
BreakIterator_object *bio;
71+
72+
BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK; //populate to from object
73+
assert(bio->biter == NULL);
74+
bio->biter = biter;
75+
}
76+
77+
/* {{{ compare handler for BreakIterator */
78+
static int BreakIterator_compare_objects(zval *object1,
79+
zval *object2 TSRMLS_DC)
80+
{
81+
BreakIterator_object *bio1,
82+
*bio2;
83+
84+
bio1 = (BreakIterator_object*)zend_object_store_get_object(object1 TSRMLS_CC);
85+
bio2 = (BreakIterator_object*)zend_object_store_get_object(object2 TSRMLS_CC);
86+
87+
if (bio1->biter == NULL || bio2->biter == NULL) {
88+
return bio1->biter == bio2->biter ? 0 : 1;
89+
}
90+
91+
return *bio1->biter == *bio2->biter ? 0 : 1;
92+
}
93+
/* }}} */
94+
95+
/* {{{ clone handler for BreakIterator */
96+
static zend_object_value BreakIterator_clone_obj(zval *object TSRMLS_DC)
97+
{
98+
BreakIterator_object *bio_orig,
99+
*bio_new;
100+
zend_object_value ret_val;
101+
102+
bio_orig = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
103+
intl_errors_reset(INTL_DATA_ERROR_P(bio_orig) TSRMLS_CC);
104+
105+
ret_val = BreakIterator_ce_ptr->create_object(Z_OBJCE_P(object) TSRMLS_CC);
106+
bio_new = (BreakIterator_object*)zend_object_store_get_object_by_handle(
107+
ret_val.handle TSRMLS_CC);
108+
109+
zend_objects_clone_members(&bio_new->zo, ret_val,
110+
&bio_orig->zo, Z_OBJ_HANDLE_P(object) TSRMLS_CC);
111+
112+
if (bio_orig->biter != NULL) {
113+
BreakIterator *new_biter;
114+
115+
new_biter = bio_orig->biter->clone();
116+
if (!new_biter) {
117+
char *err_msg;
118+
intl_errors_set_code(BREAKITER_ERROR_P(bio_orig),
119+
U_MEMORY_ALLOCATION_ERROR TSRMLS_CC);
120+
intl_errors_set_custom_msg(BREAKITER_ERROR_P(bio_orig),
121+
"Could not clone BreakIterator", 0 TSRMLS_CC);
122+
err_msg = intl_error_get_message(BREAKITER_ERROR_P(bio_orig) TSRMLS_CC);
123+
zend_throw_exception(NULL, err_msg, 0 TSRMLS_CC);
124+
efree(err_msg);
125+
} else {
126+
bio_new->biter = new_biter;
127+
bio_new->text = bio_orig->text;
128+
if (bio_new->text) {
129+
zval_add_ref(&bio_new->text);
130+
}
131+
}
132+
} else {
133+
zend_throw_exception(NULL, "Cannot clone unconstructed BreakIterator", 0 TSRMLS_CC);
134+
}
135+
136+
return ret_val;
137+
}
138+
/* }}} */
139+
140+
/* {{{ get_debug_info handler for BreakIterator */
141+
static HashTable *BreakIterator_get_debug_info(zval *object, int *is_temp TSRMLS_DC)
142+
{
143+
zval zv = zval_used_for_init;
144+
BreakIterator_object *bio;
145+
const BreakIterator *biter;
146+
147+
*is_temp = 1;
148+
149+
array_init_size(&zv, 8);
150+
151+
bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
152+
biter = bio->biter;
153+
154+
if (biter == NULL) {
155+
add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 0);
156+
return Z_ARRVAL(zv);
157+
}
158+
add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 1);
159+
160+
if (bio->text == NULL) {
161+
add_assoc_null_ex(&zv, "text", sizeof("text"));
162+
} else {
163+
zval_add_ref(&bio->text);
164+
add_assoc_zval_ex(&zv, "text", sizeof("text"), bio->text);
165+
}
166+
167+
add_assoc_string_ex(&zv, "type", sizeof("type"),
168+
const_cast<char*>(typeid(*biter).name()), 1);
169+
170+
return Z_ARRVAL(zv);
171+
}
172+
/* }}} */
173+
174+
/* {{{ void breakiterator_object_init(BreakIterator_object* to)
175+
* Initialize internals of BreakIterator_object not specific to zend standard objects.
176+
*/
177+
static void breakiterator_object_init(BreakIterator_object *bio TSRMLS_DC)
178+
{
179+
intl_error_init(BREAKITER_ERROR_P(bio) TSRMLS_CC);
180+
bio->biter = NULL;
181+
bio->text = NULL;
182+
}
183+
/* }}} */
184+
185+
/* {{{ BreakIterator_objects_dtor */
186+
static void BreakIterator_objects_dtor(void *object,
187+
zend_object_handle handle TSRMLS_DC)
188+
{
189+
zend_objects_destroy_object((zend_object*)object, handle TSRMLS_CC);
190+
}
191+
/* }}} */
192+
193+
/* {{{ BreakIterator_objects_free */
194+
static void BreakIterator_objects_free(zend_object *object TSRMLS_DC)
195+
{
196+
BreakIterator_object* bio = (BreakIterator_object*) object;
197+
198+
if (bio->text) {
199+
zval_ptr_dtor(&bio->text);
200+
}
201+
if (bio->biter) {
202+
delete bio->biter;
203+
bio->biter = NULL;
204+
}
205+
intl_error_reset(BREAKITER_ERROR_P(bio) TSRMLS_CC);
206+
207+
zend_object_std_dtor(&bio->zo TSRMLS_CC);
208+
209+
efree(bio);
210+
}
211+
/* }}} */
212+
213+
/* {{{ BreakIterator_object_create */
214+
static zend_object_value BreakIterator_object_create(zend_class_entry *ce TSRMLS_DC)
215+
{
216+
zend_object_value retval;
217+
BreakIterator_object* intern;
218+
219+
intern = (BreakIterator_object*)ecalloc(1, sizeof(BreakIterator_object));
220+
221+
zend_object_std_init(&intern->zo, ce TSRMLS_CC);
222+
#if PHP_VERSION_ID < 50399
223+
zend_hash_copy(intern->zo.properties, &(ce->default_properties),
224+
(copy_ctor_func_t) zval_add_ref, NULL, sizeof(zval*));
225+
#else
226+
object_properties_init((zend_object*) intern, ce);
227+
#endif
228+
breakiterator_object_init(intern TSRMLS_CC);
229+
230+
retval.handle = zend_objects_store_put(
231+
intern,
232+
BreakIterator_objects_dtor,
233+
(zend_objects_free_object_storage_t) BreakIterator_objects_free,
234+
NULL TSRMLS_CC);
235+
236+
retval.handlers = &BreakIterator_handlers;
237+
238+
return retval;
239+
}
240+
/* }}} */
241+
242+
/* {{{ BreakIterator/RuleBasedBreakIterator methods arguments info */
243+
244+
ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_void, 0, 0, 0)
245+
ZEND_END_ARG_INFO()
246+
247+
ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_locale, 0, 0, 0)
248+
ZEND_ARG_INFO(0, "locale")
249+
ZEND_END_ARG_INFO()
250+
251+
ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_setText, 0, 0, 1)
252+
ZEND_ARG_INFO(0, "text")
253+
ZEND_END_ARG_INFO()
254+
255+
ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_next, 0, 0, 0)
256+
ZEND_ARG_INFO(0, "offset")
257+
ZEND_END_ARG_INFO()
258+
259+
ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_offset, 0, 0, 1)
260+
ZEND_ARG_INFO(0, "offset")
261+
ZEND_END_ARG_INFO()
262+
263+
ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_get_locale, 0, 0, 1)
264+
ZEND_ARG_INFO(0, "locale_type")
265+
ZEND_END_ARG_INFO()
266+
267+
ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_getPartsIterator, 0, 0, 0)
268+
ZEND_ARG_INFO(0, "key_type")
269+
ZEND_END_ARG_INFO()
270+
271+
ZEND_BEGIN_ARG_INFO_EX(ainfo_rbbi___construct, 0, 0, 1)
272+
ZEND_ARG_INFO(0, "rules")
273+
ZEND_ARG_INFO(0, "areCompiled")
274+
ZEND_END_ARG_INFO()
275+
276+
/* }}} */
277+
278+
/* {{{ BreakIterator_class_functions
279+
* Every 'BreakIterator' class method has an entry in this table
280+
*/
281+
static const zend_function_entry BreakIterator_class_functions[] = {
282+
PHP_ME(BreakIterator, __construct, ainfo_biter_void, ZEND_ACC_PRIVATE)
283+
PHP_ME_MAPPING(createWordInstance, breakiter_create_word_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
284+
PHP_ME_MAPPING(createLineInstance, breakiter_create_line_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
285+
PHP_ME_MAPPING(createCharacterInstance, breakiter_create_character_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
286+
PHP_ME_MAPPING(createSentenceInstance, breakiter_create_sentence_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
287+
PHP_ME_MAPPING(createTitleInstance, breakiter_create_title_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
288+
PHP_ME_MAPPING(createCodePointInstance, breakiter_create_code_point_instance, ainfo_biter_void, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
289+
PHP_ME_MAPPING(getText, breakiter_get_text, ainfo_biter_void, ZEND_ACC_PUBLIC)
290+
PHP_ME_MAPPING(setText, breakiter_set_text, ainfo_biter_setText, ZEND_ACC_PUBLIC)
291+
PHP_ME_MAPPING(first, breakiter_first, ainfo_biter_void, ZEND_ACC_PUBLIC)
292+
PHP_ME_MAPPING(last, breakiter_last, ainfo_biter_void, ZEND_ACC_PUBLIC)
293+
PHP_ME_MAPPING(previous, breakiter_previous, ainfo_biter_void, ZEND_ACC_PUBLIC)
294+
PHP_ME_MAPPING(next, breakiter_next, ainfo_biter_next, ZEND_ACC_PUBLIC)
295+
PHP_ME_MAPPING(current, breakiter_current, ainfo_biter_void, ZEND_ACC_PUBLIC)
296+
PHP_ME_MAPPING(following, breakiter_following, ainfo_biter_offset, ZEND_ACC_PUBLIC)
297+
PHP_ME_MAPPING(preceding, breakiter_preceding, ainfo_biter_offset, ZEND_ACC_PUBLIC)
298+
PHP_ME_MAPPING(isBoundary, breakiter_is_boundary, ainfo_biter_offset, ZEND_ACC_PUBLIC)
299+
PHP_ME_MAPPING(getLocale, breakiter_get_locale, ainfo_biter_void, ZEND_ACC_PUBLIC)
300+
PHP_ME_MAPPING(getPartsIterator, breakiter_get_parts_iterator, ainfo_biter_getPartsIterator, ZEND_ACC_PUBLIC)
301+
302+
PHP_ME_MAPPING(getErrorCode, breakiter_get_error_code, ainfo_biter_void, ZEND_ACC_PUBLIC)
303+
PHP_ME_MAPPING(getErrorMessage, breakiter_get_error_message, ainfo_biter_void, ZEND_ACC_PUBLIC)
304+
PHP_FE_END
305+
};
306+
/* }}} */
307+
308+
/* {{{ RuleBasedBreakIterator_class_functions
309+
*/
310+
static const zend_function_entry RuleBasedBreakIterator_class_functions[] = {
311+
PHP_ME(IntlRuleBasedBreakIterator, __construct, ainfo_rbbi___construct, ZEND_ACC_PUBLIC)
312+
PHP_ME_MAPPING(getRules, rbbi_get_rules, ainfo_biter_void, ZEND_ACC_PUBLIC)
313+
PHP_ME_MAPPING(getRuleStatus, rbbi_get_rule_status, ainfo_biter_void, ZEND_ACC_PUBLIC)
314+
PHP_ME_MAPPING(getRuleStatusVec, rbbi_get_rule_status_vec, ainfo_biter_void, ZEND_ACC_PUBLIC)
315+
#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
316+
PHP_ME_MAPPING(getBinaryRules, rbbi_get_binary_rules, ainfo_biter_void, ZEND_ACC_PUBLIC)
317+
#endif
318+
PHP_FE_END
319+
};
320+
/* }}} */
321+
322+
/* {{{ CodePointBreakIterator_class_functions
323+
*/
324+
static const zend_function_entry CodePointBreakIterator_class_functions[] = {
325+
PHP_ME_MAPPING(getLastCodePoint, cpbi_get_last_code_point, ainfo_biter_void, ZEND_ACC_PUBLIC)
326+
PHP_FE_END
327+
};
328+
/* }}} */
329+
330+
331+
/* {{{ breakiterator_register_BreakIterator_class
332+
* Initialize 'BreakIterator' class
333+
*/
334+
U_CFUNC void breakiterator_register_BreakIterator_class(TSRMLS_D)
335+
{
336+
zend_class_entry ce;
337+
338+
/* Create and register 'BreakIterator' class. */
339+
INIT_CLASS_ENTRY(ce, "IntlBreakIterator", BreakIterator_class_functions);
340+
ce.create_object = BreakIterator_object_create;
341+
ce.get_iterator = _breakiterator_get_iterator;
342+
BreakIterator_ce_ptr = zend_register_internal_class(&ce TSRMLS_CC);
343+
344+
memcpy(&BreakIterator_handlers, zend_get_std_object_handlers(),
345+
sizeof BreakIterator_handlers);
346+
BreakIterator_handlers.compare_objects = BreakIterator_compare_objects;
347+
BreakIterator_handlers.clone_obj = BreakIterator_clone_obj;
348+
BreakIterator_handlers.get_debug_info = BreakIterator_get_debug_info;
349+
350+
zend_class_implements(BreakIterator_ce_ptr TSRMLS_CC, 1,
351+
zend_ce_traversable);
352+
353+
zend_declare_class_constant_long(BreakIterator_ce_ptr,
354+
"DONE", sizeof("DONE") - 1, BreakIterator::DONE TSRMLS_CC );
355+
356+
/* Declare constants that are defined in the C header */
357+
#define BREAKITER_DECL_LONG_CONST(name) \
358+
zend_declare_class_constant_long(BreakIterator_ce_ptr, #name, \
359+
sizeof(#name) - 1, UBRK_ ## name TSRMLS_CC)
360+
361+
BREAKITER_DECL_LONG_CONST(WORD_NONE);
362+
BREAKITER_DECL_LONG_CONST(WORD_NONE_LIMIT);
363+
BREAKITER_DECL_LONG_CONST(WORD_NUMBER);
364+
BREAKITER_DECL_LONG_CONST(WORD_NUMBER_LIMIT);
365+
BREAKITER_DECL_LONG_CONST(WORD_LETTER);
366+
BREAKITER_DECL_LONG_CONST(WORD_LETTER_LIMIT);
367+
BREAKITER_DECL_LONG_CONST(WORD_KANA);
368+
BREAKITER_DECL_LONG_CONST(WORD_KANA_LIMIT);
369+
BREAKITER_DECL_LONG_CONST(WORD_IDEO);
370+
BREAKITER_DECL_LONG_CONST(WORD_IDEO_LIMIT);
371+
372+
BREAKITER_DECL_LONG_CONST(LINE_SOFT);
373+
BREAKITER_DECL_LONG_CONST(LINE_SOFT_LIMIT);
374+
BREAKITER_DECL_LONG_CONST(LINE_HARD);
375+
BREAKITER_DECL_LONG_CONST(LINE_HARD_LIMIT);
376+
377+
BREAKITER_DECL_LONG_CONST(SENTENCE_TERM);
378+
BREAKITER_DECL_LONG_CONST(SENTENCE_TERM_LIMIT);
379+
BREAKITER_DECL_LONG_CONST(SENTENCE_SEP);
380+
BREAKITER_DECL_LONG_CONST(SENTENCE_SEP_LIMIT);
381+
382+
#undef BREAKITER_DECL_LONG_CONST
383+
384+
385+
/* Create and register 'RuleBasedBreakIterator' class. */
386+
INIT_CLASS_ENTRY(ce, "IntlRuleBasedBreakIterator",
387+
RuleBasedBreakIterator_class_functions);
388+
RuleBasedBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce,
389+
BreakIterator_ce_ptr, NULL TSRMLS_CC);
390+
391+
/* Create and register 'CodePointBreakIterator' class. */
392+
INIT_CLASS_ENTRY(ce, "IntlCodePointBreakIterator",
393+
CodePointBreakIterator_class_functions);
394+
CodePointBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce,
395+
BreakIterator_ce_ptr, NULL TSRMLS_CC);
396+
}
397+
/* }}} */

0 commit comments

Comments
 (0)