@@ -210,6 +210,12 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
210
210
ZEND_ARG_INFO (0 , status )
211
211
ZEND_END_ARG_INFO ()
212
212
213
+ ZEND_BEGIN_ARG_INFO_EX (arginfo_mb_str_split , 0 , 0 , 1 )
214
+ ZEND_ARG_INFO (0 , str )
215
+ ZEND_ARG_INFO (0 , split_length )
216
+ ZEND_ARG_INFO (0 , encoding )
217
+ ZEND_END_ARG_INFO ()
218
+
213
219
ZEND_BEGIN_ARG_INFO_EX (arginfo_mb_strlen , 0 , 0 , 1 )
214
220
ZEND_ARG_INFO (0 , str )
215
221
ZEND_ARG_INFO (0 , encoding )
@@ -507,6 +513,7 @@ static const zend_function_entry mbstring_functions[] = {
507
513
PHP_FE (mb_parse_str , arginfo_mb_parse_str )
508
514
PHP_FE (mb_output_handler , arginfo_mb_output_handler )
509
515
PHP_FE (mb_preferred_mime_name , arginfo_mb_preferred_mime_name )
516
+ PHP_FE (mb_str_split , arginfo_mb_str_split )
510
517
PHP_FE (mb_strlen , arginfo_mb_strlen )
511
518
PHP_FE (mb_strpos , arginfo_mb_strpos )
512
519
PHP_FE (mb_strrpos , arginfo_mb_strrpos )
@@ -2173,6 +2180,169 @@ PHP_FUNCTION(mb_output_handler)
2173
2180
}
2174
2181
/* }}} */
2175
2182
2183
+ /* {{{ proto array mb_str_split(string str [, int split_length] [, string encoding])
2184
+ Convert a multibyte string to an array. If split_length is specified,
2185
+ break the string down into chunks each split_length characters long. */
2186
+
2187
+ /* structure to pass split params to the callback */
2188
+ struct mbfl_split_params {
2189
+ zval * return_value ; /* php function return value structure pointer */
2190
+ mbfl_string * result_string ; /* string to store result chunk */
2191
+ size_t mb_chunk_length ; /* actual chunk length in chars */
2192
+ size_t split_length ; /* split length in chars */
2193
+ mbfl_convert_filter * next_filter ; /* widechar to encoding converter */
2194
+ };
2195
+
2196
+ /* callback function to fill split array */
2197
+ static int mbfl_split_output (int c , void * data )
2198
+ {
2199
+ struct mbfl_split_params * params = (struct mbfl_split_params * )data ; /* cast passed data */
2200
+
2201
+ (* params -> next_filter -> filter_function )(c , params -> next_filter ); /* decoder filter */
2202
+
2203
+ if (params -> split_length == ++ params -> mb_chunk_length ) { /* if current chunk size reached defined chunk size or last char reached */
2204
+ mbfl_convert_filter_flush (params -> next_filter );/* concatenate separate decoded chars to the solid string */
2205
+ mbfl_memory_device * device = (mbfl_memory_device * )params -> next_filter -> data ; /* chars container */
2206
+ mbfl_string * chunk = params -> result_string ;
2207
+ mbfl_memory_device_result (device , chunk ); /* make chunk */
2208
+ add_next_index_stringl (params -> return_value , (const char * )chunk -> val , chunk -> len ); /* add chunk to the array */
2209
+ efree (chunk -> val );
2210
+ params -> mb_chunk_length = 0 ; /* reset mb_chunk size */
2211
+ }
2212
+ return 0 ;
2213
+ }
2214
+
2215
+ PHP_FUNCTION (mb_str_split )
2216
+ {
2217
+ zend_string * str , * encoding = NULL ;
2218
+ size_t mb_len , chunks , chunk_len ;
2219
+ const char * p , * last ; /* pointer for the string cursor and last string char */
2220
+ mbfl_string string , result_string ;
2221
+ const mbfl_encoding * mbfl_encoding ;
2222
+ zend_long split_length = 1 ;
2223
+
2224
+ ZEND_PARSE_PARAMETERS_START (1 , 3 )
2225
+ Z_PARAM_STR (str )
2226
+ Z_PARAM_OPTIONAL
2227
+ Z_PARAM_LONG (split_length )
2228
+ Z_PARAM_STR (encoding )
2229
+ ZEND_PARSE_PARAMETERS_END ();
2230
+
2231
+ if (split_length <= 0 ) {
2232
+ php_error_docref (NULL , E_WARNING , "The length of each segment must be greater than zero" );
2233
+ RETURN_FALSE ;
2234
+ }
2235
+
2236
+ /* fill mbfl_string structure */
2237
+ string .val = (unsigned char * ) ZSTR_VAL (str );
2238
+ string .len = ZSTR_LEN (str );
2239
+ string .no_language = MBSTRG (language );
2240
+ string .encoding = php_mb_get_encoding (encoding );
2241
+ if (!string .encoding ) {
2242
+ RETURN_FALSE ;
2243
+ }
2244
+
2245
+ p = ZSTR_VAL (str ); /* string cursor pointer */
2246
+ last = ZSTR_VAL (str ) + ZSTR_LEN (str ); /* last string char pointer */
2247
+
2248
+ mbfl_encoding = string .encoding ;
2249
+
2250
+ /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
2251
+ if (mbfl_encoding -> flag & MBFL_ENCTYPE_SBCS ) { /* 1 byte */
2252
+ mb_len = string .len ;
2253
+ chunk_len = (size_t )split_length ; /* chunk length in bytes */
2254
+ } else if (mbfl_encoding -> flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE )) { /* 2 bytes */
2255
+ mb_len = string .len / 2 ;
2256
+ chunk_len = split_length * 2 ;
2257
+ } else if (mbfl_encoding -> flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE )) { /* 4 bytes */
2258
+ mb_len = string .len / 4 ;
2259
+ chunk_len = split_length * 4 ;
2260
+ } else if (mbfl_encoding -> mblen_table != NULL ) {
2261
+ /* second scenario: variable width encodings with length table */
2262
+ char unsigned const * mbtab = mbfl_encoding -> mblen_table ;
2263
+
2264
+ /* assume that we have 1-bytes characters */
2265
+ array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
2266
+
2267
+ while (p < last ) { /* split cycle work until the cursor has reached the last byte */
2268
+ char const * chunk_p = p ; /* chunk first byte pointer */
2269
+ chunk_len = 0 ; /* chunk length in bytes */
2270
+ for (zend_long char_count = 0 ; char_count < split_length && p < last ; ++ char_count ) {
2271
+ char unsigned const m = mbtab [* (const unsigned char * )p ]; /* single character length table */
2272
+ chunk_len += m ;
2273
+ p += m ;
2274
+ }
2275
+ if (p >= last ) chunk_len -= p - last ; /* check if chunk is in bounds */
2276
+ add_next_index_stringl (return_value , chunk_p , chunk_len );
2277
+ }
2278
+ return ;
2279
+ } else {
2280
+ /* third scenario: other multibyte encodings */
2281
+ mbfl_convert_filter * filter , * decoder ;
2282
+
2283
+ /* assume that we have 1-bytes characters */
2284
+ array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
2285
+
2286
+ /* decoder filter to decode wchar to encoding */
2287
+ mbfl_memory_device device ;
2288
+ mbfl_memory_device_init (& device , split_length + 1 , 0 );
2289
+
2290
+ decoder = mbfl_convert_filter_new (
2291
+ & mbfl_encoding_wchar ,
2292
+ string .encoding ,
2293
+ mbfl_memory_device_output ,
2294
+ NULL ,
2295
+ & device );
2296
+ /* if something wrong with the decoded */
2297
+ if (decoder == NULL ) {
2298
+ RETURN_FALSE ;
2299
+ }
2300
+
2301
+ /* wchar filter */
2302
+ mbfl_string_init (& result_string ); /* mbfl_string to store chunk in the callback */
2303
+ struct mbfl_split_params params = { /* init callback function params structure */
2304
+ .return_value = return_value ,
2305
+ .result_string = & result_string ,
2306
+ .mb_chunk_length = 0 ,
2307
+ .split_length = (size_t )split_length ,
2308
+ .next_filter = decoder ,
2309
+ };
2310
+
2311
+ filter = mbfl_convert_filter_new (
2312
+ string .encoding ,
2313
+ & mbfl_encoding_wchar ,
2314
+ mbfl_split_output ,
2315
+ NULL ,
2316
+ & params );
2317
+ /* if something wrong with the filter */
2318
+ if (filter == NULL ){
2319
+ mbfl_convert_filter_delete (decoder ); /* this will free allocated memory for the decoded */
2320
+ RETURN_FALSE ;
2321
+ }
2322
+
2323
+ while (p < last - 1 ) { /* cycle each byte except last with callback function */
2324
+ (* filter -> filter_function )(* p ++ , filter );
2325
+ }
2326
+ params .mb_chunk_length = split_length - 1 ; /* force to finish current chunk */
2327
+ (* filter -> filter_function )(* p ++ , filter ); /*process last char */
2328
+
2329
+ mbfl_convert_filter_delete (decoder );
2330
+ mbfl_convert_filter_delete (filter );
2331
+ return ;
2332
+ }
2333
+
2334
+ /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
2335
+ chunks = (mb_len + split_length - 1 ) / split_length ; /* (round up idiom) */
2336
+ array_init_size (return_value , chunks );
2337
+ if (chunks != 0 ) {
2338
+ for (zend_long i = 0 ; i < chunks - 1 ; p += chunk_len , ++ i ) {
2339
+ add_next_index_stringl (return_value , p , chunk_len );
2340
+ }
2341
+ add_next_index_stringl (return_value , p , last - p );
2342
+ }
2343
+ }
2344
+ /* }}} */
2345
+
2176
2346
/* {{{ proto int mb_strlen(string str [, string encoding])
2177
2347
Get character numbers of a string */
2178
2348
PHP_FUNCTION (mb_strlen )
0 commit comments