Skip to content

Track HashTableIterators for copy-on-write copies of HashTables #11248

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Zend/tests/gh11222.phpt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
--TEST--
GH-112222: foreach by-ref may jump over keys during a rehash
GH-11222: foreach by-ref may jump over keys during a rehash
--FILE--
<?php

Expand Down
22 changes: 22 additions & 0 deletions Zend/tests/gh11244-001.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
--TEST--
GH-11244: Modifying a copied by-ref iterated array resets the array position (packed)
--FILE--
<?php

$data = [0, 1, 2];

foreach ($data as $key => &$value) {
echo "$value\n";
if ($value === 1) {
$cow_copy = $data;
echo "unset $value\n";
unset($data[$key]);
}
}

?>
--EXPECTF--
0
1
unset 1
2
22 changes: 22 additions & 0 deletions Zend/tests/gh11244-002.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
--TEST--
GH-11244: Modifying a copied by-ref iterated array resets the array position (not packed)
--FILE--
<?php

$data = ["k" => 0, 1, 2];

foreach ($data as $key => &$value) {
echo "$value\n";
if ($value === 1) {
$cow_copy = $data;
echo "unset $value\n";
unset($data[$key]);
}
}

?>
--EXPECTF--
0
1
unset 1
2
23 changes: 23 additions & 0 deletions Zend/tests/gh11244-003.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
--TEST--
GH-11244: Modifying a copied by-ref iterated array resets the array position (not packed with holes)
--FILE--
<?php

$data = ["k" => 0, 1, 2, 3];
unset($data[1]);

foreach ($data as $key => &$value) {
echo "$value\n";
if ($value === 1) {
$cow_copy = $data;
echo "unset $value\n";
unset($data[$key]);
}
}

?>
--EXPECTF--
0
1
unset 1
3
18 changes: 18 additions & 0 deletions Zend/tests/gh11244-004.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
--TEST--
GH-11244: Modifying a copied by-ref iterated array resets the array position (with object)
--FILE--
<?php

$obj = (object)[1,2,3];

foreach ($obj as $p => $v) {
echo "$p : $v\n";
$clone = clone $obj;
$ref = &$obj->$p;
}

?>
--EXPECTF--
0 : 1
1 : 2
2 : 3
48 changes: 48 additions & 0 deletions Zend/tests/gh11244-005.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
--TEST--
GH-11244: Modifying a copied by-ref iterated array resets the array position (multiple copies)
--FILE--
<?php

$data = [0, 1, 2];

foreach ($data as $key => &$value) {
echo "$value\n";
if ($value === 1) {
$cow_copy = [$data, $data, $data];
echo "unset $value\n";
unset($cow_copy[0][$key]);
unset($data[$key]);
unset($cow_copy[2][$key]);
}
}

print_r($cow_copy);

?>
--EXPECTF--
0
1
unset 1
2
Array
(
[0] => Array
(
[0] => 0
[2] => 2
)

[1] => Array
(
[0] => 0
[1] => 1
[2] => 2
)

[2] => Array
(
[0] => 0
[2] => 2
)

)
114 changes: 106 additions & 8 deletions Zend/zend_hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,21 @@ ZEND_API HashPosition ZEND_FASTCALL zend_hash_get_current_pos(const HashTable *h
return _zend_hash_get_current_pos(ht);
}

static void zend_hash_remove_iterator_copies(uint32_t idx) {
HashTableIterator *iterators = EG(ht_iterators);

HashTableIterator *iter = iterators + idx;
uint32_t next_idx = iter->next_copy;
while (next_idx != idx) {
uint32_t cur_idx = next_idx;
HashTableIterator *cur_iter = iterators + cur_idx;
next_idx = cur_iter->next_copy;
cur_iter->next_copy = cur_idx; // avoid recursion in zend_hash_iterator_del
zend_hash_iterator_del(cur_idx);
}
iter->next_copy = idx;
}

ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPosition pos)
{
HashTableIterator *iter = EG(ht_iterators);
Expand All @@ -528,6 +543,7 @@ ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPositi
iter->ht = ht;
iter->pos = pos;
idx = iter - EG(ht_iterators);
iter->next_copy = idx;
if (idx + 1 > EG(ht_iterators_used)) {
EG(ht_iterators_used) = idx + 1;
}
Expand All @@ -547,16 +563,49 @@ ZEND_API uint32_t ZEND_FASTCALL zend_hash_iterator_add(HashTable *ht, HashPositi
iter->pos = pos;
memset(iter + 1, 0, sizeof(HashTableIterator) * 7);
idx = iter - EG(ht_iterators);
iter->next_copy = idx;
EG(ht_iterators_used) = idx + 1;
return idx;
}

// To avoid losing track of the HashTable when separating arrays, we track all copies at once.
static zend_always_inline bool zend_hash_iterator_find_copy_pos(uint32_t idx, HashTable *ht) {
HashTableIterator *iter = EG(ht_iterators) + idx;

uint32_t next_idx = iter->next_copy;
if (EXPECTED(next_idx != idx)) {
HashTableIterator *copy_iter;
while (next_idx != idx) {
copy_iter = EG(ht_iterators) + next_idx;
if (copy_iter->ht == ht) {
// We have found the hashtable we are actually iterating over
// Now clean any intermittent copies and replace the original index by the found one
if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR)
&& EXPECTED(!HT_ITERATORS_OVERFLOW(iter->ht))) {
HT_DEC_ITERATORS_COUNT(iter->ht);
}
if (EXPECTED(!HT_ITERATORS_OVERFLOW(ht))) {
HT_INC_ITERATORS_COUNT(ht);
}
iter->ht = copy_iter->ht;
iter->pos = copy_iter->pos;
zend_hash_remove_iterator_copies(idx);
return true;
}
next_idx = copy_iter->next_copy;
}
zend_hash_remove_iterator_copies(idx);
}

return false;
}

ZEND_API HashPosition ZEND_FASTCALL zend_hash_iterator_pos(uint32_t idx, HashTable *ht)
{
HashTableIterator *iter = EG(ht_iterators) + idx;

ZEND_ASSERT(idx != (uint32_t)-1);
if (UNEXPECTED(iter->ht != ht)) {
if (UNEXPECTED(iter->ht != ht) && !zend_hash_iterator_find_copy_pos(idx, ht)) {
if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR)
&& EXPECTED(!HT_ITERATORS_OVERFLOW(iter->ht))) {
HT_DEC_ITERATORS_COUNT(iter->ht);
Expand All @@ -576,7 +625,7 @@ ZEND_API HashPosition ZEND_FASTCALL zend_hash_iterator_pos_ex(uint32_t idx, zval
HashTableIterator *iter = EG(ht_iterators) + idx;

ZEND_ASSERT(idx != (uint32_t)-1);
if (UNEXPECTED(iter->ht != ht)) {
if (UNEXPECTED(iter->ht != ht) && !zend_hash_iterator_find_copy_pos(idx, ht)) {
if (EXPECTED(iter->ht) && EXPECTED(iter->ht != HT_POISONED_PTR)
&& EXPECTED(!HT_ITERATORS_OVERFLOW(ht))) {
HT_DEC_ITERATORS_COUNT(iter->ht);
Expand Down Expand Up @@ -605,6 +654,10 @@ ZEND_API void ZEND_FASTCALL zend_hash_iterator_del(uint32_t idx)
}
iter->ht = NULL;

if (UNEXPECTED(iter->next_copy != idx)) {
zend_hash_remove_iterator_copies(idx);
}

if (idx == EG(ht_iterators_used) - 1) {
while (idx > 0 && EG(ht_iterators)[idx - 1].ht == NULL) {
idx--;
Expand Down Expand Up @@ -2286,6 +2339,22 @@ static zend_always_inline bool zend_array_dup_element(HashTable *source, HashTab
return 1;
}

// We need to duplicate iterators to be able to search through all copy-on-write copies to find the actually iterated HashTable and position back
static void zend_array_dup_ht_iterators(HashTable *source, HashTable *target) {
HashTableIterator *iter = EG(ht_iterators);
HashTableIterator *end = iter + EG(ht_iterators_used);

while (iter != end) {
if (iter->ht == source) {
uint32_t copy_idx = zend_hash_iterator_add(target, iter->pos);
HashTableIterator *copy_iter = EG(ht_iterators) + copy_idx;
copy_iter->next_copy = iter->next_copy;
iter->next_copy = copy_idx;
}
iter++;
}
}

static zend_always_inline void zend_array_dup_packed_elements(HashTable *source, HashTable *target, bool with_holes)
{
zval *p = source->arPacked;
Expand All @@ -2300,6 +2369,10 @@ static zend_always_inline void zend_array_dup_packed_elements(HashTable *source,
}
p++; q++;
} while (p != end);

if (UNEXPECTED(HT_HAS_ITERATORS(source))) {
zend_array_dup_ht_iterators(source, target);
}
}

static zend_always_inline uint32_t zend_array_dup_elements(HashTable *source, HashTable *target, bool static_keys, bool with_holes)
Expand All @@ -2309,19 +2382,44 @@ static zend_always_inline uint32_t zend_array_dup_elements(HashTable *source, Ha
Bucket *q = target->arData;
Bucket *end = p + source->nNumUsed;

if (UNEXPECTED(HT_HAS_ITERATORS(source))) {
zend_array_dup_ht_iterators(source, target);
}

do {
if (!zend_array_dup_element(source, target, idx, p, q, 0, static_keys, with_holes)) {
uint32_t target_idx = idx;

idx++; p++;
while (p != end) {
if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) {
if (source->nInternalPointer == idx) {
target->nInternalPointer = target_idx;
if (EXPECTED(!HT_HAS_ITERATORS(target))) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may be optimized for arrays without holes (note that the function is always inlined)

			if (!with_holes || EXPECTED(!HT_HAS_ITERATORS(target))) {

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point, changed :-)

Copy link
Member Author

@bwoebi bwoebi May 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, zend_array_dup_value has a branch, if not packed and no holes, it may still return 0, if any indirects are undef. Because of that edge case, I'm afraid that we cannot do that.

Copy link
Member Author

@bwoebi bwoebi May 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably fine to do it given no copy-on-writes are ever supposed to happen on arrays containing IS_INDIRECT elements, but ... meh.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like my suggestion was wrong. It's better to be safe.
It's also possible to insert empty Buckets instead of compaction when iterators are used.

while (p != end) {
if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) {
if (source->nInternalPointer == idx) {
target->nInternalPointer = target_idx;
}
target_idx++; q++;
}
idx++; p++;
}
} else {
target->nNumUsed = source->nNumOfElements;
uint32_t iter_pos = zend_hash_iterators_lower_pos(target, idx);

while (p != end) {
if (zend_array_dup_element(source, target, target_idx, p, q, 0, static_keys, with_holes)) {
if (source->nInternalPointer == idx) {
target->nInternalPointer = target_idx;
}
if (UNEXPECTED(idx >= iter_pos)) {
do {
zend_hash_iterators_update(target, iter_pos, target_idx);
iter_pos = zend_hash_iterators_lower_pos(target, iter_pos + 1);
} while (iter_pos < idx);
}
target_idx++; q++;
}
target_idx++; q++;
idx++; p++;
}
idx++; p++;
}
return target_idx;
}
Expand Down
1 change: 1 addition & 0 deletions Zend/zend_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ typedef uint32_t HashPosition;
typedef struct _HashTableIterator {
HashTable *ht;
HashPosition pos;
uint32_t next_copy; // circular linked list via index into EG(ht_iterators)
} HashTableIterator;

struct _zend_object {
Expand Down