Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[12.x] Add Eloquent memory optimization features for large datasets #55215

Draft
wants to merge 2 commits into
base: 12.x
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions src/Illuminate/Database/Concerns/BuildsQueries.php
Original file line number Diff line number Diff line change
Expand Up @@ -611,4 +611,120 @@ public function pipe($callback)
{
return $callback($this) ?? $this;
}

/**
* Chunk the results of the query based on available memory usage.
*
* @param int $maxMemoryUsage Maximum memory usage in megabytes
* @param callable(\Illuminate\Support\Collection<int, TValue>, int): mixed $callback
* @return bool
*/
public function chunkByMemory($maxMemoryUsage, callable $callback)
{
$maxMemoryBytes = $maxMemoryUsage * 1024 * 1024; // Convert MB to bytes
$initialMemory = memory_get_usage();

// Start with a reasonable chunk size
$chunkSize = 1000;
$page = 1;

do {
// Clone the query to avoid modifying the original
$clone = clone $this;

// Get a chunk of results
$results = $clone->forPage($page, $chunkSize)->get();

$countResults = $results->count();

if ($countResults == 0) {
break;
}

// Process the results
if ($callback($results, $page) === false) {
return false;
}

// Check memory usage after processing
$currentMemory = memory_get_usage();
$memoryDelta = $currentMemory - $initialMemory;

// Adjust chunk size based on memory usage
if ($memoryDelta > $maxMemoryBytes) {
// If we're using too much memory, reduce chunk size
$chunkSize = max(10, (int) ($chunkSize * 0.75));
} elseif ($memoryDelta < ($maxMemoryBytes * 0.5)) {
// If we're using less than half the allowed memory, increase chunk size
$chunkSize = min(10000, (int) ($chunkSize * 1.25));
}

// Force garbage collection to free memory
if (function_exists('gc_collect_cycles')) {
gc_collect_cycles();
}

unset($results);

$page++;
} while ($countResults > 0);

return true;
}

/**
* Create a lazy collection from the query with automatic memory management.
*
* @param int $maxMemoryUsage Maximum memory usage in megabytes
* @return \Illuminate\Support\LazyCollection<int, TValue>
*/
public function lazyByMemory($maxMemoryUsage = 100)
{
return new LazyCollection(function () use ($maxMemoryUsage) {
$page = 1;

// Start with a reasonable chunk size
$chunkSize = 1000;
$maxMemoryBytes = $maxMemoryUsage * 1024 * 1024; // Convert MB to bytes
$initialMemory = memory_get_usage();

while (true) {
// Clone the query to avoid modifying the original
$clone = clone $this;

// Get a chunk of results
$results = $clone->forPage($page, $chunkSize)->get();

if ($results->isEmpty()) {
break;
}

foreach ($results as $item) {
yield $item;
}

// Check memory usage after processing
$currentMemory = memory_get_usage();
$memoryDelta = $currentMemory - $initialMemory;

// Adjust chunk size based on memory usage
if ($memoryDelta > $maxMemoryBytes) {
// If we're using too much memory, reduce chunk size
$chunkSize = max(10, (int) ($chunkSize * 0.75));
} elseif ($memoryDelta < ($maxMemoryBytes * 0.5)) {
// If we're using less than half the allowed memory, increase chunk size
$chunkSize = min(10000, (int) ($chunkSize * 1.25));
}

// Force garbage collection to free memory
if (function_exists('gc_collect_cycles')) {
gc_collect_cycles();
}

unset($results);

$page++;
}
});
}
}
114 changes: 114 additions & 0 deletions src/Illuminate/Database/Eloquent/Builder.php
Original file line number Diff line number Diff line change
Expand Up @@ -854,11 +854,80 @@ protected function eagerLoadRelation(array $models, $name, Closure $constraints)
// using the relationship instance. Then we just return the finished arrays
// of models which have been eagerly hydrated and are readied for return.
return $relation->match(
$models,
$relation->initRelation($models, $name),
$relation->getEager(), $name
);
}

/**
* Eagerly load the relationship on a set of models with chunking support.
*
* @param array $models
* @param string $name
* @param int $chunkSize
* @return array
*/
protected function eagerLoadRelationChunked(array $models, $name, $chunkSize = 500)
{
$relation = $this->getRelation($name);

// Instead of calling protected method directly, use addEagerConstraints
$relation->addEagerConstraints($models);

// Get the query builder from the relation
$query = $relation->getQuery();

// Execute the query with chunking
$relatedModels = collect();
$query->chunk($chunkSize, function ($chunk) use (&$relatedModels) {
$relatedModels = $relatedModels->merge($chunk);

// Clean up memory
if (function_exists('gc_collect_cycles')) {
gc_collect_cycles();
}
});

// Match the related models to their parents using public API
return $relation->match(
$models,
$relation->initRelation($models, $name),
$relatedModels,
$name
);
}

/**
* Eager load relation with low memory footprint.
*
* @param array $models
* @param string $name
* @param \Closure $constraints
* @param bool $useChunking
* @param int $chunkSize
* @return array
*/
protected function eagerLoadLowMemory(array $models, $name, \Closure $constraints, $useChunking = true, $chunkSize = 500)
{
if ($useChunking && count($models) > $chunkSize) {
return $this->eagerLoadRelationChunked($models, $name, $chunkSize);
}

$relation = $this->getRelation($name);

$relation->addEagerConstraints($models);

call_user_func($constraints, $relation);

return $relation->match(
$models,
$relation->initRelation($models, $name),
$relation->getEager(),
$name
);
}

/**
* Get the relation instance for the given relation name.
*
Expand Down Expand Up @@ -2238,4 +2307,49 @@ public function __clone()
$onCloneCallback($this);
}
}

/**
* Eager load relations with a limit on each relation to reduce memory usage.
*
* @param mixed $relations The relations to eager load
* @param array $limits Array with relation names as keys and their limits as values
* @return $this
*/
public function withLimited($relations, array $limits = [])
{
if (is_string($relations)) {
$relations = func_get_args();
// Remove the limits array if it exists
$relations = is_array(end($relations)) ? array_slice($relations, 0, -1) : $relations;
}

$eagerLoad = $this->parseWithRelations($relations);
$limitedEagerLoad = [];

foreach ($eagerLoad as $name => $constraints) {
$segments = explode('.', $name);
$baseRelation = array_shift($segments);

// Store the original constraints
$limitedEagerLoad[$name] = $constraints;

// Apply limit if specified
if (isset($limits[$name]) && is_numeric($limits[$name])) {
$limitValue = (int) $limits[$name];

$originalConstraint = $constraints;
$limitedEagerLoad[$name] = function ($builder) use ($originalConstraint, $limitValue) {
// Apply the original constraints first
if ($originalConstraint instanceof Closure) {
$originalConstraint($builder);
}

// Then apply the limit
return $builder->limit($limitValue);
};
}
}

return $this->with($limitedEagerLoad);
}
}
85 changes: 85 additions & 0 deletions src/Illuminate/Database/Eloquent/Model.php
Original file line number Diff line number Diff line change
Expand Up @@ -2459,4 +2459,89 @@ public function __wakeup()

$this->initializeTraits();
}

/**
* Optimize memory usage by unloading unused relationships and attributes.
*
* @param array|string|null $relations Relations to keep, others will be unloaded
* @param array|string|null $attributes Attributes to keep, others will be unloaded
* @return $this
*/
public function optimizeMemory($relations = null, $attributes = null)
{
// Keep only specific relations if requested
if ($relations !== null) {
$relations = is_array($relations) ? $relations : [$relations];
$currentRelations = array_keys($this->relations);

foreach ($currentRelations as $relation) {
if (! in_array($relation, $relations)) {
unset($this->relations[$relation]);
}
}
}

// Keep only specific attributes if requested
if ($attributes !== null) {
$attributes = is_array($attributes) ? $attributes : [$attributes];
// Always keep primary key
$attributes[] = $this->getKeyName();

$currentAttributes = array_keys($this->attributes);
foreach ($currentAttributes as $attribute) {
if (! in_array($attribute, $attributes)) {
unset($this->attributes[$attribute]);
}
}
}

// Force garbage collection
if (function_exists('gc_collect_cycles')) {
gc_collect_cycles();
}

return $this;
}

/**
* Create a collection of models with memory optimization after loading.
*
* @param array $models
* @param array|string|null $keepRelations Relations to keep, others will be unloaded
* @param array|string|null $keepAttributes Attributes to keep, others will be unloaded
* @return \Illuminate\Database\Eloquent\Collection
*/
public static function optimizedCollection(array $models, $keepRelations = null, $keepAttributes = null)
{
// Use the same pattern as the model's make method for consistency
$model = new static;
$collection = $model->newCollection($models);

// Apply memory optimization to each model
return $collection->each(function ($model) use ($keepRelations, $keepAttributes) {
$model->optimizeMemory($keepRelations, $keepAttributes);
});
}

/**
* Cleanup model to free memory.
*
* @return $this
*/
public function cleanup()
{
$this->relations = [];
$this->hidden = [];
$this->visible = [];
$this->appends = [];
$this->touches = [];
$this->observables = [];

// Force garbage collection
if (function_exists('gc_collect_cycles')) {
gc_collect_cycles();
}

return $this;
}
}
Loading
Loading