OptimizingTable.php
PHP
Path: src/Table/Wrappers/OptimizingTable.php
<?php
namespace mini\Table\Wrappers;
use mini\Table\AbstractTable;
use mini\Table\ColumnDef;
use mini\Table\Contracts\TableInterface;
use mini\Table\Index\TreapIndex;
use mini\Table\InMemoryTable;
use mini\Table\Types\IndexType;
use Traversable;
/**
* Adaptive optimization wrapper that measures and improves table access patterns
*
* Wraps any table and transparently optimizes repeated operations:
* - Builds TreapIndex for frequently accessed columns
* - Escalates to SQLite (InMemoryTable) for large datasets
* - Measures actual performance to make decisions
*
* Use hints when caller knows access patterns upfront:
* ```php
* $optimized = OptimizingTable::from($table)
* ->withExpectedHasCalls(10000)
* ->withIndexOn('user_id', 'order_id');
* ```
*/
class OptimizingTable extends AbstractTableWrapper
{
// Thresholds
private const MEASURE_CALLS = 3; // Minimum calls before deciding
private const INDEX_BENEFIT_THRESHOLD_MS = 50.0; // Build index if saves >50ms total
private const SQLITE_ROW_THRESHOLD = 500_000;
private const SQLITE_TIME_THRESHOLD = 2.0; // >2s estimated = use SQLite
// Hints
private int $expectedHasCalls = 0;
private int $expectedEqCalls = 0;
private array $indexColumns = [];
// Measurement state
private int $hasCallCount = 0;
private float $hasTotalTime = 0;
private array $eqCallCounts = []; // column => count
private array $eqTotalTimes = []; // column => total time
// Optimization structures
private ?TreapIndex $hasIndex = null; // Full-row index for has()
private array $columnIndexes = []; // column => TreapIndex
private ?InMemoryTable $materialized = null;
// Strategy: 'measure', 'indexed', 'sqlite'
private string $hasStrategy = 'measure';
private array $eqStrategies = []; // column => strategy
// Cache for index building
private ?array $rowCache = null;
/**
* Wrap a table for optimization, or return if already wrapped
*/
public static function from(TableInterface $table): self
{
if ($table instanceof self) {
return $table;
}
if (!$table instanceof AbstractTable) {
throw new \InvalidArgumentException(
'OptimizingTable requires AbstractTable, got ' . get_class($table)
);
}
return new self($table);
}
/**
* Hint: expected number of has() calls
*
* Used after measurement to estimate total time and decide on optimization.
* Does NOT prebuild - always measure first, then decide.
*/
public function withExpectedHasCalls(int $n): self
{
$c = clone $this;
$c->expectedHasCalls = $n;
return $c;
}
/**
* Hint: expected number of eq() calls
*
* Used after measurement to estimate total time and decide on optimization.
*/
public function withExpectedEqCalls(int $n): self
{
$c = clone $this;
$c->expectedEqCalls = $n;
return $c;
}
/**
* Hint: columns that will be used for eq() lookups
*
* Noted for potential indexing, but only built after measurement shows need.
*/
public function withIndexOn(string ...$columns): self
{
$c = clone $this;
$c->indexColumns = array_unique([...$c->indexColumns, ...$columns]);
return $c;
}
// =========================================================================
// has() optimization
// =========================================================================
public function has(object $member): bool
{
return match ($this->hasStrategy) {
'measure' => $this->measureHas($member),
'indexed' => $this->indexedHas($member),
'sqlite' => $this->materialized->has($member),
default => parent::has($member),
};
}
private function measureHas(object $member): bool
{
$this->hasCallCount++;
$t = microtime(true);
$result = parent::has($member);
$elapsed = microtime(true) - $t;
$this->hasTotalTime += $elapsed;
// Check if we should build index (after minimum measurements)
if ($this->hasCallCount >= self::MEASURE_CALLS && $this->expectedHasCalls > 0) {
$avgTime = $this->hasTotalTime / $this->hasCallCount;
$remainingCalls = $this->expectedHasCalls - $this->hasCallCount;
$estimatedRemainingMs = $remainingCalls * $avgTime * 1000;
// Build index if it would save significant time
// Index build cost is roughly O(n) iteration, which we estimate from current measurements
if ($estimatedRemainingMs > self::INDEX_BENEFIT_THRESHOLD_MS) {
$this->buildHasOptimization();
}
}
return $result;
}
private function indexedHas(object $member): bool
{
// Build key from all columns
$key = $this->buildRowKey($member);
return $this->hasIndex->has($key);
}
private function buildHasOptimization(): void
{
// For has(), TreapIndex is a hash on all columns - always efficient
// Only use SQLite if TreapIndex would be too large for PHP memory
$this->prebuildHasIndex();
}
private function prebuildHasIndex(): void
{
$columns = array_keys($this->source->getColumns());
$this->hasIndex = TreapIndex::fromGenerator(function () use ($columns) {
foreach ($this->getAllRows() as $id => $row) {
yield [$this->buildRowKey($row), $id];
}
});
// Check if too large for PHP
if ($this->hasIndex->rowCount() > self::SQLITE_ROW_THRESHOLD) {
$this->materializeToSqlite();
return;
}
$this->hasStrategy = 'indexed';
}
private function buildRowKey(object $row): string
{
$parts = [];
foreach ($this->source->getColumns() as $col => $_) {
$parts[] = $row->$col ?? "\0";
}
return serialize($parts);
}
// =========================================================================
// eq() optimization
// =========================================================================
public function eq(string $column, int|float|string|null $value): TableInterface
{
$strategy = $this->eqStrategies[$column] ?? 'measure';
return match ($strategy) {
'measure' => $this->measureEq($column, $value),
'indexed' => $this->indexedEq($column, $value),
'sqlite' => $this->materialized->eq($column, $value),
default => parent::eq($column, $value),
};
}
private function measureEq(string $column, mixed $value): TableInterface
{
$this->eqCallCounts[$column] = ($this->eqCallCounts[$column] ?? 0) + 1;
$t = microtime(true);
$result = parent::eq($column, $value);
// Force iteration to measure actual time
if ($this->eqCallCounts[$column] <= self::MEASURE_CALLS) {
iterator_count($result);
$result = parent::eq($column, $value); // Get fresh iterator
}
$elapsed = microtime(true) - $t;
$this->eqTotalTimes[$column] = ($this->eqTotalTimes[$column] ?? 0) + $elapsed;
// Check if we should build index (after minimum measurements)
if ($this->eqCallCounts[$column] >= self::MEASURE_CALLS && $this->expectedEqCalls > 0) {
$avgTime = $this->eqTotalTimes[$column] / $this->eqCallCounts[$column];
$remainingCalls = $this->expectedEqCalls - $this->eqCallCounts[$column];
$estimatedRemainingMs = $remainingCalls * $avgTime * 1000;
if ($estimatedRemainingMs > self::INDEX_BENEFIT_THRESHOLD_MS) {
$this->ensureColumnIndex($column);
}
}
return $result;
}
private function indexedEq(string $column, mixed $value): TableInterface
{
$key = (string)($value ?? "\0");
$index = $this->columnIndexes[$column];
$matchingIds = iterator_to_array($index->eq($key));
if (empty($matchingIds)) {
return \mini\Table\Utility\EmptyTable::from($this->source);
}
// Return filtered view using the matching row IDs
return new \mini\Table\GeneratorTable(
function () use ($matchingIds) {
$rows = $this->getAllRows();
foreach ($matchingIds as $id) {
if (isset($rows[$id])) {
yield $id => $rows[$id];
}
}
},
...array_values($this->source->getColumns())
);
}
private function ensureColumnIndex(string $column): void
{
if (isset($this->columnIndexes[$column])) {
return;
}
$this->columnIndexes[$column] = TreapIndex::fromGenerator(function () use ($column) {
foreach ($this->getAllRows() as $id => $row) {
$key = (string)($row->$column ?? "\0");
yield [$key, $id];
}
});
// Check if too large
if ($this->columnIndexes[$column]->rowCount() > self::SQLITE_ROW_THRESHOLD) {
$this->materializeToSqlite();
return;
}
$this->eqStrategies[$column] = 'indexed';
}
// =========================================================================
// SQLite materialization (nuclear option)
// =========================================================================
private function materializeToSqlite(): void
{
$sourceColumns = $this->source->getColumns();
// Build columns, adding indexes only for columns we know are being queried
$indexedColumns = array_unique([
...$this->indexColumns, // Explicit hints
...array_keys($this->eqCallCounts), // Observed eq() usage
]);
$columns = [];
foreach ($sourceColumns as $name => $col) {
if ($col->index !== IndexType::None) {
// Keep existing indexes
$columns[] = $col;
} elseif (in_array($name, $indexedColumns, true)) {
// Add index for queried columns
$columns[] = new ColumnDef($name, $col->type, IndexType::Unique);
} else {
$columns[] = $col;
}
}
$this->materialized = new InMemoryTable(...$columns);
foreach ($this->getAllRows() as $row) {
$this->materialized->insert((array)$row);
}
$this->hasStrategy = 'sqlite';
foreach (array_keys($this->eqStrategies) as $col) {
$this->eqStrategies[$col] = 'sqlite';
}
// Clear PHP indexes to free memory
$this->hasIndex = null;
$this->columnIndexes = [];
$this->rowCache = null;
}
// =========================================================================
// Helpers
// =========================================================================
private function sourceHasUsefulIndex(): bool
{
foreach ($this->source->getColumns() as $col) {
if ($col->index !== IndexType::None) {
return true;
}
}
return false;
}
/**
* Get all rows, caching for reuse during index building
*/
private function getAllRows(): array
{
if ($this->rowCache === null) {
$this->rowCache = [];
foreach ($this->source as $id => $row) {
$this->rowCache[$id] = $row;
}
}
return $this->rowCache;
}
// =========================================================================
// Forward other filter methods (they benefit from our indexes too)
// =========================================================================
public function lt(string $column, int|float|string $value): TableInterface
{
if ($this->materialized) {
return $this->materialized->lt($column, $value);
}
return parent::lt($column, $value);
}
public function lte(string $column, int|float|string $value): TableInterface
{
if ($this->materialized) {
return $this->materialized->lte($column, $value);
}
return parent::lte($column, $value);
}
public function gt(string $column, int|float|string $value): TableInterface
{
if ($this->materialized) {
return $this->materialized->gt($column, $value);
}
return parent::gt($column, $value);
}
public function gte(string $column, int|float|string $value): TableInterface
{
if ($this->materialized) {
return $this->materialized->gte($column, $value);
}
return parent::gte($column, $value);
}
// =========================================================================
// Diagnostics
// =========================================================================
/**
* Get current optimization state for debugging
*/
public function getOptimizationState(): array
{
return [
'hasStrategy' => $this->hasStrategy,
'eqStrategies' => $this->eqStrategies,
'hasCallCount' => $this->hasCallCount,
'eqCallCounts' => $this->eqCallCounts,
'hasIndexSize' => $this->hasIndex?->rowCount(),
'columnIndexSizes' => array_map(
fn($idx) => $idx->rowCount(),
$this->columnIndexes
),
'materialized' => $this->materialized !== null,
];
}
}