1
0
Fork 0
mirror of synced 2024-05-09 15:22:33 +12:00
appwrite/app/tasks/usage.php

616 lines
30 KiB
PHP
Raw Normal View History

2021-08-10 20:44:31 +12:00
<?php
global $cli, $register;
2021-08-10 20:44:31 +12:00
use Utopia\App;
use Utopia\Cache\Adapter\Redis;
use Utopia\Cache\Cache;
2021-08-10 20:44:31 +12:00
use Utopia\CLI\Console;
use Utopia\Database\Adapter\MariaDB;
use Utopia\Database\Database;
use Utopia\Database\Document;
use Utopia\Database\Validator\Authorization;
2021-08-10 20:44:31 +12:00
2021-08-19 18:54:32 +12:00
/**
* Metrics We collect
2021-08-19 20:01:22 +12:00
*
2021-08-30 19:19:29 +12:00
* General
*
2021-08-19 18:54:32 +12:00
* requests
* network
* executions
*
2021-08-30 19:19:29 +12:00
* Database
*
2021-08-19 18:54:32 +12:00
* database.collections.create
* database.collections.read
* database.collections.update
* database.collections.delete
* database.documents.create
* database.documents.read
* database.documents.update
* database.documents.delete
* database.collections.{collectionId}.documents.create
* database.collections.{collectionId}.documents.read
* database.collections.{collectionId}.documents.update
* database.collections.{collectionId}.documents.delete
*
2021-08-30 19:19:29 +12:00
* Storage
*
2021-08-19 18:54:32 +12:00
* storage.buckets.{bucketId}.files.create
* storage.buckets.{bucketId}.files.read
* storage.buckets.{bucketId}.files.update
* storage.buckets.{bucketId}.files.delete
*
2021-08-30 19:19:29 +12:00
* Users
*
2021-08-19 18:54:32 +12:00
* users.create
* users.read
* users.update
* users.delete
* users.sessions.create
2021-08-24 20:36:46 +12:00
* users.sessions.{provider}.create
2021-08-19 18:54:32 +12:00
* users.sessions.delete
2021-08-19 20:01:22 +12:00
*
2021-08-20 17:55:23 +12:00
* Functions
*
* functions.{functionId}.executions
* functions.{functionId}.failures
* functions.{functionId}.compute
*
2021-08-19 18:54:32 +12:00
* Counters
2021-08-19 20:01:22 +12:00
*
2021-08-19 18:54:32 +12:00
* users.count
2021-08-19 20:01:22 +12:00
* storage.files.count
* database.collections.count
* database.documents.count
* database.collections.{collectionId}.documents.count
*
2021-08-19 20:14:23 +12:00
* Totals
*
* storage.total
*
2021-08-19 18:54:32 +12:00
*/
2021-08-10 20:44:31 +12:00
$cli
->task('usage')
->desc('Schedules syncing data from influxdb to Appwrite console db')
->action(function () use ($register) {
2021-08-17 00:31:49 +12:00
Console::title('Usage Aggregation V1');
Console::success(APP_NAME . ' usage aggregation process v1 has started');
2021-08-10 20:44:31 +12:00
2021-08-30 19:19:29 +12:00
$interval = (int) App::getEnv('_APP_USAGE_AGGREGATION_INTERVAL', '30'); // 30 seconds (by default)
2021-08-16 18:58:34 +12:00
$periods = [
[
'key' => '30m',
'startTime' => '-24 hours',
],
[
'key' => '1d',
'startTime' => '-90 days',
],
];
2021-08-17 00:31:49 +12:00
// all the metrics that we are collecting at the moment
2021-08-16 18:58:34 +12:00
$globalMetrics = [
'requests' => [
'table' => 'appwrite_usage_requests_all',
],
'network' => [
'table' => 'appwrite_usage_network_all',
],
'executions' => [
'table' => 'appwrite_usage_executions_all',
],
'database.collections.create' => [
'table' => 'appwrite_usage_database_collections_create',
],
'database.collections.read' => [
'table' => 'appwrite_usage_database_collections_read',
],
'database.collections.update' => [
'table' => 'appwrite_usage_database_collections_update',
],
'database.collections.delete' => [
'table' => 'appwrite_usage_database_collections_delete',
],
'database.documents.create' => [
'table' => 'appwrite_usage_database_documents_create',
],
'database.documents.read' => [
'table' => 'appwrite_usage_database_documents_read',
],
'database.documents.update' => [
'table' => 'appwrite_usage_database_documents_update',
],
'database.documents.delete' => [
'table' => 'appwrite_usage_database_documents_delete',
],
2021-08-17 00:22:54 +12:00
'database.collections.collectionId.documents.create' => [
2021-08-16 18:58:34 +12:00
'table' => 'appwrite_usage_database_documents_create',
'groupBy' => 'collectionId',
],
2021-08-17 00:22:54 +12:00
'database.collections.collectionId.documents.read' => [
2021-08-16 18:58:34 +12:00
'table' => 'appwrite_usage_database_documents_read',
'groupBy' => 'collectionId',
],
2021-08-17 00:22:54 +12:00
'database.collections.collectionId.documents.update' => [
2021-08-16 18:58:34 +12:00
'table' => 'appwrite_usage_database_documents_update',
'groupBy' => 'collectionId',
],
2021-08-17 00:22:54 +12:00
'database.collections.collectionId.documents.delete' => [
2021-08-16 18:58:34 +12:00
'table' => 'appwrite_usage_database_documents_delete',
'groupBy' => 'collectionId',
],
2021-08-16 19:25:20 +12:00
'storage.buckets.bucketId.files.create' => [
'table' => 'appwrite_usage_storage_files_create',
'groupBy' => 'bucketId',
],
'storage.buckets.bucketId.files.read' => [
'table' => 'appwrite_usage_storage_files_read',
'groupBy' => 'bucketId',
],
'storage.buckets.bucketId.files.update' => [
'table' => 'appwrite_usage_storage_files_update',
'groupBy' => 'bucketId',
],
'storage.buckets.bucketId.files.delete' => [
'table' => 'appwrite_usage_storage_files_delete',
'groupBy' => 'bucketId',
],
2021-08-16 20:53:34 +12:00
'users.create' => [
'table' => 'appwrite_usage_users_create',
],
'users.read' => [
'table' => 'appwrite_usage_users_read',
],
'users.update' => [
'table' => 'appwrite_usage_users_update',
],
'users.delete' => [
'table' => 'appwrite_usage_users_delete',
],
'users.sessions.create' => [
'table' => 'appwrite_usage_users_sessions_create',
2021-08-24 20:36:46 +12:00
],
'users.sessions.provider.create' => [
'table' => 'appwrite_usage_users_sessions_create',
2021-08-16 20:53:34 +12:00
'groupBy' => 'provider',
],
'users.sessions.delete' => [
'table' => 'appwrite_usage_users_sessions_delete',
],
2021-08-20 17:55:23 +12:00
'functions.functionId.executions' => [
'table' => 'appwrite_usage_executions_all',
'groupBy' => 'functionId',
],
'functions.functionId.compute' => [
'table' => 'appwrite_usage_executions_time',
'groupBy' => 'functionId',
],
'functions.functionId.failures' => [
'table' => 'appwrite_usage_executions_all',
'groupBy' => 'functionId',
'filters' => [
'functionStatus' => 'failed',
],
],
2021-08-16 18:58:34 +12:00
];
2021-08-30 19:19:29 +12:00
// TODO Maybe move this to the setResource method, and reuse in the http.php file
2021-08-13 21:45:46 +12:00
$attempts = 0;
$max = 10;
$sleep = 1;
2021-08-16 18:58:34 +12:00
do { // connect to db
2021-08-13 21:45:46 +12:00
try {
$attempts++;
$db = $register->get('db');
$redis = $register->get('cache');
break; // leave the do-while if successful
2021-08-30 19:19:29 +12:00
} catch (\Exception $e) {
2021-08-13 21:45:46 +12:00
Console::warning("Database not ready. Retrying connection ({$attempts})...");
if ($attempts >= $max) {
throw new \Exception('Failed to connect to database: ' . $e->getMessage());
}
sleep($sleep);
}
} while ($attempts < $max);
2021-08-10 20:44:31 +12:00
// TODO use inject
2021-08-15 20:38:31 +12:00
$cacheAdapter = new Cache(new Redis($redis));
2021-08-13 21:45:46 +12:00
$dbForProject = new Database(new MariaDB($db), $cacheAdapter);
2021-08-17 18:03:27 +12:00
$dbForConsole = new Database(new MariaDB($db), $cacheAdapter);
$dbForProject->setDefaultDatabase(App::getEnv('_APP_DB_SCHEMA', 'appwrite'));
$dbForConsole->setDefaultDatabase(App::getEnv('_APP_DB_SCHEMA', 'appwrite'));
$dbForConsole->setNamespace('_project_console');
2021-08-10 20:44:31 +12:00
2021-08-16 21:02:35 +12:00
$latestTime = [];
Authorization::disable();
2021-08-10 20:44:31 +12:00
2021-08-17 18:03:27 +12:00
$iterations = 0;
Console::loop(function () use ($interval, $register, $dbForProject, $dbForConsole, $globalMetrics, $periods, &$latestTime, &$iterations) {
2021-08-16 21:02:35 +12:00
$now = date('d-m-Y H:i:s', time());
Console::info("[{$now}] Aggregating usage data every {$interval} seconds");
2021-08-16 20:53:34 +12:00
$loopStart = microtime(true);
2021-08-30 19:19:29 +12:00
/**
* Aggregate InfluxDB every 30 seconds
2021-09-10 22:23:49 +12:00
* @var InfluxDB\Client $client
2021-08-30 19:19:29 +12:00
*/
$client = $register->get('influxdb');
if ($client) {
2021-09-10 22:23:49 +12:00
$attempts = 0;
$max = 10;
$sleep = 1;
do { // check if telegraf database is ready
2021-11-18 23:46:39 +13:00
try {
$attempts++;
$database = $client->selectDB('telegraf');
if(in_array('telegraf', $client->listDatabases())) {
break; // leave the do-while if successful
}
} catch (\Throwable $th) {
2021-09-10 22:23:49 +12:00
Console::warning("InfluxDB not ready. Retrying connection ({$attempts})...");
if ($attempts >= $max) {
2021-09-10 22:23:49 +12:00
throw new \Exception('InfluxDB database not ready yet');
}
sleep($sleep);
}
} while ($attempts < $max);
2021-08-16 18:58:34 +12:00
// sync data
2021-08-17 00:31:49 +12:00
foreach ($globalMetrics as $metric => $options) { //for each metrics
foreach ($periods as $period) { // aggregate data for each period
2021-08-16 18:58:34 +12:00
$start = DateTime::createFromFormat('U', \strtotime($period['startTime']))->format(DateTime::RFC3339);
2021-08-17 17:45:07 +12:00
if (!empty($latestTime[$metric][$period['key']])) {
2021-08-16 21:02:35 +12:00
$start = DateTime::createFromFormat('U', $latestTime[$metric][$period['key']])->format(DateTime::RFC3339);
}
2021-08-16 18:58:34 +12:00
$end = DateTime::createFromFormat('U', \strtotime('now'))->format(DateTime::RFC3339);
$table = $options['table']; //Which influxdb table to query for this metric
$groupBy = empty($options['groupBy']) ? '' : ', "' . $options['groupBy'] . '"'; //Some sub level metrics may be grouped by other tags like collectionId, bucketId, etc
2021-08-16 18:58:34 +12:00
$filters = $options['filters'] ?? []; // Some metrics might have additional filters, like function's status
2021-08-20 17:55:23 +12:00
if (!empty($filters)) {
$filters = ' AND ' . implode(' AND ', array_map(fn ($filter, $value) => "\"{$filter}\"='{$value}'", array_keys($filters), array_values($filters)));
} else {
$filters = '';
2021-08-20 17:55:23 +12:00
}
$query = "SELECT sum(value) AS \"value\" FROM \"{$table}\" WHERE \"time\" > '{$start}' AND \"time\" < '{$end}' AND \"metric_type\"='counter' {$filters} GROUP BY time({$period['key']}), \"projectId\" {$groupBy} FILL(null)";
$result = $database->query($query);
2021-08-16 18:58:34 +12:00
$points = $result->getPoints();
foreach ($points as $point) {
$projectId = $point['projectId'];
2021-08-30 19:19:29 +12:00
if (!empty($projectId) && $projectId !== 'console') {
$dbForProject->setNamespace('_project_' . $projectId);
2021-08-20 18:38:35 +12:00
$metricUpdated = $metric;
2021-08-16 18:58:34 +12:00
if (!empty($groupBy)) {
2021-08-20 18:38:35 +12:00
$groupedBy = $point[$options['groupBy']] ?? '';
2021-08-16 18:58:34 +12:00
if (empty($groupedBy)) {
continue;
}
2021-08-20 18:38:35 +12:00
$metricUpdated = str_replace($options['groupBy'], $groupedBy, $metric);
2021-08-16 18:58:34 +12:00
}
2021-08-30 19:19:29 +12:00
2021-08-16 18:58:34 +12:00
$time = \strtotime($point['time']);
$id = \md5($time . '_' . $period['key'] . '_' . $metricUpdated); //Construct unique id for each metric using time, period and metric
2021-08-16 18:58:34 +12:00
$value = (!empty($point['value'])) ? $point['value'] : 0;
2021-08-16 18:58:34 +12:00
try {
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'period' => $period['key'],
'time' => $time,
2021-08-20 18:38:35 +12:00
'metric' => $metricUpdated,
2021-08-16 18:58:34 +12:00
'value' => $value,
'type' => 0,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $value)
);
2021-08-16 18:58:34 +12:00
}
2021-08-16 21:02:35 +12:00
$latestTime[$metric][$period['key']] = $time;
2021-08-30 19:19:29 +12:00
} catch (\Exception $e) { // if projects are deleted this might fail
Console::warning("Failed to save data for project {$projectId} and metric {$metricUpdated}: {$e->getMessage()}");
2021-08-16 18:58:34 +12:00
}
}
}
}
}
}
2021-08-16 20:53:34 +12:00
2021-08-30 19:19:29 +12:00
/**
2021-08-30 21:38:11 +12:00
* Aggregate MariaDB every 15 minutes
* Some of the queries here might contain full-table scans.
*/
if ($iterations % 30 === 0) { // Every 15 minutes aggregate number of objects in database
2021-08-30 19:19:29 +12:00
2021-08-17 18:03:27 +12:00
$latestProject = null;
2021-08-30 21:38:11 +12:00
do { // Loop over all the projects
2021-09-10 22:23:49 +12:00
$attempts = 0;
$max = 10;
$sleep = 1;
do { // list projects
try {
$attempts++;
$projects = $dbForConsole->find('projects', [], 100, cursor: $latestProject);
2021-09-10 22:23:49 +12:00
break; // leave the do-while if successful
} catch (\Exception $e) {
Console::warning("Console DB not ready yet. Retrying ({$attempts})...");
if ($attempts >= $max) {
throw new \Exception('Failed access console db: ' . $e->getMessage());
}
sleep($sleep);
}
} while ($attempts < $max);
2021-08-30 19:19:29 +12:00
if (empty($projects)) {
continue;
}
$latestProject = $projects[array_key_last($projects)];
foreach ($projects as $project) {
$projectId = $project->getId();
2021-08-30 19:19:29 +12:00
// Get total storage
$dbForProject->setNamespace('_project_' . $projectId);
2021-08-30 19:19:29 +12:00
$storageTotal = $dbForProject->sum('files', 'sizeOriginal') + $dbForProject->sum('tags', 'size');
$time = (int) (floor(time() / 1800) * 1800); // Time rounded to nearest 30 minutes
$id = \md5($time . '_30m_storage.total'); //Construct unique id for each metric using time, period and metric
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'period' => '30m',
'time' => $time,
'metric' => 'storage.total',
'value' => $storageTotal,
'type' => 1,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $storageTotal)
);
}
$time = (int) (floor(time() / 86400) * 86400); // Time rounded to nearest day
$id = \md5($time . '_1d_storage.total'); //Construct unique id for each metric using time, period and metric
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'period' => '1d',
'time' => $time,
'metric' => 'storage.total',
'value' => $storageTotal,
'type' => 1,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $storageTotal)
);
}
2021-08-30 19:19:29 +12:00
$collections = [
'users' => [
'namespace' => 'internal',
],
'collections' => [
'metricPrefix' => 'database',
'namespace' => 'internal',
'subCollections' => [ // Some collections, like collections and later buckets have child collections that need counting
2021-08-30 19:19:29 +12:00
'documents' => [
'namespace' => 'external',
2021-08-19 20:01:22 +12:00
],
],
2021-08-30 19:19:29 +12:00
],
'files' => [
'metricPrefix' => 'storage',
'namespace' => 'internal',
],
];
foreach ($collections as $collection => $options) {
try {
$dbForProject->setNamespace("_project_{$projectId}");
2021-08-30 19:19:29 +12:00
$count = $dbForProject->count($collection);
$metricPrefix = $options['metricPrefix'] ?? '';
$metric = empty($metricPrefix) ? "{$collection}.count" : "{$metricPrefix}.{$collection}.count";
$time = (int) (floor(time() / 1800) * 1800); // Time rounded to nearest 30 minutes
$id = \md5($time . '_30m_' . $metric); //Construct unique id for each metric using time, period and metric
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'time' => $time,
'period' => '30m',
'metric' => $metric,
'value' => $count,
'type' => 1,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $count)
);
}
$time = (int) (floor(time() / 86400) * 86400); // Time rounded to nearest day
$id = \md5($time . '_1d_' . $metric); //Construct unique id for each metric using time, period and metric
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'time' => $time,
'period' => '1d',
'metric' => $metric,
'value' => $count,
'type' => 1,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $count)
);
}
2021-08-30 19:19:29 +12:00
$subCollections = $options['subCollections'] ?? [];
if (empty($subCollections)) {
continue;
}
$latestParent = null;
$subCollectionCounts = []; //total project level count of sub collections
2021-08-30 21:38:11 +12:00
do { // Loop over all the parent collection document for each sub collection
$dbForProject->setNamespace("_project_{$projectId}");
$parents = $dbForProject->find($collection, [], 100, cursor: $latestParent); // Get all the parents for the sub collections for example for documents, this will get all the collections
2021-08-30 19:19:29 +12:00
if (empty($parents)) {
continue;
}
$latestParent = $parents[array_key_last($parents)];
foreach ($parents as $parent) {
foreach ($subCollections as $subCollection => $subOptions) { // Sub collection counts, like database.collections.collectionId.documents.count
$dbForProject->setNamespace("_project_{$projectId}");
2021-08-30 19:19:29 +12:00
$count = $dbForProject->count($parent->getId());
$subCollectionCounts[$subCollection] = ($subCollectionCounts[$subCollection] ?? 0) + $count; // Project level counts for sub collections like database.documents.count
$dbForProject->setNamespace("_project_{$projectId}");
$metric = empty($metricPrefix) ? "{$collection}.{$parent->getId()}.{$subCollection}.count" : "{$metricPrefix}.{$collection}.{$parent->getId()}.{$subCollection}.count";
$time = (int) (floor(time() / 1800) * 1800); // Time rounded to nearest 30 minutes
$id = \md5($time . '_30m_' . $metric); //Construct unique id for each metric using time, period and metric
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'time' => $time,
'period' => '30m',
'metric' => $metric,
'value' => $count,
'type' => 1,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $count)
);
}
$time = (int) (floor(time() / 86400) * 86400); // Time rounded to nearest day
$id = \md5($time . '_1d_' . $metric); //Construct unique id for each metric using time, period and metric
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'time' => $time,
'period' => '1d',
'metric' => $metric,
'value' => $count,
'type' => 1,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $count)
);
}
2021-08-19 20:01:22 +12:00
}
}
2021-08-30 19:19:29 +12:00
} while (!empty($parents));
/**
* Inserting project level counts for sub collections like database.documents.count
*/
foreach ($subCollectionCounts as $subCollection => $count) {
$dbForProject->setNamespace("_project_{$projectId}");
$metric = empty($metricPrefix) ? "{$subCollection}.count" : "{$metricPrefix}.{$subCollection}.count";
$time = (int) (floor(time() / 1800) * 1800); // Time rounded to nearest 30 minutes
$id = \md5($time . '_30m_' . $metric); //Construct unique id for each metric using time, period and metric
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'time' => $time,
'period' => '30m',
'metric' => $metric,
'value' => $count,
'type' => 1,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $count)
);
}
$time = (int) (floor(time() / 86400) * 86400); // Time rounded to nearest day
$id = \md5($time . '_1d_' . $metric); //Construct unique id for each metric using time, period and metric
$document = $dbForProject->getDocument('stats', $id);
if ($document->isEmpty()) {
$dbForProject->createDocument('stats', new Document([
'$id' => $id,
'time' => $time,
'period' => '1d',
'metric' => $metric,
'value' => $count,
'type' => 1,
]));
} else {
$dbForProject->updateDocument(
'stats',
$document->getId(),
$document->setAttribute('value', $count)
);
}
2021-08-19 20:01:22 +12:00
}
} catch (\Exception $e) {
2021-08-30 19:19:29 +12:00
Console::warning("Failed to save database counters data for project {$collection}: {$e->getMessage()}");
2021-08-17 18:03:27 +12:00
}
2021-08-17 17:45:07 +12:00
}
}
2021-08-17 18:03:27 +12:00
} while (!empty($projects));
}
2021-08-30 19:19:29 +12:00
2021-08-17 18:03:27 +12:00
$iterations++;
2021-08-17 17:45:07 +12:00
$loopTook = microtime(true) - $loopStart;
$now = date('d-m-Y H:i:s', time());
2021-08-17 18:03:27 +12:00
Console::info("[{$now}] Aggregation took {$loopTook} seconds");
}, $interval);
});