Skip to content

Commit e327700

Browse files
repo: add structure topology and path-depth metrics
Track additional structure-oriented maxima that are useful when diagnosing unusually complex histories: commit parent fanout, tree entry count, blob path length/depth, and annotated tag chain depth. These counters are gathered while traversing reachable objects and are reported in both table and keyvalue output, so both humans and scripts can consume the same topology signals. Signed-off-by: Eslam reda ragheb <eslam.reda.div@gmail.com>
1 parent 983659f commit e327700

File tree

1 file changed

+170
-1
lines changed

1 file changed

+170
-1
lines changed

builtin/repo.c

Lines changed: 170 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "string-list.h"
1818
#include "shallow.h"
1919
#include "submodule.h"
20+
#include "tree-walk.h"
2021
#include "utf8.h"
2122

2223
static const char *const repo_usage[] = {
@@ -429,6 +430,11 @@ struct object_stats {
429430
struct object_values max_inflated_sizes;
430431
struct object_values disk_sizes;
431432
struct object_values max_disk_sizes;
433+
size_t max_commit_parent_count;
434+
size_t max_tree_entry_count;
435+
size_t max_blob_path_length;
436+
size_t max_blob_path_depth;
437+
size_t max_tag_chain_depth;
432438
};
433439

434440
struct repo_structure {
@@ -545,6 +551,116 @@ static inline size_t get_max_object_value(struct object_values *values)
545551
return max;
546552
}
547553

554+
static size_t get_commit_parent_count(struct repository *repo,
555+
const struct object_id *oid)
556+
{
557+
unsigned long size = 0;
558+
const char *cur;
559+
const char *end;
560+
void *buf;
561+
size_t count = 0;
562+
563+
buf = odb_read_object_peeled(repo->objects, oid, OBJ_COMMIT, &size, NULL);
564+
if (!buf)
565+
return 0;
566+
567+
cur = buf;
568+
end = cur + size;
569+
while (cur < end) {
570+
const char *newline = memchr(cur, '\n', end - cur);
571+
size_t line_len;
572+
573+
if (!newline)
574+
break;
575+
line_len = newline - cur;
576+
if (!line_len)
577+
break;
578+
579+
if (line_len > 7 && !memcmp(cur, "parent ", 7))
580+
count++;
581+
582+
cur = newline + 1;
583+
}
584+
585+
free(buf);
586+
return count;
587+
}
588+
589+
static size_t get_tree_entry_count(struct repository *repo,
590+
const struct object_id *oid)
591+
{
592+
struct tree_desc desc;
593+
struct name_entry entry;
594+
unsigned long size = 0;
595+
void *buf;
596+
size_t count = 0;
597+
598+
buf = odb_read_object_peeled(repo->objects, oid, OBJ_TREE, &size, NULL);
599+
if (!buf)
600+
return 0;
601+
602+
init_tree_desc(&desc, oid, buf, size);
603+
while (tree_entry(&desc, &entry))
604+
count++;
605+
606+
free(buf);
607+
return count;
608+
}
609+
610+
static size_t get_path_depth(const char *path)
611+
{
612+
size_t depth = 0;
613+
614+
if (!path || !*path)
615+
return 0;
616+
617+
depth = 1;
618+
for (const char *cur = path; *cur; cur++)
619+
if (*cur == '/')
620+
depth++;
621+
622+
return depth;
623+
}
624+
625+
static size_t get_tag_chain_depth(struct repository *repo,
626+
const struct object_id *oid)
627+
{
628+
struct object_id current = *oid;
629+
size_t depth = 0;
630+
631+
while (1) {
632+
enum object_type type;
633+
unsigned long size = 0;
634+
struct object_id next;
635+
const char *p, *end;
636+
void *buf = odb_read_object(repo->objects, &current, &type, &size);
637+
638+
if (!buf)
639+
break;
640+
if (type != OBJ_TAG) {
641+
free(buf);
642+
break;
643+
}
644+
645+
p = buf;
646+
if (!skip_prefix(p, "object ", &p) ||
647+
parse_oid_hex_algop(p, &next, &end, repo->hash_algo) ||
648+
*end != '\n') {
649+
free(buf);
650+
break;
651+
}
652+
653+
depth++;
654+
free(buf);
655+
656+
if (oideq(&next, &current))
657+
break;
658+
oidcpy(&current, &next);
659+
}
660+
661+
return depth;
662+
}
663+
548664
static void stats_table_setup_structure(struct stats_table *table,
549665
struct repo_structure *stats)
550666
{
@@ -619,6 +735,17 @@ static void stats_table_setup_structure(struct stats_table *table,
619735
" * %s", _("Blobs"));
620736
stats_table_size_addf(table, objects->max_disk_sizes.tags,
621737
" * %s", _("Tags"));
738+
739+
stats_table_count_addf(table, objects->max_commit_parent_count,
740+
" * %s", _("Largest parent count"));
741+
stats_table_count_addf(table, objects->max_tree_entry_count,
742+
" * %s", _("Largest tree entries"));
743+
stats_table_count_addf(table, objects->max_blob_path_length,
744+
" * %s", _("Longest blob path"));
745+
stats_table_count_addf(table, objects->max_blob_path_depth,
746+
" * %s", _("Deepest blob path"));
747+
stats_table_count_addf(table, objects->max_tag_chain_depth,
748+
" * %s", _("Deepest tag chain"));
622749
}
623750

624751
static void stats_table_print_structure(const struct stats_table *table)
@@ -749,6 +876,17 @@ static void structure_keyvalue_print(struct repo_structure *stats,
749876
printf("objects.tags.max_disk_size%c%" PRIuMAX "%c", key_delim,
750877
(uintmax_t)stats->objects.max_disk_sizes.tags, value_delim);
751878

879+
printf("objects.commits.max_parent_count%c%" PRIuMAX "%c", key_delim,
880+
(uintmax_t)stats->objects.max_commit_parent_count, value_delim);
881+
printf("objects.trees.max_entry_count%c%" PRIuMAX "%c", key_delim,
882+
(uintmax_t)stats->objects.max_tree_entry_count, value_delim);
883+
printf("objects.blobs.max_path_length%c%" PRIuMAX "%c", key_delim,
884+
(uintmax_t)stats->objects.max_blob_path_length, value_delim);
885+
printf("objects.blobs.max_path_depth%c%" PRIuMAX "%c", key_delim,
886+
(uintmax_t)stats->objects.max_blob_path_depth, value_delim);
887+
printf("objects.tags.max_chain_depth%c%" PRIuMAX "%c", key_delim,
888+
(uintmax_t)stats->objects.max_tag_chain_depth, value_delim);
889+
752890
printf("objects.commits.disk_size%c%" PRIuMAX "%c", key_delim,
753891
(uintmax_t)stats->objects.disk_sizes.commits, value_delim);
754892
printf("objects.trees.disk_size%c%" PRIuMAX "%c", key_delim,
@@ -826,7 +964,7 @@ struct count_objects_data {
826964
struct progress *progress;
827965
};
828966

829-
static int count_objects(const char *path UNUSED, struct oid_array *oids,
967+
static int count_objects(const char *path, struct oid_array *oids,
830968
enum object_type type, void *cb_data)
831969
{
832970
struct count_objects_data *data = cb_data;
@@ -862,6 +1000,13 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
8621000

8631001
switch (type) {
8641002
case OBJ_TAG:
1003+
for (size_t i = 0; i < oids->nr; i++) {
1004+
size_t tag_chain_depth = get_tag_chain_depth(data->odb->repo,
1005+
&oids->oid[i]);
1006+
if (tag_chain_depth > stats->max_tag_chain_depth)
1007+
stats->max_tag_chain_depth = tag_chain_depth;
1008+
}
1009+
8651010
stats->type_counts.tags += oids->nr;
8661011
stats->inflated_sizes.tags += inflated_total;
8671012
if (max_inflated > stats->max_inflated_sizes.tags)
@@ -871,6 +1016,13 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
8711016
stats->max_disk_sizes.tags = max_disk;
8721017
break;
8731018
case OBJ_COMMIT:
1019+
for (size_t i = 0; i < oids->nr; i++) {
1020+
size_t parent_count = get_commit_parent_count(data->odb->repo,
1021+
&oids->oid[i]);
1022+
if (parent_count > stats->max_commit_parent_count)
1023+
stats->max_commit_parent_count = parent_count;
1024+
}
1025+
8741026
stats->type_counts.commits += oids->nr;
8751027
stats->inflated_sizes.commits += inflated_total;
8761028
if (max_inflated > stats->max_inflated_sizes.commits)
@@ -880,6 +1032,13 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
8801032
stats->max_disk_sizes.commits = max_disk;
8811033
break;
8821034
case OBJ_TREE:
1035+
for (size_t i = 0; i < oids->nr; i++) {
1036+
size_t entry_count = get_tree_entry_count(data->odb->repo,
1037+
&oids->oid[i]);
1038+
if (entry_count > stats->max_tree_entry_count)
1039+
stats->max_tree_entry_count = entry_count;
1040+
}
1041+
8831042
stats->type_counts.trees += oids->nr;
8841043
stats->inflated_sizes.trees += inflated_total;
8851044
if (max_inflated > stats->max_inflated_sizes.trees)
@@ -889,6 +1048,16 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
8891048
stats->max_disk_sizes.trees = max_disk;
8901049
break;
8911050
case OBJ_BLOB:
1051+
if (path && *path) {
1052+
size_t path_len = strlen(path);
1053+
size_t path_depth = get_path_depth(path);
1054+
1055+
if (path_len > stats->max_blob_path_length)
1056+
stats->max_blob_path_length = path_len;
1057+
if (path_depth > stats->max_blob_path_depth)
1058+
stats->max_blob_path_depth = path_depth;
1059+
}
1060+
8921061
stats->type_counts.blobs += oids->nr;
8931062
stats->inflated_sizes.blobs += inflated_total;
8941063
if (max_inflated > stats->max_inflated_sizes.blobs)

0 commit comments

Comments
 (0)