Skip to content

Commit 983659f

Browse files
repo: add structure max object size metrics
Extend git repo structure with maximum inflated and on-disk object sizes, both per type and overall max values. This complements existing totals by highlighting outliers that often drive repository bloat analysis. The implementation updates object counting to track per-type maxima while walking reachable objects and exposes those values in both table and keyvalue formats for scripts and human output. Signed-off-by: Eslam reda ragheb <eslam.reda.div@gmail.com>
1 parent 0a43ed7 commit 983659f

File tree

1 file changed

+86
-1
lines changed

1 file changed

+86
-1
lines changed

builtin/repo.c

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,9 @@ struct object_values {
426426
struct object_stats {
427427
struct object_values type_counts;
428428
struct object_values inflated_sizes;
429+
struct object_values max_inflated_sizes;
429430
struct object_values disk_sizes;
431+
struct object_values max_disk_sizes;
430432
};
431433

432434
struct repo_structure {
@@ -529,6 +531,20 @@ static inline size_t get_total_object_values(struct object_values *values)
529531
return values->tags + values->commits + values->trees + values->blobs;
530532
}
531533

534+
static inline size_t get_max_object_value(struct object_values *values)
535+
{
536+
size_t max = values->commits;
537+
538+
if (values->trees > max)
539+
max = values->trees;
540+
if (values->blobs > max)
541+
max = values->blobs;
542+
if (values->tags > max)
543+
max = values->tags;
544+
545+
return max;
546+
}
547+
532548
static void stats_table_setup_structure(struct stats_table *table,
533549
struct repo_structure *stats)
534550
{
@@ -583,6 +599,26 @@ static void stats_table_setup_structure(struct stats_table *table,
583599
" * %s", _("Blobs"));
584600
stats_table_size_addf(table, objects->disk_sizes.tags,
585601
" * %s", _("Tags"));
602+
603+
stats_table_size_addf(table, objects->max_inflated_sizes.commits,
604+
" * %s", _("Largest commit"));
605+
stats_table_size_addf(table, objects->max_inflated_sizes.trees,
606+
" * %s", _("Largest tree"));
607+
stats_table_size_addf(table, objects->max_inflated_sizes.blobs,
608+
" * %s", _("Largest blob"));
609+
stats_table_size_addf(table, objects->max_inflated_sizes.tags,
610+
" * %s", _("Largest tag"));
611+
612+
stats_table_size_addf(table, get_max_object_value(&objects->max_disk_sizes),
613+
" * %s", _("Largest disk size"));
614+
stats_table_size_addf(table, objects->max_disk_sizes.commits,
615+
" * %s", _("Commits"));
616+
stats_table_size_addf(table, objects->max_disk_sizes.trees,
617+
" * %s", _("Trees"));
618+
stats_table_size_addf(table, objects->max_disk_sizes.blobs,
619+
" * %s", _("Blobs"));
620+
stats_table_size_addf(table, objects->max_disk_sizes.tags,
621+
" * %s", _("Tags"));
586622
}
587623

588624
static void stats_table_print_structure(const struct stats_table *table)
@@ -661,6 +697,9 @@ static void stats_table_clear(struct stats_table *table)
661697
static void structure_keyvalue_print(struct repo_structure *stats,
662698
char key_delim, char value_delim)
663699
{
700+
size_t max_inflated_size = get_max_object_value(&stats->objects.max_inflated_sizes);
701+
size_t max_disk_size = get_max_object_value(&stats->objects.max_disk_sizes);
702+
664703
printf("references.branches.count%c%" PRIuMAX "%c", key_delim,
665704
(uintmax_t)stats->refs.branches, value_delim);
666705
printf("references.tags.count%c%" PRIuMAX "%c", key_delim,
@@ -688,6 +727,28 @@ static void structure_keyvalue_print(struct repo_structure *stats,
688727
printf("objects.tags.inflated_size%c%" PRIuMAX "%c", key_delim,
689728
(uintmax_t)stats->objects.inflated_sizes.tags, value_delim);
690729

730+
printf("objects.max_inflated_size%c%" PRIuMAX "%c", key_delim,
731+
(uintmax_t)max_inflated_size, value_delim);
732+
printf("objects.commits.max_inflated_size%c%" PRIuMAX "%c", key_delim,
733+
(uintmax_t)stats->objects.max_inflated_sizes.commits, value_delim);
734+
printf("objects.trees.max_inflated_size%c%" PRIuMAX "%c", key_delim,
735+
(uintmax_t)stats->objects.max_inflated_sizes.trees, value_delim);
736+
printf("objects.blobs.max_inflated_size%c%" PRIuMAX "%c", key_delim,
737+
(uintmax_t)stats->objects.max_inflated_sizes.blobs, value_delim);
738+
printf("objects.tags.max_inflated_size%c%" PRIuMAX "%c", key_delim,
739+
(uintmax_t)stats->objects.max_inflated_sizes.tags, value_delim);
740+
741+
printf("objects.max_disk_size%c%" PRIuMAX "%c", key_delim,
742+
(uintmax_t)max_disk_size, value_delim);
743+
printf("objects.commits.max_disk_size%c%" PRIuMAX "%c", key_delim,
744+
(uintmax_t)stats->objects.max_disk_sizes.commits, value_delim);
745+
printf("objects.trees.max_disk_size%c%" PRIuMAX "%c", key_delim,
746+
(uintmax_t)stats->objects.max_disk_sizes.trees, value_delim);
747+
printf("objects.blobs.max_disk_size%c%" PRIuMAX "%c", key_delim,
748+
(uintmax_t)stats->objects.max_disk_sizes.blobs, value_delim);
749+
printf("objects.tags.max_disk_size%c%" PRIuMAX "%c", key_delim,
750+
(uintmax_t)stats->objects.max_disk_sizes.tags, value_delim);
751+
691752
printf("objects.commits.disk_size%c%" PRIuMAX "%c", key_delim,
692753
(uintmax_t)stats->objects.disk_sizes.commits, value_delim);
693754
printf("objects.trees.disk_size%c%" PRIuMAX "%c", key_delim,
@@ -772,6 +833,8 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
772833
struct object_stats *stats = data->stats;
773834
size_t inflated_total = 0;
774835
size_t disk_total = 0;
836+
size_t max_inflated = 0;
837+
size_t max_disk = 0;
775838
size_t object_count;
776839

777840
for (size_t i = 0; i < oids->nr; i++) {
@@ -786,31 +849,53 @@ static int count_objects(const char *path UNUSED, struct oid_array *oids,
786849
OBJECT_INFO_SKIP_FETCH_OBJECT |
787850
OBJECT_INFO_QUICK) < 0)
788851
continue;
852+
if (disk < 0)
853+
continue;
789854

790855
inflated_total += inflated;
791-
disk_total += disk;
856+
disk_total += (size_t)disk;
857+
if (inflated > max_inflated)
858+
max_inflated = inflated;
859+
if ((size_t)disk > max_disk)
860+
max_disk = (size_t)disk;
792861
}
793862

794863
switch (type) {
795864
case OBJ_TAG:
796865
stats->type_counts.tags += oids->nr;
797866
stats->inflated_sizes.tags += inflated_total;
867+
if (max_inflated > stats->max_inflated_sizes.tags)
868+
stats->max_inflated_sizes.tags = max_inflated;
798869
stats->disk_sizes.tags += disk_total;
870+
if (max_disk > stats->max_disk_sizes.tags)
871+
stats->max_disk_sizes.tags = max_disk;
799872
break;
800873
case OBJ_COMMIT:
801874
stats->type_counts.commits += oids->nr;
802875
stats->inflated_sizes.commits += inflated_total;
876+
if (max_inflated > stats->max_inflated_sizes.commits)
877+
stats->max_inflated_sizes.commits = max_inflated;
803878
stats->disk_sizes.commits += disk_total;
879+
if (max_disk > stats->max_disk_sizes.commits)
880+
stats->max_disk_sizes.commits = max_disk;
804881
break;
805882
case OBJ_TREE:
806883
stats->type_counts.trees += oids->nr;
807884
stats->inflated_sizes.trees += inflated_total;
885+
if (max_inflated > stats->max_inflated_sizes.trees)
886+
stats->max_inflated_sizes.trees = max_inflated;
808887
stats->disk_sizes.trees += disk_total;
888+
if (max_disk > stats->max_disk_sizes.trees)
889+
stats->max_disk_sizes.trees = max_disk;
809890
break;
810891
case OBJ_BLOB:
811892
stats->type_counts.blobs += oids->nr;
812893
stats->inflated_sizes.blobs += inflated_total;
894+
if (max_inflated > stats->max_inflated_sizes.blobs)
895+
stats->max_inflated_sizes.blobs = max_inflated;
813896
stats->disk_sizes.blobs += disk_total;
897+
if (max_disk > stats->max_disk_sizes.blobs)
898+
stats->max_disk_sizes.blobs = max_disk;
814899
break;
815900
default:
816901
BUG("invalid object type");

0 commit comments

Comments
 (0)