lib/Gsg/Gather.pm
1	package Gsg::Gather;
2 use strict;
3 use warnings;
4 use Log::Log4perl qw(:easy);
5 use Shellex::Shellex qw(shellex findBin);
6 use Exporter qw(import);
7 our @EXPORT_OK = qw(get_file_tree get_projects trim_project_paths get_diff_stat);
8
9 sub get_diff_stat($$$$) {
10
11 my $project_dir = shift;
12 my $newest_commit = shift;
13 my $compare_commit = shift;
14 my $logger = shift;
15
16 # git --git-dir=/home/git/git-site-gen.git/ diff --stat 37f54811d49d41a4d794594e5bbaaee2271d82ad 1afd193eda9a6bc703011a72afa273e560355713
17 my $gitCmd = findBin("git",$logger);
18 my $diff_stat = shellex("$gitCmd --git-dir=$project_dir $newest_commit $compare_commit",$logger);
19
20 return $diff_stat;
21
22 }
23
24 sub get_projects($$$) {
25
26 my $git_dir = shift;
27 my $ignored_projects_ref = shift;
28 my $logger = shift;
29 my $ls_cmd = findBin("ls",$logger);
30 my @git_project_dirs;
31 foreach my $dir ( split("\n", shellex("$ls_cmd -d $git_dir/*/",$logger)) ) {
32 if ( $dir !~ m/\.git/ ) {
33 next;
34 }
35 if ( grep( /^$dir$/, @$ignored_projects_ref ) ) {
36 $logger->info("Found $dir in ignore list, skipping...");
37 next;
38 } else {
39 push(@git_project_dirs,$dir);
40 }
41 }
42
43 return \@git_project_dirs;
44
45 }
46
47 sub trim_project_paths($$) {
48
49 my $projects_ref = shift;
50 my $logger = shift;
51
52 my @trimmed_projects;
53 foreach my $project_path ( @$projects_ref ) {
54 # Chop parts of the path we dont need for the web root
55 # /some/path/project.git/ -> project.git/
56 if ( $project_path =~ m/\/?([^\/]+\.[^\.]+$)/ ) {
57 push(@trimmed_projects, $1);
58 }
59 }
60
61 $logger->info("Returning trimmed project paths");
62 return \@trimmed_projects;
63
64 }
65
66 sub get_file_tree($$) {
67
68 my $projectDir = shift;
69 my $logger = shift;
70 my $gitCmd = findBin("git",$logger);
71
72 # Get files
73 my %file_tree;
74 foreach my $file ( split("\n", shellex("$gitCmd --git-dir=\"$projectDir\" ls-tree --full-tree -r HEAD",$logger)) ) {
75 chomp $file;
76 $file =~ /([a-z0-9]{40})\t(.*)$/;
77 # Name - object id
78 $file_tree{$2} = $1;
79 }
80
81 # Get file content
82 my %file_content;
83 foreach my $filename ( keys %file_tree ) {
84 my $content = shellex("$gitCmd --git-dir=\"$projectDir\" show $file_tree{$filename}",$logger);
85 # - TODO -
86 # A hack -- interested in a better way to detect if git files are binary
87 # Also dramatically increases run time (~3 seconds additional run time, will likely ballon on bigger git repos)
88 my $file_cmd = findBin("file",$logger);
89 my $rm_cmd = findBin("rm",$logger);
90 my $test_write_path = "/tmp/test";
91 my $bin_test = shellex("$gitCmd --git-dir=\"$projectDir\" show $file_tree{$filename} > $test_write_path && $file_cmd -i $test_write_path && $rm_cmd $test_write_path",$logger);
92
93 if ( $bin_test !~ m/text/ ) {
94 $content = "Binary file";
95 }
96
97 chomp $content;
98 # Name - file content
99 $file_content{$filename} = $content;
100 }
101
102 # Get logs
103 my @commit_ids;
104 foreach my $log_line ( split("\n",shellex("$gitCmd --git-dir=\"$projectDir\" log",$logger)) ) {
105 if ( $log_line =~ m/commit\ ([a-z0-9]{40})/ ) {
106 push(@commit_ids,$1);
107 }
108 }
109
110 my %commits;
111 foreach my $commit_id ( @commit_ids ) {
112 my $commit_info = shellex("git --git-dir=\"$projectDir\" show $commit_id",$logger);
113 chomp $commit_info;
114 $commits{$commit_id} = $commit_info;
115 }
116
117 # We return commit_ids as well to preserve ordering
118 return ( \%file_tree, \%file_content, \%commits, \@commit_ids );
119
120 }
121
122 1;