Skip to content

Commit

Permalink
handle Java text block start in comments, #806
Browse files Browse the repository at this point in the history
  • Loading branch information
AlDanial committed Feb 7, 2024
1 parent f833b18 commit 7b824a0
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 0 deletions.
176 changes: 176 additions & 0 deletions Unix/cloc
Original file line number Diff line number Diff line change
Expand Up @@ -7539,6 +7539,7 @@ sub replace_between_regex { # {{{1
push @save_lines, $_;
}

print "[", join("][", @{$ra_lines}), "]\n" if $opt_v > 4;
print "<- replace_between_regex\n" if $opt_v > 2;
return @save_lines;
} # 1}}}
Expand Down Expand Up @@ -7913,6 +7914,14 @@ sub docstring_rm_comments { # {{{1
# replace /*, */, // with xx
substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g;
next;
} elsif (m{/\*.*?((""")|(''')).*?\*/}) {
# docstring start or end within /* */ comments
my $i_start = $-[0]+2;
substr($_, $i_start, 3) = "xxx";
} elsif (m{//.*?((""")|('''))}) {
# docstring start or end after //
my $i_start = $-[0]+2;
substr($_, $i_start, 3) = "xxx";
} elsif (/^(.*?)((""")|('''))/ and $in_docstring) {
$in_docstring = 0;
my $i_end = length $1;
Expand All @@ -7928,6 +7937,7 @@ sub docstring_rm_comments { # {{{1
}
}

print "[", join("][", @{$ra_lines}), "]\n" if $opt_v > 4;
print "<- docstring_rm_comments\n" if $opt_v > 2;
return @{$ra_lines};
} # 1}}}
Expand Down Expand Up @@ -11503,6 +11513,7 @@ sub call_regexp_common { # {{{1
# a bogus use of %RE to avoid:
# Name "main::RE" used only once: possible typo at cloc line xx.
print scalar keys %RE if $opt_v < -20;
print "[", join("][", @{$ra_lines}), "]\n" if $opt_v > 4;
print "<- call_regexp_common\n" if $opt_v > 2;
return split("\n", $all_lines);
} # 1}}}
Expand Down Expand Up @@ -14340,6 +14351,171 @@ sub glob2regex { # {{{
$re =~ s{\cy}{[^/]*}g;
return '^' . $re . '$';
} # }}}
sub load_json { # {{{1
#
# Load a cloc-generated JSON file into %contents
# $contents{filename}{blank|comment|code|language} = value
# then print in a variety of formats.
#
my ($file, ) = @_;

my %contents = ();
my $heading = undef;
open IN, $file or die "failed load_json($file)";
while (<IN>) {
if (/^{?"(.*?)"/) {
$heading = $1;
} else {
if (/^\s+"(.*?)"\s*:\s+(\d+(\.\d+)?)\b/) {
# numeric value
$contents{$heading}{$1} = $2;
} elsif (/^\s+"(.*?)"\s*:\s+"(.*?)"/) {
$contents{$heading}{$1} = $2;
}
}
}
close IN;
my $url = $contents{'header'}{'cloc_url'};
my $ver = $contents{'header'}{'cloc_version'};
my $sec = $contents{'header'}{'elapsed_seconds'};
my $n_file = $contents{'header'}{'n_files'};
my $n_line = $contents{'header'}{'n_lines'};
$sec = $sec == 0 ? 1.0e-3 : $sec;
my $header = sprintf "%s v %s T=%.2f s (%.1f files/s, %.1f lines/s)",
$url, $ver, $sec, $n_file/$sec, $n_line/$sec;
delete $contents{'header'};
delete $contents{'SUM'};

my @file_list = (sort { $contents{$b}{'code'} <=>
$contents{$a}{'code'} } keys %contents );
#die Dumper(\%contents);
# Determine column widths for output
my $file_len = 0;
my $lang_len = 0;
foreach my $file (keys %contents) {
my $flen = length $file;
my $llen = length $contents{$file}{'language'};
$file_len = $file_len > $flen ? $file_len : $flen;
$lang_len = $lang_len > $llen ? $lang_len : $llen;
}
return $file_len, $lang_len, $header, %contents;
} # 1}}}
sub print_format_n { # {{{1
# by file with
# format 1 : Language | files | blank | comment | code
# format 2 : Language | files | blank | comment | code | total
# format 3 : File | Language | blank | comment | code
# format 4 : File | blank | comment | code | total
# format 5 : File | Language | blank | comment | code | total
my ($format, $file_len, $lang_len, $header, %contents) = @_;

my %str_fmt = (
1 => sprintf("%%-%ds %%7s %%7s %%7s %%7s\n", $lang_len),
2 => sprintf("%%-%ds %%7s %%7s %%7s %%7s %%7s\n", $lang_len),
3 => sprintf("%%-%ds %%-%ds %%7s %%7s %%7s\n", $file_len, $lang_len),
4 => sprintf("%%-%ds %%7s %%7s %%7s %%7s\n", $file_len),
5 => sprintf("%%-%ds %%-%ds %%7s %%7s %%7s %%7s\n", $file_len, $lang_len),
);
my %val_fmt = (
1 => sprintf("%%-%ds %%7d %%7d %%7d %%7d\n", $lang_len),
2 => sprintf("%%-%ds %%7d %%7d %%7d %%7d %%7d\n", $lang_len),
3 => sprintf("%%-%ds %%-%ds %%7d %%7d %%7d\n", $file_len, $lang_len),
4 => sprintf("%%-%ds %%7d %%7d %%7d %%7d\n", $file_len),
5 => sprintf("%%-%ds %%-%ds %%7d %%7d %%7d %%7d\n", $file_len, $lang_len),
);
my %language = ();
foreach my $file (keys %contents) {
my $lang = $contents{$file}{'language'};
$language{$lang}{'files'} += 1;
foreach my $category ('blank', 'comment', 'code',) {
$language{$lang}{$category} += $contents{$file}{$category};
$language{$lang}{'total'} += $contents{$file}{$category};
}
}
my @file_list = (sort { $contents{$b}{'code'} <=>
$contents{$a}{'code'} } keys %contents );
my @lang_list = (sort { $language{$b}{'code'} <=>
$language{$a}{'code'} } keys %language );

my %hyphens = (
1 => "-" x ($lang_len + 4*9),
2 => "-" x ($lang_len + 5*9),
3 => "-" x ($lang_len + $file_len + 2 + 3*9),
4 => "-" x ($file_len + 4*9),
5 => "-" x ($lang_len + $file_len + 2 + 4*9),
);
my %col_headings = (
1 => ["Language", "files", "blank", "comment", "code"],
2 => ["Language", "files", "blank", "comment", "code", "Total"],
3 => ["File", "Language", "blank", "comment", "code"],
4 => ["File", "blank", "comment", "code", "Total"],
5 => ["File", "Language", "blank", "comment", "code", "Total"],
);

print "$header\n";
print "$hyphens{$format}\n";
printf $str_fmt{$format}, @{$col_headings{$format}};
print "$hyphens{$format}\n";
my ($n_files, $n_blank, $n_comment, $n_code, $n_total) = (0, 0, 0, 0, 0);
my @out;
if ($format < 3) {
# by language
foreach my $lang (@lang_list) {
my ($nF, $nB, $nCm, $nCo) = ($language{$lang}{'files'},
$language{$lang}{'blank'},
$language{$lang}{'comment'},
$language{$lang}{'code'});
if ($format == 1) {
@out = ($lang, $nF, $nB, $nCm, $nCo);
} else {
@out = ($lang, $nF, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
}
printf $val_fmt{$format}, @out;
$n_files += $nF;
$n_blank += $nB;
$n_comment += $nCm;
$n_code += $nCo;
$n_total += $nB + $nCm + $nCo;
}
} else {
# by file
foreach my $file (@file_list) {
my ($nB, $nCm, $nCo) = ($contents{$file}{'blank'},
$contents{$file}{'comment'},
$contents{$file}{'code'});
my $lang = $contents{$file}{'language'};
if ($format == 1) {
} elsif ($format == 3) {
@out = ($file, $lang, $nB, $nCm, $nCo);
} elsif ($format == 4) {
@out = ($file, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
} else {
@out = ($file, $lang, $nB, $nCm, $nCo, $nB + $nCm + $nCo);
}
printf $val_fmt{$format}, @out;
$n_blank += $nB;
$n_comment += $nCm;
$n_code += $nCo;
$n_total += $nB + $nCm + $nCo;
}
}
print "$hyphens{$format}\n";
if (scalar @file_list > 1) {
if ($format == 1) {
@out = ( "SUM", $n_files, $n_blank, $n_comment, $n_code );
} elsif ($format == 2) {
@out = ( "SUM", $n_files, $n_blank, $n_comment, $n_code, $n_total );
} elsif ($format == 3) {
@out = ( "SUM", " ", $n_blank, $n_comment, $n_code );
} elsif ($format == 4) {
@out = ( "SUM", $n_blank, $n_comment, $n_code, $n_total );
} else {
@out = ( "SUM", " ", $n_blank, $n_comment, $n_code, $n_total );
}
printf $val_fmt{$format}, @out;
print "$hyphens{$format}\n";
}
} # 1}}}
# really_is_pascal, really_is_incpascal, really_is_php from SLOCCount
my %php_files = (); # really_is_php()
sub really_is_pascal { # {{{1
Expand Down
7 changes: 7 additions & 0 deletions Unix/t/01_opts.t
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,13 @@ my @Tests = (
'ref' => '../tests/outputs/issues/805/text_block.java.yaml',
},

{
'name' => 'Java text block start in comments #806',
'cd' => '../tests/inputs/issues/806',
'args' => 'huffman.java',
'ref' => '../tests/outputs/issues/806/results.yaml',
},

);

# Special cases:
Expand Down
11 changes: 11 additions & 0 deletions cloc
Original file line number Diff line number Diff line change
Expand Up @@ -7554,6 +7554,7 @@ sub replace_between_regex { # {{{1
push @save_lines, $_;
}

print "[", join("][", @{$ra_lines}), "]\n" if $opt_v > 4;
print "<- replace_between_regex\n" if $opt_v > 2;
return @save_lines;
} # 1}}}
Expand Down Expand Up @@ -7928,6 +7929,14 @@ sub docstring_rm_comments { # {{{1
# replace /*, */, // with xx
substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g;
next;
} elsif (m{/\*.*?((""")|(''')).*?\*/}) {
# docstring start or end within /* */ comments
my $i_start = $-[0]+2;
substr($_, $i_start, 3) = "xxx";
} elsif (m{//.*?((""")|('''))}) {
# docstring start or end after //
my $i_start = $-[0]+2;
substr($_, $i_start, 3) = "xxx";
} elsif (/^(.*?)((""")|('''))/ and $in_docstring) {
$in_docstring = 0;
my $i_end = length $1;
Expand All @@ -7943,6 +7952,7 @@ sub docstring_rm_comments { # {{{1
}
}

print "[", join("][", @{$ra_lines}), "]\n" if $opt_v > 4;
print "<- docstring_rm_comments\n" if $opt_v > 2;
return @{$ra_lines};
} # 1}}}
Expand Down Expand Up @@ -11518,6 +11528,7 @@ sub call_regexp_common { # {{{1
# a bogus use of %RE to avoid:
# Name "main::RE" used only once: possible typo at cloc line xx.
print scalar keys %RE if $opt_v < -20;
print "[", join("][", @{$ra_lines}), "]\n" if $opt_v > 4;
print "<- call_regexp_common\n" if $opt_v > 2;
return split("\n", $all_lines);
} # 1}}}
Expand Down
11 changes: 11 additions & 0 deletions tests/inputs/issues/806/huffman.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
public class Huffman
{
static final int[][] CODES =
{
/*'"' ( 34) |11111110|01 */ {0x3f9, 10},
/*''' ( 39) |11111111|010 */ {0x7fa, 11},
};
// Huffman decode tree stored in a flattened char array for good
// locality of reference.
// Build the Huffman lookup tree and LC TABLE
}
21 changes: 21 additions & 0 deletions tests/outputs/issues/806/results.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
# github.com/AlDanial/cloc
header :
cloc_url : github.com/AlDanial/cloc
cloc_version : 1.99
elapsed_seconds : 0.00264716148376465
n_files : 1
n_lines : 11
files_per_second : 377.76312708277
lines_per_second : 4155.39439791047
report_file : ../../../outputs/issues/806/results.yaml
'Java' :
nFiles: 1
blank: 0
comment: 3
code: 8
SUM:
blank: 0
comment: 3
code: 8
nFiles: 1

0 comments on commit 7b824a0

Please sign in to comment.