Skip to content

Commit

Permalink
handle comments in Java text blocks, #805
Browse files Browse the repository at this point in the history
  • Loading branch information
AlDanial committed Feb 4, 2024
1 parent 2640c18 commit f833b18
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 0 deletions.
34 changes: 34 additions & 0 deletions Unix/cloc
Original file line number Diff line number Diff line change
Expand Up @@ -1002,6 +1002,7 @@ $opt_git = 1 if defined($opt_git_diff_all) or
defined($opt_git_diff_rel) or
(defined($opt_vcs) and ($opt_vcs eq "git"));
$opt_by_file = 1 if defined $opt_by_file_by_lang;
$opt_fmt = 0 unless defined $opt_fmt;
if ($opt_fmt) {
$opt_by_file = 1;
$opt_json = 1;
Expand Down Expand Up @@ -7898,6 +7899,38 @@ sub docstring_to_C { # {{{1
print "<- docstring_to_C\n" if $opt_v > 2;
return @{$ra_lines};
} # 1}}}
sub docstring_rm_comments { # {{{1
my ($ra_lines, ) = @_;
# Remove embedded C/C++ style comments in docstrings.

print "-> docstring_rm_comments()\n" if $opt_v > 2;

my $in_docstring = 0;
foreach (@{$ra_lines}) {
if (/((""")|('''))(.*?)\1/) {
# single line docstring
my ($i_start, $i_end) = ($-[0]+3, $+[0]-3);
# replace /*, */, // with xx
substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g;
next;
} elsif (/^(.*?)((""")|('''))/ and $in_docstring) {
$in_docstring = 0;
my $i_end = length $1;
if ($i_end) {
substr($_, 0, $i_end) =~ s{(/\*|\*/|//)}{xx}g;
}
} elsif (/((""")|('''))(.*?)$/ and !$in_docstring) {
$in_docstring = 1;
my $i_start = $-[0]+3;
substr($_, $i_start) =~ s{(/\*|\*/|//)}{xx}g;
} elsif ($in_docstring) {
s{(/\*|\*/|//)}{xx}g;
}
}

print "<- docstring_rm_comments\n" if $opt_v > 2;
return @{$ra_lines};
} # 1}}}
sub jupyter_nb { # {{{1
my ($ra_lines, ) = @_;
# Translate .ipynb file content into an equivalent set of code
Expand Down Expand Up @@ -9814,6 +9847,7 @@ sub set_constants { # {{{1
[ 'call_regexp_common' , 'C++' ],
],
'Java' => [
[ 'docstring_rm_comments', ],
[ 'replace_regex', '\\\\$', ' '],
# Java seems to have more path globs in strings
# than other languages. The variations makes
Expand Down
7 changes: 7 additions & 0 deletions Unix/t/01_opts.t
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,13 @@ my @Tests = (
'ref' => '../tests/outputs/issues/804/infoSQL.java.yaml',
},

{
'name' => 'comments in Java text blocks #805',
'cd' => '../tests/inputs/issues/805',
'args' => 'text_block.java',
'ref' => '../tests/outputs/issues/805/text_block.java.yaml',
},

);

# Special cases:
Expand Down
34 changes: 34 additions & 0 deletions cloc
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,7 @@ $opt_git = 1 if defined($opt_git_diff_all) or
defined($opt_git_diff_rel) or
(defined($opt_vcs) and ($opt_vcs eq "git"));
$opt_by_file = 1 if defined $opt_by_file_by_lang;
$opt_fmt = 0 unless defined $opt_fmt;
if ($opt_fmt) {
$opt_by_file = 1;
$opt_json = 1;
Expand Down Expand Up @@ -7913,6 +7914,38 @@ sub docstring_to_C { # {{{1
print "<- docstring_to_C\n" if $opt_v > 2;
return @{$ra_lines};
} # 1}}}
sub docstring_rm_comments { # {{{1
my ($ra_lines, ) = @_;
# Remove embedded C/C++ style comments in docstrings.

print "-> docstring_rm_comments()\n" if $opt_v > 2;

my $in_docstring = 0;
foreach (@{$ra_lines}) {
if (/((""")|('''))(.*?)\1/) {
# single line docstring
my ($i_start, $i_end) = ($-[0]+3, $+[0]-3);
# replace /*, */, // with xx
substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g;
next;
} elsif (/^(.*?)((""")|('''))/ and $in_docstring) {
$in_docstring = 0;
my $i_end = length $1;
if ($i_end) {
substr($_, 0, $i_end) =~ s{(/\*|\*/|//)}{xx}g;
}
} elsif (/((""")|('''))(.*?)$/ and !$in_docstring) {
$in_docstring = 1;
my $i_start = $-[0]+3;
substr($_, $i_start) =~ s{(/\*|\*/|//)}{xx}g;
} elsif ($in_docstring) {
s{(/\*|\*/|//)}{xx}g;
}
}

print "<- docstring_rm_comments\n" if $opt_v > 2;
return @{$ra_lines};
} # 1}}}
sub jupyter_nb { # {{{1
my ($ra_lines, ) = @_;
# Translate .ipynb file content into an equivalent set of code
Expand Down Expand Up @@ -9829,6 +9862,7 @@ sub set_constants { # {{{1
[ 'call_regexp_common' , 'C++' ],
],
'Java' => [
[ 'docstring_rm_comments', ],
[ 'replace_regex', '\\\\$', ' '],
# Java seems to have more path globs in strings
# than other languages. The variations makes
Expand Down
21 changes: 21 additions & 0 deletions tests/inputs/issues/805/text_block.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// regular comment
public String getBlockOfHtml() {
String request = """
GET /*cho/foo HTT*/1.1
Host: local
Accept: */*
Co//ection: closed
""";

return """
<html>
/*
* NOT comment
*/
<body>
<span>example text</span>
</body>
</html>
""";
}
21 changes: 21 additions & 0 deletions tests/outputs/issues/805/text_block.java.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
---
# github.com/AlDanial/cloc
header :
cloc_url : github.com/AlDanial/cloc
cloc_version : 1.99
elapsed_seconds : 0.00261998176574707
n_files : 1
n_lines : 21
files_per_second : 381.682045682046
lines_per_second : 8015.32295932296
report_file : ../../../outputs/issues/805/text_block.java.yaml
'Java' :
nFiles: 1
blank: 2
comment: 1
code: 18
SUM:
blank: 2
comment: 1
code: 18
nFiles: 1

0 comments on commit f833b18

Please sign in to comment.