diff --git a/Unix/cloc b/Unix/cloc index 527bb006..2eef9998 100755 --- a/Unix/cloc +++ b/Unix/cloc @@ -1002,6 +1002,7 @@ $opt_git = 1 if defined($opt_git_diff_all) or defined($opt_git_diff_rel) or (defined($opt_vcs) and ($opt_vcs eq "git")); $opt_by_file = 1 if defined $opt_by_file_by_lang; +$opt_fmt = 0 unless defined $opt_fmt; if ($opt_fmt) { $opt_by_file = 1; $opt_json = 1; @@ -7898,6 +7899,38 @@ sub docstring_to_C { # {{{1 print "<- docstring_to_C\n" if $opt_v > 2; return @{$ra_lines}; } # 1}}} +sub docstring_rm_comments { # {{{1 + my ($ra_lines, ) = @_; + # Remove embedded C/C++ style comments in docstrings. + + print "-> docstring_rm_comments()\n" if $opt_v > 2; + + my $in_docstring = 0; + foreach (@{$ra_lines}) { + if (/((""")|('''))(.*?)\1/) { + # single line docstring + my ($i_start, $i_end) = ($-[0]+3, $+[0]-3); + # replace /*, */, // with xx + substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g; + next; + } elsif (/^(.*?)((""")|('''))/ and $in_docstring) { + $in_docstring = 0; + my $i_end = length $1; + if ($i_end) { + substr($_, 0, $i_end) =~ s{(/\*|\*/|//)}{xx}g; + } + } elsif (/((""")|('''))(.*?)$/ and !$in_docstring) { + $in_docstring = 1; + my $i_start = $-[0]+3; + substr($_, $i_start) =~ s{(/\*|\*/|//)}{xx}g; + } elsif ($in_docstring) { + s{(/\*|\*/|//)}{xx}g; + } + } + + print "<- docstring_rm_comments\n" if $opt_v > 2; + return @{$ra_lines}; +} # 1}}} sub jupyter_nb { # {{{1 my ($ra_lines, ) = @_; # Translate .ipynb file content into an equivalent set of code @@ -9814,6 +9847,7 @@ sub set_constants { # {{{1 [ 'call_regexp_common' , 'C++' ], ], 'Java' => [ + [ 'docstring_rm_comments', ], [ 'replace_regex', '\\\\$', ' '], # Java seems to have more path globs in strings # than other languages. The variations makes diff --git a/Unix/t/01_opts.t b/Unix/t/01_opts.t index 5c6d54cf..52343de1 100755 --- a/Unix/t/01_opts.t +++ b/Unix/t/01_opts.t @@ -767,6 +767,13 @@ my @Tests = ( 'ref' => '../tests/outputs/issues/804/infoSQL.java.yaml', }, + { + 'name' => 'comments in Java text blocks #805', + 'cd' => '../tests/inputs/issues/805', + 'args' => 'text_block.java', + 'ref' => '../tests/outputs/issues/805/text_block.java.yaml', + }, + ); # Special cases: diff --git a/cloc b/cloc index b3a89d95..a537bd32 100755 --- a/cloc +++ b/cloc @@ -1017,6 +1017,7 @@ $opt_git = 1 if defined($opt_git_diff_all) or defined($opt_git_diff_rel) or (defined($opt_vcs) and ($opt_vcs eq "git")); $opt_by_file = 1 if defined $opt_by_file_by_lang; +$opt_fmt = 0 unless defined $opt_fmt; if ($opt_fmt) { $opt_by_file = 1; $opt_json = 1; @@ -7913,6 +7914,38 @@ sub docstring_to_C { # {{{1 print "<- docstring_to_C\n" if $opt_v > 2; return @{$ra_lines}; } # 1}}} +sub docstring_rm_comments { # {{{1 + my ($ra_lines, ) = @_; + # Remove embedded C/C++ style comments in docstrings. + + print "-> docstring_rm_comments()\n" if $opt_v > 2; + + my $in_docstring = 0; + foreach (@{$ra_lines}) { + if (/((""")|('''))(.*?)\1/) { + # single line docstring + my ($i_start, $i_end) = ($-[0]+3, $+[0]-3); + # replace /*, */, // with xx + substr($_, $i_start, $i_end-$i_start) =~ s{(/\*|\*/|//)}{xx}g; + next; + } elsif (/^(.*?)((""")|('''))/ and $in_docstring) { + $in_docstring = 0; + my $i_end = length $1; + if ($i_end) { + substr($_, 0, $i_end) =~ s{(/\*|\*/|//)}{xx}g; + } + } elsif (/((""")|('''))(.*?)$/ and !$in_docstring) { + $in_docstring = 1; + my $i_start = $-[0]+3; + substr($_, $i_start) =~ s{(/\*|\*/|//)}{xx}g; + } elsif ($in_docstring) { + s{(/\*|\*/|//)}{xx}g; + } + } + + print "<- docstring_rm_comments\n" if $opt_v > 2; + return @{$ra_lines}; +} # 1}}} sub jupyter_nb { # {{{1 my ($ra_lines, ) = @_; # Translate .ipynb file content into an equivalent set of code @@ -9829,6 +9862,7 @@ sub set_constants { # {{{1 [ 'call_regexp_common' , 'C++' ], ], 'Java' => [ + [ 'docstring_rm_comments', ], [ 'replace_regex', '\\\\$', ' '], # Java seems to have more path globs in strings # than other languages. The variations makes diff --git a/tests/inputs/issues/805/text_block.java b/tests/inputs/issues/805/text_block.java new file mode 100644 index 00000000..5f25353a --- /dev/null +++ b/tests/inputs/issues/805/text_block.java @@ -0,0 +1,21 @@ +// regular comment +public String getBlockOfHtml() { +String request = """ + GET /*cho/foo HTT*/1.1 + Host: local + Accept: */* + Co//ection: closed + + """; + + return """ + + /* + * NOT comment + */ + + example text + + + """; + } diff --git a/tests/outputs/issues/805/text_block.java.yaml b/tests/outputs/issues/805/text_block.java.yaml new file mode 100644 index 00000000..4b6be197 --- /dev/null +++ b/tests/outputs/issues/805/text_block.java.yaml @@ -0,0 +1,21 @@ +--- +# github.com/AlDanial/cloc +header : + cloc_url : github.com/AlDanial/cloc + cloc_version : 1.99 + elapsed_seconds : 0.00261998176574707 + n_files : 1 + n_lines : 21 + files_per_second : 381.682045682046 + lines_per_second : 8015.32295932296 + report_file : ../../../outputs/issues/805/text_block.java.yaml +'Java' : + nFiles: 1 + blank: 2 + comment: 1 + code: 18 +SUM: + blank: 2 + comment: 1 + code: 18 + nFiles: 1