R.Muralikrishnan, MPI for Empirical Aesthetics. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Licence. BEGIN{ Edges = ("^" FS); # This can also simply be Edges = "^"FS...apparently...and it seems to work. But we still retain the original. # Adapted from REF: http://www.linuxquestions.org/questions/linux-general-1/awk-delete-defined-columns-4175426481/#post4777125 # For removing a leading FS; # A trailing FS would apparently be: (FS "$") # The combination is what is used in the site above, as follows: (FS "$|^" FS). # See also the gsub command below, in which we use this variable to replace the pattern with } #($1=="[]"),($NF=="[]") {print $0} ($1=="[]"),($NF=="[]")\ { #print $0; if (NF > 1) # Consider only non-blank lines { # Treat the first line starting with []; Remove the [] and /T1... information from $0. if($1=="[]"){$1=$2=""; gsub(Edges,"")} # We replace the pattern ^FS with nothing...so that after removing the first few fields, # the remaining leading FS will be removed. # Treat the last line ending with []; The line shouldn't be printed. if($NF=="[]"){$0=""} HyphenatedText = HyphenatedText $0; # Concatenate all the lines. } } END{ #print HyphenatedText; N_Sentences_in_Paragraph=split(HyphenatedText,A_Sentences,"."); for(i=1; i<=N_Sentences_in_Paragraph; i++) { N_Words_in_Sentence=split(A_Sentences[i],A_Words," "); if (N_Words_in_Sentence > 0) { printf "S-" i ": " N_Words_in_Sentence " words. "; for(j=1; j<=N_Words_in_Sentence; j++) { #printf "W"j" "length(A_Words[j])" "; #printf "W" j " " split(A_Words[j], A_Syllables,"-") " syl. "; N_of_Syllables_in_Word = split(A_Words[j], A_Syllables,"-"); gsub(/-/,"",A_Words[j]); Word_Length = length(A_Words[j]); #printf "W-" j ": " Word_Length " chars, " N_of_Syllables_in_Word " sylbs; "; printf "W-" j ": " Word_Length "," N_of_Syllables_in_Word "; "; } } #printf "\n"; #for(j=1; j<=N_Words_in_Sentence; j++) #{ # gsub(/-/,"",A_Words[j]); # printf "W" j " " length(A_Words[j]) "; "; #} printf "\n"; } }