R.Muralikrishnan, MPI for Empirical Aesthetics. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Licence. BEGIN{ Edges = (FS "$|^" FS); # This can also simply be Edges = "^"FS...apparently...and it seems to work. But we still retain the original. # Adapted from REF: http://www.linuxquestions.org/questions/linux-general-1/awk-delete-defined-columns-4175426481/#post4777125 # For removing a leading FS; # A trailing FS would apparently be: (FS "$") # The combination is what is used in the site above, as follows: (FS "$|^" FS). # See also the gsub command below, in which we use this variable to replace the pattern with } { gsub(/[0-9]/,""); # Replace all numbers with nothing. gsub(/ \. /," "); # Replace the pattern " . " with a space. (That latter pattern would result in German texts, after removing the ordinal numbers...such as 63. Tag. gsub(/--/," "); # Replace the pattern "--" with a space. (Eduard--so nennen wir ihn => Eduard so nennen wir ihn) gsub(/- /," "); # Replace the pattern "- " with a space. (Dokumentar- und andere Filme => Dokumentar und andere Filme) gsub(/-/," "); # Replace connecting hyphens with a space. (Doku-Serie => Doku Serie) gsub(/’/,"") # Replace apostrophes with nothing. (father’s => fathers) gsub(/,/,"") # Replace commas with nothing gsub(/;/,"") # Replace semi-colons with nothing gsub(/:/,"") # Replace colons with nothing gsub(/\"/,"") # Replace double-quotes with nothing. gsub(/«/,""); # Replace German opening-quote marks with nothing. gsub(/»/,""); # Replace German closing-quote marks with nothing. gsub(/\?/,"."); # Replace question-marks with a full-stop. gsub(/\!/,"."); # Replace exclamation-marks with a full-stop. gsub(Edges,""); # Replace leading/trailing FS with nothing. if (length($0)!=0){ print $0; } }