Creative Commons License R.Muralikrishnan, MPI for Empirical Aesthetics. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Licence.
##############################
# Awk script to recode the NEO questionnaire responses
#
# Input: NEO-Data.log (the summarised data extracted from the participant logfiles)
# Variable: NEO-lookup table, with the recoding/inversion scheme for each question
# Execute as: awk -f Recode-Q-NEO-Responses.awk -v File1="Neo-lookup-table.txt" NEO-Data.out
# Output files: Neo-Data-Recoded.out, Neo-Data-Summary.txt
#
# Author: R. Muralikrishnan
##############################
BEGIN{

  # Sample rows from Neo-lookup-table.txt
  # Q1 N1 RI
  # Q2 E1 RO
  # Q3 O1 RO

  while((getline < File1) > 0) # Read the look-up table and store the 
  {                            # recoding scheme etc. for each question in an associative array.
    
    NeoFacet[$1] = $2;
    NeoRecodeScheme[$1] = $3;
    
  }
  
  OutputFile1 = "NEO-Data-Recoded.out";
  OutputFile2 = "NEO-Data-Summary.txt";

  
}

# Sample Row from NEO-Data.log
# 0001 Q1  Response 2
# 0001 Q2  Response 3
# 0001 Q3  Response 4
# 0001 Q4  Response 3

# Function definition to return the absolute value of a signed number.
function Q_Abs(x) {return x < 0 ? -x : x}
#http://unix.stackexchange.com/questions/220588/how-to-take-the-absolute-value-using-awk


{

  if (NeoRecodeScheme[$2] == "RI")
  { # For RI, Recode and Invert: new response = abs(old response - 5)
    # 5 -> 0, 4 -> 1....1 -> 4
    
    RecodedResponse = Q_Abs($4-5);

    
  }
  if (NeoRecodeScheme[$2] == "RO")
  { # For RO, Recode only: new response = old response - 1
    # 5 -> 4, 4 -> 3....1 -> 0
    
    RecodedResponse = $4-1;
    
  }    
  
    print $1, $2, RecodedResponse, NeoFacet[$2], NeoRecodeScheme[$2] >>  OutputFile1;
    
    SumResponse[$1 " " NeoFacet[$2]] = SumResponse[$1 " " NeoFacet[$2]] + RecodedResponse;
    
    Dimension = substr(NeoFacet[$2],1,1);
    
    SumResponseDim[$1 " " Dimension] = SumResponseDim[$1 " " Dimension] + RecodedResponse;
    

}
END{
  
    Command = "LC_ALL=C sort";
  # LC_ALL=C ensures that the traditional Unix Ascii sorting order.
  # Ref: https://www.gnu.org/software/gawk/manual/gawk.html#Two_002dway-I_002fO
  
  
  
  for (DimFacet in SumResponse)
  {
    print DimFacet, SumResponse[DimFacet] |& Command; 
  }
  
  close(Command, "to"); # This makes sure the write-end of the pipe is closed;
			  # This tells shell (sort) that the input stream has finished...
			  # ...a sort of EOF indication, so that 
			  # sort can do its job based on the whole input.
                          # If this is not done, both processes keep waiting for each other,
                          # in effect, ending up in a never-ending process!

  # Get the sorted output, print it as such one line at a time...
  while ((Command |& getline OutputLine) > 0)
  {
    print OutputLine >> OutputFile2;
    
  }
  
  close(Command);  # Close the Two-Way-IO fully...so we can issue another command to the shell.
  
  for (Dim in SumResponseDim)
  {
    print Dim, SumResponseDim[Dim]  |& Command; 


      
  }
  
  close(Command, "to"); # This makes sure the write-end of the pipe is closed;
			# This tells shell (sort) that the input stream has finished...
			# ...a sort of EOF indication, so that 
			# sort can do its job based on the whole input.
			# If this is not done, both processes keep waiting for each other,
			# in effect, ending up in a never-ending process!
  
  
  
  # Get the sorted output, print it as such one line at a time...
  while ((Command |& getline OutputLine) > 0)
  {
    print OutputLine >> OutputFile2;
    
  }
  
  close(Command);

}