R.Muralikrishnan, MPI for Empirical Aesthetics. This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International Licence. ############################## # Awk script to recode the NEO questionnaire responses # # Input: NEO-Data.log (the summarised data extracted from the participant logfiles) # Variable: NEO-lookup table, with the recoding/inversion scheme for each question # Execute as: awk -f Recode-Q-NEO-Responses.awk -v File1="Neo-lookup-table.txt" NEO-Data.out # Output files: Neo-Data-Recoded.out, Neo-Data-Summary.txt # # Author: R. Muralikrishnan ############################## BEGIN{ # Sample rows from Neo-lookup-table.txt # Q1 1-N1 RI # Q2 2-E1 RO # Q3 3-O1 RO while((getline < File1) > 0) # Read the look-up table and store the { # recoding scheme etc. for each question in an associative array. NeoFacet[$1] = $2; NeoRecodeScheme[$1] = $3; } OutputFile1 = "NEO-Data-Recoded.out"; OutputFile2 = "NEO-Data-Summary-Facets.txt"; OutputFile3 = "NEO-Data-Summary-Dimensions.txt"; } # Sample Row from NEO-Data.log # 0001 Q1 Response 2 # 0001 Q2 Response 3 # 0001 Q3 Response 4 # 0001 Q4 Response 3 # Function definition to return the absolute value of a signed number. function Q_Abs(x) {return x < 0 ? -x : x} #http://unix.stackexchange.com/questions/220588/how-to-take-the-absolute-value-using-awk { if (NeoRecodeScheme[$2] == "RI") { # For RI, Recode and Invert: new response = abs(old response - 5) # 5 -> 0, 4 -> 1....1 -> 4 RecodedResponse = Q_Abs($4-5); } if (NeoRecodeScheme[$2] == "RO") { # For RO, Recode only: new response = old response - 1 # 5 -> 4, 4 -> 3....1 -> 0 RecodedResponse = $4-1; } print $1, $2, RecodedResponse, NeoFacet[$2], NeoRecodeScheme[$2] >> OutputFile1; SumResponse[$1 " " NeoFacet[$2]] = SumResponse[$1 " " NeoFacet[$2]] + RecodedResponse; Dimension = substr(NeoFacet[$2],1,3); SumResponseDim[$1 " " Dimension] = SumResponseDim[$1 " " Dimension] + RecodedResponse; } END{ Command = "LC_ALL=C sort -k1 -k2"; # Sort by column 1 and then by column 2 as key # LC_ALL=C ensures that the traditional Unix Ascii sorting order. # Ref: https://www.gnu.org/software/gawk/manual/gawk.html#Two_002dway-I_002fO for (DimFacet in SumResponse) { print DimFacet, SumResponse[DimFacet] |& Command; } close(Command, "to"); # This makes sure the write-end of the pipe is closed; # This tells shell (sort) that the input stream has finished... # ...a sort of EOF indication, so that # sort can do its job based on the whole input. # If this is not done, both processes keep waiting for each other, # in effect, ending up in a never-ending process! # Get the sorted output, print it as such one line at a time... while ((Command |& getline OutputLine) > 0) { print OutputLine >> OutputFile2; } close(Command); # Close the Two-Way-IO fully...so we can issue another command to the shell. for (Dim in SumResponseDim) { print Dim, SumResponseDim[Dim] |& Command; } close(Command, "to"); # This makes sure the write-end of the pipe is closed; # This tells shell (sort) that the input stream has finished... # ...a sort of EOF indication, so that # sort can do its job based on the whole input. # If this is not done, both processes keep waiting for each other, # in effect, ending up in a never-ending process! # Get the sorted output, print it as such one line at a time... while ((Command |& getline OutputLine) > 0) { print OutputLine >> OutputFile3; } close(Command); }