Looks like this specific .sav file has a particular encoding that i’m not able to understand.
Every other .sav file i’ve tried to decode always worked, but this one in particular seems tricky..
Codes i’ve tried to retreive all the data from it:
Pandas SPSS
import pandas as pd
import json
# Load the .sav file without specifying encoding
df = pd.read_spss('/home/seizou/KURO_PLAYER_PREFS.sav')
# Convert the DataFrame to JSON
json_data = df.to_json(orient='records', force_ascii=False)
# Save the JSON data to a file, ensuring UTF-8 encoding
with open('output_file.json', 'w', encoding='utf-8') as json_file:
json_file.write(json_data)
Manual Decoding
import struct
import json
import os
import datetime
def decode_string(encoded_string):
try:
decoded_string = encoded_string.decode('utf-8', errors='replace')
cleaned_string = ''.join(char if char.isprintable() else ' ' for char in decoded_string)
return cleaned_string
except UnicodeDecodeError as e:
print(f"UnicodeDecodeError: {e}; data: {encoded_string[:50]}...") # Log the error and part of the problematic data
return "<UNDECODABLE BINARY DATA>" # Placeholder for undecodable data
def determine_data_type_and_length(binary_data, offset):
IDENTIFIER_TO_TYPE = {
b'x01': ('integer', 4),
b'x02': ('float', 4),
b'x03': ('date', 8),
b'x04': ('string', None) # Assuming the length for strings is determined separately
}
identifier = binary_data[offset:offset+1]
offset += 1
if identifier in IDENTIFIER_TO_TYPE:
data_type, length = IDENTIFIER_TO_TYPE[identifier]
if data_type == 'string':
# Assuming the length of the string is stored in the next 2 bytes
length = struct.unpack_from('<H', binary_data, offset)[0]
offset += 2
return data_type, length, offset
else:
print(f"Skipping unknown identifier {identifier} at offset {offset-1}.")
# Example: Skip a predefined number of bytes for unknown identifiers
# This needs to be adjusted based on how you can determine the length to skip
skip_length = 4 # Placeholder value
offset += skip_length
return None, None, offset
def read_sav_file(file_path):
data = []
with open(file_path, 'rb') as file:
binary_data = file.read()
offset = 0
while offset < len(binary_data):
data_item = {}
data_type, data_length, offset = determine_data_type_and_length(binary_data, offset)
if data_type is None:
continue # Skip processing this data and move to the next
data_item['type'] = data_type
if data_type == 'integer':
value = struct.unpack_from('<i', binary_data, offset)[0]
offset += 4
elif data_type == 'float':
value = struct.unpack_from('<f', binary_data, offset)[0]
offset += 4
elif data_type == 'date':
unix_timestamp = struct.unpack_from('<Q', binary_data, offset)[0]
try:
value = datetime.datetime.fromtimestamp(unix_timestamp, datetime.timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
except OverflowError:
value = "Invalid Date"
offset += 8
elif data_type == 'string':
string_length = struct.unpack_from('<H', binary_data, offset)[0]
offset += 2
value = decode_string(binary_data[offset:offset+string_length])
offset += string_length
else:
value = None
offset += data_length # Adjust based on actual logic
data_item['value'] = value
data.append(data_item)
return data
def convert_to_json(sav_data, output_file_path):
structured_data = {
"metadata": {
"source_file": os.path.basename(output_file_path),
"total_entries": len(sav_data) // 2, # Correctly calculating total entries
"extraction_date": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
},
"entries": []
}
# Iterate through sav_data in steps of 2
for i in range(0, len(sav_data), 2):
integer_entry = sav_data[i]['value'] if i < len(sav_data) and sav_data[i]['type'] == 'integer' else None
string_entry = sav_data[i+1]['value'] if i+1 < len(sav_data) and sav_data[i+1]['type'] == 'string' else None
structured_data["entries"].append({
"integer": integer_entry,
"string": string_entry
})
with open(output_file_path, 'w', encoding='utf-8') as json_file:
json.dump(structured_data, json_file, indent=4)
def main():
# Path to .sav file
sav_file_path = '/path/to/sav_file.sav'
# Check if the file exists
if not os.path.exists(sav_file_path):
print(f"File {sav_file_path} not found!")
return
# Path to .sav file to read
sav_data = read_sav_file(sav_file_path)
# Path where to save the JSON
json_file_path = '/path/to/output_file.json'
# Converts and save as JSON
convert_to_json(sav_data, json_file_path)
print(f"JSON Saved at: {json_file_path}")
if __name__ == "__main__":
main()
Nothing worked so far, the best output i’ve gotten was:
{
"metadata": {
"source_file": "output_file.json",
"total_entries": 49,
"extraction_date": "2024-06-16 10:07:19"
},
"entries": [
{
"integer": 1396790855,
"string": " "
},
{
"integer": 522,
"string": " "
},
{
"integer": 107041437,
"string": " ++UE4+Release-4.2"
},
{
"integer": 196662,
"string": ""
},
{
"integer": 58,
"string": null
},
{
"integer": -1180047784,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": -80436955,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": 1396724469,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": 529092365,
"string": null
},
{
"integer": 10,
"string": null
},
{
"integer": -1553166714,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": -2064644380,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": -1573791221,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": 350176176,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": -600192975,
"string": null
},
{
"integer": 4070542,
"string": ""
},
{
"integer": -266652928,
"string": null
},
{
"integer": -1185242439,
"string": null
},
{
"integer": 1296293990,
"string": null
},
{
"integer": 105,
"string": null
},
{
"integer": 2094599304,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": -1731039156,
"string": null
},
{
"integer": 402653184,
"string": null
},
{
"integer": -2070027676,
"string": null
},
{
"integer": -20799150,
"string": null
},
{
"integer": 22224375,
"string": ""
},
{
"integer": -1398549504,
"string": null
},
{
"integer": 1116718519,
"string": " ufffduufffd)'Fufffdufffdvufffd ufffdufffdufffd,# ]ufffdCufffdGIufffd ufffd>sufffd ufffdufffdufffd ufffdl&kufffdKufffd ufffdufffd ufffd ufffdB a=ufffd ufffdG?ufffdufffd'ufffdufffdIA ufffd `ufffdOdufffdufffdufffd ufffdufffdufffdufffd + hc ufffdXL#kp 9ufffdufffd^& u05bcufffdufffdX OIufffd !u2228ufffd< ufffdu042eufffdoAufffdufffd ufffddufffdufffdufffd&ufffd O ufffdEu01b4ufffd.?ufffd}ufffdufffdufffd ufffdJufffd5l@Xufffdufffd ufffd|$ ufffd( nufffdufffdufffdufffdB ufffd\!SufffdufffdDufffd ufffdufffdufffdufffdsBufffdufffdNufffd ufffdufffdu 6ufffdufffddufffdB ufffdufffdrufffdufffdNufffdufffdufffd ufffdufffd}iufffdAOufffdufffdQJufffd(ufffdufffdufffd B^ufffdufffdFMufffd$ufffdufffd ufffdy dufffd 'ufffd oufffdH ufffdufffdufffdufffdufffdufffd ufffd ufffd q:Iufffdufffd2ufffdufffdufffd ufffd8 P2hTufffdHufffdufffdufffdufffdu020bufffdufffdufffdufffd M CpITqiufffdiufffdu5c10ufffd ufffd2ufffdufffd LufffdS%^jufffdufffdufffd2 ufffdufffd#XNufffdLRufffdaufffdufffdufffdSufffd ufffdbufffd:Nufffdufffd ufffdufffdufffdufffdufffd$ufffd ufffdufffdufffd. Mufffd ufffdufffd6ufffdu06a5ufffd3 O5ufffdP/IufffdufffdufffdufffdIufffd c< @ufffdVJufffd ufffd ~4u04d2ufffdju0272 Jufffdu05d7FXufffd ufffdufffdufffdF}H ufffdufffdyU L:ufffd{ ufffdufffd/ufffdac Rufffd/a @Su0691O ufffd|ufffdufffdufffd 6z#ufffdufffdAufffdufffdufffd ufffdufffdufffd hX u?NufffdIKufffdp ufffdu05a4u0736~< ufffdHufffd hL./ufffdSu0409- ufffdufffd ufffd hufffdufffdKufffdYufffd ufffdufffd=Dufffds ufffdPufffd N ufffd ufffdu033bufffd ufffdW ufffdQufffdufffdufffd ufffdEufffd" ufffdVufffdx 2 /Script/KuroRenderingRuntimeBPPlugin.KuroSaveGame StringMap MapProperty ? "
},
{
"integer": 1951596544,
"string": null
},
{
"integer": 786553,
"string": ""
},
{
"integer": 1349678163,
"string": null
},
{
"integer": 0,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": 1769366852,
"string": null
},
{
"integer": null,
"string": null
},
{
"integer": 875771193,
"string": null
},
{
"integer": 875906370,
"string": null
},
{
"integer": 3290435,
"string": " Non"
},
{
"integer": 101,
"string": ""
}
]
}
which to me seems just a corrupted conversion.
any help is appreciated, i’ll give you guys a link to download the .sav from yourself:
https://zoux.net/static/KURO_PLAYER_PREFS.sav