initial commit
This commit is contained in:
commit
940fe79caf
3 changed files with 336 additions and 0 deletions
18
LICENSE.md
Normal file
18
LICENSE.md
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
Copyright (c) 2019 Jan Wolff
|
||||||
|
|
||||||
|
This software is provided 'as-is', without any express or implied
|
||||||
|
warranty. In no event will the authors be held liable for any damages
|
||||||
|
arising from the use of this software.
|
||||||
|
|
||||||
|
Permission is granted to anyone to use this software for any purpose,
|
||||||
|
including commercial applications, and to alter it and redistribute it
|
||||||
|
freely, subject to the following restrictions:
|
||||||
|
|
||||||
|
1. The origin of this software must not be misrepresented; you must not
|
||||||
|
claim that you wrote the original software. If you use this software
|
||||||
|
in a product, an acknowledgment in the product documentation would be
|
||||||
|
appreciated but is not required.
|
||||||
|
2. Altered source versions must be plainly marked as such, and must not be
|
||||||
|
misrepresented as being the original software.
|
||||||
|
3. This notice may not be removed or altered from any source distribution.
|
||||||
|
|
15
README.md
Normal file
15
README.md
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
VMA extractor
|
||||||
|
=============
|
||||||
|
|
||||||
|
`vma.py` implements a VMA extraction tool in Python 3.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
```sh
|
||||||
|
./vma.py path/to/source.vma path/to/target/directory
|
||||||
|
```
|
||||||
|
|
||||||
|
I think it is pretty important to be able to read Proxmox backups outside of a
|
||||||
|
Proxmox environment. Yet, porting their VMA implementation to a standalone
|
||||||
|
tool proved difficult. VMA-Reader and VMA-Writer are implemented as patches to
|
||||||
|
the Proxmox-patched version and Qemu and are thus very difficult to compile on
|
||||||
|
non-Proxmox systems.
|
303
vma.py
Executable file
303
vma.py
Executable file
|
@ -0,0 +1,303 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import hashlib
|
||||||
|
import struct
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
|
class VmaHeader():
|
||||||
|
def __init__(self, fo):
|
||||||
|
# 0 - 3: magic
|
||||||
|
# VMA magic string ("VMA\x00")
|
||||||
|
magic = fo.read(4)
|
||||||
|
assert magic == b'VMA\0'
|
||||||
|
|
||||||
|
# 4 - 7: version
|
||||||
|
# Version number (valid value is 1)
|
||||||
|
version = int.from_bytes(fo.read(4), 'big')
|
||||||
|
assert version == 1
|
||||||
|
|
||||||
|
# 8 - 23: uuid
|
||||||
|
# Unique ID, Same uuid is used to mark extents.
|
||||||
|
self.uuid = fo.read(16)
|
||||||
|
|
||||||
|
# 24 - 31: ctime
|
||||||
|
# Backup time stamp (seconds since epoch)
|
||||||
|
self.ctime = int.from_bytes(fo.read(8), 'big')
|
||||||
|
|
||||||
|
# 32 - 47: md5sum
|
||||||
|
# Header checksum (from byte 0 to header_size). This field
|
||||||
|
# is filled with zero to generate the checksum.
|
||||||
|
self.md5sum = fo.read(16)
|
||||||
|
|
||||||
|
# 48 - 51: blob_buffer_offset
|
||||||
|
# Start of blob buffer (multiple of 512)
|
||||||
|
self.blob_buffer_offset = int.from_bytes(fo.read(4), 'big')
|
||||||
|
|
||||||
|
# 52 - 55: blob_buffer_size
|
||||||
|
# Size of blob buffer (multiple of 512)
|
||||||
|
self.blob_buffer_size = int.from_bytes(fo.read(4), 'big')
|
||||||
|
|
||||||
|
# 56 - 59: header_size
|
||||||
|
# Overall size of this header (multiple of 512)
|
||||||
|
self.header_size = int.from_bytes(fo.read(4), 'big')
|
||||||
|
|
||||||
|
# 60 - 2043: reserved
|
||||||
|
fo.seek(1984, os.SEEK_CUR)
|
||||||
|
|
||||||
|
# 2044 - 3067: uint32_t config_names[256]
|
||||||
|
# Offsets into blob_buffer table
|
||||||
|
self.config_names = []
|
||||||
|
for i in range(256):
|
||||||
|
self.config_names.append(int.from_bytes(fo.read(4), 'big'))
|
||||||
|
|
||||||
|
# 3068 - 4091: uint32_t config_data[256]
|
||||||
|
# Offsets into blob_buffer table
|
||||||
|
self.config_data = []
|
||||||
|
for i in range(256):
|
||||||
|
self.config_data.append(int.from_bytes(fo.read(4), 'big'))
|
||||||
|
|
||||||
|
# 4092 - 4095: reserved
|
||||||
|
fo.seek(4, os.SEEK_CUR)
|
||||||
|
|
||||||
|
# 4096 - 12287: VmaDeviceInfoHeader dev_info[256]
|
||||||
|
# The offset in this table is used as 'dev_id' inside
|
||||||
|
# the data streams.
|
||||||
|
self.dev_info = []
|
||||||
|
for i in range(256):
|
||||||
|
self.dev_info.append(VmaDeviceInfoHeader(fo, self))
|
||||||
|
|
||||||
|
# 12288 - header_size: Blob buffer
|
||||||
|
|
||||||
|
# the blob buffer layout is very odd. there appears to be an additional
|
||||||
|
# byte of padding at the beginning
|
||||||
|
fo.seek(1, os.SEEK_CUR)
|
||||||
|
# since byte-wise offsets are used to address the blob buffer, the
|
||||||
|
# blob metadata is stored in a hashmap, with the offsets as the keys
|
||||||
|
self.blob_buffer = {}
|
||||||
|
blob_buffer_current_offset = 1
|
||||||
|
while(fo.tell() < self.blob_buffer_offset + self.blob_buffer_size):
|
||||||
|
self.blob_buffer[blob_buffer_current_offset] = Blob(fo)
|
||||||
|
blob_buffer_current_offset = fo.tell() - self.blob_buffer_offset
|
||||||
|
|
||||||
|
# make sure the file object points at the end of the vma header
|
||||||
|
fo.seek(self.header_size, os.SEEK_SET)
|
||||||
|
|
||||||
|
|
||||||
|
class VmaDeviceInfoHeader():
|
||||||
|
def __init__(self, fo, vma_header):
|
||||||
|
self.__vma_header = vma_header
|
||||||
|
|
||||||
|
# 0 - 3: devive name (offsets into blob_buffer table)
|
||||||
|
self.device_name = int.from_bytes(fo.read(4), 'big')
|
||||||
|
|
||||||
|
# 4 - 7: reserved
|
||||||
|
fo.seek(4, os.SEEK_CUR)
|
||||||
|
|
||||||
|
# 8 - 15: device size in bytes
|
||||||
|
self.device_size = int.from_bytes(fo.read(8), 'big')
|
||||||
|
|
||||||
|
# 16 - 31: reserved
|
||||||
|
fo.seek(16, os.SEEK_CUR)
|
||||||
|
|
||||||
|
|
||||||
|
def get_name(self):
|
||||||
|
name = self.__vma_header.blob_buffer[self.device_name].data
|
||||||
|
return name.split(b'\0')[0].decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
class VmaExtentHeader():
|
||||||
|
def __init__(self, fo, vma_header):
|
||||||
|
# 0 - 3: magic
|
||||||
|
# VMA extent magic string ("VMAE")
|
||||||
|
magic = fo.read(4)
|
||||||
|
assert magic == b'VMAE'
|
||||||
|
|
||||||
|
# 4 - 5: reserved
|
||||||
|
fo.seek(2, os.SEEK_CUR)
|
||||||
|
|
||||||
|
# 6 - 7: block_count
|
||||||
|
# Overall number of contained 4K block
|
||||||
|
self.block_count = int.from_bytes(fo.read(2), 'big')
|
||||||
|
|
||||||
|
# 8 - 23: uuid
|
||||||
|
# Unique ID, Same uuid as used in the VMA header.
|
||||||
|
self.uuid = fo.read(16)
|
||||||
|
|
||||||
|
# 24 - 39: md5sum
|
||||||
|
# Header checksum (from byte 0 to header_size). This field
|
||||||
|
# is filled with zero to generate the checksum.
|
||||||
|
self.md5sum = fo.read(16)
|
||||||
|
|
||||||
|
# 40 - 511: blockinfo[59]
|
||||||
|
self.blockinfo = []
|
||||||
|
for i in range(59):
|
||||||
|
self.blockinfo.append(Blockinfo(fo, vma_header))
|
||||||
|
|
||||||
|
|
||||||
|
class Blob():
|
||||||
|
def __init__(self, fo):
|
||||||
|
# the size of a blob is a two-byte int in LITTLE endian
|
||||||
|
# source: original c code of vma-reader
|
||||||
|
# uint32_t size = vmar->head_data[bstart] +
|
||||||
|
# (vmar->head_data[bstart+1] << 8);
|
||||||
|
self.size = int.from_bytes(fo.read(2), 'little')
|
||||||
|
self.data = fo.read(self.size)
|
||||||
|
|
||||||
|
|
||||||
|
class Blockinfo():
|
||||||
|
CLUSTER_SIZE = 65536
|
||||||
|
|
||||||
|
def __init__(self, fo, vma_header):
|
||||||
|
self.__vma_header = vma_header
|
||||||
|
|
||||||
|
# 0 - 1: mask
|
||||||
|
self.mask = int.from_bytes(fo.read(2), 'big')
|
||||||
|
|
||||||
|
# 2: reserved
|
||||||
|
fo.seek(1, os.SEEK_CUR)
|
||||||
|
|
||||||
|
# 3: dev_id
|
||||||
|
# Device ID (offset into dev_info table)
|
||||||
|
self.dev_id = int.from_bytes(fo.read(1), 'big')
|
||||||
|
|
||||||
|
# 4 - 7: cluster_num
|
||||||
|
self.cluster_num = int.from_bytes(fo.read(4), 'big')
|
||||||
|
|
||||||
|
|
||||||
|
def extract_configs(fo, args, vma_header):
|
||||||
|
"""
|
||||||
|
Configs in VMA are composed of two blobs. One specifies the config's
|
||||||
|
filename and the other contains the config's content.
|
||||||
|
The filename seems to be a null-terminated string, while the content is not
|
||||||
|
terminated.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if args.verbose: print('extracting configs...')
|
||||||
|
|
||||||
|
for i in range(256):
|
||||||
|
if vma_header.config_names[i] == 0: continue
|
||||||
|
config_name = vma_header.blob_buffer[vma_header.config_names[i]].data
|
||||||
|
# interpret filename as a null-terminated utf-8 string
|
||||||
|
config_name = config_name.split(b'\0')[0].decode('utf-8')
|
||||||
|
|
||||||
|
if args.verbose: print(f'{config_name}...', end='')
|
||||||
|
|
||||||
|
config_data = vma_header.blob_buffer[vma_header.config_data[i]].data
|
||||||
|
|
||||||
|
with open(os.path.join(args.destination, config_name), 'wb') as config_fo:
|
||||||
|
config_fo.write(config_data)
|
||||||
|
|
||||||
|
if args.verbose: print(' OK')
|
||||||
|
|
||||||
|
|
||||||
|
def extract(fo, args):
|
||||||
|
os.makedirs(args.destination, exist_ok=True)
|
||||||
|
|
||||||
|
fo.seek(0, os.SEEK_END)
|
||||||
|
filesize = fo.tell()
|
||||||
|
fo.seek(0, os.SEEK_SET)
|
||||||
|
|
||||||
|
vma_header = VmaHeader(fo)
|
||||||
|
|
||||||
|
extract_configs(fo, args, vma_header)
|
||||||
|
|
||||||
|
# extract_configs may move the read head somewhere into the blob buffer
|
||||||
|
# make sure we are back at the end of the header
|
||||||
|
fo.seek(vma_header.header_size, os.SEEK_SET)
|
||||||
|
|
||||||
|
if args.verbose: print('extracting devices...')
|
||||||
|
|
||||||
|
# open file handlers for all devices within the VMA
|
||||||
|
# so we can easily append data to arbitrary devices
|
||||||
|
device_fos = {}
|
||||||
|
for dev_id, dev_info in enumerate(vma_header.dev_info):
|
||||||
|
if dev_info.device_size > 0:
|
||||||
|
if args.verbose: print(dev_info.get_name())
|
||||||
|
device_fos[dev_id] = open(os.path.join(args.destination, dev_info.get_name()), 'wb')
|
||||||
|
|
||||||
|
if args.verbose: print('this may take a while...')
|
||||||
|
|
||||||
|
# used for sanity checking
|
||||||
|
cluster_num_prev = -1
|
||||||
|
|
||||||
|
while(fo.tell() < filesize):
|
||||||
|
# when there is data to read at this point, we can safely expect a full
|
||||||
|
# extent header with additional clusters
|
||||||
|
extent_header = VmaExtentHeader(fo, vma_header)
|
||||||
|
assert vma_header.uuid == extent_header.uuid
|
||||||
|
|
||||||
|
for blockinfo in extent_header.blockinfo:
|
||||||
|
if blockinfo.dev_id == 0: continue
|
||||||
|
|
||||||
|
device_fo = device_fos[blockinfo.dev_id]
|
||||||
|
|
||||||
|
# non-sequential clusters encountered, handle this case
|
||||||
|
if blockinfo.cluster_num != cluster_num_prev + 1:
|
||||||
|
if args.verbose: print('non sequential cluster encountered...')
|
||||||
|
|
||||||
|
cluster_pos = blockinfo.cluster_num * Blockinfo.CLUSTER_SIZE
|
||||||
|
if blockinfo.cluster_num > cluster_num_prev:
|
||||||
|
# special case: cluster num is larger than current,
|
||||||
|
# seek forward into file AND, if needed, fill missing size
|
||||||
|
# with zeros
|
||||||
|
device_fo.seek(0, os.SEEK_END)
|
||||||
|
written_size = device_fo.tell()
|
||||||
|
|
||||||
|
if written_size < cluster_pos:
|
||||||
|
# add padding for missing clusters
|
||||||
|
if args.verbose:
|
||||||
|
print(f'{blockinfo.cluster_num}')
|
||||||
|
print(f'adding {cluster_pos - written_size} bytes'
|
||||||
|
+ 'of padding...')
|
||||||
|
|
||||||
|
# write padding in chucks of 4096 bytes to avoid
|
||||||
|
# memory errors
|
||||||
|
padding = cluster_pos - written_size
|
||||||
|
while padding > 0:
|
||||||
|
device_fo.write(b'\0' * min(padding, 4096))
|
||||||
|
padding -= 4096
|
||||||
|
|
||||||
|
# seek to start of new cluster
|
||||||
|
device_fo.seek(cluster_pos, os.SEEK_SET)
|
||||||
|
|
||||||
|
cluster_num_prev = blockinfo.cluster_num
|
||||||
|
|
||||||
|
for i in range(16):
|
||||||
|
# a 2-bytes wide bitmask indicates 4k blocks with only zeros
|
||||||
|
if (1 << i) & blockinfo.mask:
|
||||||
|
device_fo.write(fo.read(4096))
|
||||||
|
else:
|
||||||
|
device_fo.write(b'\0' * 4096)
|
||||||
|
|
||||||
|
if args.verbose: print('closing file handles...')
|
||||||
|
for device_fo in device_fos.values():
|
||||||
|
device_fo.close()
|
||||||
|
|
||||||
|
if args.verbose: print('done')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument('filename', type=str)
|
||||||
|
parser.add_argument('destination', type=str)
|
||||||
|
parser.add_argument('-v', '--verbose', default=False, action='store_true')
|
||||||
|
parser.add_argument('-f', '--force', default=False, action='store_true')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if(not os.path.exists(args.filename)):
|
||||||
|
print('Error! Source file does not exist!')
|
||||||
|
return 1
|
||||||
|
|
||||||
|
if(os.path.exists(args.destination) and not args.force):
|
||||||
|
print('Error! Destination path exists!')
|
||||||
|
return 1
|
||||||
|
|
||||||
|
with open(args.filename, 'rb') as fo:
|
||||||
|
extract(fo, args)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
Loading…
Reference in a new issue