#-
# ==========================================================================
# Copyright (C) 1995 - 2006 Autodesk, Inc. and/or its licensors.  All 
# rights reserved.
#
# The coded instructions, statements, computer programs, and/or related 
# material (collectively the "Data") in these files contain unpublished 
# information proprietary to Autodesk, Inc. ("Autodesk") and/or its 
# licensors, which is protected by U.S. and Canadian federal copyright 
# law and by international treaties.
#
# The Data is provided for use exclusively by You. You have the right 
# to use, modify, and incorporate this Data into other products for 
# purposes authorized by the Autodesk software license agreement, 
# without fee.
#
# The copyright notices in the Software and this entire statement, 
# including the above license grant, this restriction and the 
# following disclaimer, must be included in all copies of the 
# Software, in whole or in part, and all derivative works of 
# the Software, unless such copies or derivative works are solely 
# in the form of machine-executable object code generated by a 
# source language processor.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. 
# AUTODESK DOES NOT MAKE AND HEREBY DISCLAIMS ANY EXPRESS OR IMPLIED 
# WARRANTIES INCLUDING, BUT NOT LIMITED TO, THE WARRANTIES OF 
# NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR 
# PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE, OR 
# TRADE PRACTICE. IN NO EVENT WILL AUTODESK AND/OR ITS LICENSORS 
# BE LIABLE FOR ANY LOST REVENUES, DATA, OR PROFITS, OR SPECIAL, 
# DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES, EVEN IF AUTODESK 
# AND/OR ITS LICENSORS HAS BEEN ADVISED OF THE POSSIBILITY 
# OR PROBABILITY OF SUCH DAMAGES.
#
# ==========================================================================
#+

import os
import os.path
import getopt
import sys
import xml.dom.minidom
import string
import re
import array

"""

This example shows how to parse data from a Maya cache file, up to version 2.0.
It produces an ascii dump of the first few elements of data for every channel 
at every time.  It parses the XML file in addition to the cache data files and 
handles caches that are one file per frame as well as one file. 
To use: 

python cacheFileExample.py -f mayaCacheFile.xml


Overview of Maya Caches:
========================

Conceptually, a Maya cache consists of 1 or more channels of data.  
Each channel has a number of properties, such as:

- start/end time 
- data type of the channel (eg. "DoubleVectorArray" to represents a point array)
- interpretation (eg. "positions" the vector array represents position data, as opposed to per vertex normals, for example)
- sampling type (eg. "regular" or "irregular")
- sampling rate (meaningful only if sampling type is "regular")

Each channel has a number of data points in time, not necessarily regularly spaced, 
and not necessarily co-incident in time with data in other channels.  
At the highest level, a Maya cache is simply made up of channels and their data in time.

On disk, the Maya cache is made up of a XML description file, and 1 or more data files.  
The description file provides a high level overview of what the cache contains, 
such as the cache type (one file, or one file per frame), channel names, interpretation, etc.  
The data files contain the actual data for the channels.  
In the case of one file per frame, a naming convention is used so the cache can check its 
available data at runtime.

Here is a visualization of the data format of the OneFile case:

//  |---CACH (Group)	// Header
//  |     |---VRSN		// Version Number (char*)
//  |     |---STIM		// Start Time of the Cache File (int)
//  |     |---ETIM		// End Time of the Cache File (int)
//  |
//  |---MYCH (Group)	// 1st Time 
//  |     |---TIME		// Time (int)
//  |     |---CHNM		// 1st Channel Name (char*)
//  |     |---SIZE		// 1st Channel Size
//  |     |---DVCA		// 1st Channel Data (Double Vector Array)
//  |     |---CHNM		// n-th Channel Name
//  |     |---SIZE		// n-th Channel Size
//  |     |---DVCA		// n-th Channel Data (Double Vector Array)
//  |     |..
//  |
//  |---MYCH (Group)	// 2nd Time 
//  |     |---TIME		// Time
//  |     |---CHNM		// 1st Channel Name
//  |     |---SIZE		// 1st Channel Size
//  |     |---DVCA		// 1st Channel Data (Double Vector Array)
//  |     |---CHNM		// n-th Channel Name
//  |     |---SIZE		// n-th Channel Size
//  |     |---DVCA		// n-th Channel Data (Double Vector Array)
//  |     |..
//  |
//  |---..
//	|
//

In a multiple file caches, the only difference is that after the 
header "CACH" group, there is only one MYCH group and there is no 
TIME chunk.	In the case of one file per frame, the time is part of 
the file name - allowing Maya to scan at run time to see what data 
is actually available, and it allows users to move data in time by 
manipulating the file name.  

!Note that it's not necessary to have data for every channel at every time.  

"""

class CacheChannel:
    m_channelName = ""
    m_channelType = ""                
    m_channelInterp = ""
    m_sampleType = ""
    m_sampleRate = 0
    m_startTime = 0
    m_endTime = 0      
    def __init__(self,channelName,channelType,interpretation,samplingType,samplingRate,startTime,endTime):
        self.m_channelName = channelName
        self.m_channelType = channelType                
        self.m_channelInterp = interpretation
        self.m_sampleType = samplingType
        self.m_sampleRate = samplingRate
        self.m_startTime = startTime
        self.m_endTime = endTime     
        print "Channel Name =%s,type=%s,interp=%s,sampleType=%s,rate=%d,start=%d,end=%d\n"%(channelName,channelType,interpretation,samplingType,samplingRate,startTime,endTime)
        
class CacheFile:
    m_baseFileName = ""
    m_directory = ""    
    m_cacheType = ""
    m_cacheStartTime = 0
    m_cacheEndTime = 0
    m_timePerFrame = 0
    m_version = 0.0
    m_channels = []    
    ########################################################################
    #   Description:
    #       Class constructor - tries to figure out full path to cache
    #       xml description file before calling parseDescriptionFile()
    #
    def __init__(self,fileName):
        # fileName can be the full path to the .xml description file,
        # or just the filename of the .xml file, with or without extension
        # if it is in the current directory
        dir = os.path.dirname(fileName)
        fullPath = ""
        if dir == "":
            currDir = os.getcwd() 
            fullPath = os.path.join(currDir,fileName)
            if not os.path.exists(fullPath):
                fileName = fileName + '.xml';
                fullPath = os.path.join(currDir,fileName)
                if not os.path.exists(fullPath):
                    print "Sorry, can't find the file %s to be opened\n" % fullPath
                    sys.exit(2)                    
                
        else:
            fullPath = fileName                
                
        self.m_baseFileName = os.path.basename(fileName).split('.')[0]        
        self.m_directory = os.path.dirname(fullPath)
        self.parseDescriptionFile(fullPath)

    ########################################################################
    # Description:
    #   Given the full path to the xml cache description file, this 
    #   method parses its contents and sets the relevant member variables
    #
    def parseDescriptionFile(self,fullPath):          
        dom = xml.dom.minidom.parse(fullPath)
        root = dom.getElementsByTagName("Autodesk_Cache_File")
        allNodes = root[0].childNodes
        for node in allNodes:
            if node.nodeName == "cacheType":
                self.m_cacheType = node.attributes.item(0).nodeValue                
            if node.nodeName == "time":
                timeRange = node.attributes.item(0).nodeValue.split('-')
                self.m_cacheStartTime = int(timeRange[0])
                self.m_cacheEndTime = int(timeRange[1])
            if node.nodeName == "cacheTimePerFrame":
                self.m_timePerFrame = int(node.attributes.item(0).nodeValue)
            if node.nodeName == "cacheVersion":
                self.m_version = float(node.attributes.item(0).nodeValue)                
            if node.nodeName == "Channels":
                self.parseChannels(node.childNodes)
                
    ########################################################################
    # Description:
    #   helper method to extract channel information
    #            
    def parseChannels(self,channels):                         
        for channel in channels:
            if re.compile("channel").match(channel.nodeName) != None :
                channelName = ""
                channelType = ""                
                channelInterp = ""
                sampleType = ""
                sampleRate = 0
                startTime = 0
                endTime = 0                                               
                
                for index in range(0,channel.attributes.length):
                    attrName = channel.attributes.item(index).nodeName                                                            
                    if attrName == "ChannelName":                        
                        channelName = channel.attributes.item(index).nodeValue                        
                    if attrName == "ChannelInterpretation":
                        channelInterp = channel.attributes.item(index).nodeValue
                    if attrName == "EndTime":
                        endTime = int(channel.attributes.item(index).nodeValue)
                    if attrName == "StartTime":
                        startTime = int(channel.attributes.item(index).nodeValue)
                    if attrName == "SamplingRate":
                        sampleRate = int(channel.attributes.item(index).nodeValue)
                    if attrName == "SamplingType":
                        sampleType = channel.attributes.item(index).nodeValue
                    if attrName == "ChannelType":
                        channelType = channel.attributes.item(index).nodeValue
                    
                channelObj = CacheChannel(channelName,channelType,channelInterp,sampleType,sampleRate,startTime,endTime)
                self.m_channels.append(channelObj)
                    


def fileFormatError():
    print "Error: unable to read cache format\n";
    sys.exit(2)
    
def readInt(fd,needSwap):
    intArray = array.array('l')    
    intArray.fromfile(fd,1)
    if needSwap:    
        intArray.byteswap()
    return intArray[0]        

########################################################################
# Description:
#   method to parse and display the contents of the data file, for the
#   One large file case ("OneFile")             
def parseDataOneFile(cacheFile):
    dataFilePath = os.path.join(cacheFile.m_directory,cacheFile.m_baseFileName)
    dataFilePath = dataFilePath + ".mc"
    if not os.path.exists(dataFilePath):
        print "Error: unable to open cache data file at %s\n" % dataFilePath
        sys.exit(2)
    fd = open(dataFilePath,"rb")
    blockTag = fd.read(4)
    
    #blockTag must be FOR4
    if blockTag != "FOR4":
        fileFormatError()
                
    platform = sys.platform
    needSwap = False
    if re.compile("win").match(platform) != None :
        needSwap = True
        
    if re.compile("linux").match(platform) != None :
        needSwap = True
        
    offset = readInt(fd,needSwap)    
    
    #The 1st block is the header, not used. 
    #ignore the header for now.
    fd.read(offset)
        
    while True:
        #From now on the file is organized in blocks of time
        #Each block holds the data for all the channels at that
        #time
        blockTag = fd.read(4)
        if blockTag == "":
            #EOF condition...we are done
            return
        if blockTag != "FOR4":
            fileFormatError()
        blockSize = readInt(fd,needSwap)
        bytesRead = 0
        
        mychTag = fd.read(4)
        if mychTag != "MYCH":
            fileFormatError()
        bytesRead += 4
        
        timeTag = fd.read(4)
        if timeTag != "TIME":
            fileFormatError()
        bytesRead += 4      
        
        #Next 32 bit int is the size of the time variable,
        #this is always 4, so we'll ignore it for now
        #though we could use it as a sanity check.
        fd.read(4)
        bytesRead += 4  
        
        #Next 32 bit int is the time itself, in ticks
        #1 tick = 1/6000 of a second
        time = readInt(fd,needSwap)            
        bytesRead += 4  
        
        print "Data found at time %f seconds:\n"%(time/6000.0)            
        while bytesRead < blockSize:
                                
            #channel name is next.
            #the tag for this must be CHNM
            chnmTag = fd.read(4)
            if chnmTag != "CHNM":
                fileFormatError()
            bytesRead += 4                  
            
            #Next comes a 32 bit int that tells us how long the 
            #channel name is
            chnmSize = readInt(fd,needSwap)
            bytesRead += 4   
            
            #The string is padded out to 32 bit boundaries,
            #so we may need to read more than chnmSize
            mask = 3
            chnmSizeToRead = (chnmSize + mask) & (~mask)            
            channelName = fd.read(chnmSize)
            paddingSize = chnmSizeToRead-chnmSize
            if paddingSize > 0:
                fd.read(paddingSize)
            bytesRead += chnmSizeToRead
            
            #Next is the SIZE field, which tells us the length 
            #of the data array
            sizeTag = fd.read(4)
            if sizeTag != "SIZE":
                fileFormatError()
            bytesRead += 4  
            
            #Next 32 bit int is the size of the array size variable,
            #this is always 4, so we'll ignore it for now
            #though we could use it as a sanity check.
            fd.read(4)
            bytesRead += 4 
            
            #finally the actual size of the array:
            arrayLength = readInt(fd,needSwap)            
            bytesRead += 4                        
            
            #data format tag:
            dataFormatTag = fd.read(4)
            #buffer length - how many bytes is the actual data
            bufferLength = readInt(fd,needSwap)                        
            bytesRead += 8
                        
            numPointsToPrint = 5
            if dataFormatTag == "FVCA":
                #FVCA == Float Vector Array
                if bufferLength != arrayLength*3*4:
                    fileFormatError()
                floatArray = array.array('f')    
                floatArray.fromfile(fd,arrayLength*3)
                bytesRead += arrayLength*3*4
                if needSwap:    
                    floatArray.byteswap()
                if numPointsToPrint > arrayLength:
                    numPointsToPrint = arrayLength                
                print "Channelname = %s,Data type float vector array,length = %d elements, First %d points:\n" % (channelName,arrayLength,numPointsToPrint)
                print floatArray[0:numPointsToPrint*3]
            elif dataFormatTag == "DVCA":
                #DVCA == Double Vector Array
                if bufferLength != arrayLength*3*8:
                    fileFormatError()
                doubleArray = array.array('d')    
                doubleArray.fromfile(fd,arrayLength*3)
                bytesRead += arrayLength*3*8
                if needSwap:    
                    doubleArray.byteswap()
                if numPointsToPrint > arrayLength:
                    numPointsToPrint = arrayLength                
                print "Channelname = %s,Data type double vector array,length = %d elements, First %d points:\n" % (channelName,arrayLength,numPointsToPrint)
                print doubleArray[0:numPointsToPrint*3]
            else:
                fileFormatError()                                    
                                   

########################################################################
# Description:
#   method to parse and display the contents of the data file, for the
#   file per frame case ("OneFilePerFrame")             
def parseDataFilePerFrame(cacheFile):    
    allFilesInDir = os.listdir(cacheFile.m_directory) 
    matcher = re.compile(cacheFile.m_baseFileName)
    dataFiles = []
    for afile in allFilesInDir:
        if os.path.splitext(afile)[1] == ".mc" and matcher.match(afile) != None:            
            dataFiles.append(afile)

    for dataFile in dataFiles:
        fileName = os.path.split(dataFile)[1]
        baseName = os.path.splitext(fileName)[0]
        
        frameAndTickNumberStr = baseName.split("Frame")[1]
        frameAndTickNumber = frameAndTickNumberStr.split("Tick")
        frameNumber = int(frameAndTickNumber[0])
        tickNumber = 0
        if len(frameAndTickNumber) > 1:
            tickNumber = int(frameAndTickNumber[1])
                        
        timeInTicks = frameNumber*cacheFile.m_timePerFrame + tickNumber
        print "--------------------------------------------------------------\n"      
        print "Data found at time %f seconds:\n"%(timeInTicks/6000.0)        
        
        fd = open(dataFile,"rb")
        blockTag = fd.read(4)
        
        #blockTag must be FOR4
        if blockTag != "FOR4":
            fileFormatError()
                    
        platform = sys.platform
        needSwap = False
        if re.compile("win").match(platform) != None :
            needSwap = True
            
        if re.compile("linux").match(platform) != None :
            needSwap = True
            
        offset = readInt(fd,needSwap)
        
        
        #The 1st block is the header, not used. 
        #ignore the header for now.
        fd.read(offset)
        
        blockTag = fd.read(4)        
        if blockTag != "FOR4":
            fileFormatError()
        blockSize = readInt(fd,needSwap)
        bytesRead = 0
        
        mychTag = fd.read(4)
        if mychTag != "MYCH":
            fileFormatError()
        bytesRead += 4
        
        #Note that unlike the oneFile case, for file per frame there is no
        #TIME tag at this point.  The time of the data is embedded in the 
        #file name itself.
        
        while bytesRead < blockSize:
            #channel name is next.
            #the tag for this must be CHNM
            chnmTag = fd.read(4)
            if chnmTag != "CHNM":
                fileFormatError()
            bytesRead += 4                  
            
            #Next comes a 32 bit int that tells us how long the 
            #channel name is
            chnmSize = readInt(fd,needSwap)
            bytesRead += 4   
            
            #The string is padded out to 32 bit boundaries,
            #so we may need to read more than chnmSize
            mask = 3
            chnmSizeToRead = (chnmSize + mask) & (~mask)            
            channelName = fd.read(chnmSize)
            paddingSize = chnmSizeToRead-chnmSize
            if paddingSize > 0:
                fd.read(paddingSize)
            bytesRead += chnmSizeToRead
            
            #Next is the SIZE field, which tells us the length 
            #of the data array
            sizeTag = fd.read(4)
            if sizeTag != "SIZE":
                fileFormatError()
            bytesRead += 4  
            
            #Next 32 bit int is the size of the array size variable,
            #this is always 4, so we'll ignore it for now
            #though we could use it as a sanity check.
            fd.read(4)
            bytesRead += 4 
            
            #finally the actual size of the array:
            arrayLength = readInt(fd,needSwap)            
            bytesRead += 4                        
            
            #data format tag:
            dataFormatTag = fd.read(4)
            #buffer length - how many bytes is the actual data
            bufferLength = readInt(fd,needSwap)                    
            bytesRead += 8
                        
            numPointsToPrint = 5
            if dataFormatTag == "FVCA":
                if bufferLength != arrayLength*3*4:
                    fileFormatError()
                #FVCA == Float Vector Array
                floatArray = array.array('f')    
                floatArray.fromfile(fd,arrayLength*3)
                bytesRead += arrayLength*3*4
                if needSwap:    
                    floatArray.byteswap()
                if numPointsToPrint > arrayLength:
                    numPointsToPrint = arrayLength                
                print "Channelname = %s,Data type float vector array,length = %d elements, First %d points:\n" % (channelName,arrayLength,numPointsToPrint)
                print floatArray[0:numPointsToPrint*3]
            elif dataFormatTag == "DVCA":
                if bufferLength != arrayLength*3*8:
                    fileFormatError()
                #DVCA == Double Vector Array
                doubleArray = array.array('d')    
                doubleArray.fromfile(fd,arrayLength*3)
                bytesRead += arrayLength*3*8
                if needSwap:    
                    doubleArray.byteswap()
                if numPointsToPrint > arrayLength:
                    numPointsToPrint = arrayLength                
                print "Channelname = %s,Data type double vector array,length = %d elements, First %d points:\n" % (channelName,arrayLength,numPointsToPrint)
                print doubleArray[0:numPointsToPrint*3]
            else:
                fileFormatError()                                    
    

def usage():
    print "Use -f to indicate the cache description file (.xml) you wish to parse\n"

try:
    (opts, args) = getopt.getopt(sys.argv[1:], "f:")
except getopt.error:
    # print help information and exit:
    usage()
    sys.exit(2)

if len(opts) == 0:
    usage()
    sys.exit(2)
        
fileName = ""
for o,a in opts:
    if o == "-f":
        fileName = a    

cacheFile = CacheFile(fileName)

if cacheFile.m_version > 2.0:
    print "Error: this script can only parse cache files of version 2 or lower\n"
    sys.exit(2)

print "*******************************************************************************\n"        
print "Maya Cache version %f, Format = %s\n"%(cacheFile.m_version,cacheFile.m_cacheType)
print "The cache was originally created at %d FPS\n"%(6000.0/cacheFile.m_timePerFrame)
print "Cache has %d channels, starting at time %f seconds and ending at %f seconds\n"%(len(cacheFile.m_channels),cacheFile.m_cacheStartTime/6000.0,cacheFile.m_cacheEndTime/6000.0)
for channel in cacheFile.m_channels:        
    print   "Channelname =%s, type=%s, interpretation =%s, sampling Type = %s\n"% (channel.m_channelName,channel.m_channelType,channel.m_channelInterp,channel.m_sampleType)
    print   "sample rate (for regular sample type only) = %f FPS\n"%(6000.0/channel.m_sampleRate)
    print   "startTime=%f seconds, endTime=%f seconds\n"%(channel.m_startTime/6000.0,channel.m_endTime/6000.0)
print "*******************************************************************************\n"

if cacheFile.m_cacheType == "OneFilePerFrame":
    parseDataFilePerFrame(cacheFile)
elif cacheFile.m_cacheType == "OneFile":
    parseDataOneFile(cacheFile)
else:
    print "unknown cache type!\n"