#script to convert features from SVM_light format to RF format (R randomForest package) #created Joseph Luttrell 2/21/17 import sys import os svmFeats = sys.argv[1] #feature file in svm_light format outputFeats = sys.argv[2] #output filename and path numFeats = int(sys.argv[3]) #number of feature columns svmFeatsFile = open(svmFeats, 'r') outputFeatsFile = None if not os.path.isfile(outputFeats): outputFeatsFile = open(outputFeats, 'w') else: print ("output file exists, exiting") exit(1) headerLine = 'Target,' for i in range(1, int(numFeats)+1): if i == int(numFeats): headerLine += 'f' + str(i) break headerLine += 'f' + str(i) + ',' outputFeatsFile.write(headerLine + '\n') for line in svmFeatsFile: items = line.split() vals = [] #will hold just the feature values targetVal = items[0] for item in items: if ':' in item: vals.append(item[(item.find(':'))+1:]) #get just the value from the featureLabel:value pair that svm format uses if item == '#': break valLine = targetVal + ',' for i in range(0, len(vals)): if i == len(vals)-1: valLine += vals[i] break valLine += vals[i] + ',' outputFeatsFile.write(valLine + '\n')