Tuesday, May 24, 2011

Sample code to create SOLR document from CSV file

Just one guy stopped by office & asked to index this legacy data. So quick & dirty solution is the following. (May be Perl etc are there, but this one gives me more flexibility.



public class CsvToSolrDoc
{
public String columnName(int i)
{
//workarounds workarounds
if ( i == 0) return "id";
if ( i == 1) return "what ever you want as field name";
return null;
}
public void csvToXML(String inputFile, String outputFile) throws java.io.FileNotFoundException, java.io.IOException
{
BufferedReader br = new BufferedReader(new FileReader(inputFile));
StreamTokenizer st = new StreamTokenizer(br);
String line = null;
FileWriter fw = new FileWriter(outputFile);
// Write the XML declaration and the root element
fw.write("\n");
fw.write("\n");
while ((line = br.readLine()) != null)
{
String[] values = line.split(",");
fw.write(" \n");
int i = 1;
for ( int j=0;j it is length; J++)
{
String colName = "field name=\""+columnName(j)+"\"";
fw.write("<" + colName + ">");
fw.write(values[j].trim());
fw.write( "\n");
}
fw.write("
\n");
}
// Now we're at the end of the file, so close the XML document,
// flush the buffer to disk, and close the newly-created file.
fw.write("
\n");
fw.flush();
fw.close();
}

public static void main(String argv[]) throws java.io.IOException
{
CsvToSolrDoc cp = new CsvToSolrDoc();
cp.csvToXML("c:\\tmp\\m2.csv", "c:\\tmp\\m2.xml");
}

No comments: