Wednesday, February 26, 2014

Personal stock portal… YQL experiments.

In general, Lots of portals provides stock quote information. For example, @finance.yahoo.com, one can give a valid  stock ticker & gets the stock information. From this point, if you want to know more about company profile you have to  click one link or one more web service call. If you want to know more about last year statistics, you have to click or one more web service call. Couple of years back, after knowing little bit of YQL, I did small POC. Basically given a stock ticker, system will display complete snapshot of the company in one click. It is a fun ride however with all bug in YQL, It remained as POC only.

Tons of code. But I will dump few important methods.

////
String stockSym = "AAPL";
SupplierSection StockInfo = new SupplierSection(stockSym);
StockInfo.createHttpClient();
StockInfo.crawlFinanceInfo(stockSym);//crawlRssInfo
StockInfo.crawlProfileInfo(stockSym);
StockInfo.crawlKeyStats(stockSym);
                 StockInfo.crawlRssInfo(stockSym);
                 StockInfo.crawlQuantInfo(stockSym);//crawlRssInfo
         
//StockInfo.printAllPublicInfo();
try {
//StockInfo.writeToXmlFile();
//StockInfo.writeToXmlFile1(stockSym);
StockInfo.writehtmlFile1(stockSym);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}


 ///basic compnay info
public  void crawlProfileInfo(String stockSym) {
   CloseableHttpClient httpclient = HttpClients.createDefault();
   try {    
         GetMethod httpGet = new GetMethod("http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20yahoo.finance.stocks%20where%20symbol%3D%22"+stockSym+"%22&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys");
         httpGet.setFollowRedirects( true );
           
     System.out.println("executing request in crawlProfileInfo " );
     int responseCode = httpClient.executeMethod(httpGet);
     byte[] data = httpGet.getResponseBody();
   

     if (responseCode >= 400) {
     System.out.println("Failed to send request "+ responseCode);
     }else{
     
    String  xmlString = new String( data, ENCODING );
    //System.out.println("xml string "+ xmlString);

       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
       DocumentBuilder db = factory.newDocumentBuilder();
       InputSource inStream = new InputSource();
       inStream.setCharacterStream(new StringReader(xmlString));
       Document doc = db.parse(inStream); 
       
       String fName = "CompanyName";
       profileInfo.put(fName,getFieldValue(doc,fName));
       fName = "Sector";
       profileInfo.put(fName,getFieldValue(doc,fName));
       fName = "Industry";
       profileInfo.put(fName,getFieldValue(doc,fName));
       
       fName = "FullTimeEmployees";
       profileInfo.put(fName,getFieldValue(doc,fName));
       
       fName = "start";
       profileInfo.put(fName,getFieldValue(doc,fName));
       
       fName = "end";
       profileInfo.put(fName,getFieldValue(doc,fName));

     }


               public  void crawlKeyStats(String stockSym) {
   try {    
         GetMethod httpGet = new GetMethod("http://query.yahooapis.com/v1/public/yql?q=SELECT%20*%20FROM%20yahoo.finance.keystats%20WHERE%20symbol%3D'"+stockSym+"'&diagnostics=false&env=store%3A%2F%2Fdatatables.org%2Falltableswithkeys");
         httpGet.setFollowRedirects( true );
           
     System.out.println("executing request in crawlKeyStats" );
     int responseCode = httpClient.executeMethod(httpGet);
     byte[] data = httpGet.getResponseBody();
   

     if (responseCode >= 400) {
     System.out.println("Failed to send request "+ responseCode);
     }else{
     
    String  xmlString = new String( data, ENCODING );
    System.out.println("xml string "+ xmlString);

       DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
       DocumentBuilder db = factory.newDocumentBuilder();
       InputSource inStream = new InputSource();
       inStream.setCharacterStream(new StringReader(xmlString));
       Document doc = db.parse(inStream); 
       processKeyStatsResponse(doc);

     }

   } catch (Exception e) {
     e.printStackTrace();
   } finally {
   }
 }
 
 
        public  void crawlRssInfo(String stockSym) {
  
   try {    
         GetMethod httpGet = new GetMethod("http://query.yahooapis.com/v1/public/yql?q=select%20*%20from%20html%20where%20url%3D'http%3A%2F%2Ffinance.yahoo.com%2Fq%3Fs%3D"+stockSym+"'%20and%20xpath%3D'%2F%2Fdiv%5B%40id%3D%22yfi_headlines%22%5D%2Fdiv%5B2%5D%2Ful%2Fli%2Fa'&diagnostics=true");
         httpGet.setFollowRedirects( true );
           
     System.out.println("executing request in crawlRssInfo" );
     int responseCode = httpClient.executeMethod(httpGet);
     byte[] data = httpGet.getResponseBody();
   

     if (responseCode >= 400) {
     System.out.println("Failed to send request "+ responseCode);
     }else{
    String  xmlString = new String( data, ENCODING );
    //System.out.println("xml string "+ xmlString);
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        DocumentBuilder db = factory.newDocumentBuilder();
        InputSource inStream = new InputSource();
        inStream.setCharacterStream(new StringReader(xmlString));
        Document doc = db.parse(inStream); 
        processRssResponse(doc); 
       }
     
   } catch (Exception e) {
     e.printStackTrace();
   } finally {
   }  
 
 
    }

Tuesday, February 25, 2014

Very Old: Java code convert Excel (CSV) file to generate SOLR documents

Scenario is :
   Daily mainframe system generates delta of product data (excel contains rows up to one million rows. Search needs to happen across 30+ product fields.) My initial thoughts are use XSLT however after seeing some computation on some columns, I end up writing some Java code. This code generate one SOLR document xml for each Excel row & with using post.jar file, we used to index the data in one go. (Back in SOLR 1.4 days, JSON format was not that popular & so I used xml. I have XSLT version too. Will post that code very soon.)

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class CsvToSolrXmlDoc
{
public String[] columnNames = null;
public  String getColumnName(int i)
{
if ( i>columnNames.length -1 ) return "checkInputData";
        return columnNames[i];
}

public  void initHeaders(String[] cols)
{
columnNames = cols;
}


public  void writeProdInfo(String[] cols) throws Exception {
 if ( cols.length == 0)  return;
 String prodId= cols[0];
 try {
     int idVal = Integer.parseInt(prodId); /// this is my use case. U can ignore this validation
} catch (NumberFormatException e) {
System.out.println("\n Skiping id "+prodId);
return;
}
String outputFile  = "c:\\a_edocs" + "\\"+prodId+".xml"
  try {

DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = builder.newDocument();
Element addEle = doc.createElement("add");
doc.appendChild(addEle);
Element root = doc.createElement("doc");
addEle.appendChild(root);
String fname = "field";
for ( int i=0; i
  String cName = getColumnName(i);
                           ////DO ALL column based post processing 
  String value = cols[i].trim();
  if ( value == null) value = "";
  Element node  = doc.createElement(fname);
  Attr attr = doc.createAttribute("name");
  attr.setValue(cName);
  node.setAttributeNode(attr);
  node.appendChild(doc.createTextNode(value));
  root.appendChild(node);
}

DOMSource domSource = new DOMSource(doc);
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
StreamResult result = new StreamResult(new File(outputFile));
transformer.transform(domSource, result);

} catch (Exception e) {
e.printStackTrace();
}

}

public void csvToXML(String inputFile) throws java.io.FileNotFoundException, java.io.IOException
{
BufferedReader br = new BufferedReader(new FileReader(inputFile));
StreamTokenizer st = new StreamTokenizer(br);
String line = null;
int i=0;
while ((line = br.readLine()) != null){
String[] values = line.split(",");
if ( i==0){
for ( int j=0; j
System.out.println("\n col index,name:"+j+":"+values[j]);
}
initHeaders(values);i++;
}else{
try {
writeProdInfo(values);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
        br.close();
}

public static void main(String argv[]) throws java.io.IOException
{
CsvToSolrXmlDoc cp = new CsvToSolrXmlDoc();
cp.csvToXML("C:\\a_work\\three.csv");
}

}

Monday, February 24, 2014

Hierarchal suppler representation & traversal in RDBMS

For example consider organizations like PG, Siemens and GE where the business is organized in multiple verticals and many subsidiaries across the globe. One way of identifying all these units are DUNS numbers.  Brief  definition of DUNS number is
At least in one standard (Dun & Bradstreet (D&B) provides a D-U-N-S Number,
A unique nine digit identification number, for each physical location of your business.)
Many global manufacturing companies uses this DUNS numbers to organize their suppliers & their subsidiaries. In the sequence of post I will explain relational way of mapping this suppler data to RDBMS and then to Graph database (In this prototype, I am using neo4j as a graph data base.)
In the relational mapping, each suppler entity contains following minimal set of attributes
Supplier primary_duns_number, supplier_name, address and parent duns number.
(In case of root suppler organization, primary_duns_number and parent duns’ number is same)

Following is the ORACLE specific SQL query, uses concepts like CONNECT AND CYCLE etc.,

Which brings all parent rows for given child suppler primary key.

>>

SELECT LEVEL,LPAD (' ', 2 * (LEVEL - 1)) ||  t1.PRIMARY_DUNS_NUMBER, 
           t1.SUPPLIER_NAME, 
           t1.PARENT_DUNS_NUMBER,
           t1.address,
           SYS_CONNECT_BY_PATH(t1.PRIMARY_DUNS_NUMBER, '/') "Path",
           t2.SUPPLIER_NAME AS parent_name,
           t2.PARENT_DUNS_NUMBER AS parent_id
           FROM  GQTS.SUPPLIER t1
           LEFT JOIN GQTS.SUPPLIER t2 ON t2.PRIMARY_DUNS_NUMBER = t1.PARENT_DUNS_NUMBER
          START WITH t1.PRIMARY_DUNS_NUMBER = 'DUNS_CODE' 
CONNECT BY NOCYCLE  PRIOR t1.PRIMARY_DUNS_NUMBER = t1.PARENT_DUNS_NUMBER
>>

Sunday, February 23, 2014

Dhanvi with San Antonio chess Trophy

Much needed confidence. 
He was playing  good however in last few  tournaments, he fell short.
But this one, he made a decision, just one a day ahead, & joined with his friends.




Saturday, February 22, 2014

Deploying Solr 4.61 in to weblogic

Assumptions:

   Weblogic 10.X is installed & running fine.
download SOLR 4.61 from http://lucene.apache.org/solr/  & expand the zip file.

a) Deploy the war file via console or auto-deploy folder.

You will notice exceptions in weblogic console saying that  log jars missing exceptions

b) Now copy log jar files from \solr-4.6.1\example\lib\ext to your weblogic domain's lib
   On my local machine, this value is :  domains\base_domain\lib
  
c) After copy & restart weblogic & start accessing solr app.
   Now system will display tons of exceptions
   (see the SOLR JIRA  SOLR-4762  for the issue & resolution.)
  
   What I did is I expanded solr.war file  (jar -xvf ...) & updated weblogic.xml file.
   Following is my weblogic file

<weblogic-web-app
   xmlns="http://www.bea.com/ns/weblogic/90"
   xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
   xsi:schemaLocation="http://www.bea.com/ns/weblogic/90 http://www.bea.com/ns/weblogic/90/weblogic-web-app.xsd">

   <container-descriptor>
<filter-dispatched-requests-enabled>false</filter-dispatched-requests-enabled> <prefer-web-inf-classes>true</prefer-web-inf-classes>
   </container-descriptor>

</weblogic-web-app>

  
  D)Note: If you are starting weblogic with explicit Solr.solr.home  parameter, you are good
     for example my value is
        -Dsolr.solr.home=C:\Oracle\Middleware\user_projects\domains\base_domain\solr
     However, if you are trying to set this value via solr\WEB-INF\web.xml changes
       then you will face one more issue or it will start at all.

 Original web.xml contains

  <!-- People who want to hardcode their "Solr Home" directly into the
      WAR File can set the JNDI property here...
  -->

   <env-entry>
      <env-entry-name>solr/home</env-entry-name>
      <env-entry-value>C:\aa_valve_solr\solr</env-entry-value>
      <env-entry-type>java.lang.String</env-entry-type>
   </env-entry>

Above will not work. Weblogic keep saying Not valid xml file

      
  <!-- People who want to hardcode their "Solr Home" directly into the
      WAR File can set the JNDI property here...
  -->

   <env-entry>
      <env-entry-name>solr/home</env-entry-name>
      <env-entry-type>java.lang.String</env-entry-type>
      <env-entry-value>C:\aa_valve_solr\solr</env-entry-value>
   </env-entry>


With this change (yes. Ordering of elements make the difference ) & SOLR will start successfully.
From this point, everything runs fine (i.e. indexing/search & i tried data import handlers etc.)

2014 Texas Data day conference

Recently I went to Texas Data day conference & there are many talks around real time processing of data pipes. In particular lot's of talks/frameworks around log file processing, 
HDFS for storing information,
Lucene based Search engines for finding information
& open source graphics to render UI.
Everything is open source & massively scalable frameworks.
In particular twitters engineers summingbird talk is crazy & interesting.
Following is above talks speaker deck.


This is placeholder. I will add more content later.

Example business rules code in Java

   I used to do lots of business rules implementation via Drools

A sample Java example is shown below.
This rule allow Claim submission up to 2ed Tuesday of the month.
Tons of util methods like to validate date ranges, returning month integer as String,
finding second Tuesday of a month, First day etc.
I will add Drools integration code separately.


       ///In case of null input date, logic uses today's date.

static public boolean  isValidClaimDate(Date inputDate ) {
  Date checkDate = null;
  if ( inputDate == null){
  Calendar c = Calendar.getInstance();
  checkDate = c.getTime();
  }else{
  checkDate = inputDate;
  }
  
  Date firstDay = getFirstDay();
  Date secondTuesday = getSecondTuesday();   
  if (firstDay.before(checkDate) && secondTuesday.after(checkDate)) {
  
  LOGGER.info("Validing current date"+ checkDate + " as  true");
return true;
  }else{
  LOGGER.info("Validing current date"+ checkDate + " as  false");
return false;
}
}
///works fine
static public Date getSecondTuesday() {
  Calendar c = Calendar.getInstance();
  int month = c.get ( Calendar.MONTH );
  int year = c.get ( Calendar.YEAR );
  Calendar cal = Calendar.getInstance(TimeZone.getTimeZone(Constants.estTimeZone));
  cal.set( year, month , 1,18,0);
   while (cal.get(Calendar.DAY_OF_WEEK) != Calendar.TUESDAY) {
       cal.add(Calendar.DAY_OF_MONTH, 1);
   }
   //now add 7 more days to get 2ed week
   cal.add(Calendar.DAY_OF_MONTH, 7);
   
  return cal.getTime();
}
///check
static public Date getFirstDay() {
  Calendar c = Calendar.getInstance();
  int month = c.get ( Calendar.MONTH );
  int year = c.get ( Calendar.YEAR );
  
  Calendar cal = Calendar.getInstance(TimeZone.getTimeZone(Constants.estTimeZone));
  cal.set( year, month ,1);
  cal.set(Calendar.HOUR_OF_DAY, 0);
  cal.set(Calendar.HOUR, 0);
      cal.set(Calendar.MINUTE, 0);
      cal.set(Calendar.SECOND, 0);
  return cal.getTime();
}
static public boolean validUploadDateRange( int month, int year ) {
  Calendar c = Calendar.getInstance(TimeZone.getTimeZone(Constants.estTimeZone));
  int currentMonth = c.get ( Calendar.MONTH ); //note: this scale starts from "0"
  int currentYear = c.get ( Calendar.YEAR );
  
  if ( month == currentMonth && currentYear==year){
  return true;
  }
  
  return false;
}
static public boolean validClaimsTimeRange( String monthStr, String yearStr ) {
   if (monthStr == null || monthStr.equals("")){
    return false;
   }
   
   if (yearStr == null || yearStr.equals("")){
    return false;
   }
   
   int month=0;
   int year=0;
   
try {
     month = Integer.parseInt(monthStr);
     year = Integer.parseInt(yearStr);
} catch (NumberFormatException e) {
                 return false;
}
return (validUploadDateRange(month,year));
}
static public String  validUploadMonthAsStr( ) {
  Calendar cal = Calendar.getInstance(TimeZone.getTimeZone(Constants.estTimeZone));
  int currentMonth = cal.get ( Calendar.MONTH ); //note: this scale starts from "0"
  int currentYear = cal.get ( Calendar.YEAR );
  
  cal.set( currentYear, currentMonth-1 ,1);
  cal.set(Calendar.HOUR_OF_DAY, 0);
  cal.set(Calendar.HOUR, 0);
      cal.set(Calendar.MINUTE, 0);
      cal.set(Calendar.SECOND, 0);
  Date pastMonth  = cal.getTime();
  String month = new SimpleDateFormat("MMMM").format(pastMonth);
  return month;
}
}