2013年12月6日 星期五

Notes Domino R7 自動抓取國衛院網頁上的公告招標資訊並匯入看板

老大說希望有新的標案就自動通知相關人員, 所以就先做了自動匯入來玩玩看

網址如下,

http://po.nhri.org.tw/po_board/po_query.jsp

代理程式如下 : (使用Java)

import lotus.domino.*;
import java.net.*;
import java.io.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.Date;
import java.util.Vector;
import java.text.*;

public class JavaAgent extends AgentBase {

private Session session;
private AgentContext agentContext;
private Database db;
private Log log;
private Document doc;

public void NotesMain() {

try {
Session session = getSession();
AgentContext agentContext = session.getAgentContext();
Agent agent = agentContext.getCurrentAgent();

// (您的程式碼移至此處)
String web1 = "http://po.nhri.org.tw/po_board/po_query.jsp" ;//UTF-8 國衛院
read1(session,agentContext,web1);


} catch(Exception e) {
e.printStackTrace();
}
}

public void read1( Session session,AgentContext agentContext,String strURL ) throws NotesException{
         int chunksize = 4096;
        byte[] chunk = new byte[chunksize];
        int count,idx;
        try  {
db=agentContext.getCurrentDatabase();
            URL pageUrl = new URL(strURL );
            // 讀入網頁(位元串流)
            //http://po.nhri.org.tw/po_board/docdownload.jsp?ticket_id=1020432&file_id=201312021140130.pdf
            BufferedInputStream bis = new BufferedInputStream(pageUrl.openStream());
            //BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream("URL1.txt", false));
            System.out.println("國衛院資料讀取中" );
            String cname;
            while ((count = bis.read(chunk, 0, chunksize)) != -1) {
               // bos.write(chunk, 0, count); // 寫入檔案
                cname=new String(chunk,"UTF-8");
                int nn=0;
View viewR = db.getView ("ViewByNoA");
Pattern pattern = Pattern.compile("<(td|a href)[^>]*>(.*?)</(td|a)>");
Matcher matcher = pattern.matcher(cname);
while(matcher.find()) {
if (matcher.group().substring(0, 2).compareTo("<a")==0 & nn>0) {
nn=nn+1;
doc.replaceItemValue ("Name",matcher.group().replaceAll("</?[a-z][a-z0-9]*[^<>]*>", ""));    
System.out.println(Integer.toString(nn)+":"+matcher.group().replaceAll("</?[a-z][a-z0-9]*[^<>]*>", ""));
Pattern pattern1 = Pattern.compile("(\"d)(.*?)\"");
Matcher matcher1 = pattern1.matcher(matcher.group());
while(matcher1.find()) {
nn=nn+1;
doc.replaceItemValue ("Hyper","http://po.nhri.org.tw/po_board/"+matcher1.group().replaceAll("\"",""));
System.out.println(Integer.toString(nn)+":"+"檔案網址:http://po.nhri.org.tw/po_board/"+matcher1.group().replaceAll("\"",""));
}
}
else {    
if (matcher.group().replaceAll("</?[a-z][a-z0-9]*[^<>]*>", "").replaceAll("&nbsp;","").compareTo("標的案號")==0 | nn>0) {
nn= nn+1;
cname=matcher.group().replaceAll("</?[a-z][a-z0-9]*[^<>]*>", "").replaceAll("&nbsp;","");
System.out.println(Integer.toString(nn)+":"+cname);
switch(nn) {
  case 1:
      break;
  case 2:
      boolean NotFound = true;
   DocumentCollection dc = db.getAllDocuments();
   Document docR = dc.getFirstDocument ();
   while (docR != null){          
        if (docR.getItemValueString("No").equals(cname) & docR.getItemValueString("Customer").equals("國衛院") ){
                NotFound = false;
            System.out.println("Find:"+cname);
break;
};
       docR = dc.getNextDocument();
    };
   if ( NotFound ) {  
      Date dt = new Date();
   SimpleDateFormat ft =  new SimpleDateFormat (" yyyy.MM.dd E");
   doc = db.createDocument ();
      doc.replaceItemValue ("Form","Form1");
      doc.replaceItemValue ("Customer","國衛院");
      doc.replaceItemValue ("IDay",ft.format(dt));
      doc.replaceItemValue ("No",cname);
      }
      else {      
      nn=0;
      }
      break;
  case 4:
      doc.replaceItemValue ("Item",cname);
      break;
  case 8:
      doc.replaceItemValue ("Type",cname);
      break;
   case 16:
      doc.replaceItemValue ("DDay",cname);
      break;
  case 18:
      doc.replaceItemValue ("Aday",cname);
      if (doc.save ()) {
System.out.println ("Document created and saved");
} else {
System.out.println ("Something went wrong");
}
nn=0;
      break;
  default:      
      break;
}      
}
    }  
};
// System.out.println(cname);
            }
            //bos.close();
            bis.close();
           System.out.println("國衛院資料讀取完成");
         }catch (IOException e) {
             e.printStackTrace();
             try{
session.setEnvironmentVar("AgentResult","國衛院資料讀取時,發生錯誤 !!");
}catch ( Exception ex){}
         }
      }
}

沒有留言:

張貼留言