import java.io.*;
import java.net.*;

class HttpParser {
 int depth;       //depth for MyURL's extracted from file 
 boolean hasBase; //is there BASE tag?
 MyURL u;         //MyURL which is parsed
 Queue q;
 public HttpParser(MyURL u, Queue q){
  this.u=u;
  this.q=q;
 }
 
 /**extracts tags from file*/
 public void parsing(DataInputStream dis){
  int c;
  int pos=0;
  int next;
  StringBuffer buffer;
   hasBase=false;
  try {
   FileOutputStream fos=new FileOutputStream(u.getFileName());
   PrintStream ps=new PrintStream(fos);
   buffer=new StringBuffer();

   while ((c=dis.read())!=-1){
    ps.print((char)c);
    //checks for the beginning of the tag
    if (c=='<'){
     c=dis.read();
     //if char is '\' or '!' then there is no link
     if (c==92||c==33){ps.print((char)c);continue;}
     else buffer.append((char)c);

     //rewrites the tag into buffer
     do {c=dis.read();buffer.append((char)c);}
     while(c!='>');

    //buffer containing the tag (including '>', but without '<') is the parameter for parseTag
    ps.print(parseTag(buffer.toString()));
    buffer=new StringBuffer();
    }
   }
   fos.close();
  }
  catch(IOException ioe){}
  //catch(Exception e){
     //System.out.println("Exception: "+e.toString());
  //}
 }

 /**extracting link from tag, and calling parseLink whose parameter is     that link*/
 private String parseTag(String is)throws IOException{
 //stLink & endLink are positions of the first and last letter of the link in tag, which is content of 'is'
 int stLink,endLink;
 boolean goBase=false;
 StringBuffer os=new StringBuffer();

 //keywords followed with links
 String[] keyword={"href","src","background","basehref","action"};

  //seeking for tag which could contain a link
  if((is.toLowerCase().startsWith("a")||
      is.toLowerCase().startsWith("img")||
      is.toLowerCase().startsWith("image")||
      is.toLowerCase().startsWith("embed")||
      is.toLowerCase().startsWith("form")||
      is.toLowerCase().startsWith("body"))){
       goBase=false;depth=u.getDepth()-1;}
  //depth must not be decremented for frames
  else if(is.toLowerCase().startsWith("frame"))
      {goBase=false;depth=u.getDepth();}
  else if(is.toLowerCase().startsWith("base")) goBase=true;
  else return is;

  try{
  int i=0;int pos=-1;

  //checks for keywords
  while(i<5&&(pos=is.toLowerCase().indexOf(keyword[i]))==-1) i++;
  if (pos==-1) return is;//return if there is no keyword
  if (i==3) goBase=true; //basehref is found

  stLink=is.indexOf('=',pos)+1;
  //link can be, but need not be surrounded by quotation marks
  while(Character.isSpace(is.charAt(stLink)))
        stLink=stLink+1;
  if(is.charAt(stLink)=='\"'){
      stLink=stLink+1;
      while(Character.isSpace(is.charAt(stLink)))
            stLink=stLink+1;
      endLink=is.indexOf('\"',stLink)-1;
      while(Character.isSpace(is.charAt(endLink)))
            endLink=endLink-1;
  }
  else{//if there are no quotation marks
      endLink=stLink;
      while(!(Character.isSpace(is.charAt(endLink+1)))&&
            !(is.charAt(endLink+1)=='>'))
           endLink=endLink+1;
  }
  if (stLink>=endLink) return is;
  os.append(is.substring(0,stLink));

  try{
     if(goBase){ 
       os.append(parseBase(is.substring(stLink,endLink+1)));
       //putting BASE tag into the comment
       os.insert(is.toLowerCase().indexOf("base"),"!");
     }
     else
       os.append(parseLink(is.substring(stLink,endLink+1)));
  }catch(MalformedURLException moe){}
  os.append(is.substring(endLink+1));
  return os.toString();
 }catch(StringIndexOutOfBoundsException sie){return is;}
 }
 /**parseLink rearranges link, creates new MyURL from link, and put it into the queue*/
 private String parseLink(String is) throws MalformedURLException{
 StringBuffer temp=new StringBuffer();
 StringBuffer temp2=new StringBuffer();
 int pos=0;int next;

 //link pertaining to a protocol other than HTTP is returned unchanged
 if(is.toLowerCase().startsWith("mailto")||
    is.toLowerCase().startsWith("ftp")||    
    is.toLowerCase().startsWith("telnet")||    
    is.toLowerCase().startsWith("news")||    
    is.toLowerCase().startsWith("file")||
    is.toLowerCase().startsWith("gopher"))
   return is;

  //link pointing to the reference inside the file is ignored
  if(is.startsWith("#")) return is;
  if(is.startsWith("//")) is="http:"+is;

  if(is.toLowerCase().startsWith("http://")){
    if(is.indexOf("?")!=-1||is.indexOf("cgi")!=-1) return is;//cgi


    //if there is no extension, it is probably a directory name


    //if link contains a reference it has to be clipped off for putting into the queue
    if(is.indexOf('#')!=-1){
       temp2.append(is.substring(0,is.indexOf('#')));
       q.putURL(new MyURL(temp2.toString(),depth));
    }
    else q.putURL(new MyURL(is,depth));

    //changing link from absolute to relative
    for(int i=u.getFileLevel();i>0;i--)temp.append("../");
    temp.append(is.substring(7));
    return temp.toString();
  }

  //link pointing to file in server's root directory
  if(is.startsWith("/")){
//changing link from absolute to relative
    for(int i=u.getFileLevel()-1;i>0;i--)temp.append("../");
    temp.append(is.substring(1));
    String os=temp.toString();

    //creating input for the queue in temp
    temp=new StringBuffer();
    temp.append("http://");
    //maybe url.getHost() would be better
    temp.append(u.getHttpName().substring(0,u.getHttpName().indexOf('/')));
    temp.append(is);
    //temp="http://"+server+is(link)

    if(is.indexOf("?")!=-1||is.indexOf("cgi")!=-1)
      return temp.toString();//cgi

    //if link contains a reference it has to be clipped off for putting into the queue
    if(temp.toString().indexOf('#')!=-1)
       q.putURL(new MyURL(temp.toString().substring
       (0,temp.toString().indexOf('#')),depth));    
    else q.putURL(new MyURL(temp.toString(),depth));
    return os;
  }

  //relative link
  if(is.startsWith("../")){

    while((next=u.getBase().indexOf("../",pos))!=-1) pos=next+3;
    temp.append(u.getBase().substring(pos));
    //temp contains "server/basedir/"
 
    pos=0;temp2.append("http://"); 
    //linkLevel is number of "../" in link
    int linkLevel=0;
    while((next=is.indexOf("../",pos))!=-1)
         {linkLevel++;pos=next+3;}
   
    //if number of "../" exceeds fileLevel then link has to point to file in the root directory
    if(linkLevel>u.getFileLevel()-1){
      temp2.append
      (temp.toString().substring(0,temp.toString().indexOf('/')+1));
      temp2.append(is.substring(3*linkLevel));
      is=is.substring(3*(linkLevel-u.getFileLevel()+1));
    }
    else temp2.append(temp.toString()+"/"+is);

    if(is.indexOf("?")!=-1||is.indexOf("cgi")!=-1)
       return temp2.toString();//cgi
    if((pos=temp2.toString().indexOf('#'))!=-1)
     q.putURL(new MyURL(temp2.toString().substring(0,pos),depth));    
    else q.putURL(new MyURL(temp2.toString(),depth));
    if (hasBase) return u.getBase()+is;
    else return is;
  }

  //usual relative link 
  else{
temp.append("http://");
    while((next=u.getBase().indexOf("../",pos))!=-1) pos=next+3;
    temp.append(u.getBase().substring(pos));
    temp.append(is);

    if(is.indexOf("?")!=-1||is.indexOf("cgi")!=-1)
       return temp.toString();//cgi

    if(temp.toString().indexOf('#')!=-1)
       q.putURL(new MyURL(temp.toString().substring
       (0,temp.toString().indexOf('#')),depth));    
    else q.putURL(new MyURL(temp.toString(),depth));
    if (hasBase) return u.getBase()+is;
    else return is;
  }
 }

 /**extracting base from BASE tag; base will be prefix for relative links*/
 private String parseBase(String is){
 StringBuffer temp=new StringBuffer();
  if(is.toLowerCase().startsWith("http://")){
     for(int i=u.getFileLevel();i>0;i--)temp.append("../");
     u.setBase(temp.append(is.substring(7)).toString());
     hasBase=true;
  } 
  return is;
 }

}

