Thursday, October 14, 2010

Reading CSV content from BLOB and Querying with SQL

We can load the csv file content to blob and then query the blob content as a table data using below functions. These functions even handles field data which contrains commas.

Step 1 - Create the function to handle commas within quotes

function handle_comma_within_quotes(str IN VARCHAR2) RETURN VARCHAR2
   AS
    copy varchar2(32000);
    inQuotes boolean := false;
   begin
     FOR i IN 1..LENGTH(str) LOOP
        IF substr(str,i,1)='"' then
              inQuotes := NOT inQuotes;
 END IF;
        if substr(str,i,1) = ',' AND inQuotes THEN
            copy := copy||'|';
        else
            copy := copy||substr(str,i,1);
        END IF;
    END LOOP;
    return replace(copy,'"','');
END handle_comma_within_quotes;

Create the pipelined function to query the csv file contents as table data

function f_csv_load_data(p_filename IN VARCHAR2) return loadcsvarray pipelined
is
    l_clob clob;
    l_blob blob;
    l_n    pls_integer;
    l_line varchar2(32000);
    rowname varchar2(4000);
    i number := 1;
begin
    select blob_content into l_blob from TEMP_TABLE WHERE loadfilename = p_filename;
    l_clob := blob_to_clob(l_blob);
    if substr(l_clob,-1,1) != chr(10) then l_clob := l_clob || chr(10); end if;
    l_n := instr(l_clob,chr(10));
    l_line := substr(l_clob,1,l_n)||',';
   
    while l_n > 0 loop
     
      l_line := handle_comma_within_quotes(l_line);
     
      if i=1 then
        rowname := 'HEADROW';
      else
        rowname := 'DATAROW';
      end if;
     
      i:= i+1;
      pipe row (loadcsvobject(rowname,
        replace(substr(l_line, 1, instr(l_line,',')-1),'|',','),
        replace(substr(l_line, instr(l_line,',')+1, instr(l_line,',',1,2)-instr(l_line,',')-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,2)+1, instr(l_line,',',1,3)-instr(l_line,',',1,2)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,3)+1, instr(l_line,',',1,4)-instr(l_line,',',1,3)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,4)+1, instr(l_line,',',1,5)-instr(l_line,',',1,4)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,5)+1, instr(l_line,',',1,6)-instr(l_line,',',1,5)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,6)+1, instr(l_line,',',1,7)-instr(l_line,',',1,6)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,7)+1, instr(l_line,',',1,8)-instr(l_line,',',1,7)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,8)+1, instr(l_line,',',1,9)-instr(l_line,',',1,8)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,9)+1, instr(l_line,',',1,10)-instr(l_line,',',1,9)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,10)+1, instr(l_line,',',1,11)-instr(l_line,',',1,10)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,11)+1, instr(l_line,',',1,12)-instr(l_line,',',1,11)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,12)+1, instr(l_line,',',1,13)-instr(l_line,',',1,12)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,13)+1, instr(l_line,',',1,14)-instr(l_line,',',1,13)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,14)+1, instr(l_line,',',1,15)-instr(l_line,',',1,14)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,15)+1, instr(l_line,',',1,16)-instr(l_line,',',1,15)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,16)+1, instr(l_line,',',1,17)-instr(l_line,',',1,16)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,17)+1, instr(l_line,',',1,18)-instr(l_line,',',1,17)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,18)+1, instr(l_line,',',1,19)-instr(l_line,',',1,18)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,19)+1, instr(l_line,',',1,20)-instr(l_line,',',1,19)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,20)+1, instr(l_line,',',1,21)-instr(l_line,',',1,20)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,21)+1, instr(l_line,',',1,22)-instr(l_line,',',1,21)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,22)+1, instr(l_line,',',1,23)-instr(l_line,',',1,22)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,23)+1, instr(l_line,',',1,24)-instr(l_line,',',1,23)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,24)+1, instr(l_line,',',1,25)-instr(l_line,',',1,24)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,25)+1, instr(l_line,',',1,26)-instr(l_line,',',1,25)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,26)+1, instr(l_line,',',1,27)-instr(l_line,',',1,26)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,27)+1, instr(l_line,',',1,28)-instr(l_line,',',1,27)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,28)+1, instr(l_line,',',1,29)-instr(l_line,',',1,28)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,29)+1, instr(l_line,',',1,30)-instr(l_line,',',1,29)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,30)+1, instr(l_line,',',1,31)-instr(l_line,',',1,30)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,31)+1, instr(l_line,',',1,32)-instr(l_line,',',1,31)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,32)+1, instr(l_line,',',1,33)-instr(l_line,',',1,32)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,33)+1, instr(l_line,',',1,34)-instr(l_line,',',1,33)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,34)+1, instr(l_line,',',1,35)-instr(l_line,',',1,34)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,35)+1, instr(l_line,',',1,36)-instr(l_line,',',1,35)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,36)+1, instr(l_line,',',1,37)-instr(l_line,',',1,36)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,37)+1, instr(l_line,',',1,38)-instr(l_line,',',1,37)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,38)+1, instr(l_line,',',1,39)-instr(l_line,',',1,38)-1),'|',','),
        replace(substr(l_line, instr(l_line,',',1,39)+1, instr(l_line,',',1,40)-instr(l_line,',',1,39)-1),'|',',')
        ));
      l_clob := substr(l_clob,l_n+1);
      l_n := instr(l_clob,chr(10));
      l_line := substr(l_clob,1,l_n)||',';
    end loop;
END f_csv_load_data;


Query

select * from table(f_csv_load_data('filename'))

Will display the result in the tabular form

No comments:

Post a Comment