6. Working with Strings

SQL is not designed to perform complex string manipulation

Walking a String

We want to traverse a string to return each character as a row.

-- Oracle
WITH employee_name AS (
    SELECT 'Sundar' AS FIRST_NAME FROM DUAL
)
SELECT SUBSTR(FIRST_NAME, LEVEL, 1) AS CHARACTER
FROM employee_name
CONNECT BY LEVEL <= LENGTH(FIRST_NAME);

-- Oracle
SELECT SUBSTR(FIRST_NAME, LEVEL, 1) AS CHARACTER
FROM employees
WHERE FIRST_NAME = 'Sundar'
CONNECT BY LEVEL <= LENGTH(FIRST_NAME) AND PRIOR FIRST_NAME = FIRST_NAME AND PRIOR DBMS_RANDOM.VALUE IS NOT NULL;

Embedding Quotes Within String Literals

We want to embed quote marks within string literals.

-- Oracle
select 'g''day mate' qmarks from dual union all
select 'beavers'' teeth' from dual union all
select '''' from dual
select '''' as quote from dual;

Counting the Occurrences of a Character in a String

We want to count the number of times a character or substring occurs within a given string. Consider the following string. We want to determine how many commas are in the string.

10,CLARK,MANAGER

-- Using length
select (length('10,CLARK,MANAGER')-length(replace('10,CLARK,MANAGER',',','')))/length(',') as cnt
from dual

-- Traversing through each character
WITH sample_string AS (
    SELECT '10,CLARK,MANAGE,R' AS str FROM DUAL
)
select count(substr(str,LEVEL,1)) from sample_string where substr(str,LEVEL,1) = ','
connect by LEVEL < LENGTH(str) ; 

Removing Unwanted Characters from a String

We want to remove specific characters from data. For example, we want to remove all zeros and vowels.

-- Oracle
select ename,
replace(translate(ename,'aaaaa','AEIOU'),'a','') as stripped1,
sal,
replace(cast(sal as char(4)),'0','') as stripped2
from emp;

-- MySQL does not support TRANSLATE function, so several calls to REPLACE needed
select ename,
replace(
 replace(
  replace(
   replace(
    replace(ename,'A',''),'E',''),'I',''),'O',''),'U','')
as stripped1,
sal,
replace(sal,0,'') stripped2
from emp;

Separating Numeric and Character Data

We have numeric data stored with character data together in one column. We want to separate the character data from the numeric data.

Sample Data

SMITH800 ALLEN1600 WARD1250 JONES2975

Sample Output

ENAME SAL SMITH 800 ALLEN 1600 WARD 1250 JONES 2975

select 
  replace(translate(data,'0123456789','0000000000'),'0') ename,
  to_number(replace(translate(lower(data),'abcdefghijklmnopqrstuvwxyz',rpad('z',26,'z')),'z')) sal
from (
select ename||sal data
from emp
)

Determining Whether a String Is Alphanumeric

We want to return rows from a table only when a column contains no characters other than numbers and letters.

Sample Data

-- Oracle
select data
from V
where translate(lower(data),'0123456789abcdefghijklmnopqrstuvwxyz',rpad('a',36,'a')) = rpad('a',length(data),'a')

Extracting Initials from a Name

We want to convert a full name into initials.

Input - Stewie Griffin Output - S.G.

Step 1: S##### G###### Step 2: S G Step 3: S.G Step 4: S.G. (append . at the end)

-- Oracle
select replace(
replace(
translate(replace('Stewie Griffin', '.', ''),'abcdefghijklmnopqrstuvwxyz',rpad('#',26,'#') ), '#','' ),' ','.' ) ||'.'
from dual;

Ordering by Parts of a String

We want to order result set based on a substring. For example, ordered based on the last two characters of each name.

select ename
from emp
order by substr(ename,length(ename)-1,)

Ordering by a Number in a String

We want to order the result set based on a number within a string

Sample Data

-- Oracle
select data from V order by to_number(replace(translate(lower('SMITH 7369 RESEARCH'), 'abcdefghijklmnopqrstubvwxyz ', RPAD('#',27,'#')),'#',''))

Creating a Delimited List from Table Rows

We want to return table rows as values in a delimited list rather than in vertical columns as they usually appear.

Sample Data

DEPTNO EMPS

10 CLARK 10 KING 10 MILLER 20 SMITH 20 ADAMS 20 FORD 20 SCOTT 20 JONES 30 ALLEN

Sample Output

DEPTNO EMPS

10 CLARK,KING,MILLER 20 SMITH,JONES,SCOTT,ADAMS,FORD 30 ALLEN,WARD,MARTIN,BLAKE,TURNER,JAMES

-- Oracle Solution

-- Create test table
CREATE TABLE test (
  deptno NUMBER(2),
  empname VARCHAR2(20)
);

-- Insert test data
INSERT INTO test (deptno, empname) VALUES (10, 'CLARK');
INSERT INTO test (deptno, empname) VALUES (10, 'KING');
INSERT INTO test (deptno, empname) VALUES (10, 'MILLER');
INSERT INTO test (deptno, empname) VALUES (20, 'SMITH');
INSERT INTO test (deptno, empname) VALUES (20, 'ADAMS');
INSERT INTO test (deptno, empname) VALUES (20, 'FORD');
INSERT INTO test (deptno, empname) VALUES (20, 'SCOTT');
INSERT INTO test (deptno, empname) VALUES (20, 'JONES');
INSERT INTO test (deptno, empname) VALUES (30, 'ALLEN');
INSERT INTO test (deptno, empname) VALUES (30, 'BLAKE');
INSERT INTO test (deptno, empname) VALUES (30, 'MARTIN');
INSERT INTO test (deptno, empname) VALUES (30, 'JAMES');
INSERT INTO test (deptno, empname) VALUES (30, 'TURNER');
INSERT INTO test (deptno, empname) VALUES (30, 'WARD');


-- Run the select query
SELECT
    deptno,
    ltrim(sys_connect_by_path(empname, ','),
          ',') emps
FROM
    (
        SELECT
            deptno,
            empname,
            ROW_NUMBER()
            OVER(PARTITION BY deptno
                 ORDER BY
                     deptno
            )                         rn,
            COUNT(*)
            OVER(PARTITION BY deptno) cnt
        FROM
            test
    )
WHERE
    level = cnt
START WITH
    rn = 1
CONNECT BY PRIOR deptno = deptno
           AND PRIOR rn = rn - 1

The purpose of the rank (aliased RN in the query) is to allow us to walk the tree. Since the function ROW_NUMBER generates an enumeration starting from one with no duplicates or gaps, just subtract one (from the current value) to reference a prior (or parent) row. For example, the number prior to 3 is 3 minus 1, which equals 2. In this context, 2 is the parent of 3.

Output

Output of inner subquery

Converting Delimited Data into a Multivalued IN-List

We have delimited data that we want to pass to the IN-list iterator of a WHERE clause. Consider the following string: 7654,7698,7782,7788 We would like to use the string in a WHERE clause, but the following SQL fails because EMPNO is a numeric column:

select ename,sal,deptno from emp where empno in ( '7654,7698,7782,7788' )

This SQL fails because, while EMPNO is a numeric column, the IN list is composed of a single string value. We want that string to be treated as a comma-delimited list of numeric values like 7654,7698,7782,7788.

-- Oracle
select empno,ename,sal,deptno
from emp
where empno in (
    select to_number(
        rtrim(
            substr(emps,
            instr(emps,',',1,iter.pos)+1,
            instr(emps,',',1,iter.pos+1)
            instr(emps,',',1,iter.pos)),',')) emps
    from (select ','||'7654,7698,7782,7788'||',' emps from dual) csv,
    (select rownum pos from emp) iter
    where iter.pos <= ((length(csv.emps)-length(replace(csv.emps,',')))/length(','))-1
)

Explanation

Step 1: Walk the string

The number of rows returned represents the number of values in the list. The strings are parsed using SUBSTR and INSTR. POS is used to locate the nth occurrence of the delimiter in each string. By enclosing the strings in commas, no special checks are necessary to determine the beginning or end of a string.

select emps,pos
from (select ','||'7654,7698,7782,7788'||',' emps
from dual) csv,
(select rownum pos from emp) iter
where iter.pos <= ((length(csv.emps)-length(replace(csv.emps,',')))/length(','))-1
select substr(emps,
instr(emps,',',1,iter.pos)+1,
instr(emps,',',1,iter.pos+1)
instr(emps,',',1,iter.pos)) emps
from (select ','||'7654,7698,7782,7788'||',' emps
from dual) csv,
(select rownum pos from emp) iter
where iter.pos <=
((length(csv.emps)-length(replace(csv.emps,',')))/length(','))-1

The final step is to remove the trailing comma from each value, cast it to a number, and plug it into a subquery.

Alphabetizing a String

We want alphabetize the individual characters within strings in your tables.

Sample Date

ENAME

ADAMS ALLEN BLAKE

Result

OLD_NAME NEW_NAME

ADAMS AADMS ALLEN AELLN BLAKE ABEKL

-- Oracle
select old_name, new_name
from (
    select old_name, replace(sys_connect_by_path(c,' '),' ') new_name
    from (
    select e.ename old_name,
        row_number() over(partition by e.ename order by substr(e.ename,iter.pos,1)) rn,
        substr(e.ename,iter.pos,1) c
    from emp e,
    ( select rownum pos from emp ) iter
    where iter.pos <= length(e.ename) order by 1
) x
start with rn = 1
connect by prior rn = rn-1 and prior old_name = old_name
)
where length(old_name) = length(new_name)

Explanation

Inline view result by walking the string and then imposing order on those characters. The rest of the query attach the names back together.

The next step is to take the alphabetized characters and rebuild each name. This is done with the function SYS_CONNECT_BY_PATH by appending each character to the ones before it

The final step is to keep only the strings that have the same length as the names they were built from.

Identifying Strings That Can Be Treated as Numbers

We have a column that is defined to hold character data.

Sample Data

CL10AR KI10NG MI10LL 7369 7566

We want to return rows that are numbers only, or that contain at least one number. If the numbers are mixed with character data, we want to remove the characters and return only the numbers.

Output

10 10 10 7369 7566

-- Oracle
select to_number (
case
    when
        replace(translate(mixed,'0123456789','9999999999'),'9') is not null
    then
        replace(
            translate(mixed,
                replace(
                    translate(mixed,'0123456789','9999999999')
                    ,'9'),rpad('#',length(mixed),'#')),'#')
    else
        mixed
end
) mixed from V
where instr(translate(mixed,'0123456789','9999999999'),'9') > 0

Explanation

Extracting the nth Delimited Substring

We want to extract a specified, delimited substring from a string.

Sample Data

create view V as
select 'mo,larry,curly' as name
from t1
union all
select 'tina,gina,jaunita,regina,leena' as name
from t1

Sample Output (We want to extract the second name in each row)

-- Oracle (Solution 1)
select sub
from (
    select iter.pos,
    src.name,
    substr( src.name,
    instr( src.name,',',1,iter.pos )+1,
    instr( src.name,',',1,iter.pos+1 ) -
    instr( src.name,',',1,iter.pos )-1) sub
    from (select ','||name||',' as name from V) src,
    (select rownum pos from emp) iter
    where iter.pos < length(src.name)-length(replace(src.name,','))
    )
where pos = 2

-- Oracle (Solution 2)
select data, 
substr(data, INSTR(data,',',1,1) + 1, INSTR(data,',',1,2) - INSTR(data,',',1,1) - 1) 
from test_data 

Parsing an IP Address

We want to parse an IP address’s fields into columns

-- Oracle
select ip,
    substr(ip, 1, instr(ip,'.')-1 ) a,
    substr(ip, instr(ip,'.')+1, instr(ip,'.',1,2)-instr(ip,'.')-1 ) b,
    substr(ip, instr(ip,'.',1,2)+1,instr(ip,'.',1,3)-instr(ip,'.',1,2)-1 ) c,
    substr(ip, instr(ip,'.',1,3)+1 ) d
from (select '92.111.0.2' as ip from t1)

Comparing Strings by Sound

SQL provides a way to represent the way words sound, which allows you to find strings that sound the same even though the underlying characters aren’t identical.

For example, you have a list of authors’ names,

Sample Output

-- Oracle
select an1.a_name as name1, an2.a_name as name2,
SOUNDEX(an1.a_name) as Soundex_Name
from author_names an1
join author_names an2
on (SOUNDEX(an1.a_name)=SOUNDEX(an2.a_name)
and an1.a_name not like an2.a_name)

Finding Text Not Matching a Pattern

We have a text field that contains some structured text values (e.g., phone numbers), and want to find occurrences where those values are structured incorrectly. We want to list rows having invalidly formatted phone numbers.

Record with 7369 is invalid because its phone number uses two different separator characters.

-- Oracle
select emp_id, text
from employee_comment
where regexp_like(text, '[0-9]{3}[-. ][0-9]{3}[-. ][0-9]{4}')
and regexp_like(
    regexp_replace(text,'[0-9]{3}([-. ])[0-9]{3}\1[0-9]{4}','***'), '[0-9]{3}[-. ][0-9]{3}[-. ][0-9]{4}')

Sample Output

Last updated