SQL-Basics 7(Windwoing Functions)

                                                        Window Functions

------>It is a function, it's trying to work on a subset of the dataset(or) tries to create a specific group 

then I have to perform some sort of operations inside that group this is where windows functions 

coming to the picture.

----->whenever we try to perform a window operation it will try to create the window size and that

particular windows it is trying to apply the functions and that window we can create in multiple ways, 

not in a single way.

------>Multiple ways we can create different kinds of windows.

------>"Window" means specific span (or) specific group (or) specific interval.

------>whole data is put into a separate window.

------->we have two types of the windowing function

#1)Aggregated  based windowing function(whole data put into the separate window)(mean(),sum(),max())

#2)Analytic based  windowing function(rank(),row_number(),dense_rank(),lag(),lastname())

select database();

create database today;

use today;

create table ineuron_students(

student_id int,

student_batch varchar(40),

student_name varchar(40),

student_stream varchar(30),

students_marks int,

student_mail_id varchar(50))

insert into ineuron_students values(101,'fsda','galla','cs',80,'galla@gmail.com')

select * from ineuron_students

insert into ineuron_students values(100,'fsda','mithun','cs',81,'mithun@gmail.com'),

(102,'fsda','srinu','cs',82,'srinu@gmail.com'),

(103,'fsda','nag','cs',67,'nag@gmail.com'),

(104,'fsda','ajay','MECH',45,'ajay@gmail.com'),

(105,'fsda','satish','MECH',89,'satish@gmail.com'),

(106,'fsds','manisha','MECH',86,'manisha@gmail.com'),

(107,'fsds','karim','MECH',83,'karim@gmail.com'),

(108,'fsds','mohith','EEE',86,'mohith@gmail.com'),

(109,'fsds','jeevan','EEE',89,'jeevan@gmail.com'),

(110,'fsde','rohith','EEE',78,'rohith@gmail.com'),

(111,'fsde','rana','EEE',77,'rana@gmail.com'),

(112,'fsde','ntr','IT',76,'ntr@gmail.com'),

(113,'fsde','saisatish','65',80,'saisatish@gmail.com'),

(114,'fsde','jackma','IT',72,'jackma@gmail.com'),

(115,'fsda','elonmusk','IT',75,'elonmusk@gmail.com'),

(116,'fsda','billgates','CIVIL',77,'billgates@gmail.com'),

(117,'fsda','rgv','CIVIL',72,'rgv@gmail.com'),

(118,'fsdc','puri','CIVIL',83,'puri@gmail.com'),

(119,'fsdc','yug','CIVIL',89,'yug@gmail.com'),

(120,'fsdc','data','ECE',91,'data@gmail.com'),

(121,'fsda','sravani','ECE',94,'sravani@gmail.com')


 select * from ineuron_students;

 select count(*) from ineuron_students;


             #1)Aggregation  based windowing function(sum(),min(),max(),avg(),count())

------->The above function which aggregates the data (or) combines the data for the specific 

        group or for a specific interval then it gives you the result.

Q)Can you please try to give me the average marks that have been received as per the student_batch?

  select * from ineuron_students;

  select student_batch,sum(students_marks) from ineuron_students group by student_batch;

  select student_batch,min(students_marks) from ineuron_students group by student_batch;

  select student_batch,max(students_marks) from ineuron_students group by student_batch;

  select student_batch,avg(students_marks) from ineuron_students group by student_batch;

  select count(student_batch) from ineuron_students;

  select count(distinct student_batch) from ineuron_students;

 Q)Can you please try to give me count a number of students batch-wise?

 select student_batch,count(*) from ineuron_students group by student_batch;

 

 #2)Analytic based  windowing function(rank(),row_number(),dense_rank(),lag(),lastname()) in b/w we are using order by clause and partition clause

 select * from ineuron_students;

 ---->If Someone asking you can please create a separate particular table or window based 

 on the student_batch? and maybe give the rank as per the marks?

Q)Can you please try to apply some sort of function (or)some sort of segregation over here for 

 a particular batch system able to give the ranking for each and every dataset which is available inside

 my system?

----->In that case, I won't use the aggregation-based function, I can use the analytic-based function.

Q)who has received highest marks in fsda batch?who has received second-highest mark in fsda batch?,

who has received highest 3rd mark in fsda batch?


 Note:-

----->In interviews, people also ask more questions about windowing functions. 

select max(students_marks) from ineuron_students where student_batch='fsda';

select students_marks from ineuron_students where student_batch='fsda' ;

select max(students_marks) from ineuron_students where student_batch='fsda'; 

 select * from ineuron_students;

select student_name,student_batch,max(students_marks) from ineuron_students group by student_batch;

-----the above query is giving me wrong result.

select student_name,students_marks from ineuron_students where student_batch='fsda' ;

select student_name,max(students_marks) from ineuron_students where student_batch='fsda';

-----the above query is giving me wrong result.

Q)who has received highest marks in fsda batch?

select student_name from ineuron_students where students_marks in

(select max(students_marks) from ineuron_students) 

                             (OR)

select student_name from ineuron_students where students_marks =

(select max(students_marks) from ineuron_students) 

----->The above approach is the correct one.

Note:-

---->we can use both (in)  and (=) also

 ---in------- it is always try to check one value (or) range of the value.

   -- =--------it is always checked for a single value.

Q)Can you please tell me who has the received second highest marks from fsda batch?

select * from ineuron_students;

select student_id,student_name,students_marks from ineuron_students where student_batch='fsda';

select * from ineuron_students where student_batch='fsda';

select count(*) from ineuron_students where student_batch='fsda';

select * from ineuron_students where student_batch='fsda' order by students_marks  limit 1;

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 1 -- it will give top 1 record

                                                 --------- About  Limit 1,1,2----

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 1,1 ---it starts from 1st index record  from there it will give  1 record.

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 1,2 ---it starts from 1st index record from there it will give 2 records.

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 1,3 ---it starts from 1st index  record from there it will give 3 records.

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 2 -----it will give top 2 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 2,1---it starts from 2nd index record from there it will give 1 record

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 2,2 ---it starts from 2nd index record from there it will give 2 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 2,3 ----it starts from 2nd index record from there it will give 3 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 3 ----it will give top 3 records 

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 3,1 ----it will starts from 3rd index record from there it will give 1 record

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 3,2 ----it will starts from 3rd index record from there it will give 2 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 3,3 ----it will starts from 3rd index record from there it will give 3 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 4 ----it will give to 4 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 4,1  ----it will starts from 4th index record from there it will give 1 record

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 4,2  ----it will starts from 4th index record from there it will give 2 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 4,3 -----it will starts from 4th index record from there it will give 3 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 5 ------it will give top 5 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 0 ------it will not give any record

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 5,1 -----it will starts from 5th index record from there it will give 1 record

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 5,2------it will starts from 5th index record from there it will give 2 records

select * from ineuron_students where student_batch='fsda' order by students_marks  desc limit 5,3 -----it will starts from 5th index record from there it will give 3 records

Q)Can you please give me 3rd highest marks from FSDA batch?

select * from ineuron_students where student_batch='fsda' order by students_marks desc limit 3

    insert into ineuron_students values(125,'fsda','sandeep','cs',82,'sandeep@gmail.com'),

    (127,'fsda','kunal','cs',82,'kunal@gmail.com')

select * from ineuron_students where student_batch='fsda' order by students_marks desc limit 2,1;

---->The above approach is correct but we required 3 names here, it will give only one

 person's name.

select * from ineuron_students where student_batch='fsda' order by students_marks desc limit 2,3;

---->The above approach is correct but, If we have 1000's records how to fetch the data.

Q)But how do we get to know how many peoples are received in 3rd highest(if we have 1000's records)

select students_marks from ineuron_students where student_batch='fsda' order by students_marks desc limit 3;

select * from ineuron_students  where students_marks=(

select min(students_marks) from 

(select students_marks from ineuron_students where student_batch='fsda' order by students_marks desc limit 3) as top)

---->For this situation, the above stmnt is correct to fetch the 3rd highest marks.

Q)But if the first highest marks 94 has been received by three students then how to get the 3rd highest marks?the above approch again fail for this case?

 Question explain here:-

insert into ineuron_students values(126,'fsda','ranbeer','cs',94,'ranbeer@gmail.com'),

    (129,'fsda','cook','cs',94,'cook@gmail.com')

select students_marks from ineuron_students where student_batch='fsda' order by students_marks desc limit 3;

select * from ineuron_students  where students_marks=(select min(students_marks) from (select students_marks from ineuron_students where student_batch='fsda' order by students_marks desc limit 3) as top);

----->see here the above approach is failed to absorb properly

# student_id, student_batch, student_name, student_stream, students_marks, student_mail_id

'121',           'fsda',         'sravani',    'ECE',         '94',        'sravani@gmail.com'

'126',           'fsda',         'ranbeer',     'cs',         '94',        'ranbeer@gmail.com'

'129',           'fsda',         'cook',        'cs',         '94',         'cook@gmail.com'

                                  row_number(),rank(),dense_rank()

select * from ineuron_students

---->Here am trying to create a separate window(or) separate separate groups (or) separate bundles of rows by applying the analytical windowing function.

------>Based on that I will able to perform different kinds of operations.

select student_id,student_batch,student_stream,students_marks,row_number() over (order by 

students_marks) as 'row_number' from ineuron_students;

---->In the above we have create one window(it consider all the table itself 1 window)

                                             Row_number();

select student_id,student_batch,student_stream,students_marks,row_number() over (partition by student_batch order by students_marks) as 'row_number' from ineuron_students;

---->In the above, we have created multiple windows with the help of partition by.


Partition by:-

----->It is trying to create the chunks (or) bundle as per student_batch.

---->The partition by is approx the same as a group by operation.

----->This is the run time partition(not compile time partitioning)

Over:-

What is over in MySQL?

------->Window functions are a super powerful resource available in almost all SQL databases. They perform a specific calculation (e.g. sum, count, average, etc.) on a set of rows; this set of rows is called a “window” and is defined by the MySQL OVER clause.

Q)can you please fetch the records of who is having the highest marks(topper) from every batch? 

select * from (select student_id,student_batch,student_stream,students_marks,row_number() over 

(partition by student_batch order by students_marks desc) as 'row_num' from ineuron_students) as test 

where row_num=1;

 insert into ineuron_students values(130,'fsdc','gampa','EEE',91,'gampa@gmail.com');

select student_id,student_batch,student_stream,students_marks,row_number() over 

(partition by student_batch order by students_marks desc)  as 'row_num' from ineuron_students;

 select * from (select student_id,student_batch,student_stream,students_marks,row_number() over 

(partition by student_batch order by students_marks desc) as 'row_num' from ineuron_students) as test 

where row_num=1;

                                              Row_rank()

select student_id,student_batch,student_stream,students_marks,rank() over 

(order by students_marks desc) as 'row_rank' from ineuron_students

select student_id,student_batch,student_stream,students_marks,row_number() over (order by 

students_marks desc) as 'row_number',rank() over 

(order by students_marks desc) as 'row_rank' from ineuron_students

----->The above stmt gives the ranking as per all the batches.

select student_id,student_batch,student_stream,students_marks,row_number() over (partition by student_batch order by students_marks desc) as 'row_number',rank() over (partition by student_batch order by students_marks desc) as 'row_rank' from ineuron_students;

select * from (select student_id,student_batch,student_stream,students_marks,row_number() over (partition by student_batch order by students_marks desc) as 'row_number',rank() over (partition by student_batch order by students_marks desc) as 'row_rank' from ineuron_students) as test where row_rank=1;

                                              dense_rank()


select student_id,student_batch,student_stream,students_marks,row_number() over (order by students_marks desc) as 'row_number',rank() over (order by students_marks desc) as 'row_rank',dense_rank() over (order by students_marks desc) as 'dense_rank'from ineuron_students;

select student_id,student_batch,student_stream,students_marks,row_number() over (partition by student_batch order by students_marks desc) as 'row_number',rank() over (partition by student_batch order by students_marks desc) as 'row_rank',dense_rank() over (partition by student_batch order by students_marks desc) as 'dense_rank'from ineuron_students;


select * from (select student_id,student_batch,student_stream,students_marks,row_number() over (partition by student_batch order by students_marks desc) as 'row_number',rank() over (partition by student_batch order by students_marks desc) as 'row_rank',dense_rank() over (partition by student_batch order by students_marks desc) as 'den_rank'from ineuron_students) as test where den_rank=2;


select * from (select student_id,student_batch,student_stream,students_marks,row_number() over (partition by student_batch order by students_marks desc) as 'row_number',rank() over (partition by student_batch order by students_marks desc) as 'row_rank',dense_rank() over (partition by student_batch order by students_marks desc) as 'den_rank'from ineuron_students) as test where den_rank=3;

Q)How we can see the rank from 1 to 3 together?

Hint: with the help of between we can fetch it

       with the help of where ,or 

       with the help of where,in 


select * from (select student_id,student_batch,student_stream,students_marks,row_number() over (partition by student_batch order by students_marks desc) as 'row_number',rank() over (partition by student_batch order by students_marks desc) as 'row_rank',dense_rank() over (partition by student_batch order by students_marks desc) as 'den_rank'from ineuron_students) as test where den_rank in(1,2,3);


      -----------------One more Example-----------------

create table if not exists emp(

id int,

`groups` varchar(20),

age int,

healthy_eating int,

active_lifestyle int,

salary int);


LOAD DATA INFILE

'E:\DATA/employee_data1.csv'

into table emp

FIELDS terminated by ','

enclosed by '"'

lines terminated by '\n'

IGNORE 1 ROWS


select * from emp;

select id,age,row_number() over(order by salary ) as 'row_number' from emp;

select id,age,`groups`,salary,row_number() over(order by salary ) as 'row_number' from emp;

select id,age,`groups`,salary,row_number() over(order by salary desc) as 'row_number' from emp;

select id,age,`groups`,salary,row_number() over(partition by `groups` order by salary desc) as 'row_number',rank() over(partition by `groups` order by salary desc) as 'rank',dense_rank() over (partition by `groups` order by salary desc) as 'den_rank'from emp;


select * from (select id,age,`groups`,salary,row_number() over(partition by `groups` order by salary desc) as 'row_number',rank() over(partition by `groups` order by salary desc) as 'rank',dense_rank() over (partition by `groups` order by salary desc) as 'den_rank'from emp) as test where den_rank=1;

select * from (select id,age,`groups`,salary,row_number() over(partition by `groups` order by salary desc) as 'row_number',rank() over(partition by `groups` order by salary desc) as 'rank',dense_rank() over (partition by `groups` order by salary desc) as 'den_rank'from emp) as test where den_rank=2;

select * from (select id,age,`groups`,salary,row_number() over(partition by `groups` order by salary desc) as 'row_number',rank() over(partition by `groups` order by salary desc) as 'rank',dense_rank() over (partition by `groups` order by salary desc) as 'den_rank'from emp) as test where den_rank=3;

select * from (select id,age,`groups`,salary,row_number() over(partition by `groups` order by salary desc) as 'row_number',rank() over(partition by `groups` order by salary desc) as 'rank',dense_rank() over (partition by `groups` order by salary desc) as 'den_rank'from emp) as test where den_rank in(1,2,3,4);




Comments